facebook · gctony · Mar 2, 2026
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
@@ -395,6 +395,25 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age, bool is_huge);
 void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
 void  hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz);
 
+typedef struct hpdata_alloc_offset_s hpdata_alloc_offset_t;
+struct hpdata_alloc_offset_s {
+	size_t index;
+	size_t len;
+};
+
+/*
+ * Given an hpdata which can serve an allocation request of size sz,
+ * find between one and max_nallocs offsets that can satisfy such
+ * an allocation request and buffer them in offsets (without actually
+ * reserving any space or updating hpdata). Return the number
+ * of offsets discovered.
+ */
+size_t hpdata_find_alloc_offsets(hpdata_t *hpdata, size_t sz,
+    hpdata_alloc_offset_t *offsets, size_t max_nallocs);
+/* Reserve the allocation for the given offset. */
+void *hpdata_reserve_alloc_offset(
+    hpdata_t *hpdata, size_t sz, hpdata_alloc_offset_t *offset);
+
 /*
  * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
  * subranges of a hugepage while holding a lock, drop the lock during the actual

diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
@@ -96,6 +96,16 @@ sec_size_supported(sec_t *sec, size_t size) {
 	return sec_is_used(sec) && size <= sec->opts.max_alloc;
 }
 
+/* Max number of extends we would allocate out of a single huge page. */
+#define MAX_SEC_NALLOCS 4
+
+/*
+ * Calculate the number of extends we will try to allocate out of
+ * a single huge page for a given allocation size. The result will be
+ * in the range [1, MAX_SEC_NALLOCS].
+ */
+size_t sec_calc_nallocs_for_size(sec_t *sec, size_t size);
+
 /* If sec does not have extent available, it will return NULL. */
 edata_t *sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size);
 void     sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size,

diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h
@@ -27,16 +27,9 @@ struct sec_opts_s {
 	 * until we are 1/4 below max_bytes.
 	 */
 	size_t max_bytes;
-	/*
-	 * When we can't satisfy an allocation out of the SEC because there are
-	 * no available ones cached, allocator will allocate a batch with extra
-	 * batch_fill_extra extents of the same size.
-	 */
-	size_t batch_fill_extra;
 };
 
 #define SEC_OPTS_NSHARDS_DEFAULT 2
-#define SEC_OPTS_BATCH_FILL_EXTRA_DEFAULT 3
 #define SEC_OPTS_MAX_ALLOC_DEFAULT ((32 * 1024) < PAGE ? PAGE : (32 * 1024))
 #define SEC_OPTS_MAX_BYTES_DEFAULT                                             \
 	((256 * 1024) < (4 * SEC_OPTS_MAX_ALLOC_DEFAULT)                       \
@@ -45,6 +38,6 @@ struct sec_opts_s {
 
 #define SEC_OPTS_DEFAULT                                                       \
 	{SEC_OPTS_NSHARDS_DEFAULT, SEC_OPTS_MAX_ALLOC_DEFAULT,                 \
-	    SEC_OPTS_MAX_BYTES_DEFAULT, SEC_OPTS_BATCH_FILL_EXTRA_DEFAULT}
+	    SEC_OPTS_MAX_BYTES_DEFAULT}
 
 #endif /* JEMALLOC_INTERNAL_SEC_OPTS_H */
diff --git a/src/conf.c b/src/conf.c
@@ -254,9 +254,8 @@ JEMALLOC_DIAGNOSTIC_PUSH
 JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-function")
 
 JET_EXTERN bool
-conf_handle_unsigned(const char *v, size_t vlen,
-    uintmax_t min, uintmax_t max, bool check_min, bool check_max,
-    bool clip, uintmax_t *result) {
+conf_handle_unsigned(const char *v, size_t vlen, uintmax_t min, uintmax_t max,
+    bool check_min, bool check_max, bool clip, uintmax_t *result) {
 	char *end;
 	set_errno(0);
 	uintmax_t mv = (uintmax_t)malloc_strtoumax(v, &end, 0);
@@ -281,9 +280,8 @@ conf_handle_unsigned(const char *v, size_t vlen,
 }
 
 JET_EXTERN bool
-conf_handle_signed(const char *v, size_t vlen,
-    intmax_t min, intmax_t max, bool check_min, bool check_max,
-    bool clip, intmax_t *result) {
+conf_handle_signed(const char *v, size_t vlen, intmax_t min, intmax_t max,
+    bool check_min, bool check_max, bool clip, intmax_t *result) {
 	char *end;
 	set_errno(0);
 	intmax_t mv = (intmax_t)malloc_strtoumax(v, &end, 0);
@@ -473,11 +471,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			continue;
 		}
 
-		while (*opts != '\0'
-		    && !conf_next(&opts, &k, &klen, &v, &vlen)) {
+		while (
+		    *opts != '\0' && !conf_next(&opts, &k, &klen, &v, &vlen)) {
 #define CONF_ERROR(msg, k, klen, v, vlen)                                      \
 	if (!initial_call) {                                                   \
-		conf_error(msg, k, klen, v, vlen);                      \
+		conf_error(msg, k, klen, v, vlen);                             \
 		cur_opt_valid = false;                                         \
 	}
 #define CONF_CONTINUE                                                          \
@@ -977,9 +975,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
 			    "hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0,
 			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
-			    "hpa_sec_batch_fill_extra", 1, HUGEPAGE_PAGES,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {

diff --git a/src/ctl.c b/src/ctl.c
@@ -115,7 +115,6 @@ CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
-CTL_PROTO(opt_hpa_sec_batch_fill_extra)
 CTL_PROTO(opt_huge_arena_pac_thp)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
@@ -488,7 +487,6 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
     {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
     {NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)},
-    {NAME("hpa_sec_batch_fill_extra"), CTL(opt_hpa_sec_batch_fill_extra)},
     {NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
     {NAME("metadata_thp"), CTL(opt_metadata_thp)},
     {NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)},
@@ -2178,8 +2176,6 @@ CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t)
-CTL_RO_NL_GEN(
-    opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra, size_t)
 CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool)
 CTL_RO_NL_GEN(
     opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *)

diff --git a/src/hpa.c b/src/hpa.c
@@ -651,37 +651,18 @@ hpa_shard_maybe_do_deferred_work(
 }
 
 static edata_t *
-hpa_try_alloc_one_no_grow(
-    tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
+hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
+    hpdata_t *ps, hpdata_alloc_offset_t *alloc_offset, bool *oom) {
+	assert(*oom == false);
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 
-	bool     err;
 	edata_t *edata = edata_cache_fast_get(tsdn, &shard->ecf);
 	if (edata == NULL) {
 		*oom = true;
 		return NULL;
 	}
 
-	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
-	if (ps == NULL) {
-		edata_cache_fast_put(tsdn, &shard->ecf, edata);
-		return NULL;
-	}
-
-	psset_update_begin(&shard->psset, ps);
-
-	if (hpdata_empty(ps)) {
-		/*
-		 * If the pageslab used to be empty, treat it as though it's
-		 * brand new for fragmentation-avoidance purposes; what we're
-		 * trying to approximate is the age of the allocations *in* that
-		 * pageslab, and the allocations in the new pageslab are by
-		 * definition the youngest in this hpa shard.
-		 */
-		hpdata_age_set(ps, shard->age_counter++);
-	}
-
-	void *addr = hpdata_reserve_alloc(ps, size);
+	void *addr = hpdata_reserve_alloc_offset(ps, size, alloc_offset);
 	JE_USDT(hpa_alloc, 5, shard->ind, addr, size, hpdata_nactive_get(ps),
 	    hpdata_age_get(ps));
 	edata_init(edata, shard->ind, addr, size, /* slab */ false, SC_NSIZES,
@@ -693,12 +674,12 @@ hpa_try_alloc_one_no_grow(
 	/*
 	 * This could theoretically be moved outside of the critical section,
 	 * but that introduces the potential for a race.  Without the lock, the
-	 * (initially nonempty, since this is the reuse pathway) pageslab we
+   	 * (initially nonempty, since this is the reuse pathway) pageslab we
 	 * allocated out of could become otherwise empty while the lock is
 	 * dropped.  This would force us to deal with a pageslab eviction down
 	 * the error pathway, which is a pain.
 	 */
-	err = emap_register_boundary(
+	const bool err = emap_register_boundary(
 	    tsdn, shard->emap, edata, SC_NSIZES, /* slab */ false);
 	if (err) {
 		hpdata_unreserve(
@@ -715,29 +696,61 @@ hpa_try_alloc_one_no_grow(
 		 * principle that we didn't *really* affect shard state (we
 		 * tweaked the stats, but our tweaks weren't really accurate).
 		 */
-		psset_update_end(&shard->psset, ps);
 		edata_cache_fast_put(tsdn, &shard->ecf, edata);
 		*oom = true;
 		return NULL;
 	}
 
 	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
-	psset_update_end(&shard->psset, ps);
+
 	return edata;
 }
 
 static size_t
 hpa_try_alloc_batch_no_grow_locked(tsdn_t *tsdn, hpa_shard_t *shard,
-    size_t size, bool *oom, size_t nallocs, edata_list_active_t *results,
+    size_t size, bool *oom, edata_list_active_t *results,
     bool *deferred_work_generated) {
+	assert(size <= HUGEPAGE);
+	assert(size <= shard->opts.slab_max_alloc || size == sz_s2u(size));
+	assert(*oom == false);
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
+	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
+	if (ps == NULL) {
+		return 0;
+	}
+
+	hpdata_alloc_offset_t alloc_offsets[MAX_SEC_NALLOCS];
+	const size_t max_nallocs = sec_calc_nallocs_for_size(&shard->sec, size);
+	const size_t nallocs = hpdata_find_alloc_offsets(
+	    ps, size, alloc_offsets, max_nallocs);
+
+	psset_update_begin(&shard->psset, ps);
+
+	if (hpdata_empty(ps)) {
+		/*
+ 		 * If the pageslab used to be empty, treat it as though it's
+		 * brand new for fragmentation-avoidance purposes; what we're
+		 * trying to approximate is the age of the allocations *in* that
+		 * pageslab, and the allocations in the new pageslab are by
+		 * definition the youngest in this hpa shard.
+		 */
+		hpdata_age_set(ps, shard->age_counter++);
+	}
+
+	psset_update_end(&shard->psset, ps);
+
 	size_t nsuccess = 0;
-	for (; nsuccess < nallocs; nsuccess++) {
+	for (; nsuccess < nallocs; nsuccess += 1) {
+		psset_update_begin(&shard->psset, ps);
 		edata_t *edata = hpa_try_alloc_one_no_grow(
-		    tsdn, shard, size, oom);
+		    tsdn, shard, size, ps, &alloc_offsets[nsuccess], oom);
+		psset_update_end(&shard->psset, ps);
+
 		if (edata == NULL) {
 			break;
 		}
+
 		edata_list_active_append(results, edata);
 	}
 
@@ -748,27 +761,22 @@ hpa_try_alloc_batch_no_grow_locked(tsdn_t *tsdn, hpa_shard_t *shard,
 
 static size_t
 hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    bool *oom, size_t nallocs, edata_list_active_t *results,
-    bool *deferred_work_generated) {
+    bool *oom, edata_list_active_t *results, bool *deferred_work_generated) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
-	size_t nsuccess = hpa_try_alloc_batch_no_grow_locked(
-	    tsdn, shard, size, oom, nallocs, results, deferred_work_generated);
+	const size_t nsuccess = hpa_try_alloc_batch_no_grow_locked(
+	    tsdn, shard, size, oom, results, deferred_work_generated);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	return nsuccess;
 }
 
 static size_t
 hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    size_t nallocs, edata_list_active_t *results,
-    bool *deferred_work_generated) {
-	assert(size <= HUGEPAGE);
-	assert(size <= shard->opts.slab_max_alloc || size == sz_s2u(size));
+    edata_list_active_t *results, bool *deferred_work_generated) {
 	bool oom = false;
 
 	size_t nsuccess = hpa_try_alloc_batch_no_grow(
-	    tsdn, shard, size, &oom, nallocs, results, deferred_work_generated);
-
-	if (nsuccess == nallocs || oom) {
+	    tsdn, shard, size, &oom, results, deferred_work_generated);
+	if (0 < nsuccess || oom) {
 		return nsuccess;
 	}
 
@@ -777,13 +785,14 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * try to grow.
 	 */
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
+
 	/*
 	 * Check for grow races; maybe some earlier thread expanded the psset
 	 * in between when we dropped the main mutex and grabbed the grow mutex.
 	 */
-	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs - nsuccess, results, deferred_work_generated);
-	if (nsuccess == nallocs || oom) {
+	nsuccess = hpa_try_alloc_batch_no_grow(
+	    tsdn, shard, size, &oom, results, deferred_work_generated);
+	if (0 < nsuccess || oom) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;
 	}
@@ -797,7 +806,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	    shard->age_counter++, hpa_is_hugify_eager(shard), &oom);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		return nsuccess;
+		return 0;
 	}
 
 	/*
@@ -807,14 +816,10 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 */
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	psset_insert(&shard->psset, ps);
-	nsuccess += hpa_try_alloc_batch_no_grow_locked(tsdn, shard, size, &oom,
-	    nallocs - nsuccess, results, deferred_work_generated);
+	nsuccess = hpa_try_alloc_batch_no_grow_locked(
+	    tsdn, shard, size, &oom, results, deferred_work_generated);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	/*
-	 * Drop grow_mtx before doing deferred work; other threads blocked on it
-	 * should be allowed to proceed while we're working.
-	 */
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 
 	return nsuccess;
@@ -886,13 +891,10 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	if (edata != NULL) {
 		return edata;
 	}
-	size_t              nallocs = sec_size_supported(&shard->sec, size)
-	                 ? shard->sec.opts.batch_fill_extra + 1
-	                 : 1;
 	edata_list_active_t results;
 	edata_list_active_init(&results);
 	size_t nsuccess = hpa_alloc_batch_psset(
-	    tsdn, shard, size, nallocs, &results, deferred_work_generated);
+	    tsdn, shard, size, &results, deferred_work_generated);
 	hpa_assert_results(tsdn, shard, &results);
 	edata = edata_list_active_first(&results);