Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/jemalloc/src/base.c
39483 views
1
#include "jemalloc/internal/jemalloc_preamble.h"
2
#include "jemalloc/internal/jemalloc_internal_includes.h"
3
4
#include "jemalloc/internal/assert.h"
5
#include "jemalloc/internal/extent_mmap.h"
6
#include "jemalloc/internal/mutex.h"
7
#include "jemalloc/internal/sz.h"
8
9
/*
10
* In auto mode, arenas switch to huge pages for the base allocator on the
11
* second base block. a0 switches to thp on the 5th block (after 20 megabytes
12
* of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
13
*/
14
15
#define BASE_AUTO_THP_THRESHOLD 2
16
#define BASE_AUTO_THP_THRESHOLD_A0 5
17
18
/******************************************************************************/
19
/* Data. */
20
21
static base_t *b0;
22
23
metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
24
25
const char *metadata_thp_mode_names[] = {
26
"disabled",
27
"auto",
28
"always"
29
};
30
31
/******************************************************************************/
32
33
static inline bool
34
metadata_thp_madvise(void) {
35
return (metadata_thp_enabled() &&
36
(init_system_thp_mode == thp_mode_default));
37
}
38
39
static void *
40
base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
41
void *addr;
42
bool zero = true;
43
bool commit = true;
44
45
/* Use huge page sizes and alignment regardless of opt_metadata_thp. */
46
assert(size == HUGEPAGE_CEILING(size));
47
size_t alignment = HUGEPAGE;
48
if (ehooks_are_default(ehooks)) {
49
addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
50
if (have_madvise_huge && addr) {
51
pages_set_thp_state(addr, size);
52
}
53
} else {
54
addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero,
55
&commit);
56
}
57
58
return addr;
59
}
60
61
static void
62
base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
63
size_t size) {
64
/*
65
* Cascade through dalloc, decommit, purge_forced, and purge_lazy,
66
* stopping at first success. This cascade is performed for consistency
67
* with the cascade in extent_dalloc_wrapper() because an application's
68
* custom hooks may not support e.g. dalloc. This function is only ever
69
* called as a side effect of arena destruction, so although it might
70
* seem pointless to do anything besides dalloc here, the application
71
* may in fact want the end state of all associated virtual memory to be
72
* in some consistent-but-allocated state.
73
*/
74
if (ehooks_are_default(ehooks)) {
75
if (!extent_dalloc_mmap(addr, size)) {
76
goto label_done;
77
}
78
if (!pages_decommit(addr, size)) {
79
goto label_done;
80
}
81
if (!pages_purge_forced(addr, size)) {
82
goto label_done;
83
}
84
if (!pages_purge_lazy(addr, size)) {
85
goto label_done;
86
}
87
/* Nothing worked. This should never happen. */
88
not_reached();
89
} else {
90
if (!ehooks_dalloc(tsdn, ehooks, addr, size, true)) {
91
goto label_done;
92
}
93
if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size)) {
94
goto label_done;
95
}
96
if (!ehooks_purge_forced(tsdn, ehooks, addr, size, 0, size)) {
97
goto label_done;
98
}
99
if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size)) {
100
goto label_done;
101
}
102
/* Nothing worked. That's the application's problem. */
103
}
104
label_done:
105
if (metadata_thp_madvise()) {
106
/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
107
assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
108
(size & HUGEPAGE_MASK) == 0);
109
pages_nohuge(addr, size);
110
}
111
}
112
113
static void
114
base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr,
115
size_t size) {
116
size_t sn;
117
118
sn = *extent_sn_next;
119
(*extent_sn_next)++;
120
121
edata_binit(edata, addr, size, sn);
122
}
123
124
static size_t
125
base_get_num_blocks(base_t *base, bool with_new_block) {
126
base_block_t *b = base->blocks;
127
assert(b != NULL);
128
129
size_t n_blocks = with_new_block ? 2 : 1;
130
while (b->next != NULL) {
131
n_blocks++;
132
b = b->next;
133
}
134
135
return n_blocks;
136
}
137
138
static void
139
base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
140
assert(opt_metadata_thp == metadata_thp_auto);
141
malloc_mutex_assert_owner(tsdn, &base->mtx);
142
if (base->auto_thp_switched) {
143
return;
144
}
145
/* Called when adding a new block. */
146
bool should_switch;
147
if (base_ind_get(base) != 0) {
148
should_switch = (base_get_num_blocks(base, true) ==
149
BASE_AUTO_THP_THRESHOLD);
150
} else {
151
should_switch = (base_get_num_blocks(base, true) ==
152
BASE_AUTO_THP_THRESHOLD_A0);
153
}
154
if (!should_switch) {
155
return;
156
}
157
158
base->auto_thp_switched = true;
159
assert(!config_stats || base->n_thp == 0);
160
/* Make the initial blocks THP lazily. */
161
base_block_t *block = base->blocks;
162
while (block != NULL) {
163
assert((block->size & HUGEPAGE_MASK) == 0);
164
pages_huge(block, block->size);
165
if (config_stats) {
166
base->n_thp += HUGEPAGE_CEILING(block->size -
167
edata_bsize_get(&block->edata)) >> LG_HUGEPAGE;
168
}
169
block = block->next;
170
assert(block == NULL || (base_ind_get(base) == 0));
171
}
172
}
173
174
static void *
175
base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size,
176
size_t alignment) {
177
void *ret;
178
179
assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
180
assert(size == ALIGNMENT_CEILING(size, alignment));
181
182
*gap_size = ALIGNMENT_CEILING((uintptr_t)edata_addr_get(edata),
183
alignment) - (uintptr_t)edata_addr_get(edata);
184
ret = (void *)((uintptr_t)edata_addr_get(edata) + *gap_size);
185
assert(edata_bsize_get(edata) >= *gap_size + size);
186
edata_binit(edata, (void *)((uintptr_t)edata_addr_get(edata) +
187
*gap_size + size), edata_bsize_get(edata) - *gap_size - size,
188
edata_sn_get(edata));
189
return ret;
190
}
191
192
static void
193
base_extent_bump_alloc_post(base_t *base, edata_t *edata, size_t gap_size,
194
void *addr, size_t size) {
195
if (edata_bsize_get(edata) > 0) {
196
/*
197
* Compute the index for the largest size class that does not
198
* exceed extent's size.
199
*/
200
szind_t index_floor =
201
sz_size2index(edata_bsize_get(edata) + 1) - 1;
202
edata_heap_insert(&base->avail[index_floor], edata);
203
}
204
205
if (config_stats) {
206
base->allocated += size;
207
/*
208
* Add one PAGE to base_resident for every page boundary that is
209
* crossed by the new allocation. Adjust n_thp similarly when
210
* metadata_thp is enabled.
211
*/
212
base->resident += PAGE_CEILING((uintptr_t)addr + size) -
213
PAGE_CEILING((uintptr_t)addr - gap_size);
214
assert(base->allocated <= base->resident);
215
assert(base->resident <= base->mapped);
216
if (metadata_thp_madvise() && (opt_metadata_thp ==
217
metadata_thp_always || base->auto_thp_switched)) {
218
base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
219
- HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
220
LG_HUGEPAGE;
221
assert(base->mapped >= base->n_thp << LG_HUGEPAGE);
222
}
223
}
224
}
225
226
static void *
227
base_extent_bump_alloc(base_t *base, edata_t *edata, size_t size,
228
size_t alignment) {
229
void *ret;
230
size_t gap_size;
231
232
ret = base_extent_bump_alloc_helper(edata, &gap_size, size, alignment);
233
base_extent_bump_alloc_post(base, edata, gap_size, ret, size);
234
return ret;
235
}
236
237
/*
238
* Allocate a block of virtual memory that is large enough to start with a
239
* base_block_t header, followed by an object of specified size and alignment.
240
* On success a pointer to the initialized base_block_t header is returned.
241
*/
242
static base_block_t *
243
base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
244
pszind_t *pind_last, size_t *extent_sn_next, size_t size,
245
size_t alignment) {
246
alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
247
size_t usize = ALIGNMENT_CEILING(size, alignment);
248
size_t header_size = sizeof(base_block_t);
249
size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) -
250
header_size;
251
/*
252
* Create increasingly larger blocks in order to limit the total number
253
* of disjoint virtual memory ranges. Choose the next size in the page
254
* size class series (skipping size classes that are not a multiple of
255
* HUGEPAGE), or a size large enough to satisfy the requested size and
256
* alignment, whichever is larger.
257
*/
258
size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
259
+ usize));
260
pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ?
261
*pind_last + 1 : *pind_last;
262
size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
263
size_t block_size = (min_block_size > next_block_size) ? min_block_size
264
: next_block_size;
265
base_block_t *block = (base_block_t *)base_map(tsdn, ehooks, ind,
266
block_size);
267
if (block == NULL) {
268
return NULL;
269
}
270
271
if (metadata_thp_madvise()) {
272
void *addr = (void *)block;
273
assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
274
(block_size & HUGEPAGE_MASK) == 0);
275
if (opt_metadata_thp == metadata_thp_always) {
276
pages_huge(addr, block_size);
277
} else if (opt_metadata_thp == metadata_thp_auto &&
278
base != NULL) {
279
/* base != NULL indicates this is not a new base. */
280
malloc_mutex_lock(tsdn, &base->mtx);
281
base_auto_thp_switch(tsdn, base);
282
if (base->auto_thp_switched) {
283
pages_huge(addr, block_size);
284
}
285
malloc_mutex_unlock(tsdn, &base->mtx);
286
}
287
}
288
289
*pind_last = sz_psz2ind(block_size);
290
block->size = block_size;
291
block->next = NULL;
292
assert(block_size >= header_size);
293
base_edata_init(extent_sn_next, &block->edata,
294
(void *)((uintptr_t)block + header_size), block_size - header_size);
295
return block;
296
}
297
298
/*
299
* Allocate an extent that is at least as large as specified size, with
300
* specified alignment.
301
*/
302
static edata_t *
303
base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
304
malloc_mutex_assert_owner(tsdn, &base->mtx);
305
306
ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
307
/*
308
* Drop mutex during base_block_alloc(), because an extent hook will be
309
* called.
310
*/
311
malloc_mutex_unlock(tsdn, &base->mtx);
312
base_block_t *block = base_block_alloc(tsdn, base, ehooks,
313
base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
314
alignment);
315
malloc_mutex_lock(tsdn, &base->mtx);
316
if (block == NULL) {
317
return NULL;
318
}
319
block->next = base->blocks;
320
base->blocks = block;
321
if (config_stats) {
322
base->allocated += sizeof(base_block_t);
323
base->resident += PAGE_CEILING(sizeof(base_block_t));
324
base->mapped += block->size;
325
if (metadata_thp_madvise() &&
326
!(opt_metadata_thp == metadata_thp_auto
327
&& !base->auto_thp_switched)) {
328
assert(base->n_thp > 0);
329
base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
330
LG_HUGEPAGE;
331
}
332
assert(base->allocated <= base->resident);
333
assert(base->resident <= base->mapped);
334
assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
335
}
336
return &block->edata;
337
}
338
339
base_t *
340
b0get(void) {
341
return b0;
342
}
343
344
base_t *
345
base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
346
bool metadata_use_hooks) {
347
pszind_t pind_last = 0;
348
size_t extent_sn_next = 0;
349
350
/*
351
* The base will contain the ehooks eventually, but it itself is
352
* allocated using them. So we use some stack ehooks to bootstrap its
353
* memory, and then initialize the ehooks within the base_t.
354
*/
355
ehooks_t fake_ehooks;
356
ehooks_init(&fake_ehooks, metadata_use_hooks ?
357
(extent_hooks_t *)extent_hooks :
358
(extent_hooks_t *)&ehooks_default_extent_hooks, ind);
359
360
base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
361
&pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
362
if (block == NULL) {
363
return NULL;
364
}
365
366
size_t gap_size;
367
size_t base_alignment = CACHELINE;
368
size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
369
base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
370
&gap_size, base_size, base_alignment);
371
ehooks_init(&base->ehooks, (extent_hooks_t *)extent_hooks, ind);
372
ehooks_init(&base->ehooks_base, metadata_use_hooks ?
373
(extent_hooks_t *)extent_hooks :
374
(extent_hooks_t *)&ehooks_default_extent_hooks, ind);
375
if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
376
malloc_mutex_rank_exclusive)) {
377
base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
378
return NULL;
379
}
380
base->pind_last = pind_last;
381
base->extent_sn_next = extent_sn_next;
382
base->blocks = block;
383
base->auto_thp_switched = false;
384
for (szind_t i = 0; i < SC_NSIZES; i++) {
385
edata_heap_new(&base->avail[i]);
386
}
387
if (config_stats) {
388
base->allocated = sizeof(base_block_t);
389
base->resident = PAGE_CEILING(sizeof(base_block_t));
390
base->mapped = block->size;
391
base->n_thp = (opt_metadata_thp == metadata_thp_always) &&
392
metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t))
393
>> LG_HUGEPAGE : 0;
394
assert(base->allocated <= base->resident);
395
assert(base->resident <= base->mapped);
396
assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
397
}
398
base_extent_bump_alloc_post(base, &block->edata, gap_size, base,
399
base_size);
400
401
return base;
402
}
403
404
void
405
base_delete(tsdn_t *tsdn, base_t *base) {
406
ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
407
base_block_t *next = base->blocks;
408
do {
409
base_block_t *block = next;
410
next = block->next;
411
base_unmap(tsdn, ehooks, base_ind_get(base), block,
412
block->size);
413
} while (next != NULL);
414
}
415
416
ehooks_t *
417
base_ehooks_get(base_t *base) {
418
return &base->ehooks;
419
}
420
421
ehooks_t *
422
base_ehooks_get_for_metadata(base_t *base) {
423
return &base->ehooks_base;
424
}
425
426
extent_hooks_t *
427
base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
428
extent_hooks_t *old_extent_hooks =
429
ehooks_get_extent_hooks_ptr(&base->ehooks);
430
ehooks_init(&base->ehooks, extent_hooks, ehooks_ind_get(&base->ehooks));
431
return old_extent_hooks;
432
}
433
434
static void *
435
base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
436
size_t *esn) {
437
alignment = QUANTUM_CEILING(alignment);
438
size_t usize = ALIGNMENT_CEILING(size, alignment);
439
size_t asize = usize + alignment - QUANTUM;
440
441
edata_t *edata = NULL;
442
malloc_mutex_lock(tsdn, &base->mtx);
443
for (szind_t i = sz_size2index(asize); i < SC_NSIZES; i++) {
444
edata = edata_heap_remove_first(&base->avail[i]);
445
if (edata != NULL) {
446
/* Use existing space. */
447
break;
448
}
449
}
450
if (edata == NULL) {
451
/* Try to allocate more space. */
452
edata = base_extent_alloc(tsdn, base, usize, alignment);
453
}
454
void *ret;
455
if (edata == NULL) {
456
ret = NULL;
457
goto label_return;
458
}
459
460
ret = base_extent_bump_alloc(base, edata, usize, alignment);
461
if (esn != NULL) {
462
*esn = (size_t)edata_sn_get(edata);
463
}
464
label_return:
465
malloc_mutex_unlock(tsdn, &base->mtx);
466
return ret;
467
}
468
469
/*
470
* base_alloc() returns zeroed memory, which is always demand-zeroed for the
471
* auto arenas, in order to make multi-page sparse data structures such as radix
472
* tree nodes efficient with respect to physical memory usage. Upon success a
473
* pointer to at least size bytes with specified alignment is returned. Note
474
* that size is rounded up to the nearest multiple of alignment to avoid false
475
* sharing.
476
*/
477
void *
478
base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
479
return base_alloc_impl(tsdn, base, size, alignment, NULL);
480
}
481
482
edata_t *
483
base_alloc_edata(tsdn_t *tsdn, base_t *base) {
484
size_t esn;
485
edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t),
486
EDATA_ALIGNMENT, &esn);
487
if (edata == NULL) {
488
return NULL;
489
}
490
edata_esn_set(edata, esn);
491
return edata;
492
}
493
494
void
495
base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
496
size_t *mapped, size_t *n_thp) {
497
cassert(config_stats);
498
499
malloc_mutex_lock(tsdn, &base->mtx);
500
assert(base->allocated <= base->resident);
501
assert(base->resident <= base->mapped);
502
*allocated = base->allocated;
503
*resident = base->resident;
504
*mapped = base->mapped;
505
*n_thp = base->n_thp;
506
malloc_mutex_unlock(tsdn, &base->mtx);
507
}
508
509
void
510
base_prefork(tsdn_t *tsdn, base_t *base) {
511
malloc_mutex_prefork(tsdn, &base->mtx);
512
}
513
514
void
515
base_postfork_parent(tsdn_t *tsdn, base_t *base) {
516
malloc_mutex_postfork_parent(tsdn, &base->mtx);
517
}
518
519
void
520
base_postfork_child(tsdn_t *tsdn, base_t *base) {
521
malloc_mutex_postfork_child(tsdn, &base->mtx);
522
}
523
524
bool
525
base_boot(tsdn_t *tsdn) {
526
b0 = base_new(tsdn, 0, (extent_hooks_t *)&ehooks_default_extent_hooks,
527
/* metadata_use_hooks */ true);
528
return (b0 == NULL);
529
}
530
531