Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/system/lib/mimalloc/src/os.c
6175 views
1
/* ----------------------------------------------------------------------------
2
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
3
This is free software; you can redistribute it and/or modify it under the
4
terms of the MIT license. A copy of the license can be found in the file
5
"LICENSE" at the root of this distribution.
6
-----------------------------------------------------------------------------*/
7
#include "mimalloc.h"
8
#include "mimalloc/internal.h"
9
#include "mimalloc/atomic.h"
10
#include "mimalloc/prim.h"
11
12
13
/* -----------------------------------------------------------
14
Initialization.
15
----------------------------------------------------------- */
16
17
static mi_os_mem_config_t mi_os_mem_config = {
18
4096, // page size
19
0, // large page size (usually 2MiB)
20
4096, // allocation granularity
21
true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems)
22
false, // can we partially free allocated blocks? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span)
23
true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory)
24
};
25
26
bool _mi_os_has_overcommit(void) {
27
return mi_os_mem_config.has_overcommit;
28
}
29
30
bool _mi_os_has_virtual_reserve(void) {
31
return mi_os_mem_config.has_virtual_reserve;
32
}
33
34
35
// OS (small) page size
36
size_t _mi_os_page_size(void) {
37
return mi_os_mem_config.page_size;
38
}
39
40
// if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB)
41
size_t _mi_os_large_page_size(void) {
42
return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());
43
}
44
45
bool _mi_os_use_large_page(size_t size, size_t alignment) {
46
// if we have access, check the size and alignment requirements
47
if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false;
48
return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0);
49
}
50
51
// round to a good OS allocation size (bounded by max 12.5% waste)
52
size_t _mi_os_good_alloc_size(size_t size) {
53
size_t align_size;
54
if (size < 512*MI_KiB) align_size = _mi_os_page_size();
55
else if (size < 2*MI_MiB) align_size = 64*MI_KiB;
56
else if (size < 8*MI_MiB) align_size = 256*MI_KiB;
57
else if (size < 32*MI_MiB) align_size = 1*MI_MiB;
58
else align_size = 4*MI_MiB;
59
if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow?
60
return _mi_align_up(size, align_size);
61
}
62
63
void _mi_os_init(void) {
64
_mi_prim_mem_init(&mi_os_mem_config);
65
}
66
67
68
/* -----------------------------------------------------------
69
Util
70
-------------------------------------------------------------- */
71
bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
72
bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats);
73
74
75
/* -----------------------------------------------------------
76
aligned hinting
77
-------------------------------------------------------------- */
78
79
// On 64-bit systems, we can do efficient aligned allocation by using
80
// the 2TiB to 30TiB area to allocate those.
81
#if (MI_INTPTR_SIZE >= 8)
82
static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
83
84
// Return a MI_SEGMENT_SIZE aligned address that is probably available.
85
// If this returns NULL, the OS will determine the address but on some OS's that may not be
86
// properly aligned which can be more costly as it needs to be adjusted afterwards.
87
// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
88
// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
89
// in the middle of the 2TiB - 6TiB address range (see issue #372))
90
91
#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
92
#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
93
#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
94
95
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size)
96
{
97
if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
98
size = _mi_align_up(size, MI_SEGMENT_SIZE);
99
if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
100
#if (MI_SECURE>0)
101
size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
102
#endif
103
104
uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
105
if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize
106
uintptr_t init = MI_HINT_BASE;
107
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
108
uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());
109
init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
110
#endif
111
uintptr_t expected = hint + size;
112
mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
113
hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
114
}
115
if (hint%try_alignment != 0) return NULL;
116
return (void*)hint;
117
}
118
#else
119
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
120
MI_UNUSED(try_alignment); MI_UNUSED(size);
121
return NULL;
122
}
123
#endif
124
125
126
/* -----------------------------------------------------------
127
Free memory
128
-------------------------------------------------------------- */
129
130
static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats);
131
132
static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) {
133
MI_UNUSED(tld_stats);
134
mi_stats_t* stats = &_mi_stats_main;
135
mi_assert_internal((size % _mi_os_page_size()) == 0);
136
if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)
137
int err = _mi_prim_free(addr, size);
138
if (err != 0) {
139
_mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
140
}
141
if (still_committed) { _mi_stat_decrease(&stats->committed, size); }
142
_mi_stat_decrease(&stats->reserved, size);
143
}
144
145
void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) {
146
if (mi_memkind_is_os(memid.memkind)) {
147
size_t csize = _mi_os_good_alloc_size(size);
148
void* base = addr;
149
// different base? (due to alignment)
150
if (memid.mem.os.base != NULL) {
151
mi_assert(memid.mem.os.base <= addr);
152
mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr);
153
base = memid.mem.os.base;
154
csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base);
155
}
156
// free it
157
if (memid.memkind == MI_MEM_OS_HUGE) {
158
mi_assert(memid.is_pinned);
159
mi_os_free_huge_os_pages(base, csize, tld_stats);
160
}
161
else {
162
mi_os_prim_free(base, csize, still_committed, tld_stats);
163
}
164
}
165
else {
166
// nothing to do
167
mi_assert(memid.memkind < MI_MEM_OS);
168
}
169
}
170
171
void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) {
172
_mi_os_free_ex(p, size, true, memid, tld_stats);
173
}
174
175
176
/* -----------------------------------------------------------
177
Primitive allocation from the OS.
178
-------------------------------------------------------------- */
179
180
// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
181
static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) {
182
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
183
mi_assert_internal(is_zero != NULL);
184
mi_assert_internal(is_large != NULL);
185
if (size == 0) return NULL;
186
if (!commit) { allow_large = false; }
187
if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning
188
*is_zero = false;
189
void* p = NULL;
190
int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p);
191
if (err != 0) {
192
_mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large);
193
}
194
195
MI_UNUSED(tld_stats);
196
mi_stats_t* stats = &_mi_stats_main;
197
mi_stat_counter_increase(stats->mmap_calls, 1);
198
if (p != NULL) {
199
_mi_stat_increase(&stats->reserved, size);
200
if (commit) {
201
_mi_stat_increase(&stats->committed, size);
202
// seems needed for asan (or `mimalloc-test-api` fails)
203
#ifdef MI_TRACK_ASAN
204
if (*is_zero) { mi_track_mem_defined(p,size); }
205
else { mi_track_mem_undefined(p,size); }
206
#endif
207
}
208
}
209
return p;
210
}
211
212
213
// Primitive aligned allocation from the OS.
214
// This function guarantees the allocated memory is aligned.
215
static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base, mi_stats_t* stats) {
216
mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));
217
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
218
mi_assert_internal(is_large != NULL);
219
mi_assert_internal(is_zero != NULL);
220
mi_assert_internal(base != NULL);
221
if (!commit) allow_large = false;
222
if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;
223
size = _mi_align_up(size, _mi_os_page_size());
224
225
// try first with a hint (this will be aligned directly on Win 10+ or BSD)
226
void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats);
227
if (p == NULL) return NULL;
228
229
// aligned already?
230
if (((uintptr_t)p % alignment) == 0) {
231
*base = p;
232
}
233
else {
234
// if not aligned, free it, overallocate, and unmap around it
235
_mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
236
mi_os_prim_free(p, size, commit, stats);
237
if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
238
const size_t over_size = size + alignment;
239
240
if (!mi_os_mem_config.has_partial_free) { // win32 virtualAlloc cannot free parts of an allocated block
241
// over-allocate uncommitted (virtual) memory
242
p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats);
243
if (p == NULL) return NULL;
244
245
// set p to the aligned part in the full region
246
// note: this is dangerous on Windows as VirtualFree needs the actual base pointer
247
// this is handled though by having the `base` field in the memid's
248
*base = p; // remember the base
249
p = mi_align_up_ptr(p, alignment);
250
251
// explicitly commit only the aligned part
252
if (commit) {
253
_mi_os_commit(p, size, NULL, stats);
254
}
255
}
256
else { // mmap can free inside an allocation
257
// overallocate...
258
p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats);
259
if (p == NULL) return NULL;
260
261
// and selectively unmap parts around the over-allocated area.
262
void* aligned_p = mi_align_up_ptr(p, alignment);
263
size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
264
size_t mid_size = _mi_align_up(size, _mi_os_page_size());
265
size_t post_size = over_size - pre_size - mid_size;
266
mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size);
267
if (pre_size > 0) { mi_os_prim_free(p, pre_size, commit, stats); }
268
if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); }
269
// we can return the aligned pointer on `mmap` systems
270
p = aligned_p;
271
*base = aligned_p; // since we freed the pre part, `*base == p`.
272
}
273
}
274
275
mi_assert_internal(p == NULL || (p != NULL && *base != NULL && ((uintptr_t)p % alignment) == 0));
276
return p;
277
}
278
279
280
/* -----------------------------------------------------------
281
OS API: alloc and alloc_aligned
282
----------------------------------------------------------- */
283
284
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
285
*memid = _mi_memid_none();
286
if (size == 0) return NULL;
287
size = _mi_os_good_alloc_size(size);
288
bool os_is_large = false;
289
bool os_is_zero = false;
290
void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats);
291
if (p != NULL) {
292
*memid = _mi_memid_create_os(true, os_is_zero, os_is_large);
293
}
294
return p;
295
}
296
297
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats)
298
{
299
MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
300
*memid = _mi_memid_none();
301
if (size == 0) return NULL;
302
size = _mi_os_good_alloc_size(size);
303
alignment = _mi_align_up(alignment, _mi_os_page_size());
304
305
bool os_is_large = false;
306
bool os_is_zero = false;
307
void* os_base = NULL;
308
void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, stats );
309
if (p != NULL) {
310
*memid = _mi_memid_create_os(commit, os_is_zero, os_is_large);
311
memid->mem.os.base = os_base;
312
memid->mem.os.alignment = alignment;
313
}
314
return p;
315
}
316
317
/* -----------------------------------------------------------
318
OS aligned allocation with an offset. This is used
319
for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc
320
page where the object can be aligned at an offset from the start of the segment.
321
As we may need to overallocate, we need to free such pointers using `mi_free_aligned`
322
to use the actual start of the memory region.
323
----------------------------------------------------------- */
324
325
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats) {
326
mi_assert(offset <= MI_SEGMENT_SIZE);
327
mi_assert(offset <= size);
328
mi_assert((alignment % _mi_os_page_size()) == 0);
329
*memid = _mi_memid_none();
330
if (offset > MI_SEGMENT_SIZE) return NULL;
331
if (offset == 0) {
332
// regular aligned allocation
333
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, stats);
334
}
335
else {
336
// overallocate to align at an offset
337
const size_t extra = _mi_align_up(offset, alignment) - offset;
338
const size_t oversize = size + extra;
339
void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, stats);
340
if (start == NULL) return NULL;
341
342
void* const p = (uint8_t*)start + extra;
343
mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
344
// decommit the overallocation at the start
345
if (commit && extra > _mi_os_page_size()) {
346
_mi_os_decommit(start, extra, stats);
347
}
348
return p;
349
}
350
}
351
352
/* -----------------------------------------------------------
353
OS memory API: reset, commit, decommit, protect, unprotect.
354
----------------------------------------------------------- */
355
356
// OS page align within a given area, either conservative (pages inside the area only),
357
// or not (straddling pages outside the area is possible)
358
static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) {
359
mi_assert(addr != NULL && size > 0);
360
if (newsize != NULL) *newsize = 0;
361
if (size == 0 || addr == NULL) return NULL;
362
363
// page align conservatively within the range
364
void* start = (conservative ? mi_align_up_ptr(addr, _mi_os_page_size())
365
: mi_align_down_ptr(addr, _mi_os_page_size()));
366
void* end = (conservative ? mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size())
367
: mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size()));
368
ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start;
369
if (diff <= 0) return NULL;
370
371
mi_assert_internal((conservative && (size_t)diff <= size) || (!conservative && (size_t)diff >= size));
372
if (newsize != NULL) *newsize = (size_t)diff;
373
return start;
374
}
375
376
static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* newsize) {
377
return mi_os_page_align_areax(true, addr, size, newsize);
378
}
379
380
bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
381
MI_UNUSED(tld_stats);
382
mi_stats_t* stats = &_mi_stats_main;
383
if (is_zero != NULL) { *is_zero = false; }
384
_mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit
385
_mi_stat_counter_increase(&stats->commit_calls, 1);
386
387
// page align range
388
size_t csize;
389
void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize);
390
if (csize == 0) return true;
391
392
// commit
393
bool os_is_zero = false;
394
int err = _mi_prim_commit(start, csize, &os_is_zero);
395
if (err != 0) {
396
_mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
397
return false;
398
}
399
if (os_is_zero && is_zero != NULL) {
400
*is_zero = true;
401
mi_assert_expensive(mi_mem_is_zero(start, csize));
402
}
403
// note: the following seems required for asan (otherwise `mimalloc-test-stress` fails)
404
#ifdef MI_TRACK_ASAN
405
if (os_is_zero) { mi_track_mem_defined(start,csize); }
406
else { mi_track_mem_undefined(start,csize); }
407
#endif
408
return true;
409
}
410
411
static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) {
412
MI_UNUSED(tld_stats);
413
mi_stats_t* stats = &_mi_stats_main;
414
mi_assert_internal(needs_recommit!=NULL);
415
_mi_stat_decrease(&stats->committed, size);
416
417
// page align
418
size_t csize;
419
void* start = mi_os_page_align_area_conservative(addr, size, &csize);
420
if (csize == 0) return true;
421
422
// decommit
423
*needs_recommit = true;
424
int err = _mi_prim_decommit(start,csize,needs_recommit);
425
if (err != 0) {
426
_mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
427
}
428
mi_assert_internal(err == 0);
429
return (err == 0);
430
}
431
432
bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
433
bool needs_recommit;
434
return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats);
435
}
436
437
438
// Signal to the OS that the address range is no longer in use
439
// but may be used later again. This will release physical memory
440
// pages and reduce swapping while keeping the memory committed.
441
// We page align to a conservative area inside the range to reset.
442
bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
443
// page align conservatively within the range
444
size_t csize;
445
void* start = mi_os_page_align_area_conservative(addr, size, &csize);
446
if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)
447
_mi_stat_increase(&stats->reset, csize);
448
_mi_stat_counter_increase(&stats->reset_calls, 1);
449
450
#if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN
451
memset(start, 0, csize); // pretend it is eagerly reset
452
#endif
453
454
int err = _mi_prim_reset(start, csize);
455
if (err != 0) {
456
_mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
457
}
458
return (err == 0);
459
}
460
461
462
// either resets or decommits memory, returns true if the memory needs
463
// to be recommitted if it is to be re-used later on.
464
bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats)
465
{
466
if (mi_option_get(mi_option_purge_delay) < 0) return false; // is purging allowed?
467
_mi_stat_counter_increase(&stats->purge_calls, 1);
468
_mi_stat_increase(&stats->purged, size);
469
470
if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit?
471
!_mi_preloading()) // don't decommit during preloading (unsafe)
472
{
473
bool needs_recommit = true;
474
mi_os_decommit_ex(p, size, &needs_recommit, stats);
475
return needs_recommit;
476
}
477
else {
478
if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed
479
_mi_os_reset(p, size, stats);
480
}
481
return false; // needs no recommit
482
}
483
}
484
485
// either resets or decommits memory, returns true if the memory needs
486
// to be recommitted if it is to be re-used later on.
487
bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) {
488
return _mi_os_purge_ex(p, size, true, stats);
489
}
490
491
// Protect a region in memory to be not accessible.
492
static bool mi_os_protectx(void* addr, size_t size, bool protect) {
493
// page align conservatively within the range
494
size_t csize = 0;
495
void* start = mi_os_page_align_area_conservative(addr, size, &csize);
496
if (csize == 0) return false;
497
/*
498
if (_mi_os_is_huge_reserved(addr)) {
499
_mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
500
}
501
*/
502
int err = _mi_prim_protect(start,csize,protect);
503
if (err != 0) {
504
_mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize);
505
}
506
return (err == 0);
507
}
508
509
bool _mi_os_protect(void* addr, size_t size) {
510
return mi_os_protectx(addr, size, true);
511
}
512
513
bool _mi_os_unprotect(void* addr, size_t size) {
514
return mi_os_protectx(addr, size, false);
515
}
516
517
518
519
/* ----------------------------------------------------------------------------
520
Support for allocating huge OS pages (1Gib) that are reserved up-front
521
and possibly associated with a specific NUMA node. (use `numa_node>=0`)
522
-----------------------------------------------------------------------------*/
523
#define MI_HUGE_OS_PAGE_SIZE (MI_GiB)
524
525
526
#if (MI_INTPTR_SIZE >= 8)
527
// To ensure proper alignment, use our own area for huge OS pages
528
static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0
529
530
// Claim an aligned address range for huge pages
531
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
532
if (total_size != NULL) *total_size = 0;
533
const size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
534
535
uintptr_t start = 0;
536
uintptr_t end = 0;
537
uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start);
538
do {
539
start = huge_start;
540
if (start == 0) {
541
// Initialize the start address after the 32TiB area
542
start = ((uintptr_t)32 << 40); // 32TiB virtual start address
543
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
544
uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());
545
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB
546
#endif
547
}
548
end = start + size;
549
mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
550
} while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
551
552
if (total_size != NULL) *total_size = size;
553
return (uint8_t*)start;
554
}
555
#else
556
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
557
MI_UNUSED(pages);
558
if (total_size != NULL) *total_size = 0;
559
return NULL;
560
}
561
#endif
562
563
// Allocate MI_SEGMENT_SIZE aligned huge pages
564
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) {
565
*memid = _mi_memid_none();
566
if (psize != NULL) *psize = 0;
567
if (pages_reserved != NULL) *pages_reserved = 0;
568
size_t size = 0;
569
uint8_t* start = mi_os_claim_huge_pages(pages, &size);
570
if (start == NULL) return NULL; // or 32-bit systems
571
572
// Allocate one page at the time but try to place them contiguously
573
// We allocate one page at the time to be able to abort if it takes too long
574
// or to at least allocate as many as available on the system.
575
mi_msecs_t start_t = _mi_clock_start();
576
size_t page = 0;
577
bool all_zero = true;
578
while (page < pages) {
579
// allocate a page
580
bool is_zero = false;
581
void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
582
void* p = NULL;
583
int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p);
584
if (!is_zero) { all_zero = false; }
585
if (err != 0) {
586
_mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE);
587
break;
588
}
589
590
// Did we succeed at a contiguous address?
591
if (p != addr) {
592
// no success, issue a warning and break
593
if (p != NULL) {
594
_mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr);
595
mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main);
596
}
597
break;
598
}
599
600
// success, record it
601
page++; // increase before timeout check (see issue #711)
602
_mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
603
_mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
604
605
// check for timeout
606
if (max_msecs > 0) {
607
mi_msecs_t elapsed = _mi_clock_end(start_t);
608
if (page >= 1) {
609
mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
610
if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
611
elapsed = max_msecs + 1;
612
}
613
}
614
if (elapsed > max_msecs) {
615
_mi_warning_message("huge OS page allocation timed out (after allocating %zu page(s))\n", page);
616
break;
617
}
618
}
619
}
620
mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
621
if (pages_reserved != NULL) { *pages_reserved = page; }
622
if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
623
if (page != 0) {
624
mi_assert(start != NULL);
625
*memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */);
626
memid->memkind = MI_MEM_OS_HUGE;
627
mi_assert(memid->is_pinned);
628
#ifdef MI_TRACK_ASAN
629
if (all_zero) { mi_track_mem_defined(start,size); }
630
#endif
631
}
632
return (page == 0 ? NULL : start);
633
}
634
635
// free every huge page in a range individually (as we allocated per page)
636
// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
637
static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) {
638
if (p==NULL || size==0) return;
639
uint8_t* base = (uint8_t*)p;
640
while (size >= MI_HUGE_OS_PAGE_SIZE) {
641
mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats);
642
size -= MI_HUGE_OS_PAGE_SIZE;
643
base += MI_HUGE_OS_PAGE_SIZE;
644
}
645
}
646
647
/* ----------------------------------------------------------------------------
648
Support NUMA aware allocation
649
-----------------------------------------------------------------------------*/
650
651
_Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count
652
653
size_t _mi_os_numa_node_count_get(void) {
654
size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
655
if (count <= 0) {
656
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
657
if (ncount > 0) {
658
count = (size_t)ncount;
659
}
660
else {
661
count = _mi_prim_numa_node_count(); // or detect dynamically
662
if (count == 0) count = 1;
663
}
664
mi_atomic_store_release(&_mi_numa_node_count, count); // save it
665
_mi_verbose_message("using %zd numa regions\n", count);
666
}
667
return count;
668
}
669
670
int _mi_os_numa_node_get(mi_os_tld_t* tld) {
671
MI_UNUSED(tld);
672
size_t numa_count = _mi_os_numa_node_count();
673
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
674
// never more than the node count and >= 0
675
size_t numa_node = _mi_prim_numa_node();
676
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
677
return (int)numa_node;
678
}
679
680