Path: blob/main/system/lib/mimalloc/src/os.c
6175 views
/* ----------------------------------------------------------------------------1Copyright (c) 2018-2023, Microsoft Research, Daan Leijen2This is free software; you can redistribute it and/or modify it under the3terms of the MIT license. A copy of the license can be found in the file4"LICENSE" at the root of this distribution.5-----------------------------------------------------------------------------*/6#include "mimalloc.h"7#include "mimalloc/internal.h"8#include "mimalloc/atomic.h"9#include "mimalloc/prim.h"101112/* -----------------------------------------------------------13Initialization.14----------------------------------------------------------- */1516static mi_os_mem_config_t mi_os_mem_config = {174096, // page size180, // large page size (usually 2MiB)194096, // allocation granularity20true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems)21false, // can we partially free allocated blocks? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span)22true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory)23};2425bool _mi_os_has_overcommit(void) {26return mi_os_mem_config.has_overcommit;27}2829bool _mi_os_has_virtual_reserve(void) {30return mi_os_mem_config.has_virtual_reserve;31}323334// OS (small) page size35size_t _mi_os_page_size(void) {36return mi_os_mem_config.page_size;37}3839// if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB)40size_t _mi_os_large_page_size(void) {41return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());42}4344bool _mi_os_use_large_page(size_t size, size_t alignment) {45// if we have access, check the size and alignment requirements46if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false;47return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0);48}4950// round to a good OS allocation size (bounded by max 12.5% waste)51size_t _mi_os_good_alloc_size(size_t size) {52size_t align_size;53if (size < 512*MI_KiB) align_size = _mi_os_page_size();54else if (size < 2*MI_MiB) align_size = 64*MI_KiB;55else if (size < 8*MI_MiB) align_size = 256*MI_KiB;56else if (size < 32*MI_MiB) align_size = 1*MI_MiB;57else align_size = 4*MI_MiB;58if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow?59return _mi_align_up(size, align_size);60}6162void _mi_os_init(void) {63_mi_prim_mem_init(&mi_os_mem_config);64}656667/* -----------------------------------------------------------68Util69-------------------------------------------------------------- */70bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);71bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats);727374/* -----------------------------------------------------------75aligned hinting76-------------------------------------------------------------- */7778// On 64-bit systems, we can do efficient aligned allocation by using79// the 2TiB to 30TiB area to allocate those.80#if (MI_INTPTR_SIZE >= 8)81static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;8283// Return a MI_SEGMENT_SIZE aligned address that is probably available.84// If this returns NULL, the OS will determine the address but on some OS's that may not be85// properly aligned which can be more costly as it needs to be adjusted afterwards.86// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;87// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses88// in the middle of the 2TiB - 6TiB address range (see issue #372))8990#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start91#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)92#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)9394void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size)95{96if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;97size = _mi_align_up(size, MI_SEGMENT_SIZE);98if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.99#if (MI_SECURE>0)100size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.101#endif102103uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);104if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize105uintptr_t init = MI_HINT_BASE;106#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode107uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());108init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB109#endif110uintptr_t expected = hint + size;111mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);112hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all113}114if (hint%try_alignment != 0) return NULL;115return (void*)hint;116}117#else118void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {119MI_UNUSED(try_alignment); MI_UNUSED(size);120return NULL;121}122#endif123124125/* -----------------------------------------------------------126Free memory127-------------------------------------------------------------- */128129static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats);130131static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) {132MI_UNUSED(tld_stats);133mi_stats_t* stats = &_mi_stats_main;134mi_assert_internal((size % _mi_os_page_size()) == 0);135if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)136int err = _mi_prim_free(addr, size);137if (err != 0) {138_mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);139}140if (still_committed) { _mi_stat_decrease(&stats->committed, size); }141_mi_stat_decrease(&stats->reserved, size);142}143144void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) {145if (mi_memkind_is_os(memid.memkind)) {146size_t csize = _mi_os_good_alloc_size(size);147void* base = addr;148// different base? (due to alignment)149if (memid.mem.os.base != NULL) {150mi_assert(memid.mem.os.base <= addr);151mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr);152base = memid.mem.os.base;153csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base);154}155// free it156if (memid.memkind == MI_MEM_OS_HUGE) {157mi_assert(memid.is_pinned);158mi_os_free_huge_os_pages(base, csize, tld_stats);159}160else {161mi_os_prim_free(base, csize, still_committed, tld_stats);162}163}164else {165// nothing to do166mi_assert(memid.memkind < MI_MEM_OS);167}168}169170void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) {171_mi_os_free_ex(p, size, true, memid, tld_stats);172}173174175/* -----------------------------------------------------------176Primitive allocation from the OS.177-------------------------------------------------------------- */178179// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.180static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) {181mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);182mi_assert_internal(is_zero != NULL);183mi_assert_internal(is_large != NULL);184if (size == 0) return NULL;185if (!commit) { allow_large = false; }186if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning187*is_zero = false;188void* p = NULL;189int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p);190if (err != 0) {191_mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large);192}193194MI_UNUSED(tld_stats);195mi_stats_t* stats = &_mi_stats_main;196mi_stat_counter_increase(stats->mmap_calls, 1);197if (p != NULL) {198_mi_stat_increase(&stats->reserved, size);199if (commit) {200_mi_stat_increase(&stats->committed, size);201// seems needed for asan (or `mimalloc-test-api` fails)202#ifdef MI_TRACK_ASAN203if (*is_zero) { mi_track_mem_defined(p,size); }204else { mi_track_mem_undefined(p,size); }205#endif206}207}208return p;209}210211212// Primitive aligned allocation from the OS.213// This function guarantees the allocated memory is aligned.214static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base, mi_stats_t* stats) {215mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));216mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);217mi_assert_internal(is_large != NULL);218mi_assert_internal(is_zero != NULL);219mi_assert_internal(base != NULL);220if (!commit) allow_large = false;221if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;222size = _mi_align_up(size, _mi_os_page_size());223224// try first with a hint (this will be aligned directly on Win 10+ or BSD)225void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats);226if (p == NULL) return NULL;227228// aligned already?229if (((uintptr_t)p % alignment) == 0) {230*base = p;231}232else {233// if not aligned, free it, overallocate, and unmap around it234_mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);235mi_os_prim_free(p, size, commit, stats);236if (size >= (SIZE_MAX - alignment)) return NULL; // overflow237const size_t over_size = size + alignment;238239if (!mi_os_mem_config.has_partial_free) { // win32 virtualAlloc cannot free parts of an allocated block240// over-allocate uncommitted (virtual) memory241p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats);242if (p == NULL) return NULL;243244// set p to the aligned part in the full region245// note: this is dangerous on Windows as VirtualFree needs the actual base pointer246// this is handled though by having the `base` field in the memid's247*base = p; // remember the base248p = mi_align_up_ptr(p, alignment);249250// explicitly commit only the aligned part251if (commit) {252_mi_os_commit(p, size, NULL, stats);253}254}255else { // mmap can free inside an allocation256// overallocate...257p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats);258if (p == NULL) return NULL;259260// and selectively unmap parts around the over-allocated area.261void* aligned_p = mi_align_up_ptr(p, alignment);262size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;263size_t mid_size = _mi_align_up(size, _mi_os_page_size());264size_t post_size = over_size - pre_size - mid_size;265mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size);266if (pre_size > 0) { mi_os_prim_free(p, pre_size, commit, stats); }267if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); }268// we can return the aligned pointer on `mmap` systems269p = aligned_p;270*base = aligned_p; // since we freed the pre part, `*base == p`.271}272}273274mi_assert_internal(p == NULL || (p != NULL && *base != NULL && ((uintptr_t)p % alignment) == 0));275return p;276}277278279/* -----------------------------------------------------------280OS API: alloc and alloc_aligned281----------------------------------------------------------- */282283void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {284*memid = _mi_memid_none();285if (size == 0) return NULL;286size = _mi_os_good_alloc_size(size);287bool os_is_large = false;288bool os_is_zero = false;289void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats);290if (p != NULL) {291*memid = _mi_memid_create_os(true, os_is_zero, os_is_large);292}293return p;294}295296void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats)297{298MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings299*memid = _mi_memid_none();300if (size == 0) return NULL;301size = _mi_os_good_alloc_size(size);302alignment = _mi_align_up(alignment, _mi_os_page_size());303304bool os_is_large = false;305bool os_is_zero = false;306void* os_base = NULL;307void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, stats );308if (p != NULL) {309*memid = _mi_memid_create_os(commit, os_is_zero, os_is_large);310memid->mem.os.base = os_base;311memid->mem.os.alignment = alignment;312}313return p;314}315316/* -----------------------------------------------------------317OS aligned allocation with an offset. This is used318for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc319page where the object can be aligned at an offset from the start of the segment.320As we may need to overallocate, we need to free such pointers using `mi_free_aligned`321to use the actual start of the memory region.322----------------------------------------------------------- */323324void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats) {325mi_assert(offset <= MI_SEGMENT_SIZE);326mi_assert(offset <= size);327mi_assert((alignment % _mi_os_page_size()) == 0);328*memid = _mi_memid_none();329if (offset > MI_SEGMENT_SIZE) return NULL;330if (offset == 0) {331// regular aligned allocation332return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, stats);333}334else {335// overallocate to align at an offset336const size_t extra = _mi_align_up(offset, alignment) - offset;337const size_t oversize = size + extra;338void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, stats);339if (start == NULL) return NULL;340341void* const p = (uint8_t*)start + extra;342mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));343// decommit the overallocation at the start344if (commit && extra > _mi_os_page_size()) {345_mi_os_decommit(start, extra, stats);346}347return p;348}349}350351/* -----------------------------------------------------------352OS memory API: reset, commit, decommit, protect, unprotect.353----------------------------------------------------------- */354355// OS page align within a given area, either conservative (pages inside the area only),356// or not (straddling pages outside the area is possible)357static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) {358mi_assert(addr != NULL && size > 0);359if (newsize != NULL) *newsize = 0;360if (size == 0 || addr == NULL) return NULL;361362// page align conservatively within the range363void* start = (conservative ? mi_align_up_ptr(addr, _mi_os_page_size())364: mi_align_down_ptr(addr, _mi_os_page_size()));365void* end = (conservative ? mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size())366: mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size()));367ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start;368if (diff <= 0) return NULL;369370mi_assert_internal((conservative && (size_t)diff <= size) || (!conservative && (size_t)diff >= size));371if (newsize != NULL) *newsize = (size_t)diff;372return start;373}374375static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* newsize) {376return mi_os_page_align_areax(true, addr, size, newsize);377}378379bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {380MI_UNUSED(tld_stats);381mi_stats_t* stats = &_mi_stats_main;382if (is_zero != NULL) { *is_zero = false; }383_mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit384_mi_stat_counter_increase(&stats->commit_calls, 1);385386// page align range387size_t csize;388void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize);389if (csize == 0) return true;390391// commit392bool os_is_zero = false;393int err = _mi_prim_commit(start, csize, &os_is_zero);394if (err != 0) {395_mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);396return false;397}398if (os_is_zero && is_zero != NULL) {399*is_zero = true;400mi_assert_expensive(mi_mem_is_zero(start, csize));401}402// note: the following seems required for asan (otherwise `mimalloc-test-stress` fails)403#ifdef MI_TRACK_ASAN404if (os_is_zero) { mi_track_mem_defined(start,csize); }405else { mi_track_mem_undefined(start,csize); }406#endif407return true;408}409410static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) {411MI_UNUSED(tld_stats);412mi_stats_t* stats = &_mi_stats_main;413mi_assert_internal(needs_recommit!=NULL);414_mi_stat_decrease(&stats->committed, size);415416// page align417size_t csize;418void* start = mi_os_page_align_area_conservative(addr, size, &csize);419if (csize == 0) return true;420421// decommit422*needs_recommit = true;423int err = _mi_prim_decommit(start,csize,needs_recommit);424if (err != 0) {425_mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);426}427mi_assert_internal(err == 0);428return (err == 0);429}430431bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {432bool needs_recommit;433return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats);434}435436437// Signal to the OS that the address range is no longer in use438// but may be used later again. This will release physical memory439// pages and reduce swapping while keeping the memory committed.440// We page align to a conservative area inside the range to reset.441bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {442// page align conservatively within the range443size_t csize;444void* start = mi_os_page_align_area_conservative(addr, size, &csize);445if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)446_mi_stat_increase(&stats->reset, csize);447_mi_stat_counter_increase(&stats->reset_calls, 1);448449#if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN450memset(start, 0, csize); // pretend it is eagerly reset451#endif452453int err = _mi_prim_reset(start, csize);454if (err != 0) {455_mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);456}457return (err == 0);458}459460461// either resets or decommits memory, returns true if the memory needs462// to be recommitted if it is to be re-used later on.463bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats)464{465if (mi_option_get(mi_option_purge_delay) < 0) return false; // is purging allowed?466_mi_stat_counter_increase(&stats->purge_calls, 1);467_mi_stat_increase(&stats->purged, size);468469if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit?470!_mi_preloading()) // don't decommit during preloading (unsafe)471{472bool needs_recommit = true;473mi_os_decommit_ex(p, size, &needs_recommit, stats);474return needs_recommit;475}476else {477if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed478_mi_os_reset(p, size, stats);479}480return false; // needs no recommit481}482}483484// either resets or decommits memory, returns true if the memory needs485// to be recommitted if it is to be re-used later on.486bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) {487return _mi_os_purge_ex(p, size, true, stats);488}489490// Protect a region in memory to be not accessible.491static bool mi_os_protectx(void* addr, size_t size, bool protect) {492// page align conservatively within the range493size_t csize = 0;494void* start = mi_os_page_align_area_conservative(addr, size, &csize);495if (csize == 0) return false;496/*497if (_mi_os_is_huge_reserved(addr)) {498_mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");499}500*/501int err = _mi_prim_protect(start,csize,protect);502if (err != 0) {503_mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize);504}505return (err == 0);506}507508bool _mi_os_protect(void* addr, size_t size) {509return mi_os_protectx(addr, size, true);510}511512bool _mi_os_unprotect(void* addr, size_t size) {513return mi_os_protectx(addr, size, false);514}515516517518/* ----------------------------------------------------------------------------519Support for allocating huge OS pages (1Gib) that are reserved up-front520and possibly associated with a specific NUMA node. (use `numa_node>=0`)521-----------------------------------------------------------------------------*/522#define MI_HUGE_OS_PAGE_SIZE (MI_GiB)523524525#if (MI_INTPTR_SIZE >= 8)526// To ensure proper alignment, use our own area for huge OS pages527static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0528529// Claim an aligned address range for huge pages530static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {531if (total_size != NULL) *total_size = 0;532const size_t size = pages * MI_HUGE_OS_PAGE_SIZE;533534uintptr_t start = 0;535uintptr_t end = 0;536uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start);537do {538start = huge_start;539if (start == 0) {540// Initialize the start address after the 32TiB area541start = ((uintptr_t)32 << 40); // 32TiB virtual start address542#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode543uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());544start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB545#endif546}547end = start + size;548mi_assert_internal(end % MI_SEGMENT_SIZE == 0);549} while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));550551if (total_size != NULL) *total_size = size;552return (uint8_t*)start;553}554#else555static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {556MI_UNUSED(pages);557if (total_size != NULL) *total_size = 0;558return NULL;559}560#endif561562// Allocate MI_SEGMENT_SIZE aligned huge pages563void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) {564*memid = _mi_memid_none();565if (psize != NULL) *psize = 0;566if (pages_reserved != NULL) *pages_reserved = 0;567size_t size = 0;568uint8_t* start = mi_os_claim_huge_pages(pages, &size);569if (start == NULL) return NULL; // or 32-bit systems570571// Allocate one page at the time but try to place them contiguously572// We allocate one page at the time to be able to abort if it takes too long573// or to at least allocate as many as available on the system.574mi_msecs_t start_t = _mi_clock_start();575size_t page = 0;576bool all_zero = true;577while (page < pages) {578// allocate a page579bool is_zero = false;580void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);581void* p = NULL;582int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p);583if (!is_zero) { all_zero = false; }584if (err != 0) {585_mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE);586break;587}588589// Did we succeed at a contiguous address?590if (p != addr) {591// no success, issue a warning and break592if (p != NULL) {593_mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr);594mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main);595}596break;597}598599// success, record it600page++; // increase before timeout check (see issue #711)601_mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);602_mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);603604// check for timeout605if (max_msecs > 0) {606mi_msecs_t elapsed = _mi_clock_end(start_t);607if (page >= 1) {608mi_msecs_t estimate = ((elapsed / (page+1)) * pages);609if (estimate > 2*max_msecs) { // seems like we are going to timeout, break610elapsed = max_msecs + 1;611}612}613if (elapsed > max_msecs) {614_mi_warning_message("huge OS page allocation timed out (after allocating %zu page(s))\n", page);615break;616}617}618}619mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);620if (pages_reserved != NULL) { *pages_reserved = page; }621if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }622if (page != 0) {623mi_assert(start != NULL);624*memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */);625memid->memkind = MI_MEM_OS_HUGE;626mi_assert(memid->is_pinned);627#ifdef MI_TRACK_ASAN628if (all_zero) { mi_track_mem_defined(start,size); }629#endif630}631return (page == 0 ? NULL : start);632}633634// free every huge page in a range individually (as we allocated per page)635// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.636static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) {637if (p==NULL || size==0) return;638uint8_t* base = (uint8_t*)p;639while (size >= MI_HUGE_OS_PAGE_SIZE) {640mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats);641size -= MI_HUGE_OS_PAGE_SIZE;642base += MI_HUGE_OS_PAGE_SIZE;643}644}645646/* ----------------------------------------------------------------------------647Support NUMA aware allocation648-----------------------------------------------------------------------------*/649650_Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count651652size_t _mi_os_numa_node_count_get(void) {653size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);654if (count <= 0) {655long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?656if (ncount > 0) {657count = (size_t)ncount;658}659else {660count = _mi_prim_numa_node_count(); // or detect dynamically661if (count == 0) count = 1;662}663mi_atomic_store_release(&_mi_numa_node_count, count); // save it664_mi_verbose_message("using %zd numa regions\n", count);665}666return count;667}668669int _mi_os_numa_node_get(mi_os_tld_t* tld) {670MI_UNUSED(tld);671size_t numa_count = _mi_os_numa_node_count();672if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0673// never more than the node count and >= 0674size_t numa_node = _mi_prim_numa_node();675if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }676return (int)numa_node;677}678679680