Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/jemalloc/src/pages.c
39564 views
1
#include "jemalloc/internal/jemalloc_preamble.h"
2
3
#include "jemalloc/internal/pages.h"
4
5
#include "jemalloc/internal/jemalloc_internal_includes.h"
6
7
#include "jemalloc/internal/assert.h"
8
#include "jemalloc/internal/malloc_io.h"
9
10
#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
11
#include <sys/sysctl.h>
12
#ifdef __FreeBSD__
13
#include <sys/auxv.h>
14
#include <vm/vm_param.h>
15
#include <vm/vm.h>
16
#endif
17
#endif
18
#ifdef __NetBSD__
19
#include <sys/bitops.h> /* ilog2 */
20
#endif
21
#ifdef JEMALLOC_HAVE_VM_MAKE_TAG
22
#define PAGES_FD_TAG VM_MAKE_TAG(101U)
23
#else
24
#define PAGES_FD_TAG -1
25
#endif
26
27
/******************************************************************************/
28
/* Data. */
29
30
/* Actual operating system page size, detected during bootstrap, <= PAGE. */
31
static size_t os_page;
32
33
#ifndef _WIN32
34
# define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
35
# define PAGES_PROT_DECOMMIT (PROT_NONE)
36
static int mmap_flags;
37
#endif
38
static bool os_overcommits;
39
40
const char *thp_mode_names[] = {
41
"default",
42
"always",
43
"never",
44
"not supported"
45
};
46
thp_mode_t opt_thp = THP_MODE_DEFAULT;
47
thp_mode_t init_system_thp_mode;
48
49
/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
50
static bool pages_can_purge_lazy_runtime = true;
51
52
#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
53
static int madvise_dont_need_zeros_is_faulty = -1;
54
/**
55
* Check that MADV_DONTNEED will actually zero pages on subsequent access.
56
*
57
* Since qemu does not support this, yet [1], and you can get very tricky
58
* assert if you will run program with jemalloc in use under qemu:
59
*
60
* <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0"
61
*
62
* [1]: https://patchwork.kernel.org/patch/10576637/
63
*/
64
static int madvise_MADV_DONTNEED_zeroes_pages()
65
{
66
int works = -1;
67
size_t size = PAGE;
68
69
void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
70
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
71
72
if (addr == MAP_FAILED) {
73
malloc_write("<jemalloc>: Cannot allocate memory for "
74
"MADV_DONTNEED check\n");
75
if (opt_abort) {
76
abort();
77
}
78
}
79
80
memset(addr, 'A', size);
81
if (madvise(addr, size, MADV_DONTNEED) == 0) {
82
works = memchr(addr, 'A', size) == NULL;
83
} else {
84
/*
85
* If madvise() does not support MADV_DONTNEED, then we can
86
* call it anyway, and use it's return code.
87
*/
88
works = 1;
89
}
90
91
if (munmap(addr, size) != 0) {
92
malloc_write("<jemalloc>: Cannot deallocate memory for "
93
"MADV_DONTNEED check\n");
94
if (opt_abort) {
95
abort();
96
}
97
}
98
99
return works;
100
}
101
#endif
102
103
/******************************************************************************/
104
/*
105
* Function prototypes for static functions that are referenced prior to
106
* definition.
107
*/
108
109
static void os_pages_unmap(void *addr, size_t size);
110
111
/******************************************************************************/
112
113
static void *
114
os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
115
assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
116
assert(ALIGNMENT_CEILING(size, os_page) == size);
117
assert(size != 0);
118
119
if (os_overcommits) {
120
*commit = true;
121
}
122
123
void *ret;
124
#ifdef _WIN32
125
/*
126
* If VirtualAlloc can't allocate at the given address when one is
127
* given, it fails and returns NULL.
128
*/
129
ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
130
PAGE_READWRITE);
131
#else
132
/*
133
* We don't use MAP_FIXED here, because it can cause the *replacement*
134
* of existing mappings, and we only want to create new mappings.
135
*/
136
{
137
#ifdef __NetBSD__
138
/*
139
* On NetBSD PAGE for a platform is defined to the
140
* maximum page size of all machine architectures
141
* for that platform, so that we can use the same
142
* binaries across all machine architectures.
143
*/
144
if (alignment > os_page || PAGE > os_page) {
145
unsigned int a = ilog2(MAX(alignment, PAGE));
146
mmap_flags |= MAP_ALIGNED(a);
147
}
148
#endif
149
int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
150
151
ret = mmap(addr, size, prot, mmap_flags, PAGES_FD_TAG, 0);
152
}
153
assert(ret != NULL);
154
155
if (ret == MAP_FAILED) {
156
ret = NULL;
157
} else if (addr != NULL && ret != addr) {
158
/*
159
* We succeeded in mapping memory, but not in the right place.
160
*/
161
os_pages_unmap(ret, size);
162
ret = NULL;
163
}
164
#endif
165
assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
166
ret == addr));
167
return ret;
168
}
169
170
static void *
171
os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
172
bool *commit) {
173
void *ret = (void *)((uintptr_t)addr + leadsize);
174
175
assert(alloc_size >= leadsize + size);
176
#ifdef _WIN32
177
os_pages_unmap(addr, alloc_size);
178
void *new_addr = os_pages_map(ret, size, PAGE, commit);
179
if (new_addr == ret) {
180
return ret;
181
}
182
if (new_addr != NULL) {
183
os_pages_unmap(new_addr, size);
184
}
185
return NULL;
186
#else
187
size_t trailsize = alloc_size - leadsize - size;
188
189
if (leadsize != 0) {
190
os_pages_unmap(addr, leadsize);
191
}
192
if (trailsize != 0) {
193
os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
194
}
195
return ret;
196
#endif
197
}
198
199
static void
200
os_pages_unmap(void *addr, size_t size) {
201
assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
202
assert(ALIGNMENT_CEILING(size, os_page) == size);
203
204
#ifdef _WIN32
205
if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
206
#else
207
if (munmap(addr, size) == -1)
208
#endif
209
{
210
char buf[BUFERROR_BUF];
211
212
buferror(get_errno(), buf, sizeof(buf));
213
malloc_printf("<jemalloc>: Error in "
214
#ifdef _WIN32
215
"VirtualFree"
216
#else
217
"munmap"
218
#endif
219
"(): %s\n", buf);
220
if (opt_abort) {
221
abort();
222
}
223
}
224
}
225
226
static void *
227
pages_map_slow(size_t size, size_t alignment, bool *commit) {
228
size_t alloc_size = size + alignment - os_page;
229
/* Beware size_t wrap-around. */
230
if (alloc_size < size) {
231
return NULL;
232
}
233
234
void *ret;
235
do {
236
void *pages = os_pages_map(NULL, alloc_size, alignment, commit);
237
if (pages == NULL) {
238
return NULL;
239
}
240
size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment)
241
- (uintptr_t)pages;
242
ret = os_pages_trim(pages, alloc_size, leadsize, size, commit);
243
} while (ret == NULL);
244
245
assert(ret != NULL);
246
assert(PAGE_ADDR2BASE(ret) == ret);
247
return ret;
248
}
249
250
void *
251
pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
252
assert(alignment >= PAGE);
253
assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
254
255
#if defined(__FreeBSD__) && defined(MAP_EXCL)
256
/*
257
* FreeBSD has mechanisms both to mmap at specific address without
258
* touching existing mappings, and to mmap with specific alignment.
259
*/
260
{
261
if (os_overcommits) {
262
*commit = true;
263
}
264
265
int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
266
int flags = mmap_flags;
267
268
if (addr != NULL) {
269
flags |= MAP_FIXED | MAP_EXCL;
270
} else {
271
unsigned alignment_bits = ffs_zu(alignment);
272
assert(alignment_bits > 0);
273
flags |= MAP_ALIGNED(alignment_bits);
274
}
275
276
void *ret = mmap(addr, size, prot, flags, -1, 0);
277
if (ret == MAP_FAILED) {
278
ret = NULL;
279
}
280
281
return ret;
282
}
283
#endif
284
/*
285
* Ideally, there would be a way to specify alignment to mmap() (like
286
* NetBSD has), but in the absence of such a feature, we have to work
287
* hard to efficiently create aligned mappings. The reliable, but
288
* slow method is to create a mapping that is over-sized, then trim the
289
* excess. However, that always results in one or two calls to
290
* os_pages_unmap(), and it can leave holes in the process's virtual
291
* memory map if memory grows downward.
292
*
293
* Optimistically try mapping precisely the right amount before falling
294
* back to the slow method, with the expectation that the optimistic
295
* approach works most of the time.
296
*/
297
298
void *ret = os_pages_map(addr, size, os_page, commit);
299
if (ret == NULL || ret == addr) {
300
return ret;
301
}
302
assert(addr == NULL);
303
if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) {
304
os_pages_unmap(ret, size);
305
return pages_map_slow(size, alignment, commit);
306
}
307
308
assert(PAGE_ADDR2BASE(ret) == ret);
309
return ret;
310
}
311
312
void
313
pages_unmap(void *addr, size_t size) {
314
assert(PAGE_ADDR2BASE(addr) == addr);
315
assert(PAGE_CEILING(size) == size);
316
317
os_pages_unmap(addr, size);
318
}
319
320
static bool
321
os_pages_commit(void *addr, size_t size, bool commit) {
322
assert(PAGE_ADDR2BASE(addr) == addr);
323
assert(PAGE_CEILING(size) == size);
324
325
#ifdef _WIN32
326
return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
327
PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
328
#else
329
{
330
int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
331
void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
332
PAGES_FD_TAG, 0);
333
if (result == MAP_FAILED) {
334
return true;
335
}
336
if (result != addr) {
337
/*
338
* We succeeded in mapping memory, but not in the right
339
* place.
340
*/
341
os_pages_unmap(result, size);
342
return true;
343
}
344
return false;
345
}
346
#endif
347
}
348
349
static bool
350
pages_commit_impl(void *addr, size_t size, bool commit) {
351
if (os_overcommits) {
352
return true;
353
}
354
355
return os_pages_commit(addr, size, commit);
356
}
357
358
bool
359
pages_commit(void *addr, size_t size) {
360
return pages_commit_impl(addr, size, true);
361
}
362
363
bool
364
pages_decommit(void *addr, size_t size) {
365
return pages_commit_impl(addr, size, false);
366
}
367
368
void
369
pages_mark_guards(void *head, void *tail) {
370
assert(head != NULL || tail != NULL);
371
assert(head == NULL || tail == NULL ||
372
(uintptr_t)head < (uintptr_t)tail);
373
#ifdef JEMALLOC_HAVE_MPROTECT
374
if (head != NULL) {
375
mprotect(head, PAGE, PROT_NONE);
376
}
377
if (tail != NULL) {
378
mprotect(tail, PAGE, PROT_NONE);
379
}
380
#else
381
/* Decommit sets to PROT_NONE / MEM_DECOMMIT. */
382
if (head != NULL) {
383
os_pages_commit(head, PAGE, false);
384
}
385
if (tail != NULL) {
386
os_pages_commit(tail, PAGE, false);
387
}
388
#endif
389
}
390
391
void
392
pages_unmark_guards(void *head, void *tail) {
393
assert(head != NULL || tail != NULL);
394
assert(head == NULL || tail == NULL ||
395
(uintptr_t)head < (uintptr_t)tail);
396
#ifdef JEMALLOC_HAVE_MPROTECT
397
bool head_and_tail = (head != NULL) && (tail != NULL);
398
size_t range = head_and_tail ?
399
(uintptr_t)tail - (uintptr_t)head + PAGE :
400
SIZE_T_MAX;
401
/*
402
* The amount of work that the kernel does in mprotect depends on the
403
* range argument. SC_LARGE_MINCLASS is an arbitrary threshold chosen
404
* to prevent kernel from doing too much work that would outweigh the
405
* savings of performing one less system call.
406
*/
407
bool ranged_mprotect = head_and_tail && range <= SC_LARGE_MINCLASS;
408
if (ranged_mprotect) {
409
mprotect(head, range, PROT_READ | PROT_WRITE);
410
} else {
411
if (head != NULL) {
412
mprotect(head, PAGE, PROT_READ | PROT_WRITE);
413
}
414
if (tail != NULL) {
415
mprotect(tail, PAGE, PROT_READ | PROT_WRITE);
416
}
417
}
418
#else
419
if (head != NULL) {
420
os_pages_commit(head, PAGE, true);
421
}
422
if (tail != NULL) {
423
os_pages_commit(tail, PAGE, true);
424
}
425
#endif
426
}
427
428
bool
429
pages_purge_lazy(void *addr, size_t size) {
430
assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
431
assert(PAGE_CEILING(size) == size);
432
433
if (!pages_can_purge_lazy) {
434
return true;
435
}
436
if (!pages_can_purge_lazy_runtime) {
437
/*
438
* Built with lazy purge enabled, but detected it was not
439
* supported on the current system.
440
*/
441
return true;
442
}
443
444
#ifdef _WIN32
445
VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
446
return false;
447
#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
448
return (madvise(addr, size,
449
# ifdef MADV_FREE
450
MADV_FREE
451
# else
452
JEMALLOC_MADV_FREE
453
# endif
454
) != 0);
455
#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
456
!defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
457
return (madvise(addr, size, MADV_DONTNEED) != 0);
458
#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
459
!defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
460
return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
461
#else
462
not_reached();
463
#endif
464
}
465
466
bool
467
pages_purge_forced(void *addr, size_t size) {
468
assert(PAGE_ADDR2BASE(addr) == addr);
469
assert(PAGE_CEILING(size) == size);
470
471
if (!pages_can_purge_forced) {
472
return true;
473
}
474
475
#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
476
defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
477
return (unlikely(madvise_dont_need_zeros_is_faulty) ||
478
madvise(addr, size, MADV_DONTNEED) != 0);
479
#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
480
defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
481
return (unlikely(madvise_dont_need_zeros_is_faulty) ||
482
posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
483
#elif defined(JEMALLOC_MAPS_COALESCE)
484
/* Try to overlay a new demand-zeroed mapping. */
485
return pages_commit(addr, size);
486
#else
487
not_reached();
488
#endif
489
}
490
491
static bool
492
pages_huge_impl(void *addr, size_t size, bool aligned) {
493
if (aligned) {
494
assert(HUGEPAGE_ADDR2BASE(addr) == addr);
495
assert(HUGEPAGE_CEILING(size) == size);
496
}
497
#if defined(JEMALLOC_HAVE_MADVISE_HUGE)
498
return (madvise(addr, size, MADV_HUGEPAGE) != 0);
499
#elif defined(JEMALLOC_HAVE_MEMCNTL)
500
struct memcntl_mha m = {0};
501
m.mha_cmd = MHA_MAPSIZE_VA;
502
m.mha_pagesize = HUGEPAGE;
503
return (memcntl(addr, size, MC_HAT_ADVISE, (caddr_t)&m, 0, 0) == 0);
504
#else
505
return true;
506
#endif
507
}
508
509
bool
510
pages_huge(void *addr, size_t size) {
511
return pages_huge_impl(addr, size, true);
512
}
513
514
static bool
515
pages_huge_unaligned(void *addr, size_t size) {
516
return pages_huge_impl(addr, size, false);
517
}
518
519
static bool
520
pages_nohuge_impl(void *addr, size_t size, bool aligned) {
521
if (aligned) {
522
assert(HUGEPAGE_ADDR2BASE(addr) == addr);
523
assert(HUGEPAGE_CEILING(size) == size);
524
}
525
526
#ifdef JEMALLOC_HAVE_MADVISE_HUGE
527
return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
528
#else
529
return false;
530
#endif
531
}
532
533
bool
534
pages_nohuge(void *addr, size_t size) {
535
return pages_nohuge_impl(addr, size, true);
536
}
537
538
static bool
539
pages_nohuge_unaligned(void *addr, size_t size) {
540
return pages_nohuge_impl(addr, size, false);
541
}
542
543
bool
544
pages_dontdump(void *addr, size_t size) {
545
assert(PAGE_ADDR2BASE(addr) == addr);
546
assert(PAGE_CEILING(size) == size);
547
#if defined(JEMALLOC_MADVISE_DONTDUMP)
548
return madvise(addr, size, MADV_DONTDUMP) != 0;
549
#elif defined(JEMALLOC_MADVISE_NOCORE)
550
return madvise(addr, size, MADV_NOCORE) != 0;
551
#else
552
return false;
553
#endif
554
}
555
556
bool
557
pages_dodump(void *addr, size_t size) {
558
assert(PAGE_ADDR2BASE(addr) == addr);
559
assert(PAGE_CEILING(size) == size);
560
#if defined(JEMALLOC_MADVISE_DONTDUMP)
561
return madvise(addr, size, MADV_DODUMP) != 0;
562
#elif defined(JEMALLOC_MADVISE_NOCORE)
563
return madvise(addr, size, MADV_CORE) != 0;
564
#else
565
return false;
566
#endif
567
}
568
569
570
static size_t
571
os_page_detect(void) {
572
#ifdef _WIN32
573
SYSTEM_INFO si;
574
GetSystemInfo(&si);
575
return si.dwPageSize;
576
#elif defined(__FreeBSD__)
577
/*
578
* This returns the value obtained from
579
* the auxv vector, avoiding a syscall.
580
*/
581
return getpagesize();
582
#else
583
long result = sysconf(_SC_PAGESIZE);
584
if (result == -1) {
585
return LG_PAGE;
586
}
587
return (size_t)result;
588
#endif
589
}
590
591
#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
592
static bool
593
os_overcommits_sysctl(void) {
594
int vm_overcommit;
595
size_t sz;
596
int bsdflags;
597
598
if (_elf_aux_info(AT_BSDFLAGS, &bsdflags, sizeof(bsdflags)) == 0)
599
return ((bsdflags & ELF_BSDF_VMNOOVERCOMMIT) == 0);
600
601
sz = sizeof(vm_overcommit);
602
#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
603
int mib[2];
604
605
mib[0] = CTL_VM;
606
mib[1] = VM_OVERCOMMIT;
607
if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
608
return false; /* Error. */
609
}
610
#else
611
if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
612
return false; /* Error. */
613
}
614
#endif
615
616
return ((vm_overcommit & (SWAP_RESERVE_FORCE_ON |
617
SWAP_RESERVE_RLIMIT_ON)) == 0);
618
}
619
#endif
620
621
#ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
622
/*
623
* Use syscall(2) rather than {open,read,close}(2) when possible to avoid
624
* reentry during bootstrapping if another library has interposed system call
625
* wrappers.
626
*/
627
static bool
628
os_overcommits_proc(void) {
629
int fd;
630
char buf[1];
631
632
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
633
#if defined(O_CLOEXEC)
634
fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
635
O_CLOEXEC);
636
#else
637
fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
638
if (fd != -1) {
639
fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
640
}
641
#endif
642
#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
643
#if defined(O_CLOEXEC)
644
fd = (int)syscall(SYS_openat,
645
AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
646
#else
647
fd = (int)syscall(SYS_openat,
648
AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
649
if (fd != -1) {
650
fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
651
}
652
#endif
653
#else
654
#if defined(O_CLOEXEC)
655
fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
656
#else
657
fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
658
if (fd != -1) {
659
fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
660
}
661
#endif
662
#endif
663
664
if (fd == -1) {
665
return false; /* Error. */
666
}
667
668
ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
669
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
670
syscall(SYS_close, fd);
671
#else
672
close(fd);
673
#endif
674
675
if (nread < 1) {
676
return false; /* Error. */
677
}
678
/*
679
* /proc/sys/vm/overcommit_memory meanings:
680
* 0: Heuristic overcommit.
681
* 1: Always overcommit.
682
* 2: Never overcommit.
683
*/
684
return (buf[0] == '0' || buf[0] == '1');
685
}
686
#endif
687
688
void
689
pages_set_thp_state (void *ptr, size_t size) {
690
if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
691
return;
692
}
693
assert(opt_thp != thp_mode_not_supported &&
694
init_system_thp_mode != thp_mode_not_supported);
695
696
if (opt_thp == thp_mode_always
697
&& init_system_thp_mode != thp_mode_never) {
698
assert(init_system_thp_mode == thp_mode_default);
699
pages_huge_unaligned(ptr, size);
700
} else if (opt_thp == thp_mode_never) {
701
assert(init_system_thp_mode == thp_mode_default ||
702
init_system_thp_mode == thp_mode_always);
703
pages_nohuge_unaligned(ptr, size);
704
}
705
}
706
707
static void
708
init_thp_state(void) {
709
if (!have_madvise_huge && !have_memcntl) {
710
if (metadata_thp_enabled() && opt_abort) {
711
malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
712
abort();
713
}
714
goto label_error;
715
}
716
#if defined(JEMALLOC_HAVE_MADVISE_HUGE)
717
static const char sys_state_madvise[] = "always [madvise] never\n";
718
static const char sys_state_always[] = "[always] madvise never\n";
719
static const char sys_state_never[] = "always madvise [never]\n";
720
char buf[sizeof(sys_state_madvise)];
721
722
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
723
int fd = (int)syscall(SYS_open,
724
"/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
725
#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
726
int fd = (int)syscall(SYS_openat,
727
AT_FDCWD, "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
728
#else
729
int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
730
#endif
731
if (fd == -1) {
732
goto label_error;
733
}
734
735
ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
736
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
737
syscall(SYS_close, fd);
738
#else
739
close(fd);
740
#endif
741
742
if (nread < 0) {
743
goto label_error;
744
}
745
746
if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
747
init_system_thp_mode = thp_mode_default;
748
} else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
749
init_system_thp_mode = thp_mode_always;
750
} else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
751
init_system_thp_mode = thp_mode_never;
752
} else {
753
goto label_error;
754
}
755
return;
756
#elif defined(JEMALLOC_HAVE_MEMCNTL)
757
init_system_thp_mode = thp_mode_default;
758
return;
759
#endif
760
label_error:
761
opt_thp = init_system_thp_mode = thp_mode_not_supported;
762
}
763
764
bool
765
pages_boot(void) {
766
os_page = os_page_detect();
767
if (os_page > PAGE) {
768
malloc_write("<jemalloc>: Unsupported system page size\n");
769
if (opt_abort) {
770
abort();
771
}
772
return true;
773
}
774
775
#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
776
if (!opt_trust_madvise) {
777
madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages();
778
if (madvise_dont_need_zeros_is_faulty) {
779
malloc_write("<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n");
780
malloc_write("<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n");
781
}
782
} else {
783
/* In case opt_trust_madvise is disable,
784
* do not do runtime check */
785
madvise_dont_need_zeros_is_faulty = 0;
786
}
787
#endif
788
789
#ifndef _WIN32
790
mmap_flags = MAP_PRIVATE | MAP_ANON;
791
#endif
792
793
#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
794
os_overcommits = os_overcommits_sysctl();
795
#elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
796
os_overcommits = os_overcommits_proc();
797
# ifdef MAP_NORESERVE
798
if (os_overcommits) {
799
mmap_flags |= MAP_NORESERVE;
800
}
801
# endif
802
#elif defined(__NetBSD__)
803
os_overcommits = true;
804
#else
805
os_overcommits = false;
806
#endif
807
808
init_thp_state();
809
810
#ifdef __FreeBSD__
811
/*
812
* FreeBSD doesn't need the check; madvise(2) is known to work.
813
*/
814
#else
815
/* Detect lazy purge runtime support. */
816
if (pages_can_purge_lazy) {
817
bool committed = false;
818
void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
819
if (madv_free_page == NULL) {
820
return true;
821
}
822
assert(pages_can_purge_lazy_runtime);
823
if (pages_purge_lazy(madv_free_page, PAGE)) {
824
pages_can_purge_lazy_runtime = false;
825
}
826
os_pages_unmap(madv_free_page, PAGE);
827
}
828
#endif
829
830
return false;
831
}
832
833