CoCalc -- zPhysicalMemoryBacking

GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp
⁴⁰⁹⁷¹ views
1
/*
2
 * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.
8
 *
9
 * This code is distributed in the hope that it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
 * version 2 for more details (a copy is included in the LICENSE file that
13
 * accompanied this code).
14
 *
15
 * You should have received a copy of the GNU General Public License version
16
 * 2 along with this work; if not, write to the Free Software Foundation,
17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
 *
19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
 * or visit www.oracle.com if you need additional information or have any
21
 * questions.
22
 */
23

24
#include "precompiled.hpp"
25
#include "gc/shared/gcLogPrecious.hpp"
26
#include "gc/z/zArray.inline.hpp"
27
#include "gc/z/zErrno.hpp"
28
#include "gc/z/zGlobals.hpp"
29
#include "gc/z/zLargePages.inline.hpp"
30
#include "gc/z/zMountPoint_linux.hpp"
31
#include "gc/z/zNUMA.inline.hpp"
32
#include "gc/z/zPhysicalMemoryBacking_linux.hpp"
33
#include "gc/z/zSyscall_linux.hpp"
34
#include "logging/log.hpp"
35
#include "runtime/init.hpp"
36
#include "runtime/os.hpp"
37
#include "runtime/safefetch.inline.hpp"
38
#include "utilities/align.hpp"
39
#include "utilities/debug.hpp"
40
#include "utilities/growableArray.hpp"
41

42
#include <fcntl.h>
43
#include <stdio.h>
44
#include <sys/mman.h>
45
#include <sys/stat.h>
46
#include <sys/statfs.h>
47
#include <sys/types.h>
48
#include <unistd.h>
49

50
//
51
// Support for building on older Linux systems
52
//
53

54
// memfd_create(2) flags
55
#ifndef MFD_CLOEXEC
56
#define MFD_CLOEXEC                      0x0001U
57
#endif
58
#ifndef MFD_HUGETLB
59
#define MFD_HUGETLB                      0x0004U
60
#endif
61

62
// open(2) flags
63
#ifndef O_CLOEXEC
64
#define O_CLOEXEC                        02000000
65
#endif
66
#ifndef O_TMPFILE
67
#define O_TMPFILE                        (020000000 | O_DIRECTORY)
68
#endif
69

70
// fallocate(2) flags
71
#ifndef FALLOC_FL_KEEP_SIZE
72
#define FALLOC_FL_KEEP_SIZE              0x01
73
#endif
74
#ifndef FALLOC_FL_PUNCH_HOLE
75
#define FALLOC_FL_PUNCH_HOLE             0x02
76
#endif
77

78
// Filesystem types, see statfs(2)
79
#ifndef TMPFS_MAGIC
80
#define TMPFS_MAGIC                      0x01021994
81
#endif
82
#ifndef HUGETLBFS_MAGIC
83
#define HUGETLBFS_MAGIC                  0x958458f6
84
#endif
85

86
// Filesystem names
87
#define ZFILESYSTEM_TMPFS                "tmpfs"
88
#define ZFILESYSTEM_HUGETLBFS            "hugetlbfs"
89

90
// Proc file entry for max map mount
91
#define ZFILENAME_PROC_MAX_MAP_COUNT     "/proc/sys/vm/max_map_count"
92

93
// Sysfs file for transparent huge page on tmpfs
94
#define ZFILENAME_SHMEM_ENABLED          "/sys/kernel/mm/transparent_hugepage/shmem_enabled"
95

96
// Java heap filename
97
#define ZFILENAME_HEAP                   "java_heap"
98

99
// Preferred tmpfs mount points, ordered by priority
100
static const char* z_preferred_tmpfs_mountpoints[] = {
101
  "/dev/shm",
102
  "/run/shm",
103
  NULL
104
};
105

106
// Preferred hugetlbfs mount points, ordered by priority
107
static const char* z_preferred_hugetlbfs_mountpoints[] = {
108
  "/dev/hugepages",
109
  "/hugepages",
110
  NULL
111
};
112

113
static int z_fallocate_hugetlbfs_attempts = 3;
114
static bool z_fallocate_supported = true;
115

116
ZPhysicalMemoryBacking::ZPhysicalMemoryBacking(size_t max_capacity) :
117
    _fd(-1),
118
    _filesystem(0),
119
    _block_size(0),
120
    _available(0),
121
    _initialized(false) {
122

123
  // Create backing file
124
  _fd = create_fd(ZFILENAME_HEAP);
125
  if (_fd == -1) {
126
    return;
127
  }
128

129
  // Truncate backing file
130
  while (ftruncate(_fd, max_capacity) == -1) {
131
    if (errno != EINTR) {
132
      ZErrno err;
133
      log_error_p(gc)("Failed to truncate backing file (%s)", err.to_string());
134
      return;
135
    }
136
  }
137

138
  // Get filesystem statistics
139
  struct statfs buf;
140
  if (fstatfs(_fd, &buf) == -1) {
141
    ZErrno err;
142
    log_error_p(gc)("Failed to determine filesystem type for backing file (%s)", err.to_string());
143
    return;
144
  }
145

146
  _filesystem = buf.f_type;
147
  _block_size = buf.f_bsize;
148
  _available = buf.f_bavail * _block_size;
149

150
  log_info_p(gc, init)("Heap Backing Filesystem: %s (0x" UINT64_FORMAT_X ")",
151
                       is_tmpfs() ? ZFILESYSTEM_TMPFS : is_hugetlbfs() ? ZFILESYSTEM_HUGETLBFS : "other", _filesystem);
152

153
  // Make sure the filesystem type matches requested large page type
154
  if (ZLargePages::is_transparent() && !is_tmpfs()) {
155
    log_error_p(gc)("-XX:+UseTransparentHugePages can only be enabled when using a %s filesystem",
156
                    ZFILESYSTEM_TMPFS);
157
    return;
158
  }
159

160
  if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) {
161
    log_error_p(gc)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel",
162
                    ZFILESYSTEM_TMPFS);
163
    return;
164
  }
165

166
  if (ZLargePages::is_explicit() && !is_hugetlbfs()) {
167
    log_error_p(gc)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled "
168
                    "when using a %s filesystem", ZFILESYSTEM_HUGETLBFS);
169
    return;
170
  }
171

172
  if (!ZLargePages::is_explicit() && is_hugetlbfs()) {
173
    log_error_p(gc)("-XX:+UseLargePages must be enabled when using a %s filesystem",
174
                    ZFILESYSTEM_HUGETLBFS);
175
    return;
176
  }
177

178
  if (ZLargePages::is_explicit() && os::large_page_size() != ZGranuleSize) {
179
    log_error_p(gc)("Incompatible large page size configured " SIZE_FORMAT " (expected " SIZE_FORMAT ")",
180
                    os::large_page_size(), ZGranuleSize);
181
    return;
182
  }
183

184
  // Make sure the filesystem block size is compatible
185
  if (ZGranuleSize % _block_size != 0) {
186
    log_error_p(gc)("Filesystem backing the heap has incompatible block size (" SIZE_FORMAT ")",
187
                    _block_size);
188
    return;
189
  }
190

191
  if (is_hugetlbfs() && _block_size != ZGranuleSize) {
192
    log_error_p(gc)("%s filesystem has unexpected block size " SIZE_FORMAT " (expected " SIZE_FORMAT ")",
193
                    ZFILESYSTEM_HUGETLBFS, _block_size, ZGranuleSize);
194
    return;
195
  }
196

197
  // Successfully initialized
198
  _initialized = true;
199
}
200

201
int ZPhysicalMemoryBacking::create_mem_fd(const char* name) const {
202
  // Create file name
203
  char filename[PATH_MAX];
204
  snprintf(filename, sizeof(filename), "%s%s", name, ZLargePages::is_explicit() ? ".hugetlb" : "");
205

206
  // Create file
207
  const int extra_flags = ZLargePages::is_explicit() ? MFD_HUGETLB : 0;
208
  const int fd = ZSyscall::memfd_create(filename, MFD_CLOEXEC | extra_flags);
209
  if (fd == -1) {
210
    ZErrno err;
211
    log_debug_p(gc, init)("Failed to create memfd file (%s)",
212
                          ((ZLargePages::is_explicit() && err == EINVAL) ? "Hugepages not supported" : err.to_string()));
213
    return -1;
214
  }
215

216
  log_info_p(gc, init)("Heap Backing File: /memfd:%s", filename);
217

218
  return fd;
219
}
220

221
int ZPhysicalMemoryBacking::create_file_fd(const char* name) const {
222
  const char* const filesystem = ZLargePages::is_explicit()
223
                                 ? ZFILESYSTEM_HUGETLBFS
224
                                 : ZFILESYSTEM_TMPFS;
225
  const char** const preferred_mountpoints = ZLargePages::is_explicit()
226
                                             ? z_preferred_hugetlbfs_mountpoints
227
                                             : z_preferred_tmpfs_mountpoints;
228

229
  // Find mountpoint
230
  ZMountPoint mountpoint(filesystem, preferred_mountpoints);
231
  if (mountpoint.get() == NULL) {
232
    log_error_p(gc)("Use -XX:AllocateHeapAt to specify the path to a %s filesystem", filesystem);
233
    return -1;
234
  }
235

236
  // Try to create an anonymous file using the O_TMPFILE flag. Note that this
237
  // flag requires kernel >= 3.11. If this fails we fall back to open/unlink.
238
  const int fd_anon = os::open(mountpoint.get(), O_TMPFILE|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
239
  if (fd_anon == -1) {
240
    ZErrno err;
241
    log_debug_p(gc, init)("Failed to create anonymous file in %s (%s)", mountpoint.get(),
242
                          (err == EINVAL ? "Not supported" : err.to_string()));
243
  } else {
244
    // Get inode number for anonymous file
245
    struct stat stat_buf;
246
    if (fstat(fd_anon, &stat_buf) == -1) {
247
      ZErrno err;
248
      log_error_pd(gc)("Failed to determine inode number for anonymous file (%s)", err.to_string());
249
      return -1;
250
    }
251

252
    log_info_p(gc, init)("Heap Backing File: %s/#" UINT64_FORMAT, mountpoint.get(), (uint64_t)stat_buf.st_ino);
253

254
    return fd_anon;
255
  }
256

257
  log_debug_p(gc, init)("Falling back to open/unlink");
258

259
  // Create file name
260
  char filename[PATH_MAX];
261
  snprintf(filename, sizeof(filename), "%s/%s.%d", mountpoint.get(), name, os::current_process_id());
262

263
  // Create file
264
  const int fd = os::open(filename, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
265
  if (fd == -1) {
266
    ZErrno err;
267
    log_error_p(gc)("Failed to create file %s (%s)", filename, err.to_string());
268
    return -1;
269
  }
270

271
  // Unlink file
272
  if (unlink(filename) == -1) {
273
    ZErrno err;
274
    log_error_p(gc)("Failed to unlink file %s (%s)", filename, err.to_string());
275
    return -1;
276
  }
277

278
  log_info_p(gc, init)("Heap Backing File: %s", filename);
279

280
  return fd;
281
}
282

283
int ZPhysicalMemoryBacking::create_fd(const char* name) const {
284
  if (AllocateHeapAt == NULL) {
285
    // If the path is not explicitly specified, then we first try to create a memfd file
286
    // instead of looking for a tmpfd/hugetlbfs mount point. Note that memfd_create() might
287
    // not be supported at all (requires kernel >= 3.17), or it might not support large
288
    // pages (requires kernel >= 4.14). If memfd_create() fails, then we try to create a
289
    // file on an accessible tmpfs or hugetlbfs mount point.
290
    const int fd = create_mem_fd(name);
291
    if (fd != -1) {
292
      return fd;
293
    }
294

295
    log_debug_p(gc)("Falling back to searching for an accessible mount point");
296
  }
297

298
  return create_file_fd(name);
299
}
300

301
bool ZPhysicalMemoryBacking::is_initialized() const {
302
  return _initialized;
303
}
304

305
void ZPhysicalMemoryBacking::warn_available_space(size_t max_capacity) const {
306
  // Note that the available space on a tmpfs or a hugetlbfs filesystem
307
  // will be zero if no size limit was specified when it was mounted.
308
  if (_available == 0) {
309
    // No size limit set, skip check
310
    log_info_p(gc, init)("Available space on backing filesystem: N/A");
311
    return;
312
  }
313

314
  log_info_p(gc, init)("Available space on backing filesystem: " SIZE_FORMAT "M", _available / M);
315

316
  // Warn if the filesystem doesn't currently have enough space available to hold
317
  // the max heap size. The max heap size will be capped if we later hit this limit
318
  // when trying to expand the heap.
319
  if (_available < max_capacity) {
320
    log_warning_p(gc)("***** WARNING! INCORRECT SYSTEM CONFIGURATION DETECTED! *****");
321
    log_warning_p(gc)("Not enough space available on the backing filesystem to hold the current max Java heap");
322
    log_warning_p(gc)("size (" SIZE_FORMAT "M). Please adjust the size of the backing filesystem accordingly "
323
                      "(available", max_capacity / M);
324
    log_warning_p(gc)("space is currently " SIZE_FORMAT "M). Continuing execution with the current filesystem "
325
                      "size could", _available / M);
326
    log_warning_p(gc)("lead to a premature OutOfMemoryError being thrown, due to failure to commit memory.");
327
  }
328
}
329

330
void ZPhysicalMemoryBacking::warn_max_map_count(size_t max_capacity) const {
331
  const char* const filename = ZFILENAME_PROC_MAX_MAP_COUNT;
332
  FILE* const file = fopen(filename, "r");
333
  if (file == NULL) {
334
    // Failed to open file, skip check
335
    log_debug_p(gc, init)("Failed to open %s", filename);
336
    return;
337
  }
338

339
  size_t actual_max_map_count = 0;
340
  const int result = fscanf(file, SIZE_FORMAT, &actual_max_map_count);
341
  fclose(file);
342
  if (result != 1) {
343
    // Failed to read file, skip check
344
    log_debug_p(gc, init)("Failed to read %s", filename);
345
    return;
346
  }
347

348
  // The required max map count is impossible to calculate exactly since subsystems
349
  // other than ZGC are also creating memory mappings, and we have no control over that.
350
  // However, ZGC tends to create the most mappings and dominate the total count.
351
  // In the worst cases, ZGC will map each granule three times, i.e. once per heap view.
352
  // We speculate that we need another 20% to allow for non-ZGC subsystems to map memory.
353
  const size_t required_max_map_count = (max_capacity / ZGranuleSize) * 3 * 1.2;
354
  if (actual_max_map_count < required_max_map_count) {
355
    log_warning_p(gc)("***** WARNING! INCORRECT SYSTEM CONFIGURATION DETECTED! *****");
356
    log_warning_p(gc)("The system limit on number of memory mappings per process might be too low for the given");
357
    log_warning_p(gc)("max Java heap size (" SIZE_FORMAT "M). Please adjust %s to allow for at",
358
                      max_capacity / M, filename);
359
    log_warning_p(gc)("least " SIZE_FORMAT " mappings (current limit is " SIZE_FORMAT "). Continuing execution "
360
                      "with the current", required_max_map_count, actual_max_map_count);
361
    log_warning_p(gc)("limit could lead to a premature OutOfMemoryError being thrown, due to failure to map memory.");
362
  }
363
}
364

365
void ZPhysicalMemoryBacking::warn_commit_limits(size_t max_capacity) const {
366
  // Warn if available space is too low
367
  warn_available_space(max_capacity);
368

369
  // Warn if max map count is too low
370
  warn_max_map_count(max_capacity);
371
}
372

373
bool ZPhysicalMemoryBacking::is_tmpfs() const {
374
  return _filesystem == TMPFS_MAGIC;
375
}
376

377
bool ZPhysicalMemoryBacking::is_hugetlbfs() const {
378
  return _filesystem == HUGETLBFS_MAGIC;
379
}
380

381
bool ZPhysicalMemoryBacking::tmpfs_supports_transparent_huge_pages() const {
382
  // If the shmem_enabled file exists and is readable then we
383
  // know the kernel supports transparent huge pages for tmpfs.
384
  return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0;
385
}
386

387
ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_hugetlbfs(size_t offset, size_t length, bool touch) const {
388
  // On hugetlbfs, mapping a file segment will fail immediately, without
389
  // the need to touch the mapped pages first, if there aren't enough huge
390
  // pages available to back the mapping.
391
  void* const addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
392
  if (addr == MAP_FAILED) {
393
    // Failed
394
    return errno;
395
  }
396

397
  // Once mapped, the huge pages are only reserved. We need to touch them
398
  // to associate them with the file segment. Note that we can not punch
399
  // hole in file segments which only have reserved pages.
400
  if (touch) {
401
    char* const start = (char*)addr;
402
    char* const end = start + length;
403
    os::pretouch_memory(start, end, _block_size);
404
  }
405

406
  // Unmap again. From now on, the huge pages that were mapped are allocated
407
  // to this file. There's no risk of getting a SIGBUS when mapping and
408
  // touching these pages again.
409
  if (munmap(addr, length) == -1) {
410
    // Failed
411
    return errno;
412
  }
413

414
  // Success
415
  return 0;
416
}
417

418
static bool safe_touch_mapping(void* addr, size_t length, size_t page_size) {
419
  char* const start = (char*)addr;
420
  char* const end = start + length;
421

422
  // Touching a mapping that can't be backed by memory will generate a
423
  // SIGBUS. By using SafeFetch32 any SIGBUS will be safely caught and
424
  // handled. On tmpfs, doing a fetch (rather than a store) is enough
425
  // to cause backing pages to be allocated (there's no zero-page to
426
  // worry about).
427
  for (char *p = start; p < end; p += page_size) {
428
    if (SafeFetch32((int*)p, -1) == -1) {
429
      // Failed
430
      return false;
431
    }
432
  }
433

434
  // Success
435
  return true;
436
}
437

438
ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_tmpfs(size_t offset, size_t length) const {
439
  // On tmpfs, we need to touch the mapped pages to figure out
440
  // if there are enough pages available to back the mapping.
441
  void* const addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
442
  if (addr == MAP_FAILED) {
443
    // Failed
444
    return errno;
445
  }
446

447
  // Advise mapping to use transparent huge pages
448
  os::realign_memory((char*)addr, length, os::large_page_size());
449

450
  // Touch the mapping (safely) to make sure it's backed by memory
451
  const bool backed = safe_touch_mapping(addr, length, _block_size);
452

453
  // Unmap again. If successfully touched, the backing memory will
454
  // be allocated to this file. There's no risk of getting a SIGBUS
455
  // when mapping and touching these pages again.
456
  if (munmap(addr, length) == -1) {
457
    // Failed
458
    return errno;
459
  }
460

461
  // Success
462
  return backed ? 0 : ENOMEM;
463
}
464

465
ZErrno ZPhysicalMemoryBacking::fallocate_compat_pwrite(size_t offset, size_t length) const {
466
  uint8_t data = 0;
467

468
  // Allocate backing memory by writing to each block
469
  for (size_t pos = offset; pos < offset + length; pos += _block_size) {
470
    if (pwrite(_fd, &data, sizeof(data), pos) == -1) {
471
      // Failed
472
      return errno;
473
    }
474
  }
475

476
  // Success
477
  return 0;
478
}
479

480
ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_compat(size_t offset, size_t length) const {
481
  // fallocate(2) is only supported by tmpfs since Linux 3.5, and by hugetlbfs
482
  // since Linux 4.3. When fallocate(2) is not supported we emulate it using
483
  // mmap/munmap (for hugetlbfs and tmpfs with transparent huge pages) or pwrite
484
  // (for tmpfs without transparent huge pages and other filesystem types).
485
  if (ZLargePages::is_explicit()) {
486
    return fallocate_compat_mmap_hugetlbfs(offset, length, false /* touch */);
487
  } else if (ZLargePages::is_transparent()) {
488
    return fallocate_compat_mmap_tmpfs(offset, length);
489
  } else {
490
    return fallocate_compat_pwrite(offset, length);
491
  }
492
}
493

494
ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_syscall(size_t offset, size_t length) const {
495
  const int mode = 0; // Allocate
496
  const int res = ZSyscall::fallocate(_fd, mode, offset, length);
497
  if (res == -1) {
498
    // Failed
499
    return errno;
500
  }
501

502
  // Success
503
  return 0;
504
}
505

506
ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole(size_t offset, size_t length) const {
507
  // Using compat mode is more efficient when allocating space on hugetlbfs.
508
  // Note that allocating huge pages this way will only reserve them, and not
509
  // associate them with segments of the file. We must guarantee that we at
510
  // some point touch these segments, otherwise we can not punch hole in them.
511
  // Also note that we need to use compat mode when using transparent huge pages,
512
  // since we need to use madvise(2) on the mapping before the page is allocated.
513
  if (z_fallocate_supported && !ZLargePages::is_enabled()) {
514
     const ZErrno err = fallocate_fill_hole_syscall(offset, length);
515
     if (!err) {
516
       // Success
517
       return 0;
518
     }
519

520
     if (err != ENOSYS && err != EOPNOTSUPP) {
521
       // Failed
522
       return err;
523
     }
524

525
     // Not supported
526
     log_debug_p(gc)("Falling back to fallocate() compatibility mode");
527
     z_fallocate_supported = false;
528
  }
529

530
  return fallocate_fill_hole_compat(offset, length);
531
}
532

533
ZErrno ZPhysicalMemoryBacking::fallocate_punch_hole(size_t offset, size_t length) const {
534
  if (ZLargePages::is_explicit()) {
535
    // We can only punch hole in pages that have been touched. Non-touched
536
    // pages are only reserved, and not associated with any specific file
537
    // segment. We don't know which pages have been previously touched, so
538
    // we always touch them here to guarantee that we can punch hole.
539
    const ZErrno err = fallocate_compat_mmap_hugetlbfs(offset, length, true /* touch */);
540
    if (err) {
541
      // Failed
542
      return err;
543
    }
544
  }
545

546
  const int mode = FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE;
547
  if (ZSyscall::fallocate(_fd, mode, offset, length) == -1) {
548
    // Failed
549
    return errno;
550
  }
551

552
  // Success
553
  return 0;
554
}
555

556
ZErrno ZPhysicalMemoryBacking::split_and_fallocate(bool punch_hole, size_t offset, size_t length) const {
557
  // Try first half
558
  const size_t offset0 = offset;
559
  const size_t length0 = align_up(length / 2, _block_size);
560
  const ZErrno err0 = fallocate(punch_hole, offset0, length0);
561
  if (err0) {
562
    return err0;
563
  }
564

565
  // Try second half
566
  const size_t offset1 = offset0 + length0;
567
  const size_t length1 = length - length0;
568
  const ZErrno err1 = fallocate(punch_hole, offset1, length1);
569
  if (err1) {
570
    return err1;
571
  }
572

573
  // Success
574
  return 0;
575
}
576

577
ZErrno ZPhysicalMemoryBacking::fallocate(bool punch_hole, size_t offset, size_t length) const {
578
  assert(is_aligned(offset, _block_size), "Invalid offset");
579
  assert(is_aligned(length, _block_size), "Invalid length");
580

581
  const ZErrno err = punch_hole ? fallocate_punch_hole(offset, length) : fallocate_fill_hole(offset, length);
582
  if (err == EINTR && length > _block_size) {
583
    // Calling fallocate(2) with a large length can take a long time to
584
    // complete. When running profilers, such as VTune, this syscall will
585
    // be constantly interrupted by signals. Expanding the file in smaller
586
    // steps avoids this problem.
587
    return split_and_fallocate(punch_hole, offset, length);
588
  }
589

590
  return err;
591
}
592

593
bool ZPhysicalMemoryBacking::commit_inner(size_t offset, size_t length) const {
594
  log_trace(gc, heap)("Committing memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)",
595
                      offset / M, (offset + length) / M, length / M);
596

597
retry:
598
  const ZErrno err = fallocate(false /* punch_hole */, offset, length);
599
  if (err) {
600
    if (err == ENOSPC && !is_init_completed() && ZLargePages::is_explicit() && z_fallocate_hugetlbfs_attempts-- > 0) {
601
      // If we fail to allocate during initialization, due to lack of space on
602
      // the hugetlbfs filesystem, then we wait and retry a few times before
603
      // giving up. Otherwise there is a risk that running JVMs back-to-back
604
      // will fail, since there is a delay between process termination and the
605
      // huge pages owned by that process being returned to the huge page pool
606
      // and made available for new allocations.
607
      log_debug_p(gc, init)("Failed to commit memory (%s), retrying", err.to_string());
608

609
      // Wait and retry in one second, in the hope that huge pages will be
610
      // available by then.
611
      sleep(1);
612
      goto retry;
613
    }
614

615
    // Failed
616
    log_error_p(gc)("Failed to commit memory (%s)", err.to_string());
617
    return false;
618
  }
619

620
  // Success
621
  return true;
622
}
623

624
static int offset_to_node(size_t offset) {
625
  const GrowableArray<int>* mapping = os::Linux::numa_nindex_to_node();
626
  const size_t nindex = (offset >> ZGranuleSizeShift) % mapping->length();
627
  return mapping->at((int)nindex);
628
}
629

630
size_t ZPhysicalMemoryBacking::commit_numa_interleaved(size_t offset, size_t length) const {
631
  size_t committed = 0;
632

633
  // Commit one granule at a time, so that each granule
634
  // can be allocated from a different preferred node.
635
  while (committed < length) {
636
    const size_t granule_offset = offset + committed;
637

638
    // Setup NUMA policy to allocate memory from a preferred node
639
    os::Linux::numa_set_preferred(offset_to_node(granule_offset));
640

641
    if (!commit_inner(granule_offset, ZGranuleSize)) {
642
      // Failed
643
      break;
644
    }
645

646
    committed += ZGranuleSize;
647
  }
648

649
  // Restore NUMA policy
650
  os::Linux::numa_set_preferred(-1);
651

652
  return committed;
653
}
654

655
size_t ZPhysicalMemoryBacking::commit_default(size_t offset, size_t length) const {
656
  // Try to commit the whole region
657
  if (commit_inner(offset, length)) {
658
    // Success
659
    return length;
660
  }
661

662
  // Failed, try to commit as much as possible
663
  size_t start = offset;
664
  size_t end = offset + length;
665

666
  for (;;) {
667
    length = align_down((end - start) / 2, ZGranuleSize);
668
    if (length < ZGranuleSize) {
669
      // Done, don't commit more
670
      return start - offset;
671
    }
672

673
    if (commit_inner(start, length)) {
674
      // Success, try commit more
675
      start += length;
676
    } else {
677
      // Failed, try commit less
678
      end -= length;
679
    }
680
  }
681
}
682

683
size_t ZPhysicalMemoryBacking::commit(size_t offset, size_t length) const {
684
  if (ZNUMA::is_enabled() && !ZLargePages::is_explicit()) {
685
    // To get granule-level NUMA interleaving when using non-large pages,
686
    // we must explicitly interleave the memory at commit/fallocate time.
687
    return commit_numa_interleaved(offset, length);
688
  }
689

690
  return commit_default(offset, length);
691
}
692

693
size_t ZPhysicalMemoryBacking::uncommit(size_t offset, size_t length) const {
694
  log_trace(gc, heap)("Uncommitting memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)",
695
                      offset / M, (offset + length) / M, length / M);
696

697
  const ZErrno err = fallocate(true /* punch_hole */, offset, length);
698
  if (err) {
699
    log_error(gc)("Failed to uncommit memory (%s)", err.to_string());
700
    return 0;
701
  }
702

703
  return length;
704
}
705

706
void ZPhysicalMemoryBacking::map(uintptr_t addr, size_t size, uintptr_t offset) const {
707
  const void* const res = mmap((void*)addr, size, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, _fd, offset);
708
  if (res == MAP_FAILED) {
709
    ZErrno err;
710
    fatal("Failed to map memory (%s)", err.to_string());
711
  }
712
}
713

714
void ZPhysicalMemoryBacking::unmap(uintptr_t addr, size_t size) const {
715
  // Note that we must keep the address space reservation intact and just detach
716
  // the backing memory. For this reason we map a new anonymous, non-accessible
717
  // and non-reserved page over the mapping instead of actually unmapping.
718
  const void* const res = mmap((void*)addr, size, PROT_NONE, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
719
  if (res == MAP_FAILED) {
720
    ZErrno err;
721
    fatal("Failed to map memory (%s)", err.to_string());
722
  }
723
}
724

725
Product

Resources

Company