Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_alloc.cpp
35258 views
1
/*
2
* kmp_alloc.cpp -- private/shared dynamic memory allocation and management
3
*/
4
5
//===----------------------------------------------------------------------===//
6
//
7
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8
// See https://llvm.org/LICENSE.txt for license information.
9
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "kmp.h"
14
#include "kmp_io.h"
15
#include "kmp_wrapper_malloc.h"
16
17
// Disable bget when it is not used
18
#if KMP_USE_BGET
19
20
/* Thread private buffer management code */
21
22
typedef int (*bget_compact_t)(size_t, int);
23
typedef void *(*bget_acquire_t)(size_t);
24
typedef void (*bget_release_t)(void *);
25
26
/* NOTE: bufsize must be a signed datatype */
27
28
#if KMP_OS_WINDOWS
29
#if KMP_ARCH_X86 || KMP_ARCH_ARM
30
typedef kmp_int32 bufsize;
31
#else
32
typedef kmp_int64 bufsize;
33
#endif
34
#else
35
typedef ssize_t bufsize;
36
#endif // KMP_OS_WINDOWS
37
38
/* The three modes of operation are, fifo search, lifo search, and best-fit */
39
40
typedef enum bget_mode {
41
bget_mode_fifo = 0,
42
bget_mode_lifo = 1,
43
bget_mode_best = 2
44
} bget_mode_t;
45
46
static void bpool(kmp_info_t *th, void *buffer, bufsize len);
47
static void *bget(kmp_info_t *th, bufsize size);
48
static void *bgetz(kmp_info_t *th, bufsize size);
49
static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
50
static void brel(kmp_info_t *th, void *buf);
51
static void bectl(kmp_info_t *th, bget_compact_t compact,
52
bget_acquire_t acquire, bget_release_t release,
53
bufsize pool_incr);
54
55
/* BGET CONFIGURATION */
56
/* Buffer allocation size quantum: all buffers allocated are a
57
multiple of this size. This MUST be a power of two. */
58
59
/* On IA-32 architecture with Linux* OS, malloc() does not
60
ensure 16 byte alignment */
61
62
#if KMP_ARCH_X86 || !KMP_HAVE_QUAD
63
64
#define SizeQuant 8
65
#define AlignType double
66
67
#else
68
69
#define SizeQuant 16
70
#define AlignType _Quad
71
72
#endif
73
74
// Define this symbol to enable the bstats() function which calculates the
75
// total free space in the buffer pool, the largest available buffer, and the
76
// total space currently allocated.
77
#define BufStats 1
78
79
#ifdef KMP_DEBUG
80
81
// Define this symbol to enable the bpoold() function which dumps the buffers
82
// in a buffer pool.
83
#define BufDump 1
84
85
// Define this symbol to enable the bpoolv() function for validating a buffer
86
// pool.
87
#define BufValid 1
88
89
// Define this symbol to enable the bufdump() function which allows dumping the
90
// contents of an allocated or free buffer.
91
#define DumpData 1
92
93
#ifdef NOT_USED_NOW
94
95
// Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
96
// who attempt to use pointers into released buffers.
97
#define FreeWipe 1
98
99
// Use a best fit algorithm when searching for space for an allocation request.
100
// This uses memory more efficiently, but allocation will be much slower.
101
#define BestFit 1
102
103
#endif /* NOT_USED_NOW */
104
#endif /* KMP_DEBUG */
105
106
static bufsize bget_bin_size[] = {
107
0,
108
// 1 << 6, /* .5 Cache line */
109
1 << 7, /* 1 Cache line, new */
110
1 << 8, /* 2 Cache lines */
111
1 << 9, /* 4 Cache lines, new */
112
1 << 10, /* 8 Cache lines */
113
1 << 11, /* 16 Cache lines, new */
114
1 << 12, 1 << 13, /* new */
115
1 << 14, 1 << 15, /* new */
116
1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */
117
1 << 21, /* 2MB */
118
1 << 22, /* 4MB */
119
1 << 23, /* 8MB */
120
1 << 24, /* 16MB */
121
1 << 25, /* 32MB */
122
};
123
124
#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
125
126
struct bfhead;
127
128
// Declare the interface, including the requested buffer size type, bufsize.
129
130
/* Queue links */
131
typedef struct qlinks {
132
struct bfhead *flink; /* Forward link */
133
struct bfhead *blink; /* Backward link */
134
} qlinks_t;
135
136
/* Header in allocated and free buffers */
137
typedef struct bhead2 {
138
kmp_info_t *bthr; /* The thread which owns the buffer pool */
139
bufsize prevfree; /* Relative link back to previous free buffer in memory or
140
0 if previous buffer is allocated. */
141
bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
142
} bhead2_t;
143
144
/* Make sure the bhead structure is a multiple of SizeQuant in size. */
145
typedef union bhead {
146
KMP_ALIGN(SizeQuant)
147
AlignType b_align;
148
char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
149
bhead2_t bb;
150
} bhead_t;
151
#define BH(p) ((bhead_t *)(p))
152
153
/* Header in directly allocated buffers (by acqfcn) */
154
typedef struct bdhead {
155
bufsize tsize; /* Total size, including overhead */
156
bhead_t bh; /* Common header */
157
} bdhead_t;
158
#define BDH(p) ((bdhead_t *)(p))
159
160
/* Header in free buffers */
161
typedef struct bfhead {
162
bhead_t bh; /* Common allocated/free header */
163
qlinks_t ql; /* Links on free list */
164
} bfhead_t;
165
#define BFH(p) ((bfhead_t *)(p))
166
167
typedef struct thr_data {
168
bfhead_t freelist[MAX_BGET_BINS];
169
#if BufStats
170
size_t totalloc; /* Total space currently allocated */
171
long numget, numrel; /* Number of bget() and brel() calls */
172
long numpblk; /* Number of pool blocks */
173
long numpget, numprel; /* Number of block gets and rels */
174
long numdget, numdrel; /* Number of direct gets and rels */
175
#endif /* BufStats */
176
177
/* Automatic expansion block management functions */
178
bget_compact_t compfcn;
179
bget_acquire_t acqfcn;
180
bget_release_t relfcn;
181
182
bget_mode_t mode; /* what allocation mode to use? */
183
184
bufsize exp_incr; /* Expansion block size */
185
bufsize pool_len; /* 0: no bpool calls have been made
186
-1: not all pool blocks are the same size
187
>0: (common) block size for all bpool calls made so far
188
*/
189
bfhead_t *last_pool; /* Last pool owned by this thread (delay deallocation) */
190
} thr_data_t;
191
192
/* Minimum allocation quantum: */
193
#define QLSize (sizeof(qlinks_t))
194
#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
195
#define MaxSize \
196
(bufsize)( \
197
~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
198
// Maximum for the requested size.
199
200
/* End sentinel: value placed in bsize field of dummy block delimiting
201
end of pool block. The most negative number which will fit in a
202
bufsize, defined in a way that the compiler will accept. */
203
204
#define ESent \
205
((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
206
207
/* Thread Data management routines */
208
static int bget_get_bin(bufsize size) {
209
// binary chop bins
210
int lo = 0, hi = MAX_BGET_BINS - 1;
211
212
KMP_DEBUG_ASSERT(size > 0);
213
214
while ((hi - lo) > 1) {
215
int mid = (lo + hi) >> 1;
216
if (size < bget_bin_size[mid])
217
hi = mid - 1;
218
else
219
lo = mid;
220
}
221
222
KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
223
224
return lo;
225
}
226
227
static void set_thr_data(kmp_info_t *th) {
228
int i;
229
thr_data_t *data;
230
231
data = (thr_data_t *)((!th->th.th_local.bget_data)
232
? __kmp_allocate(sizeof(*data))
233
: th->th.th_local.bget_data);
234
235
memset(data, '\0', sizeof(*data));
236
237
for (i = 0; i < MAX_BGET_BINS; ++i) {
238
data->freelist[i].ql.flink = &data->freelist[i];
239
data->freelist[i].ql.blink = &data->freelist[i];
240
}
241
242
th->th.th_local.bget_data = data;
243
th->th.th_local.bget_list = 0;
244
#if !USE_CMP_XCHG_FOR_BGET
245
#ifdef USE_QUEUING_LOCK_FOR_BGET
246
__kmp_init_lock(&th->th.th_local.bget_lock);
247
#else
248
__kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
249
#endif /* USE_LOCK_FOR_BGET */
250
#endif /* ! USE_CMP_XCHG_FOR_BGET */
251
}
252
253
static thr_data_t *get_thr_data(kmp_info_t *th) {
254
thr_data_t *data;
255
256
data = (thr_data_t *)th->th.th_local.bget_data;
257
258
KMP_DEBUG_ASSERT(data != 0);
259
260
return data;
261
}
262
263
/* Walk the free list and release the enqueued buffers */
264
static void __kmp_bget_dequeue(kmp_info_t *th) {
265
void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
266
267
if (p != 0) {
268
#if USE_CMP_XCHG_FOR_BGET
269
{
270
volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
271
while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
272
CCAST(void *, old_value), nullptr)) {
273
KMP_CPU_PAUSE();
274
old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
275
}
276
p = CCAST(void *, old_value);
277
}
278
#else /* ! USE_CMP_XCHG_FOR_BGET */
279
#ifdef USE_QUEUING_LOCK_FOR_BGET
280
__kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
281
#else
282
__kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
283
#endif /* USE_QUEUING_LOCK_FOR_BGET */
284
285
p = (void *)th->th.th_local.bget_list;
286
th->th.th_local.bget_list = 0;
287
288
#ifdef USE_QUEUING_LOCK_FOR_BGET
289
__kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
290
#else
291
__kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
292
#endif
293
#endif /* USE_CMP_XCHG_FOR_BGET */
294
295
/* Check again to make sure the list is not empty */
296
while (p != 0) {
297
void *buf = p;
298
bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
299
300
KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
301
KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
302
(kmp_uintptr_t)th); // clear possible mark
303
KMP_DEBUG_ASSERT(b->ql.blink == 0);
304
305
p = (void *)b->ql.flink;
306
307
brel(th, buf);
308
}
309
}
310
}
311
312
/* Chain together the free buffers by using the thread owner field */
313
static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
314
#ifdef USE_QUEUING_LOCK_FOR_BGET
315
,
316
kmp_int32 rel_gtid
317
#endif
318
) {
319
bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
320
321
KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
322
KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
323
(kmp_uintptr_t)th); // clear possible mark
324
325
b->ql.blink = 0;
326
327
KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
328
__kmp_gtid_from_thread(th)));
329
330
#if USE_CMP_XCHG_FOR_BGET
331
{
332
volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
333
/* the next pointer must be set before setting bget_list to buf to avoid
334
exposing a broken list to other threads, even for an instant. */
335
b->ql.flink = BFH(CCAST(void *, old_value));
336
337
while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
338
CCAST(void *, old_value), buf)) {
339
KMP_CPU_PAUSE();
340
old_value = TCR_PTR(th->th.th_local.bget_list);
341
/* the next pointer must be set before setting bget_list to buf to avoid
342
exposing a broken list to other threads, even for an instant. */
343
b->ql.flink = BFH(CCAST(void *, old_value));
344
}
345
}
346
#else /* ! USE_CMP_XCHG_FOR_BGET */
347
#ifdef USE_QUEUING_LOCK_FOR_BGET
348
__kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
349
#else
350
__kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
351
#endif
352
353
b->ql.flink = BFH(th->th.th_local.bget_list);
354
th->th.th_local.bget_list = (void *)buf;
355
356
#ifdef USE_QUEUING_LOCK_FOR_BGET
357
__kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
358
#else
359
__kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
360
#endif
361
#endif /* USE_CMP_XCHG_FOR_BGET */
362
}
363
364
/* insert buffer back onto a new freelist */
365
static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
366
int bin;
367
368
KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
369
KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
370
371
bin = bget_get_bin(b->bh.bb.bsize);
372
373
KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
374
&thr->freelist[bin]);
375
KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
376
&thr->freelist[bin]);
377
378
b->ql.flink = &thr->freelist[bin];
379
b->ql.blink = thr->freelist[bin].ql.blink;
380
381
thr->freelist[bin].ql.blink = b;
382
b->ql.blink->ql.flink = b;
383
}
384
385
/* unlink the buffer from the old freelist */
386
static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
387
KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
388
KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
389
390
b->ql.blink->ql.flink = b->ql.flink;
391
b->ql.flink->ql.blink = b->ql.blink;
392
}
393
394
/* GET STATS -- check info on free list */
395
static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
396
thr_data_t *thr = get_thr_data(th);
397
int bin;
398
399
*total_free = *max_free = 0;
400
401
for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
402
bfhead_t *b, *best;
403
404
best = &thr->freelist[bin];
405
b = best->ql.flink;
406
407
while (b != &thr->freelist[bin]) {
408
*total_free += (b->bh.bb.bsize - sizeof(bhead_t));
409
if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
410
best = b;
411
412
/* Link to next buffer */
413
b = b->ql.flink;
414
}
415
416
if (*max_free < best->bh.bb.bsize)
417
*max_free = best->bh.bb.bsize;
418
}
419
420
if (*max_free > (bufsize)sizeof(bhead_t))
421
*max_free -= sizeof(bhead_t);
422
}
423
424
/* BGET -- Allocate a buffer. */
425
static void *bget(kmp_info_t *th, bufsize requested_size) {
426
thr_data_t *thr = get_thr_data(th);
427
bufsize size = requested_size;
428
bfhead_t *b;
429
void *buf;
430
int compactseq = 0;
431
int use_blink = 0;
432
/* For BestFit */
433
bfhead_t *best;
434
435
if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
436
return NULL;
437
}
438
439
__kmp_bget_dequeue(th); /* Release any queued buffers */
440
441
if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
442
size = SizeQ;
443
}
444
#if defined(SizeQuant) && (SizeQuant > 1)
445
size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
446
#endif
447
448
size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
449
KMP_DEBUG_ASSERT(size >= 0);
450
KMP_DEBUG_ASSERT(size % SizeQuant == 0);
451
452
use_blink = (thr->mode == bget_mode_lifo);
453
454
/* If a compact function was provided in the call to bectl(), wrap
455
a loop around the allocation process to allow compaction to
456
intervene in case we don't find a suitable buffer in the chain. */
457
458
for (;;) {
459
int bin;
460
461
for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
462
/* Link to next buffer */
463
b = (use_blink ? thr->freelist[bin].ql.blink
464
: thr->freelist[bin].ql.flink);
465
466
if (thr->mode == bget_mode_best) {
467
best = &thr->freelist[bin];
468
469
/* Scan the free list searching for the first buffer big enough
470
to hold the requested size buffer. */
471
while (b != &thr->freelist[bin]) {
472
if (b->bh.bb.bsize >= (bufsize)size) {
473
if ((best == &thr->freelist[bin]) ||
474
(b->bh.bb.bsize < best->bh.bb.bsize)) {
475
best = b;
476
}
477
}
478
479
/* Link to next buffer */
480
b = (use_blink ? b->ql.blink : b->ql.flink);
481
}
482
b = best;
483
}
484
485
while (b != &thr->freelist[bin]) {
486
if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
487
488
// Buffer is big enough to satisfy the request. Allocate it to the
489
// caller. We must decide whether the buffer is large enough to split
490
// into the part given to the caller and a free buffer that remains
491
// on the free list, or whether the entire buffer should be removed
492
// from the free list and given to the caller in its entirety. We
493
// only split the buffer if enough room remains for a header plus the
494
// minimum quantum of allocation.
495
if ((b->bh.bb.bsize - (bufsize)size) >
496
(bufsize)(SizeQ + (sizeof(bhead_t)))) {
497
bhead_t *ba, *bn;
498
499
ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
500
bn = BH(((char *)ba) + size);
501
502
KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
503
504
/* Subtract size from length of free block. */
505
b->bh.bb.bsize -= (bufsize)size;
506
507
/* Link allocated buffer to the previous free buffer. */
508
ba->bb.prevfree = b->bh.bb.bsize;
509
510
/* Plug negative size into user buffer. */
511
ba->bb.bsize = -size;
512
513
/* Mark this buffer as owned by this thread. */
514
TCW_PTR(ba->bb.bthr,
515
th); // not an allocated address (do not mark it)
516
/* Mark buffer after this one not preceded by free block. */
517
bn->bb.prevfree = 0;
518
519
// unlink buffer from old freelist, and reinsert into new freelist
520
__kmp_bget_remove_from_freelist(b);
521
__kmp_bget_insert_into_freelist(thr, b);
522
#if BufStats
523
thr->totalloc += (size_t)size;
524
thr->numget++; /* Increment number of bget() calls */
525
#endif
526
buf = (void *)((((char *)ba) + sizeof(bhead_t)));
527
KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
528
return buf;
529
} else {
530
bhead_t *ba;
531
532
ba = BH(((char *)b) + b->bh.bb.bsize);
533
534
KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
535
536
/* The buffer isn't big enough to split. Give the whole
537
shebang to the caller and remove it from the free list. */
538
539
__kmp_bget_remove_from_freelist(b);
540
#if BufStats
541
thr->totalloc += (size_t)b->bh.bb.bsize;
542
thr->numget++; /* Increment number of bget() calls */
543
#endif
544
/* Negate size to mark buffer allocated. */
545
b->bh.bb.bsize = -(b->bh.bb.bsize);
546
547
/* Mark this buffer as owned by this thread. */
548
TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
549
/* Zero the back pointer in the next buffer in memory
550
to indicate that this buffer is allocated. */
551
ba->bb.prevfree = 0;
552
553
/* Give user buffer starting at queue links. */
554
buf = (void *)&(b->ql);
555
KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
556
return buf;
557
}
558
}
559
560
/* Link to next buffer */
561
b = (use_blink ? b->ql.blink : b->ql.flink);
562
}
563
}
564
565
/* We failed to find a buffer. If there's a compact function defined,
566
notify it of the size requested. If it returns TRUE, try the allocation
567
again. */
568
569
if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
570
break;
571
}
572
}
573
574
/* No buffer available with requested size free. */
575
576
/* Don't give up yet -- look in the reserve supply. */
577
if (thr->acqfcn != 0) {
578
if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
579
/* Request is too large to fit in a single expansion block.
580
Try to satisfy it by a direct buffer acquisition. */
581
bdhead_t *bdh;
582
583
size += sizeof(bdhead_t) - sizeof(bhead_t);
584
585
KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
586
587
/* richryan */
588
bdh = BDH((*thr->acqfcn)((bufsize)size));
589
if (bdh != NULL) {
590
591
// Mark the buffer special by setting size field of its header to zero.
592
bdh->bh.bb.bsize = 0;
593
594
/* Mark this buffer as owned by this thread. */
595
TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
596
// because direct buffer never goes to free list
597
bdh->bh.bb.prevfree = 0;
598
bdh->tsize = size;
599
#if BufStats
600
thr->totalloc += (size_t)size;
601
thr->numget++; /* Increment number of bget() calls */
602
thr->numdget++; /* Direct bget() call count */
603
#endif
604
buf = (void *)(bdh + 1);
605
KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
606
return buf;
607
}
608
609
} else {
610
611
/* Try to obtain a new expansion block */
612
void *newpool;
613
614
KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
615
616
/* richryan */
617
newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
618
KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
619
if (newpool != NULL) {
620
bpool(th, newpool, thr->exp_incr);
621
buf = bget(
622
th, requested_size); /* This can't, I say, can't get into a loop. */
623
return buf;
624
}
625
}
626
}
627
628
/* Still no buffer available */
629
630
return NULL;
631
}
632
633
/* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
634
the entire contents of the buffer to zero, not just the
635
region requested by the caller. */
636
637
static void *bgetz(kmp_info_t *th, bufsize size) {
638
char *buf = (char *)bget(th, size);
639
640
if (buf != NULL) {
641
bhead_t *b;
642
bufsize rsize;
643
644
b = BH(buf - sizeof(bhead_t));
645
rsize = -(b->bb.bsize);
646
if (rsize == 0) {
647
bdhead_t *bd;
648
649
bd = BDH(buf - sizeof(bdhead_t));
650
rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
651
} else {
652
rsize -= sizeof(bhead_t);
653
}
654
655
KMP_DEBUG_ASSERT(rsize >= size);
656
657
(void)memset(buf, 0, (bufsize)rsize);
658
}
659
return ((void *)buf);
660
}
661
662
/* BGETR -- Reallocate a buffer. This is a minimal implementation,
663
simply in terms of brel() and bget(). It could be
664
enhanced to allow the buffer to grow into adjacent free
665
blocks and to avoid moving data unnecessarily. */
666
667
static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
668
void *nbuf;
669
bufsize osize; /* Old size of buffer */
670
bhead_t *b;
671
672
nbuf = bget(th, size);
673
if (nbuf == NULL) { /* Acquire new buffer */
674
return NULL;
675
}
676
if (buf == NULL) {
677
return nbuf;
678
}
679
b = BH(((char *)buf) - sizeof(bhead_t));
680
osize = -b->bb.bsize;
681
if (osize == 0) {
682
/* Buffer acquired directly through acqfcn. */
683
bdhead_t *bd;
684
685
bd = BDH(((char *)buf) - sizeof(bdhead_t));
686
osize = bd->tsize - (bufsize)sizeof(bdhead_t);
687
} else {
688
osize -= sizeof(bhead_t);
689
}
690
691
KMP_DEBUG_ASSERT(osize > 0);
692
693
(void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
694
(size_t)((size < osize) ? size : osize));
695
brel(th, buf);
696
697
return nbuf;
698
}
699
700
/* BREL -- Release a buffer. */
701
static void brel(kmp_info_t *th, void *buf) {
702
thr_data_t *thr = get_thr_data(th);
703
bfhead_t *b, *bn;
704
kmp_info_t *bth;
705
706
KMP_DEBUG_ASSERT(buf != NULL);
707
KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
708
709
b = BFH(((char *)buf) - sizeof(bhead_t));
710
711
if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
712
bdhead_t *bdh;
713
714
bdh = BDH(((char *)buf) - sizeof(bdhead_t));
715
KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
716
#if BufStats
717
thr->totalloc -= (size_t)bdh->tsize;
718
thr->numdrel++; /* Number of direct releases */
719
thr->numrel++; /* Increment number of brel() calls */
720
#endif /* BufStats */
721
#ifdef FreeWipe
722
(void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
723
#endif /* FreeWipe */
724
725
KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
726
727
KMP_DEBUG_ASSERT(thr->relfcn != 0);
728
(*thr->relfcn)((void *)bdh); /* Release it directly. */
729
return;
730
}
731
732
bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
733
~1); // clear possible mark before comparison
734
if (bth != th) {
735
/* Add this buffer to be released by the owning thread later */
736
__kmp_bget_enqueue(bth, buf
737
#ifdef USE_QUEUING_LOCK_FOR_BGET
738
,
739
__kmp_gtid_from_thread(th)
740
#endif
741
);
742
return;
743
}
744
745
/* Buffer size must be negative, indicating that the buffer is allocated. */
746
if (b->bh.bb.bsize >= 0) {
747
bn = NULL;
748
}
749
KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
750
751
/* Back pointer in next buffer must be zero, indicating the same thing: */
752
753
KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
754
755
#if BufStats
756
thr->numrel++; /* Increment number of brel() calls */
757
thr->totalloc += (size_t)b->bh.bb.bsize;
758
#endif
759
760
/* If the back link is nonzero, the previous buffer is free. */
761
762
if (b->bh.bb.prevfree != 0) {
763
/* The previous buffer is free. Consolidate this buffer with it by adding
764
the length of this buffer to the previous free buffer. Note that we
765
subtract the size in the buffer being released, since it's negative to
766
indicate that the buffer is allocated. */
767
bufsize size = b->bh.bb.bsize;
768
769
/* Make the previous buffer the one we're working on. */
770
KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
771
b->bh.bb.prevfree);
772
b = BFH(((char *)b) - b->bh.bb.prevfree);
773
b->bh.bb.bsize -= size;
774
775
/* unlink the buffer from the old freelist */
776
__kmp_bget_remove_from_freelist(b);
777
} else {
778
/* The previous buffer isn't allocated. Mark this buffer size as positive
779
(i.e. free) and fall through to place the buffer on the free list as an
780
isolated free block. */
781
b->bh.bb.bsize = -b->bh.bb.bsize;
782
}
783
784
/* insert buffer back onto a new freelist */
785
__kmp_bget_insert_into_freelist(thr, b);
786
787
/* Now we look at the next buffer in memory, located by advancing from
788
the start of this buffer by its size, to see if that buffer is
789
free. If it is, we combine this buffer with the next one in
790
memory, dechaining the second buffer from the free list. */
791
bn = BFH(((char *)b) + b->bh.bb.bsize);
792
if (bn->bh.bb.bsize > 0) {
793
794
/* The buffer is free. Remove it from the free list and add
795
its size to that of our buffer. */
796
KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
797
bn->bh.bb.bsize);
798
799
__kmp_bget_remove_from_freelist(bn);
800
801
b->bh.bb.bsize += bn->bh.bb.bsize;
802
803
/* unlink the buffer from the old freelist, and reinsert it into the new
804
* freelist */
805
__kmp_bget_remove_from_freelist(b);
806
__kmp_bget_insert_into_freelist(thr, b);
807
808
/* Finally, advance to the buffer that follows the newly
809
consolidated free block. We must set its backpointer to the
810
head of the consolidated free block. We know the next block
811
must be an allocated block because the process of recombination
812
guarantees that two free blocks will never be contiguous in
813
memory. */
814
bn = BFH(((char *)b) + b->bh.bb.bsize);
815
}
816
#ifdef FreeWipe
817
(void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
818
(size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
819
#endif
820
KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
821
822
/* The next buffer is allocated. Set the backpointer in it to point
823
to this buffer; the previous free buffer in memory. */
824
825
bn->bh.bb.prevfree = b->bh.bb.bsize;
826
827
/* If a block-release function is defined, and this free buffer
828
constitutes the entire block, release it. Note that pool_len
829
is defined in such a way that the test will fail unless all
830
pool blocks are the same size. */
831
if (thr->relfcn != 0 &&
832
b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
833
#if BufStats
834
if (thr->numpblk !=
835
1) { /* Do not release the last buffer until finalization time */
836
#endif
837
838
KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
839
KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
840
KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
841
b->bh.bb.bsize);
842
843
/* Unlink the buffer from the free list */
844
__kmp_bget_remove_from_freelist(b);
845
846
KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
847
848
(*thr->relfcn)(b);
849
#if BufStats
850
thr->numprel++; /* Nr of expansion block releases */
851
thr->numpblk--; /* Total number of blocks */
852
KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
853
854
// avoid leaving stale last_pool pointer around if it is being dealloced
855
if (thr->last_pool == b)
856
thr->last_pool = 0;
857
} else {
858
thr->last_pool = b;
859
}
860
#endif /* BufStats */
861
}
862
}
863
864
/* BECTL -- Establish automatic pool expansion control */
865
static void bectl(kmp_info_t *th, bget_compact_t compact,
866
bget_acquire_t acquire, bget_release_t release,
867
bufsize pool_incr) {
868
thr_data_t *thr = get_thr_data(th);
869
870
thr->compfcn = compact;
871
thr->acqfcn = acquire;
872
thr->relfcn = release;
873
thr->exp_incr = pool_incr;
874
}
875
876
/* BPOOL -- Add a region of memory to the buffer pool. */
877
static void bpool(kmp_info_t *th, void *buf, bufsize len) {
878
/* int bin = 0; */
879
thr_data_t *thr = get_thr_data(th);
880
bfhead_t *b = BFH(buf);
881
bhead_t *bn;
882
883
__kmp_bget_dequeue(th); /* Release any queued buffers */
884
885
#ifdef SizeQuant
886
len &= ~((bufsize)(SizeQuant - 1));
887
#endif
888
if (thr->pool_len == 0) {
889
thr->pool_len = len;
890
} else if (len != thr->pool_len) {
891
thr->pool_len = -1;
892
}
893
#if BufStats
894
thr->numpget++; /* Number of block acquisitions */
895
thr->numpblk++; /* Number of blocks total */
896
KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
897
#endif /* BufStats */
898
899
/* Since the block is initially occupied by a single free buffer,
900
it had better not be (much) larger than the largest buffer
901
whose size we can store in bhead.bb.bsize. */
902
KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
903
904
/* Clear the backpointer at the start of the block to indicate that
905
there is no free block prior to this one. That blocks
906
recombination when the first block in memory is released. */
907
b->bh.bb.prevfree = 0;
908
909
/* Create a dummy allocated buffer at the end of the pool. This dummy
910
buffer is seen when a buffer at the end of the pool is released and
911
blocks recombination of the last buffer with the dummy buffer at
912
the end. The length in the dummy buffer is set to the largest
913
negative number to denote the end of the pool for diagnostic
914
routines (this specific value is not counted on by the actual
915
allocation and release functions). */
916
len -= sizeof(bhead_t);
917
b->bh.bb.bsize = (bufsize)len;
918
/* Set the owner of this buffer */
919
TCW_PTR(b->bh.bb.bthr,
920
(kmp_info_t *)((kmp_uintptr_t)th |
921
1)); // mark the buffer as allocated address
922
923
/* Chain the new block to the free list. */
924
__kmp_bget_insert_into_freelist(thr, b);
925
926
#ifdef FreeWipe
927
(void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
928
(size_t)(len - sizeof(bfhead_t)));
929
#endif
930
bn = BH(((char *)b) + len);
931
bn->bb.prevfree = (bufsize)len;
932
/* Definition of ESent assumes two's complement! */
933
KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
934
935
bn->bb.bsize = ESent;
936
}
937
938
/* BFREED -- Dump the free lists for this thread. */
939
static void bfreed(kmp_info_t *th) {
940
int bin = 0, count = 0;
941
int gtid = __kmp_gtid_from_thread(th);
942
thr_data_t *thr = get_thr_data(th);
943
944
#if BufStats
945
__kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
946
" get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
947
" pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
948
" prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
949
" drel=%" KMP_INT64_SPEC "\n",
950
gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
951
(kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
952
(kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
953
(kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
954
#endif
955
956
for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
957
bfhead_t *b;
958
959
for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
960
b = b->ql.flink) {
961
bufsize bs = b->bh.bb.bsize;
962
963
KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
964
KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
965
KMP_DEBUG_ASSERT(bs > 0);
966
967
count += 1;
968
969
__kmp_printf_no_lock(
970
"__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
971
(long)bs);
972
#ifdef FreeWipe
973
{
974
char *lerr = ((char *)b) + sizeof(bfhead_t);
975
if ((bs > sizeof(bfhead_t)) &&
976
((*lerr != 0x55) ||
977
(memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
978
0))) {
979
__kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
980
"free block have been overstored.)\n",
981
gtid);
982
}
983
}
984
#endif
985
}
986
}
987
988
if (count == 0)
989
__kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
990
}
991
992
void __kmp_initialize_bget(kmp_info_t *th) {
993
KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
994
995
set_thr_data(th);
996
997
bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
998
(bufsize)__kmp_malloc_pool_incr);
999
}
1000
1001
void __kmp_finalize_bget(kmp_info_t *th) {
1002
thr_data_t *thr;
1003
bfhead_t *b;
1004
1005
KMP_DEBUG_ASSERT(th != 0);
1006
1007
#if BufStats
1008
thr = (thr_data_t *)th->th.th_local.bget_data;
1009
KMP_DEBUG_ASSERT(thr != NULL);
1010
b = thr->last_pool;
1011
1012
/* If a block-release function is defined, and this free buffer constitutes
1013
the entire block, release it. Note that pool_len is defined in such a way
1014
that the test will fail unless all pool blocks are the same size. */
1015
1016
// Deallocate the last pool if one exists because we no longer do it in brel()
1017
if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1018
b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
1019
KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1020
KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1021
KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
1022
b->bh.bb.bsize);
1023
1024
/* Unlink the buffer from the free list */
1025
__kmp_bget_remove_from_freelist(b);
1026
1027
KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
1028
1029
(*thr->relfcn)(b);
1030
thr->numprel++; /* Nr of expansion block releases */
1031
thr->numpblk--; /* Total number of blocks */
1032
KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1033
}
1034
#endif /* BufStats */
1035
1036
/* Deallocate bget_data */
1037
if (th->th.th_local.bget_data != NULL) {
1038
__kmp_free(th->th.th_local.bget_data);
1039
th->th.th_local.bget_data = NULL;
1040
}
1041
}
1042
1043
void kmpc_set_poolsize(size_t size) {
1044
bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1045
(bget_release_t)free, (bufsize)size);
1046
}
1047
1048
size_t kmpc_get_poolsize(void) {
1049
thr_data_t *p;
1050
1051
p = get_thr_data(__kmp_get_thread());
1052
1053
return p->exp_incr;
1054
}
1055
1056
void kmpc_set_poolmode(int mode) {
1057
thr_data_t *p;
1058
1059
if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1060
mode == bget_mode_best) {
1061
p = get_thr_data(__kmp_get_thread());
1062
p->mode = (bget_mode_t)mode;
1063
}
1064
}
1065
1066
int kmpc_get_poolmode(void) {
1067
thr_data_t *p;
1068
1069
p = get_thr_data(__kmp_get_thread());
1070
1071
return p->mode;
1072
}
1073
1074
void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
1075
kmp_info_t *th = __kmp_get_thread();
1076
bufsize a, b;
1077
1078
__kmp_bget_dequeue(th); /* Release any queued buffers */
1079
1080
bcheck(th, &a, &b);
1081
1082
*maxmem = a;
1083
*allmem = b;
1084
}
1085
1086
void kmpc_poolprint(void) {
1087
kmp_info_t *th = __kmp_get_thread();
1088
1089
__kmp_bget_dequeue(th); /* Release any queued buffers */
1090
1091
bfreed(th);
1092
}
1093
1094
#endif // #if KMP_USE_BGET
1095
1096
void *kmpc_malloc(size_t size) {
1097
void *ptr;
1098
ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1099
if (ptr != NULL) {
1100
// save allocated pointer just before one returned to user
1101
*(void **)ptr = ptr;
1102
ptr = (void **)ptr + 1;
1103
}
1104
return ptr;
1105
}
1106
1107
#define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1108
1109
void *kmpc_aligned_malloc(size_t size, size_t alignment) {
1110
void *ptr;
1111
void *ptr_allocated;
1112
KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
1113
if (!IS_POWER_OF_TWO(alignment)) {
1114
// AC: do we need to issue a warning here?
1115
errno = EINVAL;
1116
return NULL;
1117
}
1118
size = size + sizeof(void *) + alignment;
1119
ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1120
if (ptr_allocated != NULL) {
1121
// save allocated pointer just before one returned to user
1122
ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
1123
~(alignment - 1));
1124
*((void **)ptr - 1) = ptr_allocated;
1125
} else {
1126
ptr = NULL;
1127
}
1128
return ptr;
1129
}
1130
1131
void *kmpc_calloc(size_t nelem, size_t elsize) {
1132
void *ptr;
1133
ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
1134
if (ptr != NULL) {
1135
// save allocated pointer just before one returned to user
1136
*(void **)ptr = ptr;
1137
ptr = (void **)ptr + 1;
1138
}
1139
return ptr;
1140
}
1141
1142
void *kmpc_realloc(void *ptr, size_t size) {
1143
void *result = NULL;
1144
if (ptr == NULL) {
1145
// If pointer is NULL, realloc behaves like malloc.
1146
result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1147
// save allocated pointer just before one returned to user
1148
if (result != NULL) {
1149
*(void **)result = result;
1150
result = (void **)result + 1;
1151
}
1152
} else if (size == 0) {
1153
// If size is 0, realloc behaves like free.
1154
// The thread must be registered by the call to kmpc_malloc() or
1155
// kmpc_calloc() before.
1156
// So it should be safe to call __kmp_get_thread(), not
1157
// __kmp_entry_thread().
1158
KMP_ASSERT(*((void **)ptr - 1));
1159
brel(__kmp_get_thread(), *((void **)ptr - 1));
1160
} else {
1161
result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
1162
(bufsize)(size + sizeof(ptr)));
1163
if (result != NULL) {
1164
*(void **)result = result;
1165
result = (void **)result + 1;
1166
}
1167
}
1168
return result;
1169
}
1170
1171
// NOTE: the library must have already been initialized by a previous allocate
1172
void kmpc_free(void *ptr) {
1173
if (!__kmp_init_serial) {
1174
return;
1175
}
1176
if (ptr != NULL) {
1177
kmp_info_t *th = __kmp_get_thread();
1178
__kmp_bget_dequeue(th); /* Release any queued buffers */
1179
// extract allocated pointer and free it
1180
KMP_ASSERT(*((void **)ptr - 1));
1181
brel(th, *((void **)ptr - 1));
1182
}
1183
}
1184
1185
void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
1186
void *ptr;
1187
KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1188
(int)size KMP_SRC_LOC_PARM));
1189
ptr = bget(th, (bufsize)size);
1190
KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
1191
return ptr;
1192
}
1193
1194
void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
1195
size_t elsize KMP_SRC_LOC_DECL) {
1196
void *ptr;
1197
KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1198
(int)nelem, (int)elsize KMP_SRC_LOC_PARM));
1199
ptr = bgetz(th, (bufsize)(nelem * elsize));
1200
KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
1201
return ptr;
1202
}
1203
1204
void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
1205
size_t size KMP_SRC_LOC_DECL) {
1206
KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1207
ptr, (int)size KMP_SRC_LOC_PARM));
1208
ptr = bgetr(th, ptr, (bufsize)size);
1209
KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
1210
return ptr;
1211
}
1212
1213
void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
1214
KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1215
ptr KMP_SRC_LOC_PARM));
1216
if (ptr != NULL) {
1217
__kmp_bget_dequeue(th); /* Release any queued buffers */
1218
brel(th, ptr);
1219
}
1220
KE_TRACE(30, ("<- __kmp_thread_free()\n"));
1221
}
1222
1223
/* OMP 5.0 Memory Management support */
1224
static const char *kmp_mk_lib_name;
1225
static void *h_memkind;
1226
/* memkind experimental API: */
1227
// memkind_alloc
1228
static void *(*kmp_mk_alloc)(void *k, size_t sz);
1229
// memkind_free
1230
static void (*kmp_mk_free)(void *kind, void *ptr);
1231
// memkind_check_available
1232
static int (*kmp_mk_check)(void *kind);
1233
// kinds we are going to use
1234
static void **mk_default;
1235
static void **mk_interleave;
1236
static void **mk_hbw;
1237
static void **mk_hbw_interleave;
1238
static void **mk_hbw_preferred;
1239
static void **mk_hugetlb;
1240
static void **mk_hbw_hugetlb;
1241
static void **mk_hbw_preferred_hugetlb;
1242
static void **mk_dax_kmem;
1243
static void **mk_dax_kmem_all;
1244
static void **mk_dax_kmem_preferred;
1245
static void *(*kmp_target_alloc_host)(size_t size, int device);
1246
static void *(*kmp_target_alloc_shared)(size_t size, int device);
1247
static void *(*kmp_target_alloc_device)(size_t size, int device);
1248
static void *(*kmp_target_lock_mem)(void *ptr, size_t size, int device);
1249
static void *(*kmp_target_unlock_mem)(void *ptr, int device);
1250
static void *(*kmp_target_free_host)(void *ptr, int device);
1251
static void *(*kmp_target_free_shared)(void *ptr, int device);
1252
static void *(*kmp_target_free_device)(void *ptr, int device);
1253
static bool __kmp_target_mem_available;
1254
#define KMP_IS_TARGET_MEM_SPACE(MS) \
1255
(MS == llvm_omp_target_host_mem_space || \
1256
MS == llvm_omp_target_shared_mem_space || \
1257
MS == llvm_omp_target_device_mem_space)
1258
#define KMP_IS_TARGET_MEM_ALLOC(MA) \
1259
(MA == llvm_omp_target_host_mem_alloc || \
1260
MA == llvm_omp_target_shared_mem_alloc || \
1261
MA == llvm_omp_target_device_mem_alloc)
1262
1263
#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1264
static inline void chk_kind(void ***pkind) {
1265
KMP_DEBUG_ASSERT(pkind);
1266
if (*pkind) // symbol found
1267
if (kmp_mk_check(**pkind)) // kind not available or error
1268
*pkind = NULL;
1269
}
1270
#endif
1271
1272
void __kmp_init_memkind() {
1273
// as of 2018-07-31 memkind does not support Windows*, exclude it for now
1274
#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1275
// use of statically linked memkind is problematic, as it depends on libnuma
1276
kmp_mk_lib_name = "libmemkind.so";
1277
h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1278
if (h_memkind) {
1279
kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available");
1280
kmp_mk_alloc =
1281
(void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc");
1282
kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free");
1283
mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT");
1284
if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
1285
!kmp_mk_check(*mk_default)) {
1286
__kmp_memkind_available = 1;
1287
mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE");
1288
chk_kind(&mk_interleave);
1289
mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW");
1290
chk_kind(&mk_hbw);
1291
mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE");
1292
chk_kind(&mk_hbw_interleave);
1293
mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED");
1294
chk_kind(&mk_hbw_preferred);
1295
mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB");
1296
chk_kind(&mk_hugetlb);
1297
mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB");
1298
chk_kind(&mk_hbw_hugetlb);
1299
mk_hbw_preferred_hugetlb =
1300
(void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB");
1301
chk_kind(&mk_hbw_preferred_hugetlb);
1302
mk_dax_kmem = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM");
1303
chk_kind(&mk_dax_kmem);
1304
mk_dax_kmem_all = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_ALL");
1305
chk_kind(&mk_dax_kmem_all);
1306
mk_dax_kmem_preferred =
1307
(void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_PREFERRED");
1308
chk_kind(&mk_dax_kmem_preferred);
1309
KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
1310
return; // success
1311
}
1312
dlclose(h_memkind); // failure
1313
}
1314
#else // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
1315
kmp_mk_lib_name = "";
1316
#endif // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
1317
h_memkind = NULL;
1318
kmp_mk_check = NULL;
1319
kmp_mk_alloc = NULL;
1320
kmp_mk_free = NULL;
1321
mk_default = NULL;
1322
mk_interleave = NULL;
1323
mk_hbw = NULL;
1324
mk_hbw_interleave = NULL;
1325
mk_hbw_preferred = NULL;
1326
mk_hugetlb = NULL;
1327
mk_hbw_hugetlb = NULL;
1328
mk_hbw_preferred_hugetlb = NULL;
1329
mk_dax_kmem = NULL;
1330
mk_dax_kmem_all = NULL;
1331
mk_dax_kmem_preferred = NULL;
1332
}
1333
1334
void __kmp_fini_memkind() {
1335
#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1336
if (__kmp_memkind_available)
1337
KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
1338
if (h_memkind) {
1339
dlclose(h_memkind);
1340
h_memkind = NULL;
1341
}
1342
kmp_mk_check = NULL;
1343
kmp_mk_alloc = NULL;
1344
kmp_mk_free = NULL;
1345
mk_default = NULL;
1346
mk_interleave = NULL;
1347
mk_hbw = NULL;
1348
mk_hbw_interleave = NULL;
1349
mk_hbw_preferred = NULL;
1350
mk_hugetlb = NULL;
1351
mk_hbw_hugetlb = NULL;
1352
mk_hbw_preferred_hugetlb = NULL;
1353
mk_dax_kmem = NULL;
1354
mk_dax_kmem_all = NULL;
1355
mk_dax_kmem_preferred = NULL;
1356
#endif
1357
}
1358
1359
void __kmp_init_target_mem() {
1360
*(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
1361
*(void **)(&kmp_target_alloc_shared) =
1362
KMP_DLSYM("llvm_omp_target_alloc_shared");
1363
*(void **)(&kmp_target_alloc_device) =
1364
KMP_DLSYM("llvm_omp_target_alloc_device");
1365
*(void **)(&kmp_target_free_host) = KMP_DLSYM("llvm_omp_target_free_host");
1366
*(void **)(&kmp_target_free_shared) =
1367
KMP_DLSYM("llvm_omp_target_free_shared");
1368
*(void **)(&kmp_target_free_device) =
1369
KMP_DLSYM("llvm_omp_target_free_device");
1370
__kmp_target_mem_available =
1371
kmp_target_alloc_host && kmp_target_alloc_shared &&
1372
kmp_target_alloc_device && kmp_target_free_host &&
1373
kmp_target_free_shared && kmp_target_free_device;
1374
// lock/pin and unlock/unpin target calls
1375
*(void **)(&kmp_target_lock_mem) = KMP_DLSYM("llvm_omp_target_lock_mem");
1376
*(void **)(&kmp_target_unlock_mem) = KMP_DLSYM("llvm_omp_target_unlock_mem");
1377
}
1378
1379
omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
1380
int ntraits,
1381
omp_alloctrait_t traits[]) {
1382
// OpenMP 5.0 only allows predefined memspaces
1383
KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
1384
ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
1385
ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms));
1386
kmp_allocator_t *al;
1387
int i;
1388
al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
1389
al->memspace = ms; // not used currently
1390
for (i = 0; i < ntraits; ++i) {
1391
switch (traits[i].key) {
1392
case omp_atk_sync_hint:
1393
case omp_atk_access:
1394
break;
1395
case omp_atk_pinned:
1396
al->pinned = true;
1397
break;
1398
case omp_atk_alignment:
1399
__kmp_type_convert(traits[i].value, &(al->alignment));
1400
KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
1401
break;
1402
case omp_atk_pool_size:
1403
al->pool_size = traits[i].value;
1404
break;
1405
case omp_atk_fallback:
1406
al->fb = (omp_alloctrait_value_t)traits[i].value;
1407
KMP_DEBUG_ASSERT(
1408
al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
1409
al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
1410
break;
1411
case omp_atk_fb_data:
1412
al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
1413
break;
1414
case omp_atk_partition:
1415
al->memkind = RCAST(void **, traits[i].value);
1416
break;
1417
default:
1418
KMP_ASSERT2(0, "Unexpected allocator trait");
1419
}
1420
}
1421
if (al->fb == 0) {
1422
// set default allocator
1423
al->fb = omp_atv_default_mem_fb;
1424
al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1425
} else if (al->fb == omp_atv_allocator_fb) {
1426
KMP_ASSERT(al->fb_data != NULL);
1427
} else if (al->fb == omp_atv_default_mem_fb) {
1428
al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1429
}
1430
if (__kmp_memkind_available) {
1431
// Let's use memkind library if available
1432
if (ms == omp_high_bw_mem_space) {
1433
if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) {
1434
al->memkind = mk_hbw_interleave;
1435
} else if (mk_hbw_preferred) {
1436
// AC: do not try to use MEMKIND_HBW for now, because memkind library
1437
// cannot reliably detect exhaustion of HBW memory.
1438
// It could be possible using hbw_verify_memory_region() but memkind
1439
// manual says: "Using this function in production code may result in
1440
// serious performance penalty".
1441
al->memkind = mk_hbw_preferred;
1442
} else {
1443
// HBW is requested but not available --> return NULL allocator
1444
__kmp_free(al);
1445
return omp_null_allocator;
1446
}
1447
} else if (ms == omp_large_cap_mem_space) {
1448
if (mk_dax_kmem_all) {
1449
// All pmem nodes are visited
1450
al->memkind = mk_dax_kmem_all;
1451
} else if (mk_dax_kmem) {
1452
// Only closest pmem node is visited
1453
al->memkind = mk_dax_kmem;
1454
} else {
1455
__kmp_free(al);
1456
return omp_null_allocator;
1457
}
1458
} else {
1459
if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) {
1460
al->memkind = mk_interleave;
1461
} else {
1462
al->memkind = mk_default;
1463
}
1464
}
1465
} else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
1466
__kmp_free(al);
1467
return omp_null_allocator;
1468
} else {
1469
if (ms == omp_high_bw_mem_space) {
1470
// cannot detect HBW memory presence without memkind library
1471
__kmp_free(al);
1472
return omp_null_allocator;
1473
}
1474
}
1475
return (omp_allocator_handle_t)al;
1476
}
1477
1478
void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) {
1479
if (allocator > kmp_max_mem_alloc)
1480
__kmp_free(allocator);
1481
}
1482
1483
void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) {
1484
if (allocator == omp_null_allocator)
1485
allocator = omp_default_mem_alloc;
1486
__kmp_threads[gtid]->th.th_def_allocator = allocator;
1487
}
1488
1489
omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) {
1490
return __kmp_threads[gtid]->th.th_def_allocator;
1491
}
1492
1493
typedef struct kmp_mem_desc { // Memory block descriptor
1494
void *ptr_alloc; // Pointer returned by allocator
1495
size_t size_a; // Size of allocated memory block (initial+descriptor+align)
1496
size_t size_orig; // Original size requested
1497
void *ptr_align; // Pointer to aligned memory, returned
1498
kmp_allocator_t *allocator; // allocator
1499
} kmp_mem_desc_t;
1500
static int alignment = sizeof(void *); // align to pointer size by default
1501
1502
// external interfaces are wrappers over internal implementation
1503
void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
1504
KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
1505
void *ptr = __kmp_alloc(gtid, 0, size, allocator);
1506
KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1507
return ptr;
1508
}
1509
1510
void *__kmpc_aligned_alloc(int gtid, size_t algn, size_t size,
1511
omp_allocator_handle_t allocator) {
1512
KE_TRACE(25, ("__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (int)algn,
1513
(int)size, allocator));
1514
void *ptr = __kmp_alloc(gtid, algn, size, allocator);
1515
KE_TRACE(25, ("__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1516
return ptr;
1517
}
1518
1519
void *__kmpc_calloc(int gtid, size_t nmemb, size_t size,
1520
omp_allocator_handle_t allocator) {
1521
KE_TRACE(25, ("__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (int)nmemb,
1522
(int)size, allocator));
1523
void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
1524
KE_TRACE(25, ("__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1525
return ptr;
1526
}
1527
1528
void *__kmpc_realloc(int gtid, void *ptr, size_t size,
1529
omp_allocator_handle_t allocator,
1530
omp_allocator_handle_t free_allocator) {
1531
KE_TRACE(25, ("__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (int)size,
1532
allocator, free_allocator));
1533
void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
1534
KE_TRACE(25, ("__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1535
return nptr;
1536
}
1537
1538
void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
1539
KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1540
___kmpc_free(gtid, ptr, allocator);
1541
KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1542
return;
1543
}
1544
1545
// internal implementation, called from inside the library
1546
void *__kmp_alloc(int gtid, size_t algn, size_t size,
1547
omp_allocator_handle_t allocator) {
1548
void *ptr = NULL;
1549
kmp_allocator_t *al;
1550
KMP_DEBUG_ASSERT(__kmp_init_serial);
1551
if (size == 0)
1552
return NULL;
1553
if (allocator == omp_null_allocator)
1554
allocator = __kmp_threads[gtid]->th.th_def_allocator;
1555
kmp_int32 default_device =
1556
__kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1557
1558
al = RCAST(kmp_allocator_t *, allocator);
1559
1560
int sz_desc = sizeof(kmp_mem_desc_t);
1561
kmp_mem_desc_t desc;
1562
kmp_uintptr_t addr; // address returned by allocator
1563
kmp_uintptr_t addr_align; // address to return to caller
1564
kmp_uintptr_t addr_descr; // address of memory block descriptor
1565
size_t align = alignment; // default alignment
1566
if (allocator > kmp_max_mem_alloc && al->alignment > align)
1567
align = al->alignment; // alignment required by allocator trait
1568
if (align < algn)
1569
align = algn; // max of allocator trait, parameter and sizeof(void*)
1570
desc.size_orig = size;
1571
desc.size_a = size + sz_desc + align;
1572
bool is_pinned = false;
1573
if (allocator > kmp_max_mem_alloc)
1574
is_pinned = al->pinned;
1575
1576
// Use default allocator if libmemkind is not available
1577
int use_default_allocator = (__kmp_memkind_available) ? false : true;
1578
1579
if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
1580
// Use size input directly as the memory may not be accessible on host.
1581
// Use default device for now.
1582
if (__kmp_target_mem_available) {
1583
kmp_int32 device =
1584
__kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1585
if (allocator == llvm_omp_target_host_mem_alloc)
1586
ptr = kmp_target_alloc_host(size, device);
1587
else if (allocator == llvm_omp_target_shared_mem_alloc)
1588
ptr = kmp_target_alloc_shared(size, device);
1589
else // allocator == llvm_omp_target_device_mem_alloc
1590
ptr = kmp_target_alloc_device(size, device);
1591
return ptr;
1592
} else {
1593
KMP_INFORM(TargetMemNotAvailable);
1594
}
1595
}
1596
1597
if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
1598
if (__kmp_target_mem_available) {
1599
kmp_int32 device =
1600
__kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1601
if (al->memspace == llvm_omp_target_host_mem_space)
1602
ptr = kmp_target_alloc_host(size, device);
1603
else if (al->memspace == llvm_omp_target_shared_mem_space)
1604
ptr = kmp_target_alloc_shared(size, device);
1605
else // al->memspace == llvm_omp_target_device_mem_space
1606
ptr = kmp_target_alloc_device(size, device);
1607
return ptr;
1608
} else {
1609
KMP_INFORM(TargetMemNotAvailable);
1610
}
1611
}
1612
1613
if (__kmp_memkind_available) {
1614
if (allocator < kmp_max_mem_alloc) {
1615
// pre-defined allocator
1616
if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
1617
ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
1618
} else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
1619
ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
1620
} else {
1621
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1622
}
1623
} else if (al->pool_size > 0) {
1624
// custom allocator with pool size requested
1625
kmp_uint64 used =
1626
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
1627
if (used + desc.size_a > al->pool_size) {
1628
// not enough space, need to go fallback path
1629
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1630
if (al->fb == omp_atv_default_mem_fb) {
1631
al = (kmp_allocator_t *)omp_default_mem_alloc;
1632
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1633
} else if (al->fb == omp_atv_abort_fb) {
1634
KMP_ASSERT(0); // abort fallback requested
1635
} else if (al->fb == omp_atv_allocator_fb) {
1636
KMP_ASSERT(al != al->fb_data);
1637
al = al->fb_data;
1638
ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1639
if (is_pinned && kmp_target_lock_mem)
1640
kmp_target_lock_mem(ptr, size, default_device);
1641
return ptr;
1642
} // else ptr == NULL;
1643
} else {
1644
// pool has enough space
1645
ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
1646
if (ptr == NULL) {
1647
if (al->fb == omp_atv_default_mem_fb) {
1648
al = (kmp_allocator_t *)omp_default_mem_alloc;
1649
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1650
} else if (al->fb == omp_atv_abort_fb) {
1651
KMP_ASSERT(0); // abort fallback requested
1652
} else if (al->fb == omp_atv_allocator_fb) {
1653
KMP_ASSERT(al != al->fb_data);
1654
al = al->fb_data;
1655
ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1656
if (is_pinned && kmp_target_lock_mem)
1657
kmp_target_lock_mem(ptr, size, default_device);
1658
return ptr;
1659
}
1660
}
1661
}
1662
} else {
1663
// custom allocator, pool size not requested
1664
ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
1665
if (ptr == NULL) {
1666
if (al->fb == omp_atv_default_mem_fb) {
1667
al = (kmp_allocator_t *)omp_default_mem_alloc;
1668
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1669
} else if (al->fb == omp_atv_abort_fb) {
1670
KMP_ASSERT(0); // abort fallback requested
1671
} else if (al->fb == omp_atv_allocator_fb) {
1672
KMP_ASSERT(al != al->fb_data);
1673
al = al->fb_data;
1674
ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1675
if (is_pinned && kmp_target_lock_mem)
1676
kmp_target_lock_mem(ptr, size, default_device);
1677
return ptr;
1678
}
1679
}
1680
}
1681
} else if (allocator < kmp_max_mem_alloc) {
1682
// pre-defined allocator
1683
if (allocator == omp_high_bw_mem_alloc) {
1684
KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc");
1685
} else if (allocator == omp_large_cap_mem_alloc) {
1686
KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc");
1687
} else if (allocator == omp_const_mem_alloc) {
1688
KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc");
1689
} else if (allocator == omp_low_lat_mem_alloc) {
1690
KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc");
1691
} else if (allocator == omp_cgroup_mem_alloc) {
1692
KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc");
1693
} else if (allocator == omp_pteam_mem_alloc) {
1694
KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc");
1695
} else if (allocator == omp_thread_mem_alloc) {
1696
KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc");
1697
} else { // default allocator requested
1698
use_default_allocator = true;
1699
}
1700
if (use_default_allocator) {
1701
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1702
use_default_allocator = false;
1703
}
1704
} else if (al->pool_size > 0) {
1705
// custom allocator with pool size requested
1706
kmp_uint64 used =
1707
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
1708
if (used + desc.size_a > al->pool_size) {
1709
// not enough space, need to go fallback path
1710
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1711
if (al->fb == omp_atv_default_mem_fb) {
1712
al = (kmp_allocator_t *)omp_default_mem_alloc;
1713
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1714
} else if (al->fb == omp_atv_abort_fb) {
1715
KMP_ASSERT(0); // abort fallback requested
1716
} else if (al->fb == omp_atv_allocator_fb) {
1717
KMP_ASSERT(al != al->fb_data);
1718
al = al->fb_data;
1719
ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1720
if (is_pinned && kmp_target_lock_mem)
1721
kmp_target_lock_mem(ptr, size, default_device);
1722
return ptr;
1723
} // else ptr == NULL;
1724
} else {
1725
// pool has enough space
1726
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1727
if (ptr == NULL && al->fb == omp_atv_abort_fb) {
1728
KMP_ASSERT(0); // abort fallback requested
1729
} // no sense to look for another fallback because of same internal alloc
1730
}
1731
} else {
1732
// custom allocator, pool size not requested
1733
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1734
if (ptr == NULL && al->fb == omp_atv_abort_fb) {
1735
KMP_ASSERT(0); // abort fallback requested
1736
} // no sense to look for another fallback because of same internal alloc
1737
}
1738
KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
1739
if (ptr == NULL)
1740
return NULL;
1741
1742
if (is_pinned && kmp_target_lock_mem)
1743
kmp_target_lock_mem(ptr, desc.size_a, default_device);
1744
1745
addr = (kmp_uintptr_t)ptr;
1746
addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
1747
addr_descr = addr_align - sz_desc;
1748
1749
desc.ptr_alloc = ptr;
1750
desc.ptr_align = (void *)addr_align;
1751
desc.allocator = al;
1752
*((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
1753
KMP_MB();
1754
1755
return desc.ptr_align;
1756
}
1757
1758
void *__kmp_calloc(int gtid, size_t algn, size_t nmemb, size_t size,
1759
omp_allocator_handle_t allocator) {
1760
void *ptr = NULL;
1761
kmp_allocator_t *al;
1762
KMP_DEBUG_ASSERT(__kmp_init_serial);
1763
1764
if (allocator == omp_null_allocator)
1765
allocator = __kmp_threads[gtid]->th.th_def_allocator;
1766
1767
al = RCAST(kmp_allocator_t *, allocator);
1768
1769
if (nmemb == 0 || size == 0)
1770
return ptr;
1771
1772
if ((SIZE_MAX - sizeof(kmp_mem_desc_t)) / size < nmemb) {
1773
if (al->fb == omp_atv_abort_fb) {
1774
KMP_ASSERT(0);
1775
}
1776
return ptr;
1777
}
1778
1779
ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
1780
1781
if (ptr) {
1782
memset(ptr, 0x00, nmemb * size);
1783
}
1784
return ptr;
1785
}
1786
1787
void *__kmp_realloc(int gtid, void *ptr, size_t size,
1788
omp_allocator_handle_t allocator,
1789
omp_allocator_handle_t free_allocator) {
1790
void *nptr = NULL;
1791
KMP_DEBUG_ASSERT(__kmp_init_serial);
1792
1793
if (size == 0) {
1794
if (ptr != NULL)
1795
___kmpc_free(gtid, ptr, free_allocator);
1796
return nptr;
1797
}
1798
1799
nptr = __kmp_alloc(gtid, 0, size, allocator);
1800
1801
if (nptr != NULL && ptr != NULL) {
1802
kmp_mem_desc_t desc;
1803
kmp_uintptr_t addr_align; // address to return to caller
1804
kmp_uintptr_t addr_descr; // address of memory block descriptor
1805
1806
addr_align = (kmp_uintptr_t)ptr;
1807
addr_descr = addr_align - sizeof(kmp_mem_desc_t);
1808
desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
1809
1810
KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
1811
KMP_DEBUG_ASSERT(desc.size_orig > 0);
1812
KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
1813
KMP_MEMCPY((char *)nptr, (char *)ptr,
1814
(size_t)((size < desc.size_orig) ? size : desc.size_orig));
1815
}
1816
1817
if (nptr != NULL) {
1818
___kmpc_free(gtid, ptr, free_allocator);
1819
}
1820
1821
return nptr;
1822
}
1823
1824
void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
1825
if (ptr == NULL)
1826
return;
1827
1828
kmp_allocator_t *al;
1829
omp_allocator_handle_t oal;
1830
al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
1831
kmp_mem_desc_t desc;
1832
kmp_uintptr_t addr_align; // address to return to caller
1833
kmp_uintptr_t addr_descr; // address of memory block descriptor
1834
if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
1835
(allocator > kmp_max_mem_alloc &&
1836
KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
1837
kmp_int32 device =
1838
__kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1839
if (allocator == llvm_omp_target_host_mem_alloc) {
1840
kmp_target_free_host(ptr, device);
1841
} else if (allocator == llvm_omp_target_shared_mem_alloc) {
1842
kmp_target_free_shared(ptr, device);
1843
} else if (allocator == llvm_omp_target_device_mem_alloc) {
1844
kmp_target_free_device(ptr, device);
1845
}
1846
return;
1847
}
1848
1849
addr_align = (kmp_uintptr_t)ptr;
1850
addr_descr = addr_align - sizeof(kmp_mem_desc_t);
1851
desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
1852
1853
KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
1854
if (allocator) {
1855
KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
1856
}
1857
al = desc.allocator;
1858
oal = (omp_allocator_handle_t)al; // cast to void* for comparisons
1859
KMP_DEBUG_ASSERT(al);
1860
1861
if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) {
1862
kmp_int32 device =
1863
__kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1864
kmp_target_unlock_mem(desc.ptr_alloc, device);
1865
}
1866
1867
if (__kmp_memkind_available) {
1868
if (oal < kmp_max_mem_alloc) {
1869
// pre-defined allocator
1870
if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
1871
kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
1872
} else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
1873
kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
1874
} else {
1875
kmp_mk_free(*mk_default, desc.ptr_alloc);
1876
}
1877
} else {
1878
if (al->pool_size > 0) { // custom allocator with pool size requested
1879
kmp_uint64 used =
1880
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1881
(void)used; // to suppress compiler warning
1882
KMP_DEBUG_ASSERT(used >= desc.size_a);
1883
}
1884
kmp_mk_free(*al->memkind, desc.ptr_alloc);
1885
}
1886
} else {
1887
if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
1888
kmp_uint64 used =
1889
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1890
(void)used; // to suppress compiler warning
1891
KMP_DEBUG_ASSERT(used >= desc.size_a);
1892
}
1893
__kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
1894
}
1895
}
1896
1897
/* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
1898
memory leaks, but it may be useful for debugging memory corruptions, used
1899
freed pointers, etc. */
1900
/* #define LEAK_MEMORY */
1901
struct kmp_mem_descr { // Memory block descriptor.
1902
void *ptr_allocated; // Pointer returned by malloc(), subject for free().
1903
size_t size_allocated; // Size of allocated memory block.
1904
void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
1905
size_t size_aligned; // Size of aligned memory block.
1906
};
1907
typedef struct kmp_mem_descr kmp_mem_descr_t;
1908
1909
/* Allocate memory on requested boundary, fill allocated memory with 0x00.
1910
NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1911
error. Must use __kmp_free when freeing memory allocated by this routine! */
1912
static void *___kmp_allocate_align(size_t size,
1913
size_t alignment KMP_SRC_LOC_DECL) {
1914
/* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
1915
requested to return properly aligned pointer. Original pointer returned
1916
by malloc() and size of allocated block is saved in descriptor just
1917
before the aligned pointer. This information used by __kmp_free() -- it
1918
has to pass to free() original pointer, not aligned one.
1919
1920
+---------+------------+-----------------------------------+---------+
1921
| padding | descriptor | aligned block | padding |
1922
+---------+------------+-----------------------------------+---------+
1923
^ ^
1924
| |
1925
| +- Aligned pointer returned to caller
1926
+- Pointer returned by malloc()
1927
1928
Aligned block is filled with zeros, paddings are filled with 0xEF. */
1929
1930
kmp_mem_descr_t descr;
1931
kmp_uintptr_t addr_allocated; // Address returned by malloc().
1932
kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1933
kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1934
1935
KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1936
(int)size, (int)alignment KMP_SRC_LOC_PARM));
1937
1938
KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
1939
KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
1940
// Make sure kmp_uintptr_t is enough to store addresses.
1941
1942
descr.size_aligned = size;
1943
descr.size_allocated =
1944
descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
1945
1946
#if KMP_DEBUG
1947
descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
1948
#else
1949
descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
1950
#endif
1951
KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
1952
descr.ptr_allocated));
1953
if (descr.ptr_allocated == NULL) {
1954
KMP_FATAL(OutOfHeapMemory);
1955
}
1956
1957
addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1958
addr_aligned =
1959
(addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
1960
addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
1961
1962
descr.ptr_aligned = (void *)addr_aligned;
1963
1964
KE_TRACE(26, (" ___kmp_allocate_align: "
1965
"ptr_allocated=%p, size_allocated=%d, "
1966
"ptr_aligned=%p, size_aligned=%d\n",
1967
descr.ptr_allocated, (int)descr.size_allocated,
1968
descr.ptr_aligned, (int)descr.size_aligned));
1969
1970
KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
1971
KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
1972
KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1973
addr_allocated + descr.size_allocated);
1974
KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
1975
#ifdef KMP_DEBUG
1976
memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1977
// Fill allocated memory block with 0xEF.
1978
#endif
1979
memset(descr.ptr_aligned, 0x00, descr.size_aligned);
1980
// Fill the aligned memory block (which is intended for using by caller) with
1981
// 0x00. Do not
1982
// put this filling under KMP_DEBUG condition! Many callers expect zeroed
1983
// memory. (Padding
1984
// bytes remain filled with 0xEF in debugging library.)
1985
*((kmp_mem_descr_t *)addr_descr) = descr;
1986
1987
KMP_MB();
1988
1989
KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
1990
return descr.ptr_aligned;
1991
} // func ___kmp_allocate_align
1992
1993
/* Allocate memory on cache line boundary, fill allocated memory with 0x00.
1994
Do not call this func directly! Use __kmp_allocate macro instead.
1995
NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1996
error. Must use __kmp_free when freeing memory allocated by this routine! */
1997
void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
1998
void *ptr;
1999
KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
2000
(int)size KMP_SRC_LOC_PARM));
2001
ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
2002
KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
2003
return ptr;
2004
} // func ___kmp_allocate
2005
2006
/* Allocate memory on page boundary, fill allocated memory with 0x00.
2007
Does not call this func directly! Use __kmp_page_allocate macro instead.
2008
NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
2009
error. Must use __kmp_free when freeing memory allocated by this routine! */
2010
void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
2011
int page_size = 8 * 1024;
2012
void *ptr;
2013
2014
KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
2015
(int)size KMP_SRC_LOC_PARM));
2016
ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
2017
KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
2018
return ptr;
2019
} // ___kmp_page_allocate
2020
2021
/* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
2022
In debug mode, fill the memory block with 0xEF before call to free(). */
2023
void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
2024
kmp_mem_descr_t descr;
2025
#if KMP_DEBUG
2026
kmp_uintptr_t addr_allocated; // Address returned by malloc().
2027
kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
2028
#endif
2029
KE_TRACE(25,
2030
("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
2031
KMP_ASSERT(ptr != NULL);
2032
2033
descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
2034
2035
KE_TRACE(26, (" __kmp_free: "
2036
"ptr_allocated=%p, size_allocated=%d, "
2037
"ptr_aligned=%p, size_aligned=%d\n",
2038
descr.ptr_allocated, (int)descr.size_allocated,
2039
descr.ptr_aligned, (int)descr.size_aligned));
2040
#if KMP_DEBUG
2041
addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2042
addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
2043
KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
2044
KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
2045
KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
2046
KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
2047
KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2048
addr_allocated + descr.size_allocated);
2049
memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2050
// Fill memory block with 0xEF, it helps catch using freed memory.
2051
#endif
2052
2053
#ifndef LEAK_MEMORY
2054
KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
2055
#ifdef KMP_DEBUG
2056
_free_src_loc(descr.ptr_allocated, _file_, _line_);
2057
#else
2058
free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
2059
#endif
2060
#endif
2061
KMP_MB();
2062
KE_TRACE(25, ("<- __kmp_free() returns\n"));
2063
} // func ___kmp_free
2064
2065
#if USE_FAST_MEMORY == 3
2066
// Allocate fast memory by first scanning the thread's free lists
2067
// If a chunk the right size exists, grab it off the free list.
2068
// Otherwise allocate normally using kmp_thread_malloc.
2069
2070
// AC: How to choose the limit? Just get 16 for now...
2071
#define KMP_FREE_LIST_LIMIT 16
2072
2073
// Always use 128 bytes for determining buckets for caching memory blocks
2074
#define DCACHE_LINE 128
2075
2076
void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
2077
void *ptr;
2078
size_t num_lines, idx;
2079
int index;
2080
void *alloc_ptr;
2081
size_t alloc_size;
2082
kmp_mem_descr_t *descr;
2083
2084
KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
2085
__kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
2086
2087
num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
2088
idx = num_lines - 1;
2089
KMP_DEBUG_ASSERT(idx >= 0);
2090
if (idx < 2) {
2091
index = 0; // idx is [ 0, 1 ], use first free list
2092
num_lines = 2; // 1, 2 cache lines or less than cache line
2093
} else if ((idx >>= 2) == 0) {
2094
index = 1; // idx is [ 2, 3 ], use second free list
2095
num_lines = 4; // 3, 4 cache lines
2096
} else if ((idx >>= 2) == 0) {
2097
index = 2; // idx is [ 4, 15 ], use third free list
2098
num_lines = 16; // 5, 6, ..., 16 cache lines
2099
} else if ((idx >>= 2) == 0) {
2100
index = 3; // idx is [ 16, 63 ], use fourth free list
2101
num_lines = 64; // 17, 18, ..., 64 cache lines
2102
} else {
2103
goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
2104
}
2105
2106
ptr = this_thr->th.th_free_lists[index].th_free_list_self;
2107
if (ptr != NULL) {
2108
// pop the head of no-sync free list
2109
this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2110
KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2111
sizeof(kmp_mem_descr_t)))
2112
->ptr_aligned);
2113
goto end;
2114
}
2115
ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2116
if (ptr != NULL) {
2117
// no-sync free list is empty, use sync free list (filled in by other
2118
// threads only)
2119
// pop the head of the sync free list, push NULL instead
2120
while (!KMP_COMPARE_AND_STORE_PTR(
2121
&this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
2122
KMP_CPU_PAUSE();
2123
ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2124
}
2125
// push the rest of chain into no-sync free list (can be NULL if there was
2126
// the only block)
2127
this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2128
KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2129
sizeof(kmp_mem_descr_t)))
2130
->ptr_aligned);
2131
goto end;
2132
}
2133
2134
alloc_call:
2135
// haven't found block in the free lists, thus allocate it
2136
size = num_lines * DCACHE_LINE;
2137
2138
alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
2139
KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
2140
"alloc_size %d\n",
2141
__kmp_gtid_from_thread(this_thr), alloc_size));
2142
alloc_ptr = bget(this_thr, (bufsize)alloc_size);
2143
2144
// align ptr to DCACHE_LINE
2145
ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
2146
DCACHE_LINE) &
2147
~(DCACHE_LINE - 1));
2148
descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
2149
2150
descr->ptr_allocated = alloc_ptr; // remember allocated pointer
2151
// we don't need size_allocated
2152
descr->ptr_aligned = (void *)this_thr; // remember allocating thread
2153
// (it is already saved in bget buffer,
2154
// but we may want to use another allocator in future)
2155
descr->size_aligned = size;
2156
2157
end:
2158
KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
2159
__kmp_gtid_from_thread(this_thr), ptr));
2160
return ptr;
2161
} // func __kmp_fast_allocate
2162
2163
// Free fast memory and place it on the thread's free list if it is of
2164
// the correct size.
2165
void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
2166
kmp_mem_descr_t *descr;
2167
kmp_info_t *alloc_thr;
2168
size_t size;
2169
size_t idx;
2170
int index;
2171
2172
KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
2173
__kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
2174
KMP_ASSERT(ptr != NULL);
2175
2176
descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
2177
2178
KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
2179
(int)descr->size_aligned));
2180
2181
size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
2182
2183
idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
2184
if (idx == size) {
2185
index = 0; // 2 cache lines
2186
} else if ((idx <<= 1) == size) {
2187
index = 1; // 4 cache lines
2188
} else if ((idx <<= 2) == size) {
2189
index = 2; // 16 cache lines
2190
} else if ((idx <<= 2) == size) {
2191
index = 3; // 64 cache lines
2192
} else {
2193
KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
2194
goto free_call; // 65 or more cache lines ( > 8KB )
2195
}
2196
2197
alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
2198
if (alloc_thr == this_thr) {
2199
// push block to self no-sync free list, linking previous head (LIFO)
2200
*((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
2201
this_thr->th.th_free_lists[index].th_free_list_self = ptr;
2202
} else {
2203
void *head = this_thr->th.th_free_lists[index].th_free_list_other;
2204
if (head == NULL) {
2205
// Create new free list
2206
this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2207
*((void **)ptr) = NULL; // mark the tail of the list
2208
descr->size_allocated = (size_t)1; // head of the list keeps its length
2209
} else {
2210
// need to check existed "other" list's owner thread and size of queue
2211
kmp_mem_descr_t *dsc =
2212
(kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
2213
// allocating thread, same for all queue nodes
2214
kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
2215
size_t q_sz =
2216
dsc->size_allocated + 1; // new size in case we add current task
2217
if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
2218
// we can add current task to "other" list, no sync needed
2219
*((void **)ptr) = head;
2220
descr->size_allocated = q_sz;
2221
this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2222
} else {
2223
// either queue blocks owner is changing or size limit exceeded
2224
// return old queue to allocating thread (q_th) synchronously,
2225
// and start new list for alloc_thr's tasks
2226
void *old_ptr;
2227
void *tail = head;
2228
void *next = *((void **)head);
2229
while (next != NULL) {
2230
KMP_DEBUG_ASSERT(
2231
// queue size should decrease by 1 each step through the list
2232
((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
2233
->size_allocated +
2234
1 ==
2235
((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
2236
->size_allocated);
2237
tail = next; // remember tail node
2238
next = *((void **)next);
2239
}
2240
KMP_DEBUG_ASSERT(q_th != NULL);
2241
// push block to owner's sync free list
2242
old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2243
/* the next pointer must be set before setting free_list to ptr to avoid
2244
exposing a broken list to other threads, even for an instant. */
2245
*((void **)tail) = old_ptr;
2246
2247
while (!KMP_COMPARE_AND_STORE_PTR(
2248
&q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
2249
KMP_CPU_PAUSE();
2250
old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2251
*((void **)tail) = old_ptr;
2252
}
2253
2254
// start new list of not-selt tasks
2255
this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2256
*((void **)ptr) = NULL;
2257
descr->size_allocated = (size_t)1; // head of queue keeps its length
2258
}
2259
}
2260
}
2261
goto end;
2262
2263
free_call:
2264
KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2265
__kmp_gtid_from_thread(this_thr), size));
2266
__kmp_bget_dequeue(this_thr); /* Release any queued buffers */
2267
brel(this_thr, descr->ptr_allocated);
2268
2269
end:
2270
KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
2271
2272
} // func __kmp_fast_free
2273
2274
// Initialize the thread free lists related to fast memory
2275
// Only do this when a thread is initially created.
2276
void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
2277
KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
2278
2279
memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
2280
}
2281
2282
// Free the memory in the thread free lists related to fast memory
2283
// Only do this when a thread is being reaped (destroyed).
2284
void __kmp_free_fast_memory(kmp_info_t *th) {
2285
// Suppose we use BGET underlying allocator, walk through its structures...
2286
int bin;
2287
thr_data_t *thr = get_thr_data(th);
2288
void **lst = NULL;
2289
2290
KE_TRACE(
2291
5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
2292
2293
__kmp_bget_dequeue(th); // Release any queued buffers
2294
2295
// Dig through free lists and extract all allocated blocks
2296
for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
2297
bfhead_t *b = thr->freelist[bin].ql.flink;
2298
while (b != &thr->freelist[bin]) {
2299
if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
2300
*((void **)b) =
2301
lst; // link the list (override bthr, but keep flink yet)
2302
lst = (void **)b; // push b into lst
2303
}
2304
b = b->ql.flink; // get next buffer
2305
}
2306
}
2307
while (lst != NULL) {
2308
void *next = *lst;
2309
KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2310
lst, next, th, __kmp_gtid_from_thread(th)));
2311
(*thr->relfcn)(lst);
2312
#if BufStats
2313
// count blocks to prevent problems in __kmp_finalize_bget()
2314
thr->numprel++; /* Nr of expansion block releases */
2315
thr->numpblk--; /* Total number of blocks */
2316
#endif
2317
lst = (void **)next;
2318
}
2319
2320
KE_TRACE(
2321
5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
2322
}
2323
2324
#endif // USE_FAST_MEMORY
2325
2326