Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/gpu/tests/gpu_buddy_test.c
170890 views
1
// SPDX-License-Identifier: MIT
2
/*
3
* Copyright © 2019 Intel Corporation
4
* Copyright © 2022 Maíra Canal <[email protected]>
5
*/
6
7
#include <kunit/test.h>
8
9
#include <linux/prime_numbers.h>
10
#include <linux/sched/signal.h>
11
#include <linux/sizes.h>
12
13
#include <linux/gpu_buddy.h>
14
15
#include "gpu_random.h"
16
17
static unsigned int random_seed;
18
19
static inline u64 get_size(int order, u64 chunk_size)
20
{
21
return (1 << order) * chunk_size;
22
}
23
24
static void gpu_test_buddy_subtree_offset_alignment_stress(struct kunit *test)
25
{
26
struct gpu_buddy_block *block;
27
struct rb_node *node = NULL;
28
const u64 mm_size = SZ_2M;
29
const u64 alignments[] = {
30
SZ_1M,
31
SZ_512K,
32
SZ_256K,
33
SZ_128K,
34
SZ_64K,
35
SZ_32K,
36
SZ_16K,
37
SZ_8K,
38
};
39
struct list_head allocated[ARRAY_SIZE(alignments)];
40
unsigned int i, max_subtree_align = 0;
41
int ret, tree, order;
42
struct gpu_buddy mm;
43
44
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
45
"buddy_init failed\n");
46
47
for (i = 0; i < ARRAY_SIZE(allocated); i++)
48
INIT_LIST_HEAD(&allocated[i]);
49
50
/*
51
* Exercise subtree_max_alignment tracking by allocating blocks with descending
52
* alignment constraints and freeing them in reverse order. This verifies that
53
* free-tree augmentation correctly propagates the maximum offset alignment
54
* present in each subtree at every stage.
55
*/
56
57
for (i = 0; i < ARRAY_SIZE(alignments); i++) {
58
struct gpu_buddy_block *root = NULL;
59
unsigned int expected;
60
u64 align;
61
62
align = alignments[i];
63
expected = ilog2(align) - 1;
64
65
for (;;) {
66
ret = gpu_buddy_alloc_blocks(&mm,
67
0, mm_size,
68
SZ_4K, align,
69
&allocated[i],
70
0);
71
if (ret)
72
break;
73
74
block = list_last_entry(&allocated[i],
75
struct gpu_buddy_block,
76
link);
77
KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), align));
78
}
79
80
for (order = mm.max_order; order >= 0 && !root; order--) {
81
for (tree = 0; tree < 2; tree++) {
82
node = mm.free_trees[tree][order].rb_node;
83
if (node) {
84
root = container_of(node,
85
struct gpu_buddy_block,
86
rb);
87
break;
88
}
89
}
90
}
91
92
KUNIT_ASSERT_NOT_NULL(test, root);
93
KUNIT_EXPECT_EQ(test, root->subtree_max_alignment, expected);
94
}
95
96
for (i = ARRAY_SIZE(alignments); i-- > 0; ) {
97
gpu_buddy_free_list(&mm, &allocated[i], 0);
98
99
for (order = 0; order <= mm.max_order; order++) {
100
for (tree = 0; tree < 2; tree++) {
101
node = mm.free_trees[tree][order].rb_node;
102
if (!node)
103
continue;
104
105
block = container_of(node, struct gpu_buddy_block, rb);
106
max_subtree_align = max(max_subtree_align,
107
block->subtree_max_alignment);
108
}
109
}
110
111
KUNIT_EXPECT_GE(test, max_subtree_align, ilog2(alignments[i]));
112
}
113
114
gpu_buddy_fini(&mm);
115
}
116
117
static void gpu_test_buddy_offset_aligned_allocation(struct kunit *test)
118
{
119
struct gpu_buddy_block *block, *tmp;
120
int num_blocks, i, count = 0;
121
LIST_HEAD(allocated);
122
struct gpu_buddy mm;
123
u64 mm_size = SZ_4M;
124
LIST_HEAD(freed);
125
126
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
127
"buddy_init failed\n");
128
129
num_blocks = mm_size / SZ_256K;
130
/*
131
* Allocate multiple sizes under a fixed offset alignment.
132
* Ensures alignment handling is independent of allocation size and
133
* exercises subtree max-alignment pruning for small requests.
134
*/
135
for (i = 0; i < num_blocks; i++)
136
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_256K,
137
&allocated, 0),
138
"buddy_alloc hit an error size=%u\n", SZ_8K);
139
140
list_for_each_entry(block, &allocated, link) {
141
/* Ensure the allocated block uses the expected 8 KB size */
142
KUNIT_EXPECT_EQ(test, gpu_buddy_block_size(&mm, block), SZ_8K);
143
/* Ensure the block starts at a 256 KB-aligned offset for proper alignment */
144
KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), SZ_256K));
145
}
146
gpu_buddy_free_list(&mm, &allocated, 0);
147
148
for (i = 0; i < num_blocks; i++)
149
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K,
150
&allocated, 0),
151
"buddy_alloc hit an error size=%u\n", SZ_16K);
152
153
list_for_each_entry(block, &allocated, link) {
154
/* Ensure the allocated block uses the expected 16 KB size */
155
KUNIT_EXPECT_EQ(test, gpu_buddy_block_size(&mm, block), SZ_16K);
156
/* Ensure the block starts at a 256 KB-aligned offset for proper alignment */
157
KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), SZ_256K));
158
}
159
160
/*
161
* Free alternating aligned blocks to introduce fragmentation.
162
* Ensures offset-aligned allocations remain valid after frees and
163
* verifies subtree max-alignment metadata is correctly maintained.
164
*/
165
list_for_each_entry_safe(block, tmp, &allocated, link) {
166
if (count % 2 == 0)
167
list_move_tail(&block->link, &freed);
168
count++;
169
}
170
gpu_buddy_free_list(&mm, &freed, 0);
171
172
for (i = 0; i < num_blocks / 2; i++)
173
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K,
174
&allocated, 0),
175
"buddy_alloc hit an error size=%u\n", SZ_16K);
176
177
/*
178
* Allocate with offset alignment after all slots are used; must fail.
179
* Confirms that no aligned offsets remain.
180
*/
181
KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K,
182
&allocated, 0),
183
"buddy_alloc hit an error size=%u\n", SZ_16K);
184
gpu_buddy_free_list(&mm, &allocated, 0);
185
gpu_buddy_fini(&mm);
186
}
187
188
static void gpu_test_buddy_fragmentation_performance(struct kunit *test)
189
{
190
struct gpu_buddy_block *block, *tmp;
191
int num_blocks, i, ret, count = 0;
192
LIST_HEAD(allocated_blocks);
193
unsigned long elapsed_ms;
194
LIST_HEAD(reverse_list);
195
LIST_HEAD(test_blocks);
196
LIST_HEAD(clear_list);
197
LIST_HEAD(dirty_list);
198
LIST_HEAD(free_list);
199
struct gpu_buddy mm;
200
u64 mm_size = SZ_4G;
201
ktime_t start, end;
202
203
/*
204
* Allocation under severe fragmentation
205
*
206
* Create severe fragmentation by allocating the entire 4 GiB address space
207
* as tiny 8 KiB blocks but forcing a 64 KiB alignment. The resulting pattern
208
* leaves many scattered holes. Split the allocations into two groups and
209
* return them with different flags to block coalescing, then repeatedly
210
* allocate and free 64 KiB blocks while timing the loop. This stresses how
211
* quickly the allocator can satisfy larger, aligned requests from a pool of
212
* highly fragmented space.
213
*/
214
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
215
"buddy_init failed\n");
216
217
num_blocks = mm_size / SZ_64K;
218
219
start = ktime_get();
220
/* Allocate with maximum fragmentation - 8K blocks with 64K alignment */
221
for (i = 0; i < num_blocks; i++)
222
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
223
&allocated_blocks, 0),
224
"buddy_alloc hit an error size=%u\n", SZ_8K);
225
226
list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
227
if (count % 4 == 0 || count % 4 == 3)
228
list_move_tail(&block->link, &clear_list);
229
else
230
list_move_tail(&block->link, &dirty_list);
231
count++;
232
}
233
234
/* Free with different flags to ensure no coalescing */
235
gpu_buddy_free_list(&mm, &clear_list, GPU_BUDDY_CLEARED);
236
gpu_buddy_free_list(&mm, &dirty_list, 0);
237
238
for (i = 0; i < num_blocks; i++)
239
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K,
240
&test_blocks, 0),
241
"buddy_alloc hit an error size=%u\n", SZ_64K);
242
gpu_buddy_free_list(&mm, &test_blocks, 0);
243
244
end = ktime_get();
245
elapsed_ms = ktime_to_ms(ktime_sub(end, start));
246
247
kunit_info(test, "Fragmented allocation took %lu ms\n", elapsed_ms);
248
249
gpu_buddy_fini(&mm);
250
251
/*
252
* Reverse free order under fragmentation
253
*
254
* Construct a fragmented 4 GiB space by allocating every 8 KiB block with
255
* 64 KiB alignment, creating a dense scatter of small regions. Half of the
256
* blocks are selectively freed to form sparse gaps, while the remaining
257
* allocations are preserved, reordered in reverse, and released back with
258
* the cleared flag. This models a pathological reverse-ordered free pattern
259
* and measures how quickly the allocator can merge and reclaim space when
260
* deallocation occurs in the opposite order of allocation, exposing the
261
* cost difference between a linear freelist scan and an ordered tree lookup.
262
*/
263
ret = gpu_buddy_init(&mm, mm_size, SZ_4K);
264
KUNIT_ASSERT_EQ(test, ret, 0);
265
266
start = ktime_get();
267
/* Allocate maximum fragmentation */
268
for (i = 0; i < num_blocks; i++)
269
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
270
&allocated_blocks, 0),
271
"buddy_alloc hit an error size=%u\n", SZ_8K);
272
273
list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
274
if (count % 2 == 0)
275
list_move_tail(&block->link, &free_list);
276
count++;
277
}
278
gpu_buddy_free_list(&mm, &free_list, GPU_BUDDY_CLEARED);
279
280
list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link)
281
list_move(&block->link, &reverse_list);
282
gpu_buddy_free_list(&mm, &reverse_list, GPU_BUDDY_CLEARED);
283
284
end = ktime_get();
285
elapsed_ms = ktime_to_ms(ktime_sub(end, start));
286
287
kunit_info(test, "Reverse-ordered free took %lu ms\n", elapsed_ms);
288
289
gpu_buddy_fini(&mm);
290
}
291
292
static void gpu_test_buddy_alloc_range_bias(struct kunit *test)
293
{
294
u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem;
295
GPU_RND_STATE(prng, random_seed);
296
unsigned int i, count, *order;
297
struct gpu_buddy_block *block;
298
unsigned long flags;
299
struct gpu_buddy mm;
300
LIST_HEAD(allocated);
301
302
bias_size = SZ_1M;
303
ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size);
304
ps = max(SZ_4K, ps);
305
mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */
306
307
kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps);
308
309
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
310
"buddy_init failed\n");
311
312
count = mm_size / bias_size;
313
order = gpu_random_order(count, &prng);
314
KUNIT_EXPECT_TRUE(test, order);
315
316
/*
317
* Idea is to split the address space into uniform bias ranges, and then
318
* in some random order allocate within each bias, using various
319
* patterns within. This should detect if allocations leak out from a
320
* given bias, for example.
321
*/
322
323
for (i = 0; i < count; i++) {
324
LIST_HEAD(tmp);
325
u32 size;
326
327
bias_start = order[i] * bias_size;
328
bias_end = bias_start + bias_size;
329
bias_rem = bias_size;
330
331
/* internal round_up too big */
332
KUNIT_ASSERT_TRUE_MSG(test,
333
gpu_buddy_alloc_blocks(&mm, bias_start,
334
bias_end, bias_size + ps, bias_size,
335
&allocated,
336
GPU_BUDDY_RANGE_ALLOCATION),
337
"buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
338
bias_start, bias_end, bias_size, bias_size);
339
340
/* size too big */
341
KUNIT_ASSERT_TRUE_MSG(test,
342
gpu_buddy_alloc_blocks(&mm, bias_start,
343
bias_end, bias_size + ps, ps,
344
&allocated,
345
GPU_BUDDY_RANGE_ALLOCATION),
346
"buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
347
bias_start, bias_end, bias_size + ps, ps);
348
349
/* bias range too small for size */
350
KUNIT_ASSERT_TRUE_MSG(test,
351
gpu_buddy_alloc_blocks(&mm, bias_start + ps,
352
bias_end, bias_size, ps,
353
&allocated,
354
GPU_BUDDY_RANGE_ALLOCATION),
355
"buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
356
bias_start + ps, bias_end, bias_size, ps);
357
358
/* bias misaligned */
359
KUNIT_ASSERT_TRUE_MSG(test,
360
gpu_buddy_alloc_blocks(&mm, bias_start + ps,
361
bias_end - ps,
362
bias_size >> 1, bias_size >> 1,
363
&allocated,
364
GPU_BUDDY_RANGE_ALLOCATION),
365
"buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n",
366
bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1);
367
368
/* single big page */
369
KUNIT_ASSERT_FALSE_MSG(test,
370
gpu_buddy_alloc_blocks(&mm, bias_start,
371
bias_end, bias_size, bias_size,
372
&tmp,
373
GPU_BUDDY_RANGE_ALLOCATION),
374
"buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n",
375
bias_start, bias_end, bias_size, bias_size);
376
gpu_buddy_free_list(&mm, &tmp, 0);
377
378
/* single page with internal round_up */
379
KUNIT_ASSERT_FALSE_MSG(test,
380
gpu_buddy_alloc_blocks(&mm, bias_start,
381
bias_end, ps, bias_size,
382
&tmp,
383
GPU_BUDDY_RANGE_ALLOCATION),
384
"buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
385
bias_start, bias_end, ps, bias_size);
386
gpu_buddy_free_list(&mm, &tmp, 0);
387
388
/* random size within */
389
size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
390
if (size)
391
KUNIT_ASSERT_FALSE_MSG(test,
392
gpu_buddy_alloc_blocks(&mm, bias_start,
393
bias_end, size, ps,
394
&tmp,
395
GPU_BUDDY_RANGE_ALLOCATION),
396
"buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
397
bias_start, bias_end, size, ps);
398
399
bias_rem -= size;
400
/* too big for current avail */
401
KUNIT_ASSERT_TRUE_MSG(test,
402
gpu_buddy_alloc_blocks(&mm, bias_start,
403
bias_end, bias_rem + ps, ps,
404
&allocated,
405
GPU_BUDDY_RANGE_ALLOCATION),
406
"buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
407
bias_start, bias_end, bias_rem + ps, ps);
408
409
if (bias_rem) {
410
/* random fill of the remainder */
411
size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
412
size = max(size, ps);
413
414
KUNIT_ASSERT_FALSE_MSG(test,
415
gpu_buddy_alloc_blocks(&mm, bias_start,
416
bias_end, size, ps,
417
&allocated,
418
GPU_BUDDY_RANGE_ALLOCATION),
419
"buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
420
bias_start, bias_end, size, ps);
421
/*
422
* Intentionally allow some space to be left
423
* unallocated, and ideally not always on the bias
424
* boundaries.
425
*/
426
gpu_buddy_free_list(&mm, &tmp, 0);
427
} else {
428
list_splice_tail(&tmp, &allocated);
429
}
430
}
431
432
kfree(order);
433
gpu_buddy_free_list(&mm, &allocated, 0);
434
gpu_buddy_fini(&mm);
435
436
/*
437
* Something more free-form. Idea is to pick a random starting bias
438
* range within the address space and then start filling it up. Also
439
* randomly grow the bias range in both directions as we go along. This
440
* should give us bias start/end which is not always uniform like above,
441
* and in some cases will require the allocator to jump over already
442
* allocated nodes in the middle of the address space.
443
*/
444
445
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
446
"buddy_init failed\n");
447
448
bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
449
bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps);
450
bias_end = max(bias_end, bias_start + ps);
451
bias_rem = bias_end - bias_start;
452
453
do {
454
u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
455
456
KUNIT_ASSERT_FALSE_MSG(test,
457
gpu_buddy_alloc_blocks(&mm, bias_start,
458
bias_end, size, ps,
459
&allocated,
460
GPU_BUDDY_RANGE_ALLOCATION),
461
"buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
462
bias_start, bias_end, size, ps);
463
bias_rem -= size;
464
465
/*
466
* Try to randomly grow the bias range in both directions, or
467
* only one, or perhaps don't grow at all.
468
*/
469
do {
470
u32 old_bias_start = bias_start;
471
u32 old_bias_end = bias_end;
472
473
if (bias_start)
474
bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps);
475
if (bias_end != mm_size)
476
bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps);
477
478
bias_rem += old_bias_start - bias_start;
479
bias_rem += bias_end - old_bias_end;
480
} while (!bias_rem && (bias_start || bias_end != mm_size));
481
} while (bias_rem);
482
483
KUNIT_ASSERT_EQ(test, bias_start, 0);
484
KUNIT_ASSERT_EQ(test, bias_end, mm_size);
485
KUNIT_ASSERT_TRUE_MSG(test,
486
gpu_buddy_alloc_blocks(&mm, bias_start, bias_end,
487
ps, ps,
488
&allocated,
489
GPU_BUDDY_RANGE_ALLOCATION),
490
"buddy_alloc passed with bias(%x-%x), size=%u\n",
491
bias_start, bias_end, ps);
492
493
gpu_buddy_free_list(&mm, &allocated, 0);
494
gpu_buddy_fini(&mm);
495
496
/*
497
* Allocate cleared blocks in the bias range when the GPU buddy's clear avail is
498
* zero. This will validate the bias range allocation in scenarios like system boot
499
* when no cleared blocks are available and exercise the fallback path too. The resulting
500
* blocks should always be dirty.
501
*/
502
503
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
504
"buddy_init failed\n");
505
506
bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
507
bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps);
508
bias_end = max(bias_end, bias_start + ps);
509
bias_rem = bias_end - bias_start;
510
511
flags = GPU_BUDDY_CLEAR_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION;
512
size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
513
514
KUNIT_ASSERT_FALSE_MSG(test,
515
gpu_buddy_alloc_blocks(&mm, bias_start,
516
bias_end, size, ps,
517
&allocated,
518
flags),
519
"buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
520
bias_start, bias_end, size, ps);
521
522
list_for_each_entry(block, &allocated, link)
523
KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
524
525
gpu_buddy_free_list(&mm, &allocated, 0);
526
gpu_buddy_fini(&mm);
527
}
528
529
static void gpu_test_buddy_alloc_range(struct kunit *test)
530
{
531
GPU_RND_STATE(prng, random_seed);
532
struct gpu_buddy_block *block;
533
struct gpu_buddy mm;
534
u32 mm_size, total;
535
LIST_HEAD(blocks);
536
LIST_HEAD(tmp);
537
u32 ps = SZ_4K;
538
int ret;
539
540
mm_size = SZ_16M;
541
542
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
543
"buddy_init failed\n");
544
545
/*
546
* Basic exact-range allocation.
547
* Allocate the entire mm as one exact range (start + size == end).
548
* This is the simplest case exercising __gpu_buddy_alloc_range.
549
*/
550
ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size, mm_size, ps, &blocks, 0);
551
KUNIT_ASSERT_EQ_MSG(test, ret, 0,
552
"exact-range alloc of full mm failed\n");
553
554
total = 0;
555
list_for_each_entry(block, &blocks, link) {
556
u64 offset = gpu_buddy_block_offset(block);
557
u64 bsize = gpu_buddy_block_size(&mm, block);
558
559
KUNIT_EXPECT_TRUE_MSG(test, offset + bsize <= (u64)mm_size,
560
"block [%llx, %llx) outside mm\n", offset, offset + bsize);
561
total += (u32)bsize;
562
}
563
KUNIT_EXPECT_EQ(test, total, mm_size);
564
KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
565
566
/* Full mm should be exhausted */
567
ret = gpu_buddy_alloc_blocks(&mm, 0, ps, ps, ps, &tmp, 0);
568
KUNIT_EXPECT_NE_MSG(test, ret, 0, "alloc should fail when mm is full\n");
569
570
gpu_buddy_free_list(&mm, &blocks, 0);
571
KUNIT_EXPECT_EQ(test, mm.avail, (u64)mm_size);
572
gpu_buddy_fini(&mm);
573
574
/*
575
* Exact-range allocation of sub-ranges.
576
* Split the mm into four equal quarters and allocate each as an exact
577
* range. Validates splitting and non-overlapping exact allocations.
578
*/
579
KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
580
581
{
582
u32 quarter = mm_size / 4;
583
int i;
584
585
for (i = 0; i < 4; i++) {
586
u32 start = i * quarter;
587
u32 end = start + quarter;
588
589
ret = gpu_buddy_alloc_blocks(&mm, start, end, quarter, ps, &blocks, 0);
590
KUNIT_ASSERT_EQ_MSG(test, ret, 0,
591
"exact-range alloc quarter %d [%x, %x) failed\n",
592
i, start, end);
593
}
594
KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
595
gpu_buddy_free_list(&mm, &blocks, 0);
596
}
597
598
gpu_buddy_fini(&mm);
599
600
/*
601
* Minimum chunk-size exact range at various offsets.
602
* Allocate single-page exact ranges at the start, middle and end.
603
*/
604
KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
605
606
ret = gpu_buddy_alloc_blocks(&mm, 0, ps, ps, ps, &blocks, 0);
607
KUNIT_ASSERT_EQ(test, ret, 0);
608
609
ret = gpu_buddy_alloc_blocks(&mm, mm_size / 2, mm_size / 2 + ps, ps, ps, &blocks, 0);
610
KUNIT_ASSERT_EQ(test, ret, 0);
611
612
ret = gpu_buddy_alloc_blocks(&mm, mm_size - ps, mm_size, ps, ps, &blocks, 0);
613
KUNIT_ASSERT_EQ(test, ret, 0);
614
615
total = 0;
616
list_for_each_entry(block, &blocks, link)
617
total += (u32)gpu_buddy_block_size(&mm, block);
618
KUNIT_EXPECT_EQ(test, total, 3 * ps);
619
620
gpu_buddy_free_list(&mm, &blocks, 0);
621
gpu_buddy_fini(&mm);
622
623
/*
624
* Non power-of-two mm size (multiple roots).
625
* Exact-range allocations that span root boundaries must still work.
626
*/
627
mm_size = SZ_4M + SZ_2M + SZ_1M; /* 7 MiB, three roots */
628
629
KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
630
KUNIT_EXPECT_GT(test, mm.n_roots, 1U);
631
632
/* Allocate first 4M root exactly */
633
ret = gpu_buddy_alloc_blocks(&mm, 0, SZ_4M, SZ_4M, ps, &blocks, 0);
634
KUNIT_ASSERT_EQ(test, ret, 0);
635
636
/* Allocate second root (4M-6M) exactly */
637
ret = gpu_buddy_alloc_blocks(&mm, SZ_4M, SZ_4M + SZ_2M, SZ_2M, ps, &blocks, 0);
638
KUNIT_ASSERT_EQ(test, ret, 0);
639
640
/* Allocate third root (6M-7M) exactly */
641
ret = gpu_buddy_alloc_blocks(&mm, SZ_4M + SZ_2M, mm_size, SZ_1M, ps, &blocks, 0);
642
KUNIT_ASSERT_EQ(test, ret, 0);
643
644
KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
645
gpu_buddy_free_list(&mm, &blocks, 0);
646
647
/* Cross-root exact-range: the entire non-pot mm */
648
ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size, mm_size, ps, &blocks, 0);
649
KUNIT_ASSERT_EQ(test, ret, 0);
650
KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
651
652
gpu_buddy_free_list(&mm, &blocks, 0);
653
gpu_buddy_fini(&mm);
654
655
/*
656
* Randomized exact-range allocations.
657
* Divide the mm into N random-sized, contiguous, page-aligned slices
658
* and allocate each as an exact range in random order.
659
*/
660
mm_size = SZ_16M;
661
KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
662
663
{
664
#define N_RAND_RANGES 16
665
u32 ranges[N_RAND_RANGES + 1]; /* boundaries */
666
u32 order_arr[N_RAND_RANGES];
667
u32 remaining = mm_size;
668
int i;
669
670
ranges[0] = 0;
671
for (i = 0; i < N_RAND_RANGES - 1; i++) {
672
u32 max_chunk = remaining - (N_RAND_RANGES - 1 - i) * ps;
673
u32 sz = max(round_up(prandom_u32_state(&prng) % max_chunk, ps), ps);
674
675
ranges[i + 1] = ranges[i] + sz;
676
remaining -= sz;
677
}
678
ranges[N_RAND_RANGES] = mm_size;
679
680
/* Create a random order */
681
for (i = 0; i < N_RAND_RANGES; i++)
682
order_arr[i] = i;
683
for (i = N_RAND_RANGES - 1; i > 0; i--) {
684
u32 j = prandom_u32_state(&prng) % (i + 1);
685
u32 tmp_val = order_arr[i];
686
687
order_arr[i] = order_arr[j];
688
order_arr[j] = tmp_val;
689
}
690
691
for (i = 0; i < N_RAND_RANGES; i++) {
692
u32 idx = order_arr[i];
693
u32 start = ranges[idx];
694
u32 end = ranges[idx + 1];
695
u32 sz = end - start;
696
697
ret = gpu_buddy_alloc_blocks(&mm, start, end, sz, ps, &blocks, 0);
698
KUNIT_ASSERT_EQ_MSG(test, ret, 0,
699
"random exact-range [%x, %x) sz=%x failed\n",
700
start, end, sz);
701
}
702
703
KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
704
gpu_buddy_free_list(&mm, &blocks, 0);
705
#undef N_RAND_RANGES
706
}
707
708
gpu_buddy_fini(&mm);
709
710
/*
711
* Negative case - partially allocated range.
712
* Allocate the first half, then try to exact-range allocate the full
713
* mm. This must fail because the first half is already occupied.
714
*/
715
mm_size = SZ_16M;
716
KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
717
718
ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size / 2, mm_size / 2, ps, &blocks, 0);
719
KUNIT_ASSERT_EQ(test, ret, 0);
720
721
ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size, mm_size, ps, &tmp, 0);
722
KUNIT_EXPECT_NE_MSG(test, ret, 0,
723
"exact-range alloc should fail when range is partially used\n");
724
725
/* Also try the already-occupied sub-range directly */
726
ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size / 2, mm_size / 2, ps, &tmp, 0);
727
KUNIT_EXPECT_NE_MSG(test, ret, 0,
728
"double alloc of same exact range should fail\n");
729
730
/* The free second half should still be allocatable */
731
ret = gpu_buddy_alloc_blocks(&mm, mm_size / 2, mm_size, mm_size / 2, ps, &blocks, 0);
732
KUNIT_ASSERT_EQ(test, ret, 0);
733
734
KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
735
gpu_buddy_free_list(&mm, &blocks, 0);
736
gpu_buddy_fini(&mm);
737
738
/*
739
* Negative case - checkerboard partial allocation.
740
* Allocate every other page-sized chunk in a small mm, then try to
741
* exact-range allocate a range covering two pages (one allocated, one
742
* free). This must fail.
743
*/
744
mm_size = SZ_64K;
745
KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
746
747
{
748
u32 off;
749
750
for (off = 0; off < mm_size; off += 2 * ps) {
751
ret = gpu_buddy_alloc_blocks(&mm, off, off + ps, ps, ps, &blocks, 0);
752
KUNIT_ASSERT_EQ(test, ret, 0);
753
}
754
755
/* Try exact range over a pair [allocated, free] */
756
ret = gpu_buddy_alloc_blocks(&mm, 0, 2 * ps, 2 * ps, ps, &tmp, 0);
757
KUNIT_EXPECT_NE_MSG(test, ret, 0,
758
"exact-range over partially allocated pair should fail\n");
759
760
/* The free pages individually should still work */
761
ret = gpu_buddy_alloc_blocks(&mm, ps, 2 * ps, ps, ps, &blocks, 0);
762
KUNIT_ASSERT_EQ(test, ret, 0);
763
764
gpu_buddy_free_list(&mm, &blocks, 0);
765
}
766
767
gpu_buddy_fini(&mm);
768
769
/* Negative case - misaligned start/end/size */
770
mm_size = SZ_16M;
771
KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
772
773
/* start not aligned to chunk_size */
774
ret = gpu_buddy_alloc_blocks(&mm, ps / 2, ps / 2 + ps, ps, ps, &tmp, 0);
775
KUNIT_EXPECT_NE(test, ret, 0);
776
777
/* size not aligned */
778
ret = gpu_buddy_alloc_blocks(&mm, 0, ps + 1, ps + 1, ps, &tmp, 0);
779
KUNIT_EXPECT_NE(test, ret, 0);
780
781
/* end exceeds mm size */
782
ret = gpu_buddy_alloc_blocks(&mm, mm_size, mm_size + ps, ps, ps, &tmp, 0);
783
KUNIT_EXPECT_NE(test, ret, 0);
784
785
gpu_buddy_fini(&mm);
786
787
/*
788
* Free and re-allocate the same exact range.
789
* This exercises merge-on-free followed by exact-range re-split.
790
*/
791
mm_size = SZ_16M;
792
KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
793
794
{
795
int i;
796
797
for (i = 0; i < 5; i++) {
798
ret = gpu_buddy_alloc_blocks(&mm, SZ_4M, SZ_4M + SZ_2M,
799
SZ_2M, ps, &blocks, 0);
800
KUNIT_ASSERT_EQ_MSG(test, ret, 0,
801
"re-alloc iteration %d failed\n", i);
802
803
total = 0;
804
list_for_each_entry(block, &blocks, link) {
805
u64 offset = gpu_buddy_block_offset(block);
806
u64 bsize = gpu_buddy_block_size(&mm, block);
807
808
KUNIT_EXPECT_GE(test, offset, (u64)SZ_4M);
809
KUNIT_EXPECT_LE(test, offset + bsize, (u64)(SZ_4M + SZ_2M));
810
total += (u32)bsize;
811
}
812
KUNIT_EXPECT_EQ(test, total, SZ_2M);
813
814
gpu_buddy_free_list(&mm, &blocks, 0);
815
}
816
817
KUNIT_EXPECT_EQ(test, mm.avail, (u64)mm_size);
818
}
819
820
gpu_buddy_fini(&mm);
821
822
/*
823
* Various power-of-two exact ranges within a large mm.
824
* Allocate non-overlapping power-of-two exact ranges at their natural
825
* alignment, validating that the allocator handles different orders.
826
*/
827
mm_size = SZ_16M;
828
KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
829
830
/* Allocate 4K at offset 0 */
831
ret = gpu_buddy_alloc_blocks(&mm, 0, SZ_4K, SZ_4K, ps, &blocks, 0);
832
KUNIT_ASSERT_EQ(test, ret, 0);
833
834
/* Allocate 64K at offset 64K */
835
ret = gpu_buddy_alloc_blocks(&mm, SZ_64K, SZ_64K + SZ_64K, SZ_64K, ps, &blocks, 0);
836
KUNIT_ASSERT_EQ(test, ret, 0);
837
838
/* Allocate 1M at offset 1M */
839
ret = gpu_buddy_alloc_blocks(&mm, SZ_1M, SZ_1M + SZ_1M, SZ_1M, ps, &blocks, 0);
840
KUNIT_ASSERT_EQ(test, ret, 0);
841
842
/* Allocate 4M at offset 4M */
843
ret = gpu_buddy_alloc_blocks(&mm, SZ_4M, SZ_4M + SZ_4M, SZ_4M, ps, &blocks, 0);
844
KUNIT_ASSERT_EQ(test, ret, 0);
845
846
total = 0;
847
list_for_each_entry(block, &blocks, link)
848
total += (u32)gpu_buddy_block_size(&mm, block);
849
KUNIT_EXPECT_EQ(test, total, SZ_4K + SZ_64K + SZ_1M + SZ_4M);
850
851
gpu_buddy_free_list(&mm, &blocks, 0);
852
gpu_buddy_fini(&mm);
853
}
854
855
static void gpu_test_buddy_alloc_clear(struct kunit *test)
856
{
857
unsigned long n_pages, total, i = 0;
858
const unsigned long ps = SZ_4K;
859
struct gpu_buddy_block *block;
860
const int max_order = 12;
861
LIST_HEAD(allocated);
862
struct gpu_buddy mm;
863
unsigned int order;
864
u32 mm_size, size;
865
LIST_HEAD(dirty);
866
LIST_HEAD(clean);
867
868
mm_size = SZ_4K << max_order;
869
KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
870
871
KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
872
873
/*
874
* Idea is to allocate and free some random portion of the address space,
875
* returning those pages as non-dirty and randomly alternate between
876
* requesting dirty and non-dirty pages (not going over the limit
877
* we freed as non-dirty), putting that into two separate lists.
878
* Loop over both lists at the end checking that the dirty list
879
* is indeed all dirty pages and vice versa. Free it all again,
880
* keeping the dirty/clear status.
881
*/
882
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
883
5 * ps, ps, &allocated,
884
GPU_BUDDY_TOPDOWN_ALLOCATION),
885
"buddy_alloc hit an error size=%lu\n", 5 * ps);
886
gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
887
888
n_pages = 10;
889
do {
890
unsigned long flags;
891
struct list_head *list;
892
int slot = i % 2;
893
894
if (slot == 0) {
895
list = &dirty;
896
flags = 0;
897
} else {
898
list = &clean;
899
flags = GPU_BUDDY_CLEAR_ALLOCATION;
900
}
901
902
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
903
ps, ps, list,
904
flags),
905
"buddy_alloc hit an error size=%lu\n", ps);
906
} while (++i < n_pages);
907
908
list_for_each_entry(block, &clean, link)
909
KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), true);
910
911
list_for_each_entry(block, &dirty, link)
912
KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
913
914
gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
915
916
/*
917
* Trying to go over the clear limit for some allocation.
918
* The allocation should never fail with reasonable page-size.
919
*/
920
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
921
10 * ps, ps, &clean,
922
GPU_BUDDY_CLEAR_ALLOCATION),
923
"buddy_alloc hit an error size=%lu\n", 10 * ps);
924
925
gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
926
gpu_buddy_free_list(&mm, &dirty, 0);
927
gpu_buddy_fini(&mm);
928
929
KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
930
931
/*
932
* Create a new mm. Intentionally fragment the address space by creating
933
* two alternating lists. Free both lists, one as dirty the other as clean.
934
* Try to allocate double the previous size with matching min_page_size. The
935
* allocation should never fail as it calls the force_merge. Also check that
936
* the page is always dirty after force_merge. Free the page as dirty, then
937
* repeat the whole thing, increment the order until we hit the max_order.
938
*/
939
940
i = 0;
941
n_pages = mm_size / ps;
942
do {
943
struct list_head *list;
944
int slot = i % 2;
945
946
if (slot == 0)
947
list = &dirty;
948
else
949
list = &clean;
950
951
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
952
ps, ps, list, 0),
953
"buddy_alloc hit an error size=%lu\n", ps);
954
} while (++i < n_pages);
955
956
gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
957
gpu_buddy_free_list(&mm, &dirty, 0);
958
959
order = 1;
960
do {
961
size = SZ_4K << order;
962
963
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
964
size, size, &allocated,
965
GPU_BUDDY_CLEAR_ALLOCATION),
966
"buddy_alloc hit an error size=%u\n", size);
967
total = 0;
968
list_for_each_entry(block, &allocated, link) {
969
if (size != mm_size)
970
KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
971
total += gpu_buddy_block_size(&mm, block);
972
}
973
KUNIT_EXPECT_EQ(test, total, size);
974
975
gpu_buddy_free_list(&mm, &allocated, 0);
976
} while (++order <= max_order);
977
978
gpu_buddy_fini(&mm);
979
980
/*
981
* Create a new mm with a non power-of-two size. Allocate a random size from each
982
* root, free as cleared and then call fini. This will ensure the multi-root
983
* force merge during fini.
984
*/
985
mm_size = (SZ_4K << max_order) + (SZ_4K << (max_order - 2));
986
987
KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
988
KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
989
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
990
4 * ps, ps, &allocated,
991
GPU_BUDDY_RANGE_ALLOCATION),
992
"buddy_alloc hit an error size=%lu\n", 4 * ps);
993
gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
994
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
995
2 * ps, ps, &allocated,
996
GPU_BUDDY_CLEAR_ALLOCATION),
997
"buddy_alloc hit an error size=%lu\n", 2 * ps);
998
gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
999
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size,
1000
ps, ps, &allocated,
1001
GPU_BUDDY_RANGE_ALLOCATION),
1002
"buddy_alloc hit an error size=%lu\n", ps);
1003
gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
1004
gpu_buddy_fini(&mm);
1005
}
1006
1007
static void gpu_test_buddy_alloc_contiguous(struct kunit *test)
1008
{
1009
const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K;
1010
unsigned long i, n_pages, total;
1011
struct gpu_buddy_block *block;
1012
struct gpu_buddy mm;
1013
LIST_HEAD(left);
1014
LIST_HEAD(middle);
1015
LIST_HEAD(right);
1016
LIST_HEAD(allocated);
1017
1018
KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
1019
1020
/*
1021
* Idea is to fragment the address space by alternating block
1022
* allocations between three different lists; one for left, middle and
1023
* right. We can then free a list to simulate fragmentation. In
1024
* particular we want to exercise the GPU_BUDDY_CONTIGUOUS_ALLOCATION,
1025
* including the try_harder path.
1026
*/
1027
1028
i = 0;
1029
n_pages = mm_size / ps;
1030
do {
1031
struct list_head *list;
1032
int slot = i % 3;
1033
1034
if (slot == 0)
1035
list = &left;
1036
else if (slot == 1)
1037
list = &middle;
1038
else
1039
list = &right;
1040
KUNIT_ASSERT_FALSE_MSG(test,
1041
gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1042
ps, ps, list, 0),
1043
"buddy_alloc hit an error size=%lu\n",
1044
ps);
1045
} while (++i < n_pages);
1046
1047
KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1048
3 * ps, ps, &allocated,
1049
GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1050
"buddy_alloc didn't error size=%lu\n", 3 * ps);
1051
1052
gpu_buddy_free_list(&mm, &middle, 0);
1053
KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1054
3 * ps, ps, &allocated,
1055
GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1056
"buddy_alloc didn't error size=%lu\n", 3 * ps);
1057
KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1058
2 * ps, ps, &allocated,
1059
GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1060
"buddy_alloc didn't error size=%lu\n", 2 * ps);
1061
1062
gpu_buddy_free_list(&mm, &right, 0);
1063
KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1064
3 * ps, ps, &allocated,
1065
GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1066
"buddy_alloc didn't error size=%lu\n", 3 * ps);
1067
/*
1068
* At this point we should have enough contiguous space for 2 blocks,
1069
* however they are never buddies (since we freed middle and right) so
1070
* will require the try_harder logic to find them.
1071
*/
1072
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1073
2 * ps, ps, &allocated,
1074
GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1075
"buddy_alloc hit an error size=%lu\n", 2 * ps);
1076
1077
gpu_buddy_free_list(&mm, &left, 0);
1078
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1079
3 * ps, ps, &allocated,
1080
GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1081
"buddy_alloc hit an error size=%lu\n", 3 * ps);
1082
1083
total = 0;
1084
list_for_each_entry(block, &allocated, link)
1085
total += gpu_buddy_block_size(&mm, block);
1086
1087
KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
1088
1089
gpu_buddy_free_list(&mm, &allocated, 0);
1090
gpu_buddy_fini(&mm);
1091
}
1092
1093
static void gpu_test_buddy_alloc_pathological(struct kunit *test)
1094
{
1095
u64 mm_size, size, start = 0;
1096
struct gpu_buddy_block *block;
1097
const int max_order = 3;
1098
unsigned long flags = 0;
1099
int order, top;
1100
struct gpu_buddy mm;
1101
LIST_HEAD(blocks);
1102
LIST_HEAD(holes);
1103
LIST_HEAD(tmp);
1104
1105
/*
1106
* Create a pot-sized mm, then allocate one of each possible
1107
* order within. This should leave the mm with exactly one
1108
* page left. Free the largest block, then whittle down again.
1109
* Eventually we will have a fully 50% fragmented mm.
1110
*/
1111
1112
mm_size = SZ_4K << max_order;
1113
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
1114
"buddy_init failed\n");
1115
1116
KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
1117
1118
for (top = max_order; top; top--) {
1119
/* Make room by freeing the largest allocated block */
1120
block = list_first_entry_or_null(&blocks, typeof(*block), link);
1121
if (block) {
1122
list_del(&block->link);
1123
gpu_buddy_free_block(&mm, block);
1124
}
1125
1126
for (order = top; order--;) {
1127
size = get_size(order, mm.chunk_size);
1128
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start,
1129
mm_size, size, size,
1130
&tmp, flags),
1131
"buddy_alloc hit -ENOMEM with order=%d, top=%d\n",
1132
order, top);
1133
1134
block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1135
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1136
1137
list_move_tail(&block->link, &blocks);
1138
}
1139
1140
/* There should be one final page for this sub-allocation */
1141
size = get_size(0, mm.chunk_size);
1142
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1143
size, size, &tmp, flags),
1144
"buddy_alloc hit -ENOMEM for hole\n");
1145
1146
block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1147
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1148
1149
list_move_tail(&block->link, &holes);
1150
1151
size = get_size(top, mm.chunk_size);
1152
KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1153
size, size, &tmp, flags),
1154
"buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!",
1155
top, max_order);
1156
}
1157
1158
gpu_buddy_free_list(&mm, &holes, 0);
1159
1160
/* Nothing larger than blocks of chunk_size now available */
1161
for (order = 1; order <= max_order; order++) {
1162
size = get_size(order, mm.chunk_size);
1163
KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1164
size, size, &tmp, flags),
1165
"buddy_alloc unexpectedly succeeded at order %d, it should be full!",
1166
order);
1167
}
1168
1169
list_splice_tail(&holes, &blocks);
1170
gpu_buddy_free_list(&mm, &blocks, 0);
1171
gpu_buddy_fini(&mm);
1172
}
1173
1174
static void gpu_test_buddy_alloc_pessimistic(struct kunit *test)
1175
{
1176
u64 mm_size, size, start = 0;
1177
struct gpu_buddy_block *block, *bn;
1178
const unsigned int max_order = 16;
1179
unsigned long flags = 0;
1180
struct gpu_buddy mm;
1181
unsigned int order;
1182
LIST_HEAD(blocks);
1183
LIST_HEAD(tmp);
1184
1185
/*
1186
* Create a pot-sized mm, then allocate one of each possible
1187
* order within. This should leave the mm with exactly one
1188
* page left.
1189
*/
1190
1191
mm_size = SZ_4K << max_order;
1192
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
1193
"buddy_init failed\n");
1194
1195
KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
1196
1197
for (order = 0; order < max_order; order++) {
1198
size = get_size(order, mm.chunk_size);
1199
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1200
size, size, &tmp, flags),
1201
"buddy_alloc hit -ENOMEM with order=%d\n",
1202
order);
1203
1204
block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1205
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1206
1207
list_move_tail(&block->link, &blocks);
1208
}
1209
1210
/* And now the last remaining block available */
1211
size = get_size(0, mm.chunk_size);
1212
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1213
size, size, &tmp, flags),
1214
"buddy_alloc hit -ENOMEM on final alloc\n");
1215
1216
block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1217
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1218
1219
list_move_tail(&block->link, &blocks);
1220
1221
/* Should be completely full! */
1222
for (order = max_order; order--;) {
1223
size = get_size(order, mm.chunk_size);
1224
KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1225
size, size, &tmp, flags),
1226
"buddy_alloc unexpectedly succeeded, it should be full!");
1227
}
1228
1229
block = list_last_entry(&blocks, typeof(*block), link);
1230
list_del(&block->link);
1231
gpu_buddy_free_block(&mm, block);
1232
1233
/* As we free in increasing size, we make available larger blocks */
1234
order = 1;
1235
list_for_each_entry_safe(block, bn, &blocks, link) {
1236
list_del(&block->link);
1237
gpu_buddy_free_block(&mm, block);
1238
1239
size = get_size(order, mm.chunk_size);
1240
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1241
size, size, &tmp, flags),
1242
"buddy_alloc hit -ENOMEM with order=%d\n",
1243
order);
1244
1245
block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1246
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1247
1248
list_del(&block->link);
1249
gpu_buddy_free_block(&mm, block);
1250
order++;
1251
}
1252
1253
/* To confirm, now the whole mm should be available */
1254
size = get_size(max_order, mm.chunk_size);
1255
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1256
size, size, &tmp, flags),
1257
"buddy_alloc (realloc) hit -ENOMEM with order=%d\n",
1258
max_order);
1259
1260
block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1261
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1262
1263
list_del(&block->link);
1264
gpu_buddy_free_block(&mm, block);
1265
gpu_buddy_free_list(&mm, &blocks, 0);
1266
gpu_buddy_fini(&mm);
1267
}
1268
1269
static void gpu_test_buddy_alloc_optimistic(struct kunit *test)
1270
{
1271
u64 mm_size, size, start = 0;
1272
struct gpu_buddy_block *block;
1273
unsigned long flags = 0;
1274
const int max_order = 16;
1275
struct gpu_buddy mm;
1276
LIST_HEAD(blocks);
1277
LIST_HEAD(tmp);
1278
int order;
1279
1280
/*
1281
* Create a mm with one block of each order available, and
1282
* try to allocate them all.
1283
*/
1284
1285
mm_size = SZ_4K * ((1 << (max_order + 1)) - 1);
1286
1287
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
1288
"buddy_init failed\n");
1289
1290
KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
1291
1292
for (order = 0; order <= max_order; order++) {
1293
size = get_size(order, mm.chunk_size);
1294
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1295
size, size, &tmp, flags),
1296
"buddy_alloc hit -ENOMEM with order=%d\n",
1297
order);
1298
1299
block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1300
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1301
1302
list_move_tail(&block->link, &blocks);
1303
}
1304
1305
/* Should be completely full! */
1306
size = get_size(0, mm.chunk_size);
1307
KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1308
size, size, &tmp, flags),
1309
"buddy_alloc unexpectedly succeeded, it should be full!");
1310
1311
gpu_buddy_free_list(&mm, &blocks, 0);
1312
gpu_buddy_fini(&mm);
1313
}
1314
1315
static void gpu_test_buddy_alloc_limit(struct kunit *test)
1316
{
1317
u64 size = U64_MAX, start = 0;
1318
struct gpu_buddy_block *block;
1319
unsigned long flags = 0;
1320
LIST_HEAD(allocated);
1321
struct gpu_buddy mm;
1322
1323
KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, size, SZ_4K));
1324
1325
KUNIT_EXPECT_EQ_MSG(test, mm.max_order, GPU_BUDDY_MAX_ORDER,
1326
"mm.max_order(%d) != %d\n", mm.max_order,
1327
GPU_BUDDY_MAX_ORDER);
1328
1329
size = mm.chunk_size << mm.max_order;
1330
KUNIT_EXPECT_FALSE(test, gpu_buddy_alloc_blocks(&mm, start, size, size,
1331
mm.chunk_size, &allocated, flags));
1332
1333
block = list_first_entry_or_null(&allocated, struct gpu_buddy_block, link);
1334
KUNIT_EXPECT_TRUE(test, block);
1335
1336
KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_order(block), mm.max_order,
1337
"block order(%d) != %d\n",
1338
gpu_buddy_block_order(block), mm.max_order);
1339
1340
KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_size(&mm, block),
1341
BIT_ULL(mm.max_order) * mm.chunk_size,
1342
"block size(%llu) != %llu\n",
1343
gpu_buddy_block_size(&mm, block),
1344
BIT_ULL(mm.max_order) * mm.chunk_size);
1345
1346
gpu_buddy_free_list(&mm, &allocated, 0);
1347
gpu_buddy_fini(&mm);
1348
}
1349
1350
static void gpu_test_buddy_alloc_exceeds_max_order(struct kunit *test)
1351
{
1352
u64 mm_size = SZ_8G + SZ_2G, size = SZ_8G + SZ_1G, min_block_size = SZ_8G;
1353
struct gpu_buddy mm;
1354
LIST_HEAD(blocks);
1355
int err;
1356
1357
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
1358
"buddy_init failed\n");
1359
1360
/* CONTIGUOUS allocation should succeed via try_harder fallback */
1361
KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, size,
1362
SZ_4K, &blocks,
1363
GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1364
"buddy_alloc hit an error size=%llu\n", size);
1365
gpu_buddy_free_list(&mm, &blocks, 0);
1366
1367
/* Non-CONTIGUOUS with large min_block_size should return -EINVAL */
1368
err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0);
1369
KUNIT_EXPECT_EQ(test, err, -EINVAL);
1370
1371
/* Non-CONTIGUOUS + RANGE with large min_block_size should return -EINVAL */
1372
err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks,
1373
GPU_BUDDY_RANGE_ALLOCATION);
1374
KUNIT_EXPECT_EQ(test, err, -EINVAL);
1375
1376
/* CONTIGUOUS + RANGE should return -EINVAL (no try_harder for RANGE) */
1377
err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks,
1378
GPU_BUDDY_CONTIGUOUS_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION);
1379
KUNIT_EXPECT_EQ(test, err, -EINVAL);
1380
1381
gpu_buddy_fini(&mm);
1382
}
1383
1384
static int gpu_buddy_suite_init(struct kunit_suite *suite)
1385
{
1386
while (!random_seed)
1387
random_seed = get_random_u32();
1388
1389
kunit_info(suite, "Testing GPU buddy manager, with random_seed=0x%x\n",
1390
random_seed);
1391
1392
return 0;
1393
}
1394
1395
static struct kunit_case gpu_buddy_tests[] = {
1396
KUNIT_CASE(gpu_test_buddy_alloc_limit),
1397
KUNIT_CASE(gpu_test_buddy_alloc_optimistic),
1398
KUNIT_CASE(gpu_test_buddy_alloc_pessimistic),
1399
KUNIT_CASE(gpu_test_buddy_alloc_pathological),
1400
KUNIT_CASE(gpu_test_buddy_alloc_contiguous),
1401
KUNIT_CASE(gpu_test_buddy_alloc_clear),
1402
KUNIT_CASE(gpu_test_buddy_alloc_range),
1403
KUNIT_CASE(gpu_test_buddy_alloc_range_bias),
1404
KUNIT_CASE_SLOW(gpu_test_buddy_fragmentation_performance),
1405
KUNIT_CASE(gpu_test_buddy_alloc_exceeds_max_order),
1406
KUNIT_CASE(gpu_test_buddy_offset_aligned_allocation),
1407
KUNIT_CASE(gpu_test_buddy_subtree_offset_alignment_stress),
1408
{}
1409
};
1410
1411
static struct kunit_suite gpu_buddy_test_suite = {
1412
.name = "gpu_buddy",
1413
.suite_init = gpu_buddy_suite_init,
1414
.test_cases = gpu_buddy_tests,
1415
};
1416
1417
kunit_test_suite(gpu_buddy_test_suite);
1418
1419
MODULE_AUTHOR("Intel Corporation");
1420
MODULE_DESCRIPTION("Kunit test for gpu_buddy functions");
1421
MODULE_LICENSE("GPL");
1422
1423