CoCalc -- gpu_buddy

GitHub Repository: torvalds/linux
Path: blob/master/drivers/gpu/tests/gpu_buddy_test.c
¹⁷⁰⁸⁹⁰ views
1
// SPDX-License-Identifier: MIT
2
/*
3
 * Copyright © 2019 Intel Corporation
4
 * Copyright © 2022 Maíra Canal <[email protected]>
5
 */
6

7
#include <kunit/test.h>
8

9
#include <linux/prime_numbers.h>
10
#include <linux/sched/signal.h>
11
#include <linux/sizes.h>
12

13
#include <linux/gpu_buddy.h>
14

15
#include "gpu_random.h"
16

17
static unsigned int random_seed;
18

19
static inline u64 get_size(int order, u64 chunk_size)
20
{
21
	return (1 << order) * chunk_size;
22
}
23

24
static void gpu_test_buddy_subtree_offset_alignment_stress(struct kunit *test)
25
{
26
	struct gpu_buddy_block *block;
27
	struct rb_node *node = NULL;
28
	const u64 mm_size = SZ_2M;
29
	const u64 alignments[] = {
30
		SZ_1M,
31
		SZ_512K,
32
		SZ_256K,
33
		SZ_128K,
34
		SZ_64K,
35
		SZ_32K,
36
		SZ_16K,
37
		SZ_8K,
38
	};
39
	struct list_head allocated[ARRAY_SIZE(alignments)];
40
	unsigned int i, max_subtree_align = 0;
41
	int ret, tree, order;
42
	struct gpu_buddy mm;
43

44
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
45
			       "buddy_init failed\n");
46

47
	for (i = 0; i < ARRAY_SIZE(allocated); i++)
48
		INIT_LIST_HEAD(&allocated[i]);
49

50
	/*
51
	 * Exercise subtree_max_alignment tracking by allocating blocks with descending
52
	 * alignment constraints and freeing them in reverse order. This verifies that
53
	 * free-tree augmentation correctly propagates the maximum offset alignment
54
	 * present in each subtree at every stage.
55
	 */
56

57
	for (i = 0; i < ARRAY_SIZE(alignments); i++) {
58
		struct gpu_buddy_block *root = NULL;
59
		unsigned int expected;
60
		u64 align;
61

62
		align = alignments[i];
63
		expected = ilog2(align) - 1;
64

65
		for (;;) {
66
			ret = gpu_buddy_alloc_blocks(&mm,
67
						     0, mm_size,
68
						     SZ_4K, align,
69
						     &allocated[i],
70
						     0);
71
			if (ret)
72
				break;
73

74
			block = list_last_entry(&allocated[i],
75
						struct gpu_buddy_block,
76
						link);
77
			KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), align));
78
		}
79

80
		for (order = mm.max_order; order >= 0 && !root; order--) {
81
			for (tree = 0; tree < 2; tree++) {
82
				node = mm.free_trees[tree][order].rb_node;
83
				if (node) {
84
					root = container_of(node,
85
							    struct gpu_buddy_block,
86
							    rb);
87
					break;
88
				}
89
			}
90
		}
91

92
		KUNIT_ASSERT_NOT_NULL(test, root);
93
		KUNIT_EXPECT_EQ(test, root->subtree_max_alignment, expected);
94
	}
95

96
	for (i = ARRAY_SIZE(alignments); i-- > 0; ) {
97
		gpu_buddy_free_list(&mm, &allocated[i], 0);
98

99
		for (order = 0; order <= mm.max_order; order++) {
100
			for (tree = 0; tree < 2; tree++) {
101
				node = mm.free_trees[tree][order].rb_node;
102
				if (!node)
103
					continue;
104

105
				block = container_of(node, struct gpu_buddy_block, rb);
106
				max_subtree_align = max(max_subtree_align,
107
							block->subtree_max_alignment);
108
			}
109
		}
110

111
		KUNIT_EXPECT_GE(test, max_subtree_align, ilog2(alignments[i]));
112
	}
113

114
	gpu_buddy_fini(&mm);
115
}
116

117
static void gpu_test_buddy_offset_aligned_allocation(struct kunit *test)
118
{
119
	struct gpu_buddy_block *block, *tmp;
120
	int num_blocks, i, count = 0;
121
	LIST_HEAD(allocated);
122
	struct gpu_buddy mm;
123
	u64 mm_size = SZ_4M;
124
	LIST_HEAD(freed);
125

126
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
127
			       "buddy_init failed\n");
128

129
	num_blocks = mm_size / SZ_256K;
130
	/*
131
	 * Allocate multiple sizes under a fixed offset alignment.
132
	 * Ensures alignment handling is independent of allocation size and
133
	 * exercises subtree max-alignment pruning for small requests.
134
	 */
135
	for (i = 0; i < num_blocks; i++)
136
		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_256K,
137
								    &allocated, 0),
138
					"buddy_alloc hit an error size=%u\n", SZ_8K);
139

140
	list_for_each_entry(block, &allocated, link) {
141
		/* Ensure the allocated block uses the expected 8 KB size */
142
		KUNIT_EXPECT_EQ(test, gpu_buddy_block_size(&mm, block), SZ_8K);
143
		/* Ensure the block starts at a 256 KB-aligned offset for proper alignment */
144
		KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), SZ_256K));
145
	}
146
	gpu_buddy_free_list(&mm, &allocated, 0);
147

148
	for (i = 0; i < num_blocks; i++)
149
		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K,
150
								    &allocated, 0),
151
					"buddy_alloc hit an error size=%u\n", SZ_16K);
152

153
	list_for_each_entry(block, &allocated, link) {
154
		/* Ensure the allocated block uses the expected 16 KB size */
155
		KUNIT_EXPECT_EQ(test, gpu_buddy_block_size(&mm, block), SZ_16K);
156
		/* Ensure the block starts at a 256 KB-aligned offset for proper alignment */
157
		KUNIT_EXPECT_TRUE(test, IS_ALIGNED(gpu_buddy_block_offset(block), SZ_256K));
158
	}
159

160
	/*
161
	 * Free alternating aligned blocks to introduce fragmentation.
162
	 * Ensures offset-aligned allocations remain valid after frees and
163
	 * verifies subtree max-alignment metadata is correctly maintained.
164
	 */
165
	list_for_each_entry_safe(block, tmp, &allocated, link) {
166
		if (count % 2 == 0)
167
			list_move_tail(&block->link, &freed);
168
		count++;
169
	}
170
	gpu_buddy_free_list(&mm, &freed, 0);
171

172
	for (i = 0; i < num_blocks / 2; i++)
173
		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K,
174
								    &allocated, 0),
175
					"buddy_alloc hit an error size=%u\n", SZ_16K);
176

177
	/*
178
	 * Allocate with offset alignment after all slots are used; must fail.
179
	 * Confirms that no aligned offsets remain.
180
	 */
181
	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_16K, SZ_256K,
182
							   &allocated, 0),
183
			       "buddy_alloc hit an error size=%u\n", SZ_16K);
184
	gpu_buddy_free_list(&mm, &allocated, 0);
185
	gpu_buddy_fini(&mm);
186
}
187

188
static void gpu_test_buddy_fragmentation_performance(struct kunit *test)
189
{
190
	struct gpu_buddy_block *block, *tmp;
191
	int num_blocks, i, ret, count = 0;
192
	LIST_HEAD(allocated_blocks);
193
	unsigned long elapsed_ms;
194
	LIST_HEAD(reverse_list);
195
	LIST_HEAD(test_blocks);
196
	LIST_HEAD(clear_list);
197
	LIST_HEAD(dirty_list);
198
	LIST_HEAD(free_list);
199
	struct gpu_buddy mm;
200
	u64 mm_size = SZ_4G;
201
	ktime_t start, end;
202

203
	/*
204
	 * Allocation under severe fragmentation
205
	 *
206
	 * Create severe fragmentation by allocating the entire 4 GiB address space
207
	 * as tiny 8 KiB blocks but forcing a 64 KiB alignment. The resulting pattern
208
	 * leaves many scattered holes. Split the allocations into two groups and
209
	 * return them with different flags to block coalescing, then repeatedly
210
	 * allocate and free 64 KiB blocks while timing the loop. This stresses how
211
	 * quickly the allocator can satisfy larger, aligned requests from a pool of
212
	 * highly fragmented space.
213
	 */
214
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
215
			       "buddy_init failed\n");
216

217
	num_blocks = mm_size / SZ_64K;
218

219
	start = ktime_get();
220
	/* Allocate with maximum fragmentation - 8K blocks with 64K alignment */
221
	for (i = 0; i < num_blocks; i++)
222
		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
223
								    &allocated_blocks, 0),
224
					"buddy_alloc hit an error size=%u\n", SZ_8K);
225

226
	list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
227
		if (count % 4 == 0 || count % 4 == 3)
228
			list_move_tail(&block->link, &clear_list);
229
		else
230
			list_move_tail(&block->link, &dirty_list);
231
		count++;
232
	}
233

234
	/* Free with different flags to ensure no coalescing */
235
	gpu_buddy_free_list(&mm, &clear_list, GPU_BUDDY_CLEARED);
236
	gpu_buddy_free_list(&mm, &dirty_list, 0);
237

238
	for (i = 0; i < num_blocks; i++)
239
		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K,
240
								    &test_blocks, 0),
241
					"buddy_alloc hit an error size=%u\n", SZ_64K);
242
	gpu_buddy_free_list(&mm, &test_blocks, 0);
243

244
	end = ktime_get();
245
	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
246

247
	kunit_info(test, "Fragmented allocation took %lu ms\n", elapsed_ms);
248

249
	gpu_buddy_fini(&mm);
250

251
	/*
252
	 * Reverse free order under fragmentation
253
	 *
254
	 * Construct a fragmented 4 GiB space by allocating every 8 KiB block with
255
	 * 64 KiB alignment, creating a dense scatter of small regions. Half of the
256
	 * blocks are selectively freed to form sparse gaps, while the remaining
257
	 * allocations are preserved, reordered in reverse, and released back with
258
	 * the cleared flag. This models a pathological reverse-ordered free pattern
259
	 * and measures how quickly the allocator can merge and reclaim space when
260
	 * deallocation occurs in the opposite order of allocation, exposing the
261
	 * cost difference between a linear freelist scan and an ordered tree lookup.
262
	 */
263
	ret = gpu_buddy_init(&mm, mm_size, SZ_4K);
264
	KUNIT_ASSERT_EQ(test, ret, 0);
265

266
	start = ktime_get();
267
	/* Allocate maximum fragmentation */
268
	for (i = 0; i < num_blocks; i++)
269
		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
270
								    &allocated_blocks, 0),
271
					"buddy_alloc hit an error size=%u\n", SZ_8K);
272

273
	list_for_each_entry_safe(block, tmp, &allocated_blocks, link) {
274
		if (count % 2 == 0)
275
			list_move_tail(&block->link, &free_list);
276
		count++;
277
	}
278
	gpu_buddy_free_list(&mm, &free_list, GPU_BUDDY_CLEARED);
279

280
	list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link)
281
		list_move(&block->link, &reverse_list);
282
	gpu_buddy_free_list(&mm, &reverse_list, GPU_BUDDY_CLEARED);
283

284
	end = ktime_get();
285
	elapsed_ms = ktime_to_ms(ktime_sub(end, start));
286

287
	kunit_info(test, "Reverse-ordered free took %lu ms\n", elapsed_ms);
288

289
	gpu_buddy_fini(&mm);
290
}
291

292
static void gpu_test_buddy_alloc_range_bias(struct kunit *test)
293
{
294
	u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem;
295
	GPU_RND_STATE(prng, random_seed);
296
	unsigned int i, count, *order;
297
	struct gpu_buddy_block *block;
298
	unsigned long flags;
299
	struct gpu_buddy mm;
300
	LIST_HEAD(allocated);
301

302
	bias_size = SZ_1M;
303
	ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size);
304
	ps = max(SZ_4K, ps);
305
	mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */
306

307
	kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps);
308

309
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
310
			       "buddy_init failed\n");
311

312
	count = mm_size / bias_size;
313
	order = gpu_random_order(count, &prng);
314
	KUNIT_EXPECT_TRUE(test, order);
315

316
	/*
317
	 * Idea is to split the address space into uniform bias ranges, and then
318
	 * in some random order allocate within each bias, using various
319
	 * patterns within. This should detect if allocations leak out from a
320
	 * given bias, for example.
321
	 */
322

323
	for (i = 0; i < count; i++) {
324
		LIST_HEAD(tmp);
325
		u32 size;
326

327
		bias_start = order[i] * bias_size;
328
		bias_end = bias_start + bias_size;
329
		bias_rem = bias_size;
330

331
		/* internal round_up too big */
332
		KUNIT_ASSERT_TRUE_MSG(test,
333
				      gpu_buddy_alloc_blocks(&mm, bias_start,
334
							     bias_end, bias_size + ps, bias_size,
335
							     &allocated,
336
							     GPU_BUDDY_RANGE_ALLOCATION),
337
				      "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
338
				      bias_start, bias_end, bias_size, bias_size);
339

340
		/* size too big */
341
		KUNIT_ASSERT_TRUE_MSG(test,
342
				      gpu_buddy_alloc_blocks(&mm, bias_start,
343
							     bias_end, bias_size + ps, ps,
344
							     &allocated,
345
							     GPU_BUDDY_RANGE_ALLOCATION),
346
				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
347
				      bias_start, bias_end, bias_size + ps, ps);
348

349
		/* bias range too small for size */
350
		KUNIT_ASSERT_TRUE_MSG(test,
351
				      gpu_buddy_alloc_blocks(&mm, bias_start + ps,
352
							     bias_end, bias_size, ps,
353
							     &allocated,
354
							     GPU_BUDDY_RANGE_ALLOCATION),
355
				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
356
				      bias_start + ps, bias_end, bias_size, ps);
357

358
		/* bias misaligned */
359
		KUNIT_ASSERT_TRUE_MSG(test,
360
				      gpu_buddy_alloc_blocks(&mm, bias_start + ps,
361
							     bias_end - ps,
362
							     bias_size >> 1, bias_size >> 1,
363
							     &allocated,
364
							     GPU_BUDDY_RANGE_ALLOCATION),
365
				      "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n",
366
				      bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1);
367

368
		/* single big page */
369
		KUNIT_ASSERT_FALSE_MSG(test,
370
				       gpu_buddy_alloc_blocks(&mm, bias_start,
371
							      bias_end, bias_size, bias_size,
372
							      &tmp,
373
							      GPU_BUDDY_RANGE_ALLOCATION),
374
				       "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n",
375
				       bias_start, bias_end, bias_size, bias_size);
376
		gpu_buddy_free_list(&mm, &tmp, 0);
377

378
		/* single page with internal round_up */
379
		KUNIT_ASSERT_FALSE_MSG(test,
380
				       gpu_buddy_alloc_blocks(&mm, bias_start,
381
							      bias_end, ps, bias_size,
382
							      &tmp,
383
							      GPU_BUDDY_RANGE_ALLOCATION),
384
				       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
385
				       bias_start, bias_end, ps, bias_size);
386
		gpu_buddy_free_list(&mm, &tmp, 0);
387

388
		/* random size within */
389
		size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
390
		if (size)
391
			KUNIT_ASSERT_FALSE_MSG(test,
392
					       gpu_buddy_alloc_blocks(&mm, bias_start,
393
								      bias_end, size, ps,
394
								      &tmp,
395
								      GPU_BUDDY_RANGE_ALLOCATION),
396
					       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
397
					       bias_start, bias_end, size, ps);
398

399
		bias_rem -= size;
400
		/* too big for current avail */
401
		KUNIT_ASSERT_TRUE_MSG(test,
402
				      gpu_buddy_alloc_blocks(&mm, bias_start,
403
							     bias_end, bias_rem + ps, ps,
404
							     &allocated,
405
							     GPU_BUDDY_RANGE_ALLOCATION),
406
				      "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
407
				      bias_start, bias_end, bias_rem + ps, ps);
408

409
		if (bias_rem) {
410
			/* random fill of the remainder */
411
			size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
412
			size = max(size, ps);
413

414
			KUNIT_ASSERT_FALSE_MSG(test,
415
					       gpu_buddy_alloc_blocks(&mm, bias_start,
416
								      bias_end, size, ps,
417
								      &allocated,
418
								      GPU_BUDDY_RANGE_ALLOCATION),
419
					       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
420
					       bias_start, bias_end, size, ps);
421
			/*
422
			 * Intentionally allow some space to be left
423
			 * unallocated, and ideally not always on the bias
424
			 * boundaries.
425
			 */
426
			gpu_buddy_free_list(&mm, &tmp, 0);
427
		} else {
428
			list_splice_tail(&tmp, &allocated);
429
		}
430
	}
431

432
	kfree(order);
433
	gpu_buddy_free_list(&mm, &allocated, 0);
434
	gpu_buddy_fini(&mm);
435

436
	/*
437
	 * Something more free-form. Idea is to pick a random starting bias
438
	 * range within the address space and then start filling it up. Also
439
	 * randomly grow the bias range in both directions as we go along. This
440
	 * should give us bias start/end which is not always uniform like above,
441
	 * and in some cases will require the allocator to jump over already
442
	 * allocated nodes in the middle of the address space.
443
	 */
444

445
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
446
			       "buddy_init failed\n");
447

448
	bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
449
	bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps);
450
	bias_end = max(bias_end, bias_start + ps);
451
	bias_rem = bias_end - bias_start;
452

453
	do {
454
		u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
455

456
		KUNIT_ASSERT_FALSE_MSG(test,
457
				       gpu_buddy_alloc_blocks(&mm, bias_start,
458
							      bias_end, size, ps,
459
							      &allocated,
460
							      GPU_BUDDY_RANGE_ALLOCATION),
461
				       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
462
				       bias_start, bias_end, size, ps);
463
		bias_rem -= size;
464

465
		/*
466
		 * Try to randomly grow the bias range in both directions, or
467
		 * only one, or perhaps don't grow at all.
468
		 */
469
		do {
470
			u32 old_bias_start = bias_start;
471
			u32 old_bias_end = bias_end;
472

473
			if (bias_start)
474
				bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps);
475
			if (bias_end != mm_size)
476
				bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps);
477

478
			bias_rem += old_bias_start - bias_start;
479
			bias_rem += bias_end - old_bias_end;
480
		} while (!bias_rem && (bias_start || bias_end != mm_size));
481
	} while (bias_rem);
482

483
	KUNIT_ASSERT_EQ(test, bias_start, 0);
484
	KUNIT_ASSERT_EQ(test, bias_end, mm_size);
485
	KUNIT_ASSERT_TRUE_MSG(test,
486
			      gpu_buddy_alloc_blocks(&mm, bias_start, bias_end,
487
						     ps, ps,
488
						     &allocated,
489
						     GPU_BUDDY_RANGE_ALLOCATION),
490
			      "buddy_alloc passed with bias(%x-%x), size=%u\n",
491
			      bias_start, bias_end, ps);
492

493
	gpu_buddy_free_list(&mm, &allocated, 0);
494
	gpu_buddy_fini(&mm);
495

496
	/*
497
	 * Allocate cleared blocks in the bias range when the GPU buddy's clear avail is
498
	 * zero. This will validate the bias range allocation in scenarios like system boot
499
	 * when no cleared blocks are available and exercise the fallback path too. The resulting
500
	 * blocks should always be dirty.
501
	 */
502

503
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
504
			       "buddy_init failed\n");
505

506
	bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
507
	bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps);
508
	bias_end = max(bias_end, bias_start + ps);
509
	bias_rem = bias_end - bias_start;
510

511
	flags = GPU_BUDDY_CLEAR_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION;
512
	size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
513

514
	KUNIT_ASSERT_FALSE_MSG(test,
515
			       gpu_buddy_alloc_blocks(&mm, bias_start,
516
						      bias_end, size, ps,
517
						      &allocated,
518
						      flags),
519
			       "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
520
			       bias_start, bias_end, size, ps);
521

522
	list_for_each_entry(block, &allocated, link)
523
		KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
524

525
	gpu_buddy_free_list(&mm, &allocated, 0);
526
	gpu_buddy_fini(&mm);
527
}
528

529
static void gpu_test_buddy_alloc_range(struct kunit *test)
530
{
531
	GPU_RND_STATE(prng, random_seed);
532
	struct gpu_buddy_block *block;
533
	struct gpu_buddy mm;
534
	u32 mm_size, total;
535
	LIST_HEAD(blocks);
536
	LIST_HEAD(tmp);
537
	u32 ps = SZ_4K;
538
	int ret;
539

540
	mm_size = SZ_16M;
541

542
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
543
			       "buddy_init failed\n");
544

545
	/*
546
	 * Basic exact-range allocation.
547
	 * Allocate the entire mm as one exact range (start + size == end).
548
	 * This is the simplest case exercising __gpu_buddy_alloc_range.
549
	 */
550
	ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size, mm_size, ps, &blocks, 0);
551
	KUNIT_ASSERT_EQ_MSG(test, ret, 0,
552
			    "exact-range alloc of full mm failed\n");
553

554
	total = 0;
555
	list_for_each_entry(block, &blocks, link) {
556
		u64 offset = gpu_buddy_block_offset(block);
557
		u64 bsize = gpu_buddy_block_size(&mm, block);
558

559
		KUNIT_EXPECT_TRUE_MSG(test, offset + bsize <= (u64)mm_size,
560
				      "block [%llx, %llx) outside mm\n", offset, offset + bsize);
561
		total += (u32)bsize;
562
	}
563
	KUNIT_EXPECT_EQ(test, total, mm_size);
564
	KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
565

566
	/* Full mm should be exhausted */
567
	ret = gpu_buddy_alloc_blocks(&mm, 0, ps, ps, ps, &tmp, 0);
568
	KUNIT_EXPECT_NE_MSG(test, ret, 0, "alloc should fail when mm is full\n");
569

570
	gpu_buddy_free_list(&mm, &blocks, 0);
571
	KUNIT_EXPECT_EQ(test, mm.avail, (u64)mm_size);
572
	gpu_buddy_fini(&mm);
573

574
	/*
575
	 * Exact-range allocation of sub-ranges.
576
	 * Split the mm into four equal quarters and allocate each as an exact
577
	 * range. Validates splitting and non-overlapping exact allocations.
578
	 */
579
	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
580

581
	{
582
		u32 quarter = mm_size / 4;
583
		int i;
584

585
		for (i = 0; i < 4; i++) {
586
			u32 start = i * quarter;
587
			u32 end = start + quarter;
588

589
			ret = gpu_buddy_alloc_blocks(&mm, start, end, quarter, ps, &blocks, 0);
590
			KUNIT_ASSERT_EQ_MSG(test, ret, 0,
591
					    "exact-range alloc quarter %d [%x, %x) failed\n",
592
					    i, start, end);
593
		}
594
		KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
595
		gpu_buddy_free_list(&mm, &blocks, 0);
596
	}
597

598
	gpu_buddy_fini(&mm);
599

600
	/*
601
	 * Minimum chunk-size exact range at various offsets.
602
	 * Allocate single-page exact ranges at the start, middle and end.
603
	 */
604
	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
605

606
	ret = gpu_buddy_alloc_blocks(&mm, 0, ps, ps, ps, &blocks, 0);
607
	KUNIT_ASSERT_EQ(test, ret, 0);
608

609
	ret = gpu_buddy_alloc_blocks(&mm, mm_size / 2, mm_size / 2 + ps, ps, ps, &blocks, 0);
610
	KUNIT_ASSERT_EQ(test, ret, 0);
611

612
	ret = gpu_buddy_alloc_blocks(&mm, mm_size - ps, mm_size, ps, ps, &blocks, 0);
613
	KUNIT_ASSERT_EQ(test, ret, 0);
614

615
	total = 0;
616
	list_for_each_entry(block, &blocks, link)
617
		total += (u32)gpu_buddy_block_size(&mm, block);
618
	KUNIT_EXPECT_EQ(test, total, 3 * ps);
619

620
	gpu_buddy_free_list(&mm, &blocks, 0);
621
	gpu_buddy_fini(&mm);
622

623
	/*
624
	 * Non power-of-two mm size (multiple roots).
625
	 * Exact-range allocations that span root boundaries must still work.
626
	 */
627
	mm_size = SZ_4M + SZ_2M + SZ_1M; /* 7 MiB, three roots */
628

629
	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
630
	KUNIT_EXPECT_GT(test, mm.n_roots, 1U);
631

632
	/* Allocate first 4M root exactly */
633
	ret = gpu_buddy_alloc_blocks(&mm, 0, SZ_4M, SZ_4M, ps, &blocks, 0);
634
	KUNIT_ASSERT_EQ(test, ret, 0);
635

636
	/* Allocate second root (4M-6M) exactly */
637
	ret = gpu_buddy_alloc_blocks(&mm, SZ_4M, SZ_4M + SZ_2M, SZ_2M, ps, &blocks, 0);
638
	KUNIT_ASSERT_EQ(test, ret, 0);
639

640
	/* Allocate third root (6M-7M) exactly */
641
	ret = gpu_buddy_alloc_blocks(&mm, SZ_4M + SZ_2M, mm_size, SZ_1M, ps, &blocks, 0);
642
	KUNIT_ASSERT_EQ(test, ret, 0);
643

644
	KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
645
	gpu_buddy_free_list(&mm, &blocks, 0);
646

647
	/* Cross-root exact-range: the entire non-pot mm */
648
	ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size, mm_size, ps, &blocks, 0);
649
	KUNIT_ASSERT_EQ(test, ret, 0);
650
	KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
651

652
	gpu_buddy_free_list(&mm, &blocks, 0);
653
	gpu_buddy_fini(&mm);
654

655
	/*
656
	 * Randomized exact-range allocations.
657
	 * Divide the mm into N random-sized, contiguous, page-aligned slices
658
	 * and allocate each as an exact range in random order.
659
	 */
660
	mm_size = SZ_16M;
661
	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
662

663
	{
664
#define N_RAND_RANGES 16
665
		u32 ranges[N_RAND_RANGES + 1]; /* boundaries */
666
		u32 order_arr[N_RAND_RANGES];
667
		u32 remaining = mm_size;
668
		int i;
669

670
		ranges[0] = 0;
671
		for (i = 0; i < N_RAND_RANGES - 1; i++) {
672
			u32 max_chunk = remaining - (N_RAND_RANGES - 1 - i) * ps;
673
			u32 sz = max(round_up(prandom_u32_state(&prng) % max_chunk, ps), ps);
674

675
			ranges[i + 1] = ranges[i] + sz;
676
			remaining -= sz;
677
		}
678
		ranges[N_RAND_RANGES] = mm_size;
679

680
		/* Create a random order */
681
		for (i = 0; i < N_RAND_RANGES; i++)
682
			order_arr[i] = i;
683
		for (i = N_RAND_RANGES - 1; i > 0; i--) {
684
			u32 j = prandom_u32_state(&prng) % (i + 1);
685
			u32 tmp_val = order_arr[i];
686

687
			order_arr[i] = order_arr[j];
688
			order_arr[j] = tmp_val;
689
		}
690

691
		for (i = 0; i < N_RAND_RANGES; i++) {
692
			u32 idx = order_arr[i];
693
			u32 start = ranges[idx];
694
			u32 end = ranges[idx + 1];
695
			u32 sz = end - start;
696

697
			ret = gpu_buddy_alloc_blocks(&mm, start, end, sz, ps, &blocks, 0);
698
			KUNIT_ASSERT_EQ_MSG(test, ret, 0,
699
					    "random exact-range [%x, %x) sz=%x failed\n",
700
					    start, end, sz);
701
		}
702

703
		KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
704
		gpu_buddy_free_list(&mm, &blocks, 0);
705
#undef N_RAND_RANGES
706
	}
707

708
	gpu_buddy_fini(&mm);
709

710
	/*
711
	 * Negative case - partially allocated range.
712
	 * Allocate the first half, then try to exact-range allocate the full
713
	 * mm. This must fail because the first half is already occupied.
714
	 */
715
	mm_size = SZ_16M;
716
	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
717

718
	ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size / 2, mm_size / 2, ps, &blocks, 0);
719
	KUNIT_ASSERT_EQ(test, ret, 0);
720

721
	ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size, mm_size, ps, &tmp, 0);
722
	KUNIT_EXPECT_NE_MSG(test, ret, 0,
723
			    "exact-range alloc should fail when range is partially used\n");
724

725
	/* Also try the already-occupied sub-range directly */
726
	ret = gpu_buddy_alloc_blocks(&mm, 0, mm_size / 2, mm_size / 2, ps, &tmp, 0);
727
	KUNIT_EXPECT_NE_MSG(test, ret, 0,
728
			    "double alloc of same exact range should fail\n");
729

730
	/* The free second half should still be allocatable */
731
	ret = gpu_buddy_alloc_blocks(&mm, mm_size / 2, mm_size, mm_size / 2, ps, &blocks, 0);
732
	KUNIT_ASSERT_EQ(test, ret, 0);
733

734
	KUNIT_EXPECT_EQ(test, mm.avail, 0ULL);
735
	gpu_buddy_free_list(&mm, &blocks, 0);
736
	gpu_buddy_fini(&mm);
737

738
	/*
739
	 * Negative case - checkerboard partial allocation.
740
	 * Allocate every other page-sized chunk in a small mm, then try to
741
	 * exact-range allocate a range covering two pages (one allocated, one
742
	 * free). This must fail.
743
	 */
744
	mm_size = SZ_64K;
745
	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
746

747
	{
748
		u32 off;
749

750
		for (off = 0; off < mm_size; off += 2 * ps) {
751
			ret = gpu_buddy_alloc_blocks(&mm, off, off + ps, ps, ps, &blocks, 0);
752
			KUNIT_ASSERT_EQ(test, ret, 0);
753
		}
754

755
		/* Try exact range over a pair [allocated, free] */
756
		ret = gpu_buddy_alloc_blocks(&mm, 0, 2 * ps, 2 * ps, ps, &tmp, 0);
757
		KUNIT_EXPECT_NE_MSG(test, ret, 0,
758
				    "exact-range over partially allocated pair should fail\n");
759

760
		/* The free pages individually should still work */
761
		ret = gpu_buddy_alloc_blocks(&mm, ps, 2 * ps, ps, ps, &blocks, 0);
762
		KUNIT_ASSERT_EQ(test, ret, 0);
763

764
		gpu_buddy_free_list(&mm, &blocks, 0);
765
	}
766

767
	gpu_buddy_fini(&mm);
768

769
	/* Negative case - misaligned start/end/size */
770
	mm_size = SZ_16M;
771
	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
772

773
	/* start not aligned to chunk_size */
774
	ret = gpu_buddy_alloc_blocks(&mm, ps / 2, ps / 2 + ps, ps, ps, &tmp, 0);
775
	KUNIT_EXPECT_NE(test, ret, 0);
776

777
	/* size not aligned */
778
	ret = gpu_buddy_alloc_blocks(&mm, 0, ps + 1, ps + 1, ps, &tmp, 0);
779
	KUNIT_EXPECT_NE(test, ret, 0);
780

781
	/* end exceeds mm size */
782
	ret = gpu_buddy_alloc_blocks(&mm, mm_size, mm_size + ps, ps, ps, &tmp, 0);
783
	KUNIT_EXPECT_NE(test, ret, 0);
784

785
	gpu_buddy_fini(&mm);
786

787
	/*
788
	 * Free and re-allocate the same exact range.
789
	 * This exercises merge-on-free followed by exact-range re-split.
790
	 */
791
	mm_size = SZ_16M;
792
	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
793

794
	{
795
		int i;
796

797
		for (i = 0; i < 5; i++) {
798
			ret = gpu_buddy_alloc_blocks(&mm, SZ_4M, SZ_4M + SZ_2M,
799
						     SZ_2M, ps, &blocks, 0);
800
			KUNIT_ASSERT_EQ_MSG(test, ret, 0,
801
					    "re-alloc iteration %d failed\n", i);
802

803
			total = 0;
804
			list_for_each_entry(block, &blocks, link) {
805
				u64 offset = gpu_buddy_block_offset(block);
806
				u64 bsize = gpu_buddy_block_size(&mm, block);
807

808
				KUNIT_EXPECT_GE(test, offset, (u64)SZ_4M);
809
				KUNIT_EXPECT_LE(test, offset + bsize, (u64)(SZ_4M + SZ_2M));
810
				total += (u32)bsize;
811
			}
812
			KUNIT_EXPECT_EQ(test, total, SZ_2M);
813

814
			gpu_buddy_free_list(&mm, &blocks, 0);
815
		}
816

817
		KUNIT_EXPECT_EQ(test, mm.avail, (u64)mm_size);
818
	}
819

820
	gpu_buddy_fini(&mm);
821

822
	/*
823
	 * Various power-of-two exact ranges within a large mm.
824
	 * Allocate non-overlapping power-of-two exact ranges at their natural
825
	 * alignment, validating that the allocator handles different orders.
826
	 */
827
	mm_size = SZ_16M;
828
	KUNIT_ASSERT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
829

830
	/* Allocate 4K at offset 0 */
831
	ret = gpu_buddy_alloc_blocks(&mm, 0, SZ_4K, SZ_4K, ps, &blocks, 0);
832
	KUNIT_ASSERT_EQ(test, ret, 0);
833

834
	/* Allocate 64K at offset 64K */
835
	ret = gpu_buddy_alloc_blocks(&mm, SZ_64K, SZ_64K + SZ_64K, SZ_64K, ps, &blocks, 0);
836
	KUNIT_ASSERT_EQ(test, ret, 0);
837

838
	/* Allocate 1M at offset 1M */
839
	ret = gpu_buddy_alloc_blocks(&mm, SZ_1M, SZ_1M + SZ_1M, SZ_1M, ps, &blocks, 0);
840
	KUNIT_ASSERT_EQ(test, ret, 0);
841

842
	/* Allocate 4M at offset 4M */
843
	ret = gpu_buddy_alloc_blocks(&mm, SZ_4M, SZ_4M + SZ_4M, SZ_4M, ps, &blocks, 0);
844
	KUNIT_ASSERT_EQ(test, ret, 0);
845

846
	total = 0;
847
	list_for_each_entry(block, &blocks, link)
848
		total += (u32)gpu_buddy_block_size(&mm, block);
849
	KUNIT_EXPECT_EQ(test, total, SZ_4K + SZ_64K + SZ_1M + SZ_4M);
850

851
	gpu_buddy_free_list(&mm, &blocks, 0);
852
	gpu_buddy_fini(&mm);
853
}
854

855
static void gpu_test_buddy_alloc_clear(struct kunit *test)
856
{
857
	unsigned long n_pages, total, i = 0;
858
	const unsigned long ps = SZ_4K;
859
	struct gpu_buddy_block *block;
860
	const int max_order = 12;
861
	LIST_HEAD(allocated);
862
	struct gpu_buddy mm;
863
	unsigned int order;
864
	u32 mm_size, size;
865
	LIST_HEAD(dirty);
866
	LIST_HEAD(clean);
867

868
	mm_size = SZ_4K << max_order;
869
	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
870

871
	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
872

873
	/*
874
	 * Idea is to allocate and free some random portion of the address space,
875
	 * returning those pages as non-dirty and randomly alternate between
876
	 * requesting dirty and non-dirty pages (not going over the limit
877
	 * we freed as non-dirty), putting that into two separate lists.
878
	 * Loop over both lists at the end checking that the dirty list
879
	 * is indeed all dirty pages and vice versa. Free it all again,
880
	 * keeping the dirty/clear status.
881
	 */
882
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
883
							    5 * ps, ps, &allocated,
884
							    GPU_BUDDY_TOPDOWN_ALLOCATION),
885
				"buddy_alloc hit an error size=%lu\n", 5 * ps);
886
	gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
887

888
	n_pages = 10;
889
	do {
890
		unsigned long flags;
891
		struct list_head *list;
892
		int slot = i % 2;
893

894
		if (slot == 0) {
895
			list = &dirty;
896
			flags = 0;
897
		} else {
898
			list = &clean;
899
			flags = GPU_BUDDY_CLEAR_ALLOCATION;
900
		}
901

902
		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
903
								    ps, ps, list,
904
								    flags),
905
					"buddy_alloc hit an error size=%lu\n", ps);
906
	} while (++i < n_pages);
907

908
	list_for_each_entry(block, &clean, link)
909
		KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), true);
910

911
	list_for_each_entry(block, &dirty, link)
912
		KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
913

914
	gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
915

916
	/*
917
	 * Trying to go over the clear limit for some allocation.
918
	 * The allocation should never fail with reasonable page-size.
919
	 */
920
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
921
							    10 * ps, ps, &clean,
922
							    GPU_BUDDY_CLEAR_ALLOCATION),
923
				"buddy_alloc hit an error size=%lu\n", 10 * ps);
924

925
	gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
926
	gpu_buddy_free_list(&mm, &dirty, 0);
927
	gpu_buddy_fini(&mm);
928

929
	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
930

931
	/*
932
	 * Create a new mm. Intentionally fragment the address space by creating
933
	 * two alternating lists. Free both lists, one as dirty the other as clean.
934
	 * Try to allocate double the previous size with matching min_page_size. The
935
	 * allocation should never fail as it calls the force_merge. Also check that
936
	 * the page is always dirty after force_merge. Free the page as dirty, then
937
	 * repeat the whole thing, increment the order until we hit the max_order.
938
	 */
939

940
	i = 0;
941
	n_pages = mm_size / ps;
942
	do {
943
		struct list_head *list;
944
		int slot = i % 2;
945

946
		if (slot == 0)
947
			list = &dirty;
948
		else
949
			list = &clean;
950

951
		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
952
								    ps, ps, list, 0),
953
					"buddy_alloc hit an error size=%lu\n", ps);
954
	} while (++i < n_pages);
955

956
	gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
957
	gpu_buddy_free_list(&mm, &dirty, 0);
958

959
	order = 1;
960
	do {
961
		size = SZ_4K << order;
962

963
		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
964
								    size, size, &allocated,
965
								    GPU_BUDDY_CLEAR_ALLOCATION),
966
					"buddy_alloc hit an error size=%u\n", size);
967
		total = 0;
968
		list_for_each_entry(block, &allocated, link) {
969
			if (size != mm_size)
970
				KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
971
			total += gpu_buddy_block_size(&mm, block);
972
		}
973
		KUNIT_EXPECT_EQ(test, total, size);
974

975
		gpu_buddy_free_list(&mm, &allocated, 0);
976
	} while (++order <= max_order);
977

978
	gpu_buddy_fini(&mm);
979

980
	/*
981
	 * Create a new mm with a non power-of-two size. Allocate a random size from each
982
	 * root, free as cleared and then call fini. This will ensure the multi-root
983
	 * force merge during fini.
984
	 */
985
	mm_size = (SZ_4K << max_order) + (SZ_4K << (max_order - 2));
986

987
	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
988
	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
989
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
990
							    4 * ps, ps, &allocated,
991
							    GPU_BUDDY_RANGE_ALLOCATION),
992
				"buddy_alloc hit an error size=%lu\n", 4 * ps);
993
	gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
994
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
995
							    2 * ps, ps, &allocated,
996
							    GPU_BUDDY_CLEAR_ALLOCATION),
997
				"buddy_alloc hit an error size=%lu\n", 2 * ps);
998
	gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
999
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size,
1000
							    ps, ps, &allocated,
1001
							    GPU_BUDDY_RANGE_ALLOCATION),
1002
				"buddy_alloc hit an error size=%lu\n", ps);
1003
	gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
1004
	gpu_buddy_fini(&mm);
1005
}
1006

1007
static void gpu_test_buddy_alloc_contiguous(struct kunit *test)
1008
{
1009
	const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K;
1010
	unsigned long i, n_pages, total;
1011
	struct gpu_buddy_block *block;
1012
	struct gpu_buddy mm;
1013
	LIST_HEAD(left);
1014
	LIST_HEAD(middle);
1015
	LIST_HEAD(right);
1016
	LIST_HEAD(allocated);
1017

1018
	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
1019

1020
	/*
1021
	 * Idea is to fragment the address space by alternating block
1022
	 * allocations between three different lists; one for left, middle and
1023
	 * right. We can then free a list to simulate fragmentation. In
1024
	 * particular we want to exercise the GPU_BUDDY_CONTIGUOUS_ALLOCATION,
1025
	 * including the try_harder path.
1026
	 */
1027

1028
	i = 0;
1029
	n_pages = mm_size / ps;
1030
	do {
1031
		struct list_head *list;
1032
		int slot = i % 3;
1033

1034
		if (slot == 0)
1035
			list = &left;
1036
		else if (slot == 1)
1037
			list = &middle;
1038
		else
1039
			list = &right;
1040
		KUNIT_ASSERT_FALSE_MSG(test,
1041
				       gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1042
							      ps, ps, list, 0),
1043
				       "buddy_alloc hit an error size=%lu\n",
1044
				       ps);
1045
	} while (++i < n_pages);
1046

1047
	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1048
							   3 * ps, ps, &allocated,
1049
							   GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1050
			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
1051

1052
	gpu_buddy_free_list(&mm, &middle, 0);
1053
	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1054
							   3 * ps, ps, &allocated,
1055
							   GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1056
			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
1057
	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1058
							   2 * ps, ps, &allocated,
1059
							   GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1060
			       "buddy_alloc didn't error size=%lu\n", 2 * ps);
1061

1062
	gpu_buddy_free_list(&mm, &right, 0);
1063
	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1064
							   3 * ps, ps, &allocated,
1065
							   GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1066
			       "buddy_alloc didn't error size=%lu\n", 3 * ps);
1067
	/*
1068
	 * At this point we should have enough contiguous space for 2 blocks,
1069
	 * however they are never buddies (since we freed middle and right) so
1070
	 * will require the try_harder logic to find them.
1071
	 */
1072
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1073
							    2 * ps, ps, &allocated,
1074
							    GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1075
			       "buddy_alloc hit an error size=%lu\n", 2 * ps);
1076

1077
	gpu_buddy_free_list(&mm, &left, 0);
1078
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
1079
							    3 * ps, ps, &allocated,
1080
							    GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1081
			       "buddy_alloc hit an error size=%lu\n", 3 * ps);
1082

1083
	total = 0;
1084
	list_for_each_entry(block, &allocated, link)
1085
		total += gpu_buddy_block_size(&mm, block);
1086

1087
	KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
1088

1089
	gpu_buddy_free_list(&mm, &allocated, 0);
1090
	gpu_buddy_fini(&mm);
1091
}
1092

1093
static void gpu_test_buddy_alloc_pathological(struct kunit *test)
1094
{
1095
	u64 mm_size, size, start = 0;
1096
	struct gpu_buddy_block *block;
1097
	const int max_order = 3;
1098
	unsigned long flags = 0;
1099
	int order, top;
1100
	struct gpu_buddy mm;
1101
	LIST_HEAD(blocks);
1102
	LIST_HEAD(holes);
1103
	LIST_HEAD(tmp);
1104

1105
	/*
1106
	 * Create a pot-sized mm, then allocate one of each possible
1107
	 * order within. This should leave the mm with exactly one
1108
	 * page left. Free the largest block, then whittle down again.
1109
	 * Eventually we will have a fully 50% fragmented mm.
1110
	 */
1111

1112
	mm_size = SZ_4K << max_order;
1113
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
1114
			       "buddy_init failed\n");
1115

1116
	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
1117

1118
	for (top = max_order; top; top--) {
1119
		/* Make room by freeing the largest allocated block */
1120
		block = list_first_entry_or_null(&blocks, typeof(*block), link);
1121
		if (block) {
1122
			list_del(&block->link);
1123
			gpu_buddy_free_block(&mm, block);
1124
		}
1125

1126
		for (order = top; order--;) {
1127
			size = get_size(order, mm.chunk_size);
1128
			KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start,
1129
									    mm_size, size, size,
1130
										&tmp, flags),
1131
					"buddy_alloc hit -ENOMEM with order=%d, top=%d\n",
1132
					order, top);
1133

1134
			block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1135
			KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1136

1137
			list_move_tail(&block->link, &blocks);
1138
		}
1139

1140
		/* There should be one final page for this sub-allocation */
1141
		size = get_size(0, mm.chunk_size);
1142
		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1143
								    size, size, &tmp, flags),
1144
							   "buddy_alloc hit -ENOMEM for hole\n");
1145

1146
		block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1147
		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1148

1149
		list_move_tail(&block->link, &holes);
1150

1151
		size = get_size(top, mm.chunk_size);
1152
		KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1153
								   size, size, &tmp, flags),
1154
							  "buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!",
1155
							  top, max_order);
1156
	}
1157

1158
	gpu_buddy_free_list(&mm, &holes, 0);
1159

1160
	/* Nothing larger than blocks of chunk_size now available */
1161
	for (order = 1; order <= max_order; order++) {
1162
		size = get_size(order, mm.chunk_size);
1163
		KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1164
								   size, size, &tmp, flags),
1165
							  "buddy_alloc unexpectedly succeeded at order %d, it should be full!",
1166
							  order);
1167
	}
1168

1169
	list_splice_tail(&holes, &blocks);
1170
	gpu_buddy_free_list(&mm, &blocks, 0);
1171
	gpu_buddy_fini(&mm);
1172
}
1173

1174
static void gpu_test_buddy_alloc_pessimistic(struct kunit *test)
1175
{
1176
	u64 mm_size, size, start = 0;
1177
	struct gpu_buddy_block *block, *bn;
1178
	const unsigned int max_order = 16;
1179
	unsigned long flags = 0;
1180
	struct gpu_buddy mm;
1181
	unsigned int order;
1182
	LIST_HEAD(blocks);
1183
	LIST_HEAD(tmp);
1184

1185
	/*
1186
	 * Create a pot-sized mm, then allocate one of each possible
1187
	 * order within. This should leave the mm with exactly one
1188
	 * page left.
1189
	 */
1190

1191
	mm_size = SZ_4K << max_order;
1192
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
1193
			       "buddy_init failed\n");
1194

1195
	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
1196

1197
	for (order = 0; order < max_order; order++) {
1198
		size = get_size(order, mm.chunk_size);
1199
		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1200
								    size, size, &tmp, flags),
1201
							   "buddy_alloc hit -ENOMEM with order=%d\n",
1202
							   order);
1203

1204
		block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1205
		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1206

1207
		list_move_tail(&block->link, &blocks);
1208
	}
1209

1210
	/* And now the last remaining block available */
1211
	size = get_size(0, mm.chunk_size);
1212
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1213
							    size, size, &tmp, flags),
1214
						   "buddy_alloc hit -ENOMEM on final alloc\n");
1215

1216
	block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1217
	KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1218

1219
	list_move_tail(&block->link, &blocks);
1220

1221
	/* Should be completely full! */
1222
	for (order = max_order; order--;) {
1223
		size = get_size(order, mm.chunk_size);
1224
		KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1225
								   size, size, &tmp, flags),
1226
							  "buddy_alloc unexpectedly succeeded, it should be full!");
1227
	}
1228

1229
	block = list_last_entry(&blocks, typeof(*block), link);
1230
	list_del(&block->link);
1231
	gpu_buddy_free_block(&mm, block);
1232

1233
	/* As we free in increasing size, we make available larger blocks */
1234
	order = 1;
1235
	list_for_each_entry_safe(block, bn, &blocks, link) {
1236
		list_del(&block->link);
1237
		gpu_buddy_free_block(&mm, block);
1238

1239
		size = get_size(order, mm.chunk_size);
1240
		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1241
								    size, size, &tmp, flags),
1242
							   "buddy_alloc hit -ENOMEM with order=%d\n",
1243
							   order);
1244

1245
		block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1246
		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1247

1248
		list_del(&block->link);
1249
		gpu_buddy_free_block(&mm, block);
1250
		order++;
1251
	}
1252

1253
	/* To confirm, now the whole mm should be available */
1254
	size = get_size(max_order, mm.chunk_size);
1255
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1256
							    size, size, &tmp, flags),
1257
						   "buddy_alloc (realloc) hit -ENOMEM with order=%d\n",
1258
						   max_order);
1259

1260
	block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1261
	KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1262

1263
	list_del(&block->link);
1264
	gpu_buddy_free_block(&mm, block);
1265
	gpu_buddy_free_list(&mm, &blocks, 0);
1266
	gpu_buddy_fini(&mm);
1267
}
1268

1269
static void gpu_test_buddy_alloc_optimistic(struct kunit *test)
1270
{
1271
	u64 mm_size, size, start = 0;
1272
	struct gpu_buddy_block *block;
1273
	unsigned long flags = 0;
1274
	const int max_order = 16;
1275
	struct gpu_buddy mm;
1276
	LIST_HEAD(blocks);
1277
	LIST_HEAD(tmp);
1278
	int order;
1279

1280
	/*
1281
	 * Create a mm with one block of each order available, and
1282
	 * try to allocate them all.
1283
	 */
1284

1285
	mm_size = SZ_4K * ((1 << (max_order + 1)) - 1);
1286

1287
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
1288
			       "buddy_init failed\n");
1289

1290
	KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
1291

1292
	for (order = 0; order <= max_order; order++) {
1293
		size = get_size(order, mm.chunk_size);
1294
		KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1295
								    size, size, &tmp, flags),
1296
							   "buddy_alloc hit -ENOMEM with order=%d\n",
1297
							   order);
1298

1299
		block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
1300
		KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
1301

1302
		list_move_tail(&block->link, &blocks);
1303
	}
1304

1305
	/* Should be completely full! */
1306
	size = get_size(0, mm.chunk_size);
1307
	KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
1308
							   size, size, &tmp, flags),
1309
						  "buddy_alloc unexpectedly succeeded, it should be full!");
1310

1311
	gpu_buddy_free_list(&mm, &blocks, 0);
1312
	gpu_buddy_fini(&mm);
1313
}
1314

1315
static void gpu_test_buddy_alloc_limit(struct kunit *test)
1316
{
1317
	u64 size = U64_MAX, start = 0;
1318
	struct gpu_buddy_block *block;
1319
	unsigned long flags = 0;
1320
	LIST_HEAD(allocated);
1321
	struct gpu_buddy mm;
1322

1323
	KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, size, SZ_4K));
1324

1325
	KUNIT_EXPECT_EQ_MSG(test, mm.max_order, GPU_BUDDY_MAX_ORDER,
1326
			    "mm.max_order(%d) != %d\n", mm.max_order,
1327
						GPU_BUDDY_MAX_ORDER);
1328

1329
	size = mm.chunk_size << mm.max_order;
1330
	KUNIT_EXPECT_FALSE(test, gpu_buddy_alloc_blocks(&mm, start, size, size,
1331
							mm.chunk_size, &allocated, flags));
1332

1333
	block = list_first_entry_or_null(&allocated, struct gpu_buddy_block, link);
1334
	KUNIT_EXPECT_TRUE(test, block);
1335

1336
	KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_order(block), mm.max_order,
1337
			    "block order(%d) != %d\n",
1338
						gpu_buddy_block_order(block), mm.max_order);
1339

1340
	KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_size(&mm, block),
1341
			    BIT_ULL(mm.max_order) * mm.chunk_size,
1342
						"block size(%llu) != %llu\n",
1343
						gpu_buddy_block_size(&mm, block),
1344
						BIT_ULL(mm.max_order) * mm.chunk_size);
1345

1346
	gpu_buddy_free_list(&mm, &allocated, 0);
1347
	gpu_buddy_fini(&mm);
1348
}
1349

1350
static void gpu_test_buddy_alloc_exceeds_max_order(struct kunit *test)
1351
{
1352
	u64 mm_size = SZ_8G + SZ_2G, size = SZ_8G + SZ_1G, min_block_size = SZ_8G;
1353
	struct gpu_buddy mm;
1354
	LIST_HEAD(blocks);
1355
	int err;
1356

1357
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
1358
			       "buddy_init failed\n");
1359

1360
	/* CONTIGUOUS allocation should succeed via try_harder fallback */
1361
	KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, size,
1362
							    SZ_4K, &blocks,
1363
							    GPU_BUDDY_CONTIGUOUS_ALLOCATION),
1364
			       "buddy_alloc hit an error size=%llu\n", size);
1365
	gpu_buddy_free_list(&mm, &blocks, 0);
1366

1367
	/* Non-CONTIGUOUS with large min_block_size should return -EINVAL */
1368
	err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0);
1369
	KUNIT_EXPECT_EQ(test, err, -EINVAL);
1370

1371
	/* Non-CONTIGUOUS + RANGE with large min_block_size should return -EINVAL */
1372
	err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks,
1373
				     GPU_BUDDY_RANGE_ALLOCATION);
1374
	KUNIT_EXPECT_EQ(test, err, -EINVAL);
1375

1376
	/* CONTIGUOUS + RANGE should return -EINVAL (no try_harder for RANGE) */
1377
	err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks,
1378
				     GPU_BUDDY_CONTIGUOUS_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION);
1379
	KUNIT_EXPECT_EQ(test, err, -EINVAL);
1380

1381
	gpu_buddy_fini(&mm);
1382
}
1383

1384
static int gpu_buddy_suite_init(struct kunit_suite *suite)
1385
{
1386
	while (!random_seed)
1387
		random_seed = get_random_u32();
1388

1389
	kunit_info(suite, "Testing GPU buddy manager, with random_seed=0x%x\n",
1390
		   random_seed);
1391

1392
	return 0;
1393
}
1394

1395
static struct kunit_case gpu_buddy_tests[] = {
1396
	KUNIT_CASE(gpu_test_buddy_alloc_limit),
1397
	KUNIT_CASE(gpu_test_buddy_alloc_optimistic),
1398
	KUNIT_CASE(gpu_test_buddy_alloc_pessimistic),
1399
	KUNIT_CASE(gpu_test_buddy_alloc_pathological),
1400
	KUNIT_CASE(gpu_test_buddy_alloc_contiguous),
1401
	KUNIT_CASE(gpu_test_buddy_alloc_clear),
1402
	KUNIT_CASE(gpu_test_buddy_alloc_range),
1403
	KUNIT_CASE(gpu_test_buddy_alloc_range_bias),
1404
	KUNIT_CASE_SLOW(gpu_test_buddy_fragmentation_performance),
1405
	KUNIT_CASE(gpu_test_buddy_alloc_exceeds_max_order),
1406
	KUNIT_CASE(gpu_test_buddy_offset_aligned_allocation),
1407
	KUNIT_CASE(gpu_test_buddy_subtree_offset_alignment_stress),
1408
	{}
1409
};
1410

1411
static struct kunit_suite gpu_buddy_test_suite = {
1412
	.name = "gpu_buddy",
1413
	.suite_init = gpu_buddy_suite_init,
1414
	.test_cases = gpu_buddy_tests,
1415
};
1416

1417
kunit_test_suite(gpu_buddy_test_suite);
1418

1419
MODULE_AUTHOR("Intel Corporation");
1420
MODULE_DESCRIPTION("Kunit test for gpu_buddy functions");
1421
MODULE_LICENSE("GPL");
1422

1423
Product

Resources

Company