CoCalc -- kfd_packet

GitHub Repository: torvalds/linux
Path: blob/master/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
²⁶⁵¹⁶ views
1
// SPDX-License-Identifier: GPL-2.0 OR MIT
2
/*
3
 * Copyright 2014-2022 Advanced Micro Devices, Inc.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice shall be included in
13
 * all copies or substantial portions of the Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
 * OTHER DEALINGS IN THE SOFTWARE.
22
 *
23
 */
24

25
#include <linux/slab.h>
26
#include <linux/mutex.h>
27
#include "kfd_device_queue_manager.h"
28
#include "kfd_kernel_queue.h"
29
#include "kfd_priv.h"
30

31
#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0)
32
#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1)
33
#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2)
34
#define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3)
35

36
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
37
				unsigned int buffer_size_bytes)
38
{
39
	unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);
40

41
	WARN((temp * sizeof(uint32_t)) > buffer_size_bytes,
42
	     "Runlist IB overflow");
43
	*wptr = temp;
44
}
45

46
static void pm_calc_rlib_size(struct packet_manager *pm,
47
				unsigned int *rlib_size,
48
				int *over_subscription,
49
				int xnack_conflict)
50
{
51
	unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
52
	unsigned int map_queue_size;
53
	unsigned int max_proc_per_quantum = 1;
54
	struct kfd_node *node = pm->dqm->dev;
55
	struct device *dev = node->adev->dev;
56

57
	process_count = pm->dqm->processes_count;
58
	queue_count = pm->dqm->active_queue_count;
59
	compute_queue_count = pm->dqm->active_cp_queue_count;
60
	gws_queue_count = pm->dqm->gws_queue_count;
61

62
	/* check if there is over subscription
63
	 * Note: the arbitration between the number of VMIDs and
64
	 * hws_max_conc_proc has been done in
65
	 * kgd2kfd_device_init().
66
	 */
67
	*over_subscription = 0;
68

69
	if (node->max_proc_per_quantum > 1)
70
		max_proc_per_quantum = node->max_proc_per_quantum;
71

72
	if (process_count > max_proc_per_quantum)
73
		*over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT;
74
	if (compute_queue_count > get_cp_queues_num(pm->dqm))
75
		*over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT;
76
	if (gws_queue_count > 1)
77
		*over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT;
78
	if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
79
		*over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT;
80

81
	if (*over_subscription)
82
		dev_dbg(dev, "Over subscribed runlist\n");
83

84
	map_queue_size = pm->pmf->map_queues_size;
85
	/* calculate run list ib allocation size */
86
	*rlib_size = process_count * pm->pmf->map_process_size +
87
		     queue_count * map_queue_size;
88

89
	/*
90
	 * Increase the allocation size in case we need a chained run list
91
	 * when over subscription
92
	 */
93
	if (*over_subscription)
94
		*rlib_size += pm->pmf->runlist_size;
95

96
	dev_dbg(dev, "runlist ib size %d\n", *rlib_size);
97
}
98

99
static int pm_allocate_runlist_ib(struct packet_manager *pm,
100
				unsigned int **rl_buffer,
101
				uint64_t *rl_gpu_buffer,
102
				unsigned int *rl_buffer_size,
103
				int *is_over_subscription,
104
				int xnack_conflict)
105
{
106
	struct kfd_node *node = pm->dqm->dev;
107
	struct device *dev = node->adev->dev;
108
	int retval;
109

110
	if (WARN_ON(pm->allocated))
111
		return -EINVAL;
112

113
	pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription,
114
				xnack_conflict);
115

116
	mutex_lock(&pm->lock);
117

118
	retval = kfd_gtt_sa_allocate(node, *rl_buffer_size, &pm->ib_buffer_obj);
119

120
	if (retval) {
121
		dev_err(dev, "Failed to allocate runlist IB\n");
122
		goto out;
123
	}
124

125
	*(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
126
	*rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr;
127

128
	memset(*rl_buffer, 0, *rl_buffer_size);
129
	pm->allocated = true;
130

131
out:
132
	mutex_unlock(&pm->lock);
133
	return retval;
134
}
135

136
static int pm_create_runlist_ib(struct packet_manager *pm,
137
				struct list_head *queues,
138
				uint64_t *rl_gpu_addr,
139
				size_t *rl_size_bytes)
140
{
141
	unsigned int alloc_size_bytes;
142
	unsigned int *rl_buffer, rl_wptr, i;
143
	struct kfd_node *node = pm->dqm->dev;
144
	struct device *dev = node->adev->dev;
145
	int retval, processes_mapped;
146
	struct device_process_node *cur;
147
	struct qcm_process_device *qpd;
148
	struct queue *q;
149
	struct kernel_queue *kq;
150
	int is_over_subscription;
151
	int xnack_enabled = -1;
152
	bool xnack_conflict = 0;
153

154
	rl_wptr = retval = processes_mapped = 0;
155

156
	/* Check if processes set different xnack modes */
157
	list_for_each_entry(cur, queues, list) {
158
		qpd = cur->qpd;
159
		if (xnack_enabled < 0)
160
			/* First process */
161
			xnack_enabled = qpd->pqm->process->xnack_enabled;
162
		else if (qpd->pqm->process->xnack_enabled != xnack_enabled) {
163
			/* Found a process with a different xnack mode */
164
			xnack_conflict = 1;
165
			break;
166
		}
167
	}
168

169
	retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
170
				&alloc_size_bytes, &is_over_subscription,
171
				xnack_conflict);
172
	if (retval)
173
		return retval;
174

175
	*rl_size_bytes = alloc_size_bytes;
176
	pm->ib_size_bytes = alloc_size_bytes;
177

178
	dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",
179
		pm->dqm->processes_count, pm->dqm->active_queue_count);
180

181
build_runlist_ib:
182
	/* build the run list ib packet */
183
	list_for_each_entry(cur, queues, list) {
184
		qpd = cur->qpd;
185
		/* group processes with the same xnack mode together */
186
		if (qpd->pqm->process->xnack_enabled != xnack_enabled)
187
			continue;
188
		/* build map process packet */
189
		if (processes_mapped >= pm->dqm->processes_count) {
190
			dev_dbg(dev, "Not enough space left in runlist IB\n");
191
			pm_release_ib(pm);
192
			return -ENOMEM;
193
		}
194

195
		retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd);
196
		if (retval)
197
			return retval;
198

199
		processes_mapped++;
200
		inc_wptr(&rl_wptr, pm->pmf->map_process_size,
201
				alloc_size_bytes);
202

203
		list_for_each_entry(kq, &qpd->priv_queue_list, list) {
204
			if (!kq->queue->properties.is_active)
205
				continue;
206

207
			dev_dbg(dev,
208
				"static_queue, mapping kernel q %d, is debug status %d\n",
209
				kq->queue->queue, qpd->is_debug);
210

211
			retval = pm->pmf->map_queues(pm,
212
						&rl_buffer[rl_wptr],
213
						kq->queue,
214
						qpd->is_debug);
215
			if (retval)
216
				return retval;
217

218
			inc_wptr(&rl_wptr,
219
				pm->pmf->map_queues_size,
220
				alloc_size_bytes);
221
		}
222

223
		list_for_each_entry(q, &qpd->queues_list, list) {
224
			if (!q->properties.is_active)
225
				continue;
226

227
			dev_dbg(dev,
228
				"static_queue, mapping user queue %d, is debug status %d\n",
229
				q->queue, qpd->is_debug);
230

231
			retval = pm->pmf->map_queues(pm,
232
						&rl_buffer[rl_wptr],
233
						q,
234
						qpd->is_debug);
235

236
			if (retval)
237
				return retval;
238

239
			inc_wptr(&rl_wptr,
240
				pm->pmf->map_queues_size,
241
				alloc_size_bytes);
242
		}
243
	}
244
	if (xnack_conflict) {
245
		/* pick up processes with the other xnack mode */
246
		xnack_enabled = !xnack_enabled;
247
		xnack_conflict = 0;
248
		goto build_runlist_ib;
249
	}
250

251
	dev_dbg(dev, "Finished map process and queues to runlist\n");
252

253
	if (is_over_subscription) {
254
		if (!pm->is_over_subscription)
255
			dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n",
256
				is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ?
257
				" too many processes" : "",
258
				is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ?
259
				" too many queues" : "",
260
				is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ?
261
				" multiple processes using cooperative launch" : "",
262
				is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ?
263
				" xnack on/off processes mixed on gfx9" : "");
264

265
		retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
266
					*rl_gpu_addr,
267
					alloc_size_bytes / sizeof(uint32_t),
268
					true);
269
	}
270
	pm->is_over_subscription = !!is_over_subscription;
271

272
	for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
273
		pr_debug("0x%2X ", rl_buffer[i]);
274
	pr_debug("\n");
275

276
	return retval;
277
}
278

279
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
280
{
281
	switch (dqm->dev->adev->asic_type) {
282
	case CHIP_KAVERI:
283
	case CHIP_HAWAII:
284
		/* PM4 packet structures on CIK are the same as on VI */
285
	case CHIP_CARRIZO:
286
	case CHIP_TONGA:
287
	case CHIP_FIJI:
288
	case CHIP_POLARIS10:
289
	case CHIP_POLARIS11:
290
	case CHIP_POLARIS12:
291
	case CHIP_VEGAM:
292
		pm->pmf = &kfd_vi_pm_funcs;
293
		break;
294
	default:
295
		if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||
296
		    KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) ||
297
		    KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) ||
298
		    KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0))
299
			pm->pmf = &kfd_aldebaran_pm_funcs;
300
		else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
301
			pm->pmf = &kfd_v9_pm_funcs;
302
		else {
303
			WARN(1, "Unexpected ASIC family %u",
304
			     dqm->dev->adev->asic_type);
305
			return -EINVAL;
306
		}
307
	}
308

309
	pm->dqm = dqm;
310
	mutex_init(&pm->lock);
311
	pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
312
	if (!pm->priv_queue) {
313
		mutex_destroy(&pm->lock);
314
		return -ENOMEM;
315
	}
316
	pm->allocated = false;
317

318
	return 0;
319
}
320

321
void pm_uninit(struct packet_manager *pm)
322
{
323
	mutex_destroy(&pm->lock);
324
	kernel_queue_uninit(pm->priv_queue);
325
	pm->priv_queue = NULL;
326
}
327

328
int pm_send_set_resources(struct packet_manager *pm,
329
				struct scheduling_resources *res)
330
{
331
	struct kfd_node *node = pm->dqm->dev;
332
	struct device *dev = node->adev->dev;
333
	uint32_t *buffer, size;
334
	int retval = 0;
335

336
	size = pm->pmf->set_resources_size;
337
	mutex_lock(&pm->lock);
338
	kq_acquire_packet_buffer(pm->priv_queue,
339
					size / sizeof(uint32_t),
340
					(unsigned int **)&buffer);
341
	if (!buffer) {
342
		dev_err(dev, "Failed to allocate buffer on kernel queue\n");
343
		retval = -ENOMEM;
344
		goto out;
345
	}
346

347
	retval = pm->pmf->set_resources(pm, buffer, res);
348
	if (!retval)
349
		retval = kq_submit_packet(pm->priv_queue);
350
	else
351
		kq_rollback_packet(pm->priv_queue);
352

353
out:
354
	mutex_unlock(&pm->lock);
355

356
	return retval;
357
}
358

359
int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
360
{
361
	uint64_t rl_gpu_ib_addr;
362
	uint32_t *rl_buffer;
363
	size_t rl_ib_size, packet_size_dwords;
364
	int retval;
365

366
	retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
367
					&rl_ib_size);
368
	if (retval)
369
		goto fail_create_runlist_ib;
370

371
	pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
372

373
	packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
374
	mutex_lock(&pm->lock);
375

376
	retval = kq_acquire_packet_buffer(pm->priv_queue,
377
					packet_size_dwords, &rl_buffer);
378
	if (retval)
379
		goto fail_acquire_packet_buffer;
380

381
	retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
382
					rl_ib_size / sizeof(uint32_t), false);
383
	if (retval)
384
		goto fail_create_runlist;
385

386
	retval = kq_submit_packet(pm->priv_queue);
387

388
	mutex_unlock(&pm->lock);
389

390
	return retval;
391

392
fail_create_runlist:
393
	kq_rollback_packet(pm->priv_queue);
394
fail_acquire_packet_buffer:
395
	mutex_unlock(&pm->lock);
396
fail_create_runlist_ib:
397
	pm_release_ib(pm);
398
	return retval;
399
}
400

401
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
402
			uint64_t fence_value)
403
{
404
	struct kfd_node *node = pm->dqm->dev;
405
	struct device *dev = node->adev->dev;
406
	uint32_t *buffer, size;
407
	int retval = 0;
408

409
	if (WARN_ON(!fence_address))
410
		return -EFAULT;
411

412
	size = pm->pmf->query_status_size;
413
	mutex_lock(&pm->lock);
414
	kq_acquire_packet_buffer(pm->priv_queue,
415
			size / sizeof(uint32_t), (unsigned int **)&buffer);
416
	if (!buffer) {
417
		dev_err(dev, "Failed to allocate buffer on kernel queue\n");
418
		retval = -ENOMEM;
419
		goto out;
420
	}
421

422
	retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
423
	if (!retval)
424
		retval = kq_submit_packet(pm->priv_queue);
425
	else
426
		kq_rollback_packet(pm->priv_queue);
427

428
out:
429
	mutex_unlock(&pm->lock);
430
	return retval;
431
}
432

433
/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts
434
 *  by writing to CP_IQ_WAIT_TIME2 registers.
435
 *
436
 *  @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition
437
 *  @value: Depends on the cmd. This parameter is unused for
438
 *    KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For
439
 *    KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set
440
 *
441
 */
442
int pm_config_dequeue_wait_counts(struct packet_manager *pm,
443
		enum kfd_config_dequeue_wait_counts_cmd cmd,
444
		uint32_t value)
445
{
446
	struct kfd_node *node = pm->dqm->dev;
447
	struct device *dev = node->adev->dev;
448
	int retval = 0;
449
	uint32_t *buffer, size;
450

451
	if (!pm->pmf->config_dequeue_wait_counts ||
452
	    !pm->pmf->config_dequeue_wait_counts_size)
453
		return 0;
454

455
	if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
456
	   KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)))
457
		return 0;
458

459
	size = pm->pmf->config_dequeue_wait_counts_size;
460

461
	mutex_lock(&pm->lock);
462

463
	if (size) {
464
		kq_acquire_packet_buffer(pm->priv_queue,
465
			size / sizeof(uint32_t),
466
			(unsigned int **)&buffer);
467

468
		if (!buffer) {
469
			dev_err(dev,
470
				"Failed to allocate buffer on kernel queue\n");
471
			retval = -ENOMEM;
472
			goto out;
473
		}
474

475
		retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
476
							     cmd, value);
477
		if (!retval) {
478
			retval = kq_submit_packet(pm->priv_queue);
479

480
			/* If default value is modified, cache that in dqm->wait_times */
481
			if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
482
				update_dqm_wait_times(pm->dqm);
483
		} else {
484
			kq_rollback_packet(pm->priv_queue);
485
		}
486
	}
487
out:
488
	mutex_unlock(&pm->lock);
489
	return retval;
490
}
491

492
int pm_send_unmap_queue(struct packet_manager *pm,
493
			enum kfd_unmap_queues_filter filter,
494
			uint32_t filter_param, bool reset)
495
{
496
	struct kfd_node *node = pm->dqm->dev;
497
	struct device *dev = node->adev->dev;
498
	uint32_t *buffer, size;
499
	int retval = 0;
500

501
	size = pm->pmf->unmap_queues_size;
502
	mutex_lock(&pm->lock);
503
	kq_acquire_packet_buffer(pm->priv_queue,
504
			size / sizeof(uint32_t), (unsigned int **)&buffer);
505
	if (!buffer) {
506
		dev_err(dev, "Failed to allocate buffer on kernel queue\n");
507
		retval = -ENOMEM;
508
		goto out;
509
	}
510

511
	retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);
512
	if (!retval)
513
		retval = kq_submit_packet(pm->priv_queue);
514
	else
515
		kq_rollback_packet(pm->priv_queue);
516

517
out:
518
	mutex_unlock(&pm->lock);
519
	return retval;
520
}
521

522
void pm_release_ib(struct packet_manager *pm)
523
{
524
	mutex_lock(&pm->lock);
525
	if (pm->allocated) {
526
		kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
527
		pm->allocated = false;
528
	}
529
	mutex_unlock(&pm->lock);
530
}
531

532
#if defined(CONFIG_DEBUG_FS)
533

534
int pm_debugfs_runlist(struct seq_file *m, void *data)
535
{
536
	struct packet_manager *pm = data;
537

538
	mutex_lock(&pm->lock);
539

540
	if (!pm->allocated) {
541
		seq_puts(m, "  No active runlist\n");
542
		goto out;
543
	}
544

545
	seq_hex_dump(m, "  ", DUMP_PREFIX_OFFSET, 32, 4,
546
		     pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
547

548
out:
549
	mutex_unlock(&pm->lock);
550
	return 0;
551
}
552

553
int pm_debugfs_hang_hws(struct packet_manager *pm)
554
{
555
	struct kfd_node *node = pm->dqm->dev;
556
	struct device *dev = node->adev->dev;
557
	uint32_t *buffer, size;
558
	int r = 0;
559

560
	if (!pm->priv_queue)
561
		return -EAGAIN;
562

563
	size = pm->pmf->query_status_size;
564
	mutex_lock(&pm->lock);
565
	kq_acquire_packet_buffer(pm->priv_queue,
566
			size / sizeof(uint32_t), (unsigned int **)&buffer);
567
	if (!buffer) {
568
		dev_err(dev, "Failed to allocate buffer on kernel queue\n");
569
		r = -ENOMEM;
570
		goto out;
571
	}
572
	memset(buffer, 0x55, size);
573
	kq_submit_packet(pm->priv_queue);
574

575
	dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
576
		 buffer[0], buffer[1], buffer[2], buffer[3], buffer[4],
577
		 buffer[5], buffer[6]);
578
out:
579
	mutex_unlock(&pm->lock);
580
	return r;
581
}
582

583

584
#endif
585

586
Product

Resources

Company