Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
26516 views
1
// SPDX-License-Identifier: GPL-2.0 OR MIT
2
/*
3
* Copyright 2014-2022 Advanced Micro Devices, Inc.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice shall be included in
13
* all copies or substantial portions of the Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
* OTHER DEALINGS IN THE SOFTWARE.
22
*
23
*/
24
25
#include <linux/slab.h>
26
#include <linux/mutex.h>
27
#include "kfd_device_queue_manager.h"
28
#include "kfd_kernel_queue.h"
29
#include "kfd_priv.h"
30
31
#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0)
32
#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1)
33
#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2)
34
#define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3)
35
36
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
37
unsigned int buffer_size_bytes)
38
{
39
unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);
40
41
WARN((temp * sizeof(uint32_t)) > buffer_size_bytes,
42
"Runlist IB overflow");
43
*wptr = temp;
44
}
45
46
static void pm_calc_rlib_size(struct packet_manager *pm,
47
unsigned int *rlib_size,
48
int *over_subscription,
49
int xnack_conflict)
50
{
51
unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
52
unsigned int map_queue_size;
53
unsigned int max_proc_per_quantum = 1;
54
struct kfd_node *node = pm->dqm->dev;
55
struct device *dev = node->adev->dev;
56
57
process_count = pm->dqm->processes_count;
58
queue_count = pm->dqm->active_queue_count;
59
compute_queue_count = pm->dqm->active_cp_queue_count;
60
gws_queue_count = pm->dqm->gws_queue_count;
61
62
/* check if there is over subscription
63
* Note: the arbitration between the number of VMIDs and
64
* hws_max_conc_proc has been done in
65
* kgd2kfd_device_init().
66
*/
67
*over_subscription = 0;
68
69
if (node->max_proc_per_quantum > 1)
70
max_proc_per_quantum = node->max_proc_per_quantum;
71
72
if (process_count > max_proc_per_quantum)
73
*over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT;
74
if (compute_queue_count > get_cp_queues_num(pm->dqm))
75
*over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT;
76
if (gws_queue_count > 1)
77
*over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT;
78
if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
79
*over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT;
80
81
if (*over_subscription)
82
dev_dbg(dev, "Over subscribed runlist\n");
83
84
map_queue_size = pm->pmf->map_queues_size;
85
/* calculate run list ib allocation size */
86
*rlib_size = process_count * pm->pmf->map_process_size +
87
queue_count * map_queue_size;
88
89
/*
90
* Increase the allocation size in case we need a chained run list
91
* when over subscription
92
*/
93
if (*over_subscription)
94
*rlib_size += pm->pmf->runlist_size;
95
96
dev_dbg(dev, "runlist ib size %d\n", *rlib_size);
97
}
98
99
static int pm_allocate_runlist_ib(struct packet_manager *pm,
100
unsigned int **rl_buffer,
101
uint64_t *rl_gpu_buffer,
102
unsigned int *rl_buffer_size,
103
int *is_over_subscription,
104
int xnack_conflict)
105
{
106
struct kfd_node *node = pm->dqm->dev;
107
struct device *dev = node->adev->dev;
108
int retval;
109
110
if (WARN_ON(pm->allocated))
111
return -EINVAL;
112
113
pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription,
114
xnack_conflict);
115
116
mutex_lock(&pm->lock);
117
118
retval = kfd_gtt_sa_allocate(node, *rl_buffer_size, &pm->ib_buffer_obj);
119
120
if (retval) {
121
dev_err(dev, "Failed to allocate runlist IB\n");
122
goto out;
123
}
124
125
*(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
126
*rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr;
127
128
memset(*rl_buffer, 0, *rl_buffer_size);
129
pm->allocated = true;
130
131
out:
132
mutex_unlock(&pm->lock);
133
return retval;
134
}
135
136
static int pm_create_runlist_ib(struct packet_manager *pm,
137
struct list_head *queues,
138
uint64_t *rl_gpu_addr,
139
size_t *rl_size_bytes)
140
{
141
unsigned int alloc_size_bytes;
142
unsigned int *rl_buffer, rl_wptr, i;
143
struct kfd_node *node = pm->dqm->dev;
144
struct device *dev = node->adev->dev;
145
int retval, processes_mapped;
146
struct device_process_node *cur;
147
struct qcm_process_device *qpd;
148
struct queue *q;
149
struct kernel_queue *kq;
150
int is_over_subscription;
151
int xnack_enabled = -1;
152
bool xnack_conflict = 0;
153
154
rl_wptr = retval = processes_mapped = 0;
155
156
/* Check if processes set different xnack modes */
157
list_for_each_entry(cur, queues, list) {
158
qpd = cur->qpd;
159
if (xnack_enabled < 0)
160
/* First process */
161
xnack_enabled = qpd->pqm->process->xnack_enabled;
162
else if (qpd->pqm->process->xnack_enabled != xnack_enabled) {
163
/* Found a process with a different xnack mode */
164
xnack_conflict = 1;
165
break;
166
}
167
}
168
169
retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
170
&alloc_size_bytes, &is_over_subscription,
171
xnack_conflict);
172
if (retval)
173
return retval;
174
175
*rl_size_bytes = alloc_size_bytes;
176
pm->ib_size_bytes = alloc_size_bytes;
177
178
dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",
179
pm->dqm->processes_count, pm->dqm->active_queue_count);
180
181
build_runlist_ib:
182
/* build the run list ib packet */
183
list_for_each_entry(cur, queues, list) {
184
qpd = cur->qpd;
185
/* group processes with the same xnack mode together */
186
if (qpd->pqm->process->xnack_enabled != xnack_enabled)
187
continue;
188
/* build map process packet */
189
if (processes_mapped >= pm->dqm->processes_count) {
190
dev_dbg(dev, "Not enough space left in runlist IB\n");
191
pm_release_ib(pm);
192
return -ENOMEM;
193
}
194
195
retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd);
196
if (retval)
197
return retval;
198
199
processes_mapped++;
200
inc_wptr(&rl_wptr, pm->pmf->map_process_size,
201
alloc_size_bytes);
202
203
list_for_each_entry(kq, &qpd->priv_queue_list, list) {
204
if (!kq->queue->properties.is_active)
205
continue;
206
207
dev_dbg(dev,
208
"static_queue, mapping kernel q %d, is debug status %d\n",
209
kq->queue->queue, qpd->is_debug);
210
211
retval = pm->pmf->map_queues(pm,
212
&rl_buffer[rl_wptr],
213
kq->queue,
214
qpd->is_debug);
215
if (retval)
216
return retval;
217
218
inc_wptr(&rl_wptr,
219
pm->pmf->map_queues_size,
220
alloc_size_bytes);
221
}
222
223
list_for_each_entry(q, &qpd->queues_list, list) {
224
if (!q->properties.is_active)
225
continue;
226
227
dev_dbg(dev,
228
"static_queue, mapping user queue %d, is debug status %d\n",
229
q->queue, qpd->is_debug);
230
231
retval = pm->pmf->map_queues(pm,
232
&rl_buffer[rl_wptr],
233
q,
234
qpd->is_debug);
235
236
if (retval)
237
return retval;
238
239
inc_wptr(&rl_wptr,
240
pm->pmf->map_queues_size,
241
alloc_size_bytes);
242
}
243
}
244
if (xnack_conflict) {
245
/* pick up processes with the other xnack mode */
246
xnack_enabled = !xnack_enabled;
247
xnack_conflict = 0;
248
goto build_runlist_ib;
249
}
250
251
dev_dbg(dev, "Finished map process and queues to runlist\n");
252
253
if (is_over_subscription) {
254
if (!pm->is_over_subscription)
255
dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n",
256
is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ?
257
" too many processes" : "",
258
is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ?
259
" too many queues" : "",
260
is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ?
261
" multiple processes using cooperative launch" : "",
262
is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ?
263
" xnack on/off processes mixed on gfx9" : "");
264
265
retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
266
*rl_gpu_addr,
267
alloc_size_bytes / sizeof(uint32_t),
268
true);
269
}
270
pm->is_over_subscription = !!is_over_subscription;
271
272
for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
273
pr_debug("0x%2X ", rl_buffer[i]);
274
pr_debug("\n");
275
276
return retval;
277
}
278
279
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
280
{
281
switch (dqm->dev->adev->asic_type) {
282
case CHIP_KAVERI:
283
case CHIP_HAWAII:
284
/* PM4 packet structures on CIK are the same as on VI */
285
case CHIP_CARRIZO:
286
case CHIP_TONGA:
287
case CHIP_FIJI:
288
case CHIP_POLARIS10:
289
case CHIP_POLARIS11:
290
case CHIP_POLARIS12:
291
case CHIP_VEGAM:
292
pm->pmf = &kfd_vi_pm_funcs;
293
break;
294
default:
295
if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||
296
KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) ||
297
KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) ||
298
KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0))
299
pm->pmf = &kfd_aldebaran_pm_funcs;
300
else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
301
pm->pmf = &kfd_v9_pm_funcs;
302
else {
303
WARN(1, "Unexpected ASIC family %u",
304
dqm->dev->adev->asic_type);
305
return -EINVAL;
306
}
307
}
308
309
pm->dqm = dqm;
310
mutex_init(&pm->lock);
311
pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
312
if (!pm->priv_queue) {
313
mutex_destroy(&pm->lock);
314
return -ENOMEM;
315
}
316
pm->allocated = false;
317
318
return 0;
319
}
320
321
void pm_uninit(struct packet_manager *pm)
322
{
323
mutex_destroy(&pm->lock);
324
kernel_queue_uninit(pm->priv_queue);
325
pm->priv_queue = NULL;
326
}
327
328
int pm_send_set_resources(struct packet_manager *pm,
329
struct scheduling_resources *res)
330
{
331
struct kfd_node *node = pm->dqm->dev;
332
struct device *dev = node->adev->dev;
333
uint32_t *buffer, size;
334
int retval = 0;
335
336
size = pm->pmf->set_resources_size;
337
mutex_lock(&pm->lock);
338
kq_acquire_packet_buffer(pm->priv_queue,
339
size / sizeof(uint32_t),
340
(unsigned int **)&buffer);
341
if (!buffer) {
342
dev_err(dev, "Failed to allocate buffer on kernel queue\n");
343
retval = -ENOMEM;
344
goto out;
345
}
346
347
retval = pm->pmf->set_resources(pm, buffer, res);
348
if (!retval)
349
retval = kq_submit_packet(pm->priv_queue);
350
else
351
kq_rollback_packet(pm->priv_queue);
352
353
out:
354
mutex_unlock(&pm->lock);
355
356
return retval;
357
}
358
359
int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
360
{
361
uint64_t rl_gpu_ib_addr;
362
uint32_t *rl_buffer;
363
size_t rl_ib_size, packet_size_dwords;
364
int retval;
365
366
retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
367
&rl_ib_size);
368
if (retval)
369
goto fail_create_runlist_ib;
370
371
pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
372
373
packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
374
mutex_lock(&pm->lock);
375
376
retval = kq_acquire_packet_buffer(pm->priv_queue,
377
packet_size_dwords, &rl_buffer);
378
if (retval)
379
goto fail_acquire_packet_buffer;
380
381
retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
382
rl_ib_size / sizeof(uint32_t), false);
383
if (retval)
384
goto fail_create_runlist;
385
386
retval = kq_submit_packet(pm->priv_queue);
387
388
mutex_unlock(&pm->lock);
389
390
return retval;
391
392
fail_create_runlist:
393
kq_rollback_packet(pm->priv_queue);
394
fail_acquire_packet_buffer:
395
mutex_unlock(&pm->lock);
396
fail_create_runlist_ib:
397
pm_release_ib(pm);
398
return retval;
399
}
400
401
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
402
uint64_t fence_value)
403
{
404
struct kfd_node *node = pm->dqm->dev;
405
struct device *dev = node->adev->dev;
406
uint32_t *buffer, size;
407
int retval = 0;
408
409
if (WARN_ON(!fence_address))
410
return -EFAULT;
411
412
size = pm->pmf->query_status_size;
413
mutex_lock(&pm->lock);
414
kq_acquire_packet_buffer(pm->priv_queue,
415
size / sizeof(uint32_t), (unsigned int **)&buffer);
416
if (!buffer) {
417
dev_err(dev, "Failed to allocate buffer on kernel queue\n");
418
retval = -ENOMEM;
419
goto out;
420
}
421
422
retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
423
if (!retval)
424
retval = kq_submit_packet(pm->priv_queue);
425
else
426
kq_rollback_packet(pm->priv_queue);
427
428
out:
429
mutex_unlock(&pm->lock);
430
return retval;
431
}
432
433
/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts
434
* by writing to CP_IQ_WAIT_TIME2 registers.
435
*
436
* @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition
437
* @value: Depends on the cmd. This parameter is unused for
438
* KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For
439
* KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set
440
*
441
*/
442
int pm_config_dequeue_wait_counts(struct packet_manager *pm,
443
enum kfd_config_dequeue_wait_counts_cmd cmd,
444
uint32_t value)
445
{
446
struct kfd_node *node = pm->dqm->dev;
447
struct device *dev = node->adev->dev;
448
int retval = 0;
449
uint32_t *buffer, size;
450
451
if (!pm->pmf->config_dequeue_wait_counts ||
452
!pm->pmf->config_dequeue_wait_counts_size)
453
return 0;
454
455
if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
456
KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)))
457
return 0;
458
459
size = pm->pmf->config_dequeue_wait_counts_size;
460
461
mutex_lock(&pm->lock);
462
463
if (size) {
464
kq_acquire_packet_buffer(pm->priv_queue,
465
size / sizeof(uint32_t),
466
(unsigned int **)&buffer);
467
468
if (!buffer) {
469
dev_err(dev,
470
"Failed to allocate buffer on kernel queue\n");
471
retval = -ENOMEM;
472
goto out;
473
}
474
475
retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
476
cmd, value);
477
if (!retval) {
478
retval = kq_submit_packet(pm->priv_queue);
479
480
/* If default value is modified, cache that in dqm->wait_times */
481
if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
482
update_dqm_wait_times(pm->dqm);
483
} else {
484
kq_rollback_packet(pm->priv_queue);
485
}
486
}
487
out:
488
mutex_unlock(&pm->lock);
489
return retval;
490
}
491
492
int pm_send_unmap_queue(struct packet_manager *pm,
493
enum kfd_unmap_queues_filter filter,
494
uint32_t filter_param, bool reset)
495
{
496
struct kfd_node *node = pm->dqm->dev;
497
struct device *dev = node->adev->dev;
498
uint32_t *buffer, size;
499
int retval = 0;
500
501
size = pm->pmf->unmap_queues_size;
502
mutex_lock(&pm->lock);
503
kq_acquire_packet_buffer(pm->priv_queue,
504
size / sizeof(uint32_t), (unsigned int **)&buffer);
505
if (!buffer) {
506
dev_err(dev, "Failed to allocate buffer on kernel queue\n");
507
retval = -ENOMEM;
508
goto out;
509
}
510
511
retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);
512
if (!retval)
513
retval = kq_submit_packet(pm->priv_queue);
514
else
515
kq_rollback_packet(pm->priv_queue);
516
517
out:
518
mutex_unlock(&pm->lock);
519
return retval;
520
}
521
522
void pm_release_ib(struct packet_manager *pm)
523
{
524
mutex_lock(&pm->lock);
525
if (pm->allocated) {
526
kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
527
pm->allocated = false;
528
}
529
mutex_unlock(&pm->lock);
530
}
531
532
#if defined(CONFIG_DEBUG_FS)
533
534
int pm_debugfs_runlist(struct seq_file *m, void *data)
535
{
536
struct packet_manager *pm = data;
537
538
mutex_lock(&pm->lock);
539
540
if (!pm->allocated) {
541
seq_puts(m, " No active runlist\n");
542
goto out;
543
}
544
545
seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
546
pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
547
548
out:
549
mutex_unlock(&pm->lock);
550
return 0;
551
}
552
553
int pm_debugfs_hang_hws(struct packet_manager *pm)
554
{
555
struct kfd_node *node = pm->dqm->dev;
556
struct device *dev = node->adev->dev;
557
uint32_t *buffer, size;
558
int r = 0;
559
560
if (!pm->priv_queue)
561
return -EAGAIN;
562
563
size = pm->pmf->query_status_size;
564
mutex_lock(&pm->lock);
565
kq_acquire_packet_buffer(pm->priv_queue,
566
size / sizeof(uint32_t), (unsigned int **)&buffer);
567
if (!buffer) {
568
dev_err(dev, "Failed to allocate buffer on kernel queue\n");
569
r = -ENOMEM;
570
goto out;
571
}
572
memset(buffer, 0x55, size);
573
kq_submit_packet(pm->priv_queue);
574
575
dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
576
buffer[0], buffer[1], buffer[2], buffer[3], buffer[4],
577
buffer[5], buffer[6]);
578
out:
579
mutex_unlock(&pm->lock);
580
return r;
581
}
582
583
584
#endif
585
586