Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/accel/amdxdna/aie2_ctx.c
51128 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright (C) 2024, Advanced Micro Devices, Inc.
4
*/
5
6
#include <drm/amdxdna_accel.h>
7
#include <drm/drm_device.h>
8
#include <drm/drm_gem.h>
9
#include <drm/drm_gem_shmem_helper.h>
10
#include <drm/drm_print.h>
11
#include <drm/drm_syncobj.h>
12
#include <linux/hmm.h>
13
#include <linux/types.h>
14
#include <linux/xarray.h>
15
#include <trace/events/amdxdna.h>
16
17
#include "aie2_msg_priv.h"
18
#include "aie2_pci.h"
19
#include "aie2_solver.h"
20
#include "amdxdna_ctx.h"
21
#include "amdxdna_gem.h"
22
#include "amdxdna_mailbox.h"
23
#include "amdxdna_pci_drv.h"
24
#include "amdxdna_pm.h"
25
26
static bool force_cmdlist;
27
module_param(force_cmdlist, bool, 0600);
28
MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
29
30
#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
31
32
static void aie2_job_release(struct kref *ref)
33
{
34
struct amdxdna_sched_job *job;
35
36
job = container_of(ref, struct amdxdna_sched_job, refcnt);
37
amdxdna_sched_job_cleanup(job);
38
atomic64_inc(&job->hwctx->job_free_cnt);
39
wake_up(&job->hwctx->priv->job_free_wq);
40
if (job->out_fence)
41
dma_fence_put(job->out_fence);
42
kfree(job);
43
}
44
45
static void aie2_job_put(struct amdxdna_sched_job *job)
46
{
47
kref_put(&job->refcnt, aie2_job_release);
48
}
49
50
/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
51
static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
52
struct drm_sched_job *bad_job)
53
{
54
drm_sched_stop(&hwctx->priv->sched, bad_job);
55
aie2_destroy_context(xdna->dev_handle, hwctx);
56
}
57
58
static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
59
{
60
struct amdxdna_gem_obj *heap = hwctx->priv->heap;
61
int ret;
62
63
ret = aie2_create_context(xdna->dev_handle, hwctx);
64
if (ret) {
65
XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
66
goto out;
67
}
68
69
ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
70
heap->mem.userptr, heap->mem.size);
71
if (ret) {
72
XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
73
goto out;
74
}
75
76
ret = aie2_config_cu(hwctx, NULL);
77
if (ret) {
78
XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
79
goto out;
80
}
81
82
out:
83
drm_sched_start(&hwctx->priv->sched, 0);
84
XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
85
return ret;
86
}
87
88
static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
89
{
90
struct dma_fence *fence, *out_fence = NULL;
91
int ret;
92
93
fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
94
if (!fence)
95
return NULL;
96
97
ret = dma_fence_chain_find_seqno(&fence, seq);
98
if (ret)
99
goto out;
100
101
out_fence = dma_fence_get(dma_fence_chain_contained(fence));
102
103
out:
104
dma_fence_put(fence);
105
return out_fence;
106
}
107
108
static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
109
{
110
struct dma_fence *fence;
111
112
fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
113
if (!fence)
114
return;
115
116
/* Wait up to 2 seconds for fw to finish all pending requests */
117
dma_fence_wait_timeout(fence, false, msecs_to_jiffies(2000));
118
dma_fence_put(fence);
119
}
120
121
static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg)
122
{
123
struct amdxdna_dev *xdna = hwctx->client->xdna;
124
125
aie2_hwctx_wait_for_idle(hwctx);
126
aie2_hwctx_stop(xdna, hwctx, NULL);
127
128
return 0;
129
}
130
131
void aie2_hwctx_suspend(struct amdxdna_client *client)
132
{
133
struct amdxdna_dev *xdna = client->xdna;
134
135
/*
136
* Command timeout is unlikely. But if it happens, it doesn't
137
* break the system. aie2_hwctx_stop() will destroy mailbox
138
* and abort all commands.
139
*/
140
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
141
amdxdna_hwctx_walk(client, NULL, aie2_hwctx_suspend_cb);
142
}
143
144
static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
145
{
146
struct amdxdna_dev *xdna = hwctx->client->xdna;
147
148
return aie2_hwctx_restart(xdna, hwctx);
149
}
150
151
int aie2_hwctx_resume(struct amdxdna_client *client)
152
{
153
/*
154
* The resume path cannot guarantee that mailbox channel can be
155
* regenerated. If this happen, when submit message to this
156
* mailbox channel, error will return.
157
*/
158
return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
159
}
160
161
static void
162
aie2_sched_notify(struct amdxdna_sched_job *job)
163
{
164
struct dma_fence *fence = job->fence;
165
166
trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
167
168
amdxdna_pm_suspend_put(job->hwctx->client->xdna);
169
job->hwctx->priv->completed++;
170
dma_fence_signal(fence);
171
172
up(&job->hwctx->priv->job_sem);
173
job->job_done = true;
174
mmput_async(job->mm);
175
aie2_job_put(job);
176
}
177
178
static int
179
aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
180
{
181
struct amdxdna_sched_job *job = handle;
182
struct amdxdna_gem_obj *cmd_abo;
183
int ret = 0;
184
u32 status;
185
186
cmd_abo = job->cmd_bo;
187
188
if (unlikely(job->job_timeout)) {
189
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
190
ret = -EINVAL;
191
goto out;
192
}
193
194
if (unlikely(!data) || unlikely(size != sizeof(u32))) {
195
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
196
ret = -EINVAL;
197
goto out;
198
}
199
200
status = readl(data);
201
XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
202
if (status == AIE2_STATUS_SUCCESS)
203
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
204
else
205
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
206
207
out:
208
aie2_sched_notify(job);
209
return ret;
210
}
211
212
static int
213
aie2_sched_drvcmd_resp_handler(void *handle, void __iomem *data, size_t size)
214
{
215
struct amdxdna_sched_job *job = handle;
216
int ret = 0;
217
218
if (unlikely(!data))
219
goto out;
220
221
if (unlikely(size != sizeof(u32))) {
222
ret = -EINVAL;
223
goto out;
224
}
225
226
job->drv_cmd->result = readl(data);
227
228
out:
229
aie2_sched_notify(job);
230
return ret;
231
}
232
233
static int
234
aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
235
{
236
struct amdxdna_sched_job *job = handle;
237
struct amdxdna_gem_obj *cmd_abo;
238
struct amdxdna_dev *xdna;
239
u32 fail_cmd_status;
240
u32 fail_cmd_idx;
241
u32 cmd_status;
242
int ret = 0;
243
244
cmd_abo = job->cmd_bo;
245
246
if (unlikely(job->job_timeout)) {
247
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
248
ret = -EINVAL;
249
goto out;
250
}
251
252
if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
253
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
254
ret = -EINVAL;
255
goto out;
256
}
257
258
cmd_status = readl(data + offsetof(struct cmd_chain_resp, status));
259
xdna = job->hwctx->client->xdna;
260
XDNA_DBG(xdna, "Status 0x%x", cmd_status);
261
if (cmd_status == AIE2_STATUS_SUCCESS) {
262
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
263
goto out;
264
}
265
266
/* Slow path to handle error, read from ringbuf on BAR */
267
fail_cmd_idx = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_idx));
268
fail_cmd_status = readl(data + offsetof(struct cmd_chain_resp, fail_cmd_status));
269
XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
270
fail_cmd_idx, fail_cmd_status);
271
272
if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
273
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
274
ret = -EINVAL;
275
goto out;
276
}
277
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
278
279
if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
280
struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
281
282
cc->error_index = fail_cmd_idx;
283
if (cc->error_index >= cc->command_count)
284
cc->error_index = 0;
285
}
286
out:
287
aie2_sched_notify(job);
288
return ret;
289
}
290
291
static struct dma_fence *
292
aie2_sched_job_run(struct drm_sched_job *sched_job)
293
{
294
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
295
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
296
struct amdxdna_hwctx *hwctx = job->hwctx;
297
struct dma_fence *fence;
298
int ret;
299
300
if (!hwctx->priv->mbox_chann)
301
return NULL;
302
303
if (!mmget_not_zero(job->mm))
304
return ERR_PTR(-ESRCH);
305
306
kref_get(&job->refcnt);
307
fence = dma_fence_get(job->fence);
308
309
ret = amdxdna_pm_resume_get(hwctx->client->xdna);
310
if (ret)
311
goto out;
312
313
if (job->drv_cmd) {
314
switch (job->drv_cmd->opcode) {
315
case SYNC_DEBUG_BO:
316
ret = aie2_sync_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
317
break;
318
case ATTACH_DEBUG_BO:
319
ret = aie2_config_debug_bo(hwctx, job, aie2_sched_drvcmd_resp_handler);
320
break;
321
default:
322
ret = -EINVAL;
323
break;
324
}
325
goto out;
326
}
327
328
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
329
330
if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
331
ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
332
else if (force_cmdlist)
333
ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
334
else
335
ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
336
337
out:
338
if (ret) {
339
amdxdna_pm_suspend_put(hwctx->client->xdna);
340
dma_fence_put(job->fence);
341
aie2_job_put(job);
342
mmput(job->mm);
343
fence = ERR_PTR(ret);
344
}
345
trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
346
347
return fence;
348
}
349
350
static void aie2_sched_job_free(struct drm_sched_job *sched_job)
351
{
352
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
353
struct amdxdna_hwctx *hwctx = job->hwctx;
354
355
trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
356
if (!job->job_done)
357
up(&hwctx->priv->job_sem);
358
359
drm_sched_job_cleanup(sched_job);
360
aie2_job_put(job);
361
}
362
363
static enum drm_gpu_sched_stat
364
aie2_sched_job_timedout(struct drm_sched_job *sched_job)
365
{
366
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
367
struct amdxdna_hwctx *hwctx = job->hwctx;
368
struct amdxdna_dev *xdna;
369
370
xdna = hwctx->client->xdna;
371
trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
372
job->job_timeout = true;
373
mutex_lock(&xdna->dev_lock);
374
aie2_hwctx_stop(xdna, hwctx, sched_job);
375
376
aie2_hwctx_restart(xdna, hwctx);
377
mutex_unlock(&xdna->dev_lock);
378
379
return DRM_GPU_SCHED_STAT_RESET;
380
}
381
382
static const struct drm_sched_backend_ops sched_ops = {
383
.run_job = aie2_sched_job_run,
384
.free_job = aie2_sched_job_free,
385
.timedout_job = aie2_sched_job_timedout,
386
};
387
388
static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
389
{
390
struct amdxdna_dev *xdna = hwctx->client->xdna;
391
struct amdxdna_dev_hdl *ndev;
392
int start, end, first, last;
393
u32 width = 1, entries = 0;
394
int i;
395
396
if (!hwctx->num_tiles) {
397
XDNA_ERR(xdna, "Number of tiles is zero");
398
return -EINVAL;
399
}
400
401
ndev = xdna->dev_handle;
402
if (unlikely(!ndev->metadata.core.row_count)) {
403
XDNA_WARN(xdna, "Core tile row count is zero");
404
return -EINVAL;
405
}
406
407
hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
408
if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
409
XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
410
return -EINVAL;
411
}
412
413
if (ndev->priv->col_align == COL_ALIGN_NATURE)
414
width = hwctx->num_col;
415
416
/*
417
* In range [start, end], find out columns that is multiple of width.
418
* 'first' is the first column,
419
* 'last' is the last column,
420
* 'entries' is the total number of columns.
421
*/
422
start = xdna->dev_info->first_col;
423
end = ndev->total_col - hwctx->num_col;
424
if (start > 0 && end == 0) {
425
XDNA_DBG(xdna, "Force start from col 0");
426
start = 0;
427
}
428
first = start + (width - start % width) % width;
429
last = end - end % width;
430
if (last >= first)
431
entries = (last - first) / width + 1;
432
XDNA_DBG(xdna, "start %d end %d first %d last %d",
433
start, end, first, last);
434
435
if (unlikely(!entries)) {
436
XDNA_ERR(xdna, "Start %d end %d width %d",
437
start, end, width);
438
return -EINVAL;
439
}
440
441
hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
442
if (!hwctx->col_list)
443
return -ENOMEM;
444
445
hwctx->col_list_len = entries;
446
hwctx->col_list[0] = first;
447
for (i = 1; i < entries; i++)
448
hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
449
450
print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
451
entries * sizeof(*hwctx->col_list), false);
452
return 0;
453
}
454
455
static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
456
{
457
struct amdxdna_dev *xdna = hwctx->client->xdna;
458
struct alloc_requests *xrs_req;
459
int ret;
460
461
if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
462
hwctx->num_unused_col = xdna->dev_handle->total_col - hwctx->num_col;
463
hwctx->num_col = xdna->dev_handle->total_col;
464
return aie2_create_context(xdna->dev_handle, hwctx);
465
}
466
467
xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
468
if (!xrs_req)
469
return -ENOMEM;
470
471
xrs_req->cdo.start_cols = hwctx->col_list;
472
xrs_req->cdo.cols_len = hwctx->col_list_len;
473
xrs_req->cdo.ncols = hwctx->num_col;
474
xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
475
476
xrs_req->rqos.gops = hwctx->qos.gops;
477
xrs_req->rqos.fps = hwctx->qos.fps;
478
xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
479
xrs_req->rqos.latency = hwctx->qos.latency;
480
xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
481
xrs_req->rqos.priority = hwctx->qos.priority;
482
483
xrs_req->rid = (uintptr_t)hwctx;
484
485
ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx);
486
if (ret)
487
XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
488
489
kfree(xrs_req);
490
return ret;
491
}
492
493
static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
494
{
495
struct amdxdna_dev *xdna = hwctx->client->xdna;
496
int ret;
497
498
if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
499
ret = aie2_destroy_context(xdna->dev_handle, hwctx);
500
if (ret)
501
XDNA_ERR(xdna, "Destroy temporal only context failed, ret %d", ret);
502
} else {
503
ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
504
if (ret)
505
XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
506
}
507
}
508
509
static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
510
{
511
struct amdxdna_dev *xdna = hwctx->client->xdna;
512
struct drm_file *filp = hwctx->client->filp;
513
struct drm_syncobj *syncobj;
514
u32 hdl;
515
int ret;
516
517
hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
518
519
ret = drm_syncobj_create(&syncobj, 0, NULL);
520
if (ret) {
521
XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
522
return ret;
523
}
524
ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
525
if (ret) {
526
drm_syncobj_put(syncobj);
527
XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
528
return ret;
529
}
530
hwctx->priv->syncobj = syncobj;
531
hwctx->syncobj_hdl = hdl;
532
533
return 0;
534
}
535
536
static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
537
{
538
/*
539
* The syncobj_hdl is owned by user space and will be cleaned up
540
* separately.
541
*/
542
drm_syncobj_put(hwctx->priv->syncobj);
543
}
544
545
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
546
{
547
struct amdxdna_client *client = hwctx->client;
548
struct amdxdna_dev *xdna = client->xdna;
549
const struct drm_sched_init_args args = {
550
.ops = &sched_ops,
551
.num_rqs = DRM_SCHED_PRIORITY_COUNT,
552
.credit_limit = HWCTX_MAX_CMDS,
553
.timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
554
.name = "amdxdna_js",
555
.dev = xdna->ddev.dev,
556
};
557
struct drm_gpu_scheduler *sched;
558
struct amdxdna_hwctx_priv *priv;
559
struct amdxdna_gem_obj *heap;
560
int i, ret;
561
562
priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
563
if (!priv)
564
return -ENOMEM;
565
hwctx->priv = priv;
566
567
mutex_lock(&client->mm_lock);
568
heap = client->dev_heap;
569
if (!heap) {
570
XDNA_ERR(xdna, "The client dev heap object not exist");
571
mutex_unlock(&client->mm_lock);
572
ret = -ENOENT;
573
goto free_priv;
574
}
575
drm_gem_object_get(to_gobj(heap));
576
mutex_unlock(&client->mm_lock);
577
priv->heap = heap;
578
sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
579
580
ret = amdxdna_gem_pin(heap);
581
if (ret) {
582
XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret);
583
goto put_heap;
584
}
585
586
for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
587
struct amdxdna_gem_obj *abo;
588
struct amdxdna_drm_create_bo args = {
589
.flags = 0,
590
.type = AMDXDNA_BO_DEV,
591
.vaddr = 0,
592
.size = MAX_CHAIN_CMDBUF_SIZE,
593
};
594
595
abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp);
596
if (IS_ERR(abo)) {
597
ret = PTR_ERR(abo);
598
goto free_cmd_bufs;
599
}
600
601
XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
602
i, abo->mem.dev_addr, abo->mem.size);
603
priv->cmd_buf[i] = abo;
604
}
605
606
sched = &priv->sched;
607
mutex_init(&priv->io_lock);
608
609
fs_reclaim_acquire(GFP_KERNEL);
610
might_lock(&priv->io_lock);
611
fs_reclaim_release(GFP_KERNEL);
612
613
ret = drm_sched_init(sched, &args);
614
if (ret) {
615
XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
616
goto free_cmd_bufs;
617
}
618
619
ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
620
&sched, 1, NULL);
621
if (ret) {
622
XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
623
goto free_sched;
624
}
625
626
ret = aie2_hwctx_col_list(hwctx);
627
if (ret) {
628
XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
629
goto free_entity;
630
}
631
632
ret = amdxdna_pm_resume_get(xdna);
633
if (ret)
634
goto free_col_list;
635
636
ret = aie2_alloc_resource(hwctx);
637
if (ret) {
638
XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
639
goto suspend_put;
640
}
641
642
ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
643
heap->mem.userptr, heap->mem.size);
644
if (ret) {
645
XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
646
goto release_resource;
647
}
648
649
ret = aie2_ctx_syncobj_create(hwctx);
650
if (ret) {
651
XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
652
goto release_resource;
653
}
654
amdxdna_pm_suspend_put(xdna);
655
656
init_waitqueue_head(&priv->job_free_wq);
657
658
XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
659
660
return 0;
661
662
release_resource:
663
aie2_release_resource(hwctx);
664
suspend_put:
665
amdxdna_pm_suspend_put(xdna);
666
free_col_list:
667
kfree(hwctx->col_list);
668
free_entity:
669
drm_sched_entity_destroy(&priv->entity);
670
free_sched:
671
drm_sched_fini(&priv->sched);
672
free_cmd_bufs:
673
for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
674
if (!priv->cmd_buf[i])
675
continue;
676
drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
677
}
678
amdxdna_gem_unpin(heap);
679
put_heap:
680
drm_gem_object_put(to_gobj(heap));
681
free_priv:
682
kfree(priv);
683
return ret;
684
}
685
686
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
687
{
688
struct amdxdna_dev *xdna;
689
int idx;
690
691
xdna = hwctx->client->xdna;
692
693
XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
694
aie2_hwctx_wait_for_idle(hwctx);
695
696
/* Request fw to destroy hwctx and cancel the rest pending requests */
697
drm_sched_stop(&hwctx->priv->sched, NULL);
698
aie2_release_resource(hwctx);
699
drm_sched_start(&hwctx->priv->sched, 0);
700
701
mutex_unlock(&xdna->dev_lock);
702
drm_sched_entity_destroy(&hwctx->priv->entity);
703
704
/* Wait for all submitted jobs to be completed or canceled */
705
wait_event(hwctx->priv->job_free_wq,
706
atomic64_read(&hwctx->job_submit_cnt) ==
707
atomic64_read(&hwctx->job_free_cnt));
708
mutex_lock(&xdna->dev_lock);
709
710
drm_sched_fini(&hwctx->priv->sched);
711
aie2_ctx_syncobj_destroy(hwctx);
712
713
for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
714
drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
715
amdxdna_gem_unpin(hwctx->priv->heap);
716
drm_gem_object_put(to_gobj(hwctx->priv->heap));
717
718
mutex_destroy(&hwctx->priv->io_lock);
719
kfree(hwctx->col_list);
720
kfree(hwctx->priv);
721
kfree(hwctx->cus);
722
}
723
724
static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size)
725
{
726
struct amdxdna_hwctx *hwctx = handle;
727
728
amdxdna_pm_suspend_put(hwctx->client->xdna);
729
return 0;
730
}
731
732
static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
733
{
734
struct amdxdna_hwctx_param_config_cu *config = buf;
735
struct amdxdna_dev *xdna = hwctx->client->xdna;
736
u32 total_size;
737
int ret;
738
739
XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
740
if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
741
return -EINVAL;
742
743
if (hwctx->cus) {
744
XDNA_ERR(xdna, "Not support re-config CU");
745
return -EINVAL;
746
}
747
748
if (!config->num_cus) {
749
XDNA_ERR(xdna, "Number of CU is zero");
750
return -EINVAL;
751
}
752
753
total_size = struct_size(config, cu_configs, config->num_cus);
754
if (total_size > size) {
755
XDNA_ERR(xdna, "CU config larger than size");
756
return -EINVAL;
757
}
758
759
hwctx->cus = kmemdup(config, total_size, GFP_KERNEL);
760
if (!hwctx->cus)
761
return -ENOMEM;
762
763
ret = amdxdna_pm_resume_get(xdna);
764
if (ret)
765
goto free_cus;
766
767
ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
768
if (ret) {
769
XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
770
goto pm_suspend_put;
771
}
772
773
wmb(); /* To avoid locking in command submit when check status */
774
775
return 0;
776
777
pm_suspend_put:
778
amdxdna_pm_suspend_put(xdna);
779
free_cus:
780
kfree(hwctx->cus);
781
hwctx->cus = NULL;
782
return ret;
783
}
784
785
static void aie2_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq)
786
{
787
struct dma_fence *out_fence = aie2_cmd_get_out_fence(hwctx, seq);
788
789
if (!out_fence) {
790
XDNA_ERR(hwctx->client->xdna, "Failed to get fence");
791
return;
792
}
793
794
dma_fence_wait_timeout(out_fence, false, MAX_SCHEDULE_TIMEOUT);
795
dma_fence_put(out_fence);
796
}
797
798
static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl,
799
bool attach)
800
{
801
struct amdxdna_client *client = hwctx->client;
802
struct amdxdna_dev *xdna = client->xdna;
803
struct amdxdna_drv_cmd cmd = { 0 };
804
struct amdxdna_gem_obj *abo;
805
u64 seq;
806
int ret;
807
808
abo = amdxdna_gem_get_obj(client, bo_hdl, AMDXDNA_BO_DEV);
809
if (!abo) {
810
XDNA_ERR(xdna, "Get bo %d failed", bo_hdl);
811
return -EINVAL;
812
}
813
814
if (attach) {
815
if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) {
816
ret = -EBUSY;
817
goto put_obj;
818
}
819
cmd.opcode = ATTACH_DEBUG_BO;
820
} else {
821
if (abo->assigned_hwctx != hwctx->id) {
822
ret = -EINVAL;
823
goto put_obj;
824
}
825
cmd.opcode = DETACH_DEBUG_BO;
826
}
827
828
ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
829
&bo_hdl, 1, hwctx->id, &seq);
830
if (ret) {
831
XDNA_ERR(xdna, "Submit command failed");
832
goto put_obj;
833
}
834
835
aie2_cmd_wait(hwctx, seq);
836
if (cmd.result) {
837
XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
838
goto put_obj;
839
}
840
841
if (attach)
842
abo->assigned_hwctx = hwctx->id;
843
else
844
abo->assigned_hwctx = AMDXDNA_INVALID_CTX_HANDLE;
845
846
XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name);
847
848
put_obj:
849
amdxdna_gem_put_obj(abo);
850
return ret;
851
}
852
853
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
854
{
855
struct amdxdna_dev *xdna = hwctx->client->xdna;
856
857
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
858
switch (type) {
859
case DRM_AMDXDNA_HWCTX_CONFIG_CU:
860
return aie2_hwctx_cu_config(hwctx, buf, size);
861
case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
862
return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, true);
863
case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
864
return aie2_hwctx_cfg_debug_bo(hwctx, (u32)value, false);
865
default:
866
XDNA_DBG(xdna, "Not supported type %d", type);
867
return -EOPNOTSUPP;
868
}
869
}
870
871
int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl)
872
{
873
struct amdxdna_client *client = hwctx->client;
874
struct amdxdna_dev *xdna = client->xdna;
875
struct amdxdna_drv_cmd cmd = { 0 };
876
u64 seq;
877
int ret;
878
879
cmd.opcode = SYNC_DEBUG_BO;
880
ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
881
&debug_bo_hdl, 1, hwctx->id, &seq);
882
if (ret) {
883
XDNA_ERR(xdna, "Submit command failed");
884
return ret;
885
}
886
887
aie2_cmd_wait(hwctx, seq);
888
if (cmd.result) {
889
XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
890
return -EINVAL;
891
}
892
893
return 0;
894
}
895
896
static int aie2_populate_range(struct amdxdna_gem_obj *abo)
897
{
898
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
899
struct amdxdna_umap *mapp;
900
unsigned long timeout;
901
struct mm_struct *mm;
902
bool found;
903
int ret;
904
905
timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
906
again:
907
found = false;
908
down_write(&xdna->notifier_lock);
909
list_for_each_entry(mapp, &abo->mem.umap_list, node) {
910
if (mapp->invalid) {
911
found = true;
912
break;
913
}
914
}
915
916
if (!found) {
917
abo->mem.map_invalid = false;
918
up_write(&xdna->notifier_lock);
919
return 0;
920
}
921
kref_get(&mapp->refcnt);
922
up_write(&xdna->notifier_lock);
923
924
XDNA_DBG(xdna, "populate memory range %lx %lx",
925
mapp->vma->vm_start, mapp->vma->vm_end);
926
mm = mapp->notifier.mm;
927
if (!mmget_not_zero(mm)) {
928
amdxdna_umap_put(mapp);
929
return -EFAULT;
930
}
931
932
mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier);
933
mmap_read_lock(mm);
934
ret = hmm_range_fault(&mapp->range);
935
mmap_read_unlock(mm);
936
if (ret) {
937
if (time_after(jiffies, timeout)) {
938
ret = -ETIME;
939
goto put_mm;
940
}
941
942
if (ret == -EBUSY) {
943
amdxdna_umap_put(mapp);
944
goto again;
945
}
946
947
goto put_mm;
948
}
949
950
down_write(&xdna->notifier_lock);
951
if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) {
952
up_write(&xdna->notifier_lock);
953
amdxdna_umap_put(mapp);
954
goto again;
955
}
956
mapp->invalid = false;
957
up_write(&xdna->notifier_lock);
958
amdxdna_umap_put(mapp);
959
goto again;
960
961
put_mm:
962
amdxdna_umap_put(mapp);
963
mmput(mm);
964
return ret;
965
}
966
967
int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
968
{
969
struct amdxdna_dev *xdna = hwctx->client->xdna;
970
struct ww_acquire_ctx acquire_ctx;
971
struct dma_fence_chain *chain;
972
struct amdxdna_gem_obj *abo;
973
unsigned long timeout = 0;
974
int ret, i;
975
976
ret = down_interruptible(&hwctx->priv->job_sem);
977
if (ret) {
978
XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
979
return ret;
980
}
981
982
chain = dma_fence_chain_alloc();
983
if (!chain) {
984
XDNA_ERR(xdna, "Alloc fence chain failed");
985
ret = -ENOMEM;
986
goto up_sem;
987
}
988
989
ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx,
990
hwctx->client->filp->client_id);
991
if (ret) {
992
XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
993
goto free_chain;
994
}
995
996
retry:
997
ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
998
if (ret) {
999
XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
1000
goto cleanup_job;
1001
}
1002
1003
for (i = 0; i < job->bo_cnt; i++) {
1004
ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
1005
if (ret) {
1006
XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
1007
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
1008
goto cleanup_job;
1009
}
1010
}
1011
1012
down_read(&xdna->notifier_lock);
1013
for (i = 0; i < job->bo_cnt; i++) {
1014
abo = to_xdna_obj(job->bos[i]);
1015
if (abo->mem.map_invalid) {
1016
up_read(&xdna->notifier_lock);
1017
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
1018
if (!timeout) {
1019
timeout = jiffies +
1020
msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1021
} else if (time_after(jiffies, timeout)) {
1022
ret = -ETIME;
1023
goto cleanup_job;
1024
}
1025
1026
ret = aie2_populate_range(abo);
1027
if (ret)
1028
goto cleanup_job;
1029
goto retry;
1030
}
1031
}
1032
1033
mutex_lock(&hwctx->priv->io_lock);
1034
drm_sched_job_arm(&job->base);
1035
job->out_fence = dma_fence_get(&job->base.s_fence->finished);
1036
for (i = 0; i < job->bo_cnt; i++)
1037
dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
1038
job->seq = hwctx->priv->seq++;
1039
kref_get(&job->refcnt);
1040
drm_sched_entity_push_job(&job->base);
1041
1042
*seq = job->seq;
1043
drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq);
1044
mutex_unlock(&hwctx->priv->io_lock);
1045
1046
up_read(&xdna->notifier_lock);
1047
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
1048
1049
aie2_job_put(job);
1050
atomic64_inc(&hwctx->job_submit_cnt);
1051
1052
return 0;
1053
1054
cleanup_job:
1055
drm_sched_job_cleanup(&job->base);
1056
free_chain:
1057
dma_fence_chain_free(chain);
1058
up_sem:
1059
up(&hwctx->priv->job_sem);
1060
job->job_done = true;
1061
return ret;
1062
}
1063
1064
void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
1065
unsigned long cur_seq)
1066
{
1067
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
1068
struct drm_gem_object *gobj = to_gobj(abo);
1069
long ret;
1070
1071
ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
1072
true, MAX_SCHEDULE_TIMEOUT);
1073
if (!ret || ret == -ERESTARTSYS)
1074
XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
1075
}
1076
1077