Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/accel/qaic/qaic_data.c
50303 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
3
/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */
4
/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */
5
6
#include <linux/bitfield.h>
7
#include <linux/bits.h>
8
#include <linux/completion.h>
9
#include <linux/delay.h>
10
#include <linux/dma-buf.h>
11
#include <linux/dma-mapping.h>
12
#include <linux/interrupt.h>
13
#include <linux/kref.h>
14
#include <linux/list.h>
15
#include <linux/math64.h>
16
#include <linux/mm.h>
17
#include <linux/moduleparam.h>
18
#include <linux/scatterlist.h>
19
#include <linux/spinlock.h>
20
#include <linux/srcu.h>
21
#include <linux/string.h>
22
#include <linux/types.h>
23
#include <linux/uaccess.h>
24
#include <linux/wait.h>
25
#include <drm/drm_file.h>
26
#include <drm/drm_gem.h>
27
#include <drm/drm_prime.h>
28
#include <drm/drm_print.h>
29
#include <uapi/drm/qaic_accel.h>
30
31
#include "qaic.h"
32
33
#define SEM_VAL_MASK GENMASK_ULL(11, 0)
34
#define SEM_INDEX_MASK GENMASK_ULL(4, 0)
35
#define BULK_XFER BIT(3)
36
#define GEN_COMPLETION BIT(4)
37
#define INBOUND_XFER 1
38
#define OUTBOUND_XFER 2
39
#define REQHP_OFF 0x0 /* we read this */
40
#define REQTP_OFF 0x4 /* we write this */
41
#define RSPHP_OFF 0x8 /* we write this */
42
#define RSPTP_OFF 0xc /* we read this */
43
44
#define ENCODE_SEM(val, index, sync, cmd, flags) \
45
({ \
46
FIELD_PREP(GENMASK(11, 0), (val)) | \
47
FIELD_PREP(GENMASK(20, 16), (index)) | \
48
FIELD_PREP(BIT(22), (sync)) | \
49
FIELD_PREP(GENMASK(26, 24), (cmd)) | \
50
FIELD_PREP(GENMASK(30, 29), (flags)) | \
51
FIELD_PREP(BIT(31), (cmd) ? 1 : 0); \
52
})
53
#define NUM_EVENTS 128
54
#define NUM_DELAYS 10
55
#define fifo_at(base, offset) ((base) + (offset) * get_dbc_req_elem_size())
56
57
static unsigned int wait_exec_default_timeout_ms = 5000; /* 5 sec default */
58
module_param(wait_exec_default_timeout_ms, uint, 0600);
59
MODULE_PARM_DESC(wait_exec_default_timeout_ms, "Default timeout for DRM_IOCTL_QAIC_WAIT_BO");
60
61
static unsigned int datapath_poll_interval_us = 100; /* 100 usec default */
62
module_param(datapath_poll_interval_us, uint, 0600);
63
MODULE_PARM_DESC(datapath_poll_interval_us,
64
"Amount of time to sleep between activity when datapath polling is enabled");
65
66
struct dbc_req {
67
/*
68
* A request ID is assigned to each memory handle going in DMA queue.
69
* As a single memory handle can enqueue multiple elements in DMA queue
70
* all of them will have the same request ID.
71
*/
72
__le16 req_id;
73
/* Future use */
74
__u8 seq_id;
75
/*
76
* Special encoded variable
77
* 7 0 - Do not force to generate MSI after DMA is completed
78
* 1 - Force to generate MSI after DMA is completed
79
* 6:5 Reserved
80
* 4 1 - Generate completion element in the response queue
81
* 0 - No Completion Code
82
* 3 0 - DMA request is a Link list transfer
83
* 1 - DMA request is a Bulk transfer
84
* 2 Reserved
85
* 1:0 00 - No DMA transfer involved
86
* 01 - DMA transfer is part of inbound transfer
87
* 10 - DMA transfer has outbound transfer
88
* 11 - NA
89
*/
90
__u8 cmd;
91
__le32 resv;
92
/* Source address for the transfer */
93
__le64 src_addr;
94
/* Destination address for the transfer */
95
__le64 dest_addr;
96
/* Length of transfer request */
97
__le32 len;
98
__le32 resv2;
99
/* Doorbell address */
100
__le64 db_addr;
101
/*
102
* Special encoded variable
103
* 7 1 - Doorbell(db) write
104
* 0 - No doorbell write
105
* 6:2 Reserved
106
* 1:0 00 - 32 bit access, db address must be aligned to 32bit-boundary
107
* 01 - 16 bit access, db address must be aligned to 16bit-boundary
108
* 10 - 8 bit access, db address must be aligned to 8bit-boundary
109
* 11 - Reserved
110
*/
111
__u8 db_len;
112
__u8 resv3;
113
__le16 resv4;
114
/* 32 bit data written to doorbell address */
115
__le32 db_data;
116
/*
117
* Special encoded variable
118
* All the fields of sem_cmdX are passed from user and all are ORed
119
* together to form sem_cmd.
120
* 0:11 Semaphore value
121
* 15:12 Reserved
122
* 20:16 Semaphore index
123
* 21 Reserved
124
* 22 Semaphore Sync
125
* 23 Reserved
126
* 26:24 Semaphore command
127
* 28:27 Reserved
128
* 29 Semaphore DMA out bound sync fence
129
* 30 Semaphore DMA in bound sync fence
130
* 31 Enable semaphore command
131
*/
132
__le32 sem_cmd0;
133
__le32 sem_cmd1;
134
__le32 sem_cmd2;
135
__le32 sem_cmd3;
136
} __packed;
137
138
struct dbc_rsp {
139
/* Request ID of the memory handle whose DMA transaction is completed */
140
__le16 req_id;
141
/* Status of the DMA transaction. 0 : Success otherwise failure */
142
__le16 status;
143
} __packed;
144
145
static inline bool bo_queued(struct qaic_bo *bo)
146
{
147
return !list_empty(&bo->xfer_list);
148
}
149
150
inline int get_dbc_req_elem_size(void)
151
{
152
return sizeof(struct dbc_req);
153
}
154
155
inline int get_dbc_rsp_elem_size(void)
156
{
157
return sizeof(struct dbc_rsp);
158
}
159
160
static void free_slice(struct kref *kref)
161
{
162
struct bo_slice *slice = container_of(kref, struct bo_slice, ref_count);
163
164
slice->bo->total_slice_nents -= slice->nents;
165
list_del(&slice->slice);
166
drm_gem_object_put(&slice->bo->base);
167
sg_free_table(slice->sgt);
168
kfree(slice->sgt);
169
kvfree(slice->reqs);
170
kfree(slice);
171
}
172
173
static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_table **sgt_out,
174
struct sg_table *sgt_in, u64 size, u64 offset)
175
{
176
struct scatterlist *sg, *sgn, *sgf, *sgl;
177
unsigned int len, nents, offf, offl;
178
struct sg_table *sgt;
179
size_t total_len;
180
int ret, j;
181
182
/* find out number of relevant nents needed for this mem */
183
total_len = 0;
184
sgf = NULL;
185
sgl = NULL;
186
nents = 0;
187
offf = 0;
188
offl = 0;
189
190
size = size ? size : PAGE_SIZE;
191
for_each_sgtable_dma_sg(sgt_in, sg, j) {
192
len = sg_dma_len(sg);
193
194
if (!len)
195
continue;
196
if (offset >= total_len && offset < total_len + len) {
197
sgf = sg;
198
offf = offset - total_len;
199
}
200
if (sgf)
201
nents++;
202
if (offset + size >= total_len &&
203
offset + size <= total_len + len) {
204
sgl = sg;
205
offl = offset + size - total_len;
206
break;
207
}
208
total_len += len;
209
}
210
211
if (!sgf || !sgl) {
212
ret = -EINVAL;
213
goto out;
214
}
215
216
sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
217
if (!sgt) {
218
ret = -ENOMEM;
219
goto out;
220
}
221
222
ret = sg_alloc_table(sgt, nents, GFP_KERNEL);
223
if (ret)
224
goto free_sgt;
225
226
/* copy relevant sg node and fix page and length */
227
sgn = sgf;
228
for_each_sgtable_dma_sg(sgt, sg, j) {
229
memcpy(sg, sgn, sizeof(*sg));
230
if (sgn == sgf) {
231
sg_dma_address(sg) += offf;
232
sg_dma_len(sg) -= offf;
233
sg_set_page(sg, sg_page(sgn), sg_dma_len(sg), offf);
234
} else {
235
offf = 0;
236
}
237
if (sgn == sgl) {
238
sg_dma_len(sg) = offl - offf;
239
sg_set_page(sg, sg_page(sgn), offl - offf, offf);
240
sg_mark_end(sg);
241
break;
242
}
243
sgn = sg_next(sgn);
244
}
245
246
*sgt_out = sgt;
247
return ret;
248
249
free_sgt:
250
kfree(sgt);
251
out:
252
*sgt_out = NULL;
253
return ret;
254
}
255
256
static int encode_reqs(struct qaic_device *qdev, struct bo_slice *slice,
257
struct qaic_attach_slice_entry *req)
258
{
259
__le64 db_addr = cpu_to_le64(req->db_addr);
260
__le32 db_data = cpu_to_le32(req->db_data);
261
struct scatterlist *sg;
262
__u8 cmd = BULK_XFER;
263
int presync_sem;
264
u64 dev_addr;
265
__u8 db_len;
266
int i;
267
268
if (!slice->no_xfer)
269
cmd |= (slice->dir == DMA_TO_DEVICE ? INBOUND_XFER : OUTBOUND_XFER);
270
271
if (req->db_len && !IS_ALIGNED(req->db_addr, req->db_len / 8))
272
return -EINVAL;
273
274
presync_sem = req->sem0.presync + req->sem1.presync + req->sem2.presync + req->sem3.presync;
275
if (presync_sem > 1)
276
return -EINVAL;
277
278
presync_sem = req->sem0.presync << 0 | req->sem1.presync << 1 |
279
req->sem2.presync << 2 | req->sem3.presync << 3;
280
281
switch (req->db_len) {
282
case 32:
283
db_len = BIT(7);
284
break;
285
case 16:
286
db_len = BIT(7) | 1;
287
break;
288
case 8:
289
db_len = BIT(7) | 2;
290
break;
291
case 0:
292
db_len = 0; /* doorbell is not active for this command */
293
break;
294
default:
295
return -EINVAL; /* should never hit this */
296
}
297
298
/*
299
* When we end up splitting up a single request (ie a buf slice) into
300
* multiple DMA requests, we have to manage the sync data carefully.
301
* There can only be one presync sem. That needs to be on every xfer
302
* so that the DMA engine doesn't transfer data before the receiver is
303
* ready. We only do the doorbell and postsync sems after the xfer.
304
* To guarantee previous xfers for the request are complete, we use a
305
* fence.
306
*/
307
dev_addr = req->dev_addr;
308
for_each_sgtable_dma_sg(slice->sgt, sg, i) {
309
slice->reqs[i].cmd = cmd;
310
slice->reqs[i].src_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ?
311
sg_dma_address(sg) : dev_addr);
312
slice->reqs[i].dest_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ?
313
dev_addr : sg_dma_address(sg));
314
/*
315
* sg_dma_len(sg) returns size of a DMA segment, maximum DMA
316
* segment size is set to UINT_MAX by qaic and hence return
317
* values of sg_dma_len(sg) can never exceed u32 range. So,
318
* by down sizing we are not corrupting the value.
319
*/
320
slice->reqs[i].len = cpu_to_le32((u32)sg_dma_len(sg));
321
switch (presync_sem) {
322
case BIT(0):
323
slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val,
324
req->sem0.index,
325
req->sem0.presync,
326
req->sem0.cmd,
327
req->sem0.flags));
328
break;
329
case BIT(1):
330
slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val,
331
req->sem1.index,
332
req->sem1.presync,
333
req->sem1.cmd,
334
req->sem1.flags));
335
break;
336
case BIT(2):
337
slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val,
338
req->sem2.index,
339
req->sem2.presync,
340
req->sem2.cmd,
341
req->sem2.flags));
342
break;
343
case BIT(3):
344
slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val,
345
req->sem3.index,
346
req->sem3.presync,
347
req->sem3.cmd,
348
req->sem3.flags));
349
break;
350
}
351
dev_addr += sg_dma_len(sg);
352
}
353
/* add post transfer stuff to last segment */
354
i--;
355
slice->reqs[i].cmd |= GEN_COMPLETION;
356
slice->reqs[i].db_addr = db_addr;
357
slice->reqs[i].db_len = db_len;
358
slice->reqs[i].db_data = db_data;
359
/*
360
* Add a fence if we have more than one request going to the hardware
361
* representing the entirety of the user request, and the user request
362
* has no presync condition.
363
* Fences are expensive, so we try to avoid them. We rely on the
364
* hardware behavior to avoid needing one when there is a presync
365
* condition. When a presync exists, all requests for that same
366
* presync will be queued into a fifo. Thus, since we queue the
367
* post xfer activity only on the last request we queue, the hardware
368
* will ensure that the last queued request is processed last, thus
369
* making sure the post xfer activity happens at the right time without
370
* a fence.
371
*/
372
if (i && !presync_sem)
373
req->sem0.flags |= (slice->dir == DMA_TO_DEVICE ?
374
QAIC_SEM_INSYNCFENCE : QAIC_SEM_OUTSYNCFENCE);
375
slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val, req->sem0.index,
376
req->sem0.presync, req->sem0.cmd,
377
req->sem0.flags));
378
slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val, req->sem1.index,
379
req->sem1.presync, req->sem1.cmd,
380
req->sem1.flags));
381
slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val, req->sem2.index,
382
req->sem2.presync, req->sem2.cmd,
383
req->sem2.flags));
384
slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val, req->sem3.index,
385
req->sem3.presync, req->sem3.cmd,
386
req->sem3.flags));
387
388
return 0;
389
}
390
391
static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo,
392
struct qaic_attach_slice_entry *slice_ent)
393
{
394
struct sg_table *sgt = NULL;
395
struct bo_slice *slice;
396
int ret;
397
398
ret = clone_range_of_sgt_for_slice(qdev, &sgt, bo->sgt, slice_ent->size, slice_ent->offset);
399
if (ret)
400
goto out;
401
402
slice = kmalloc(sizeof(*slice), GFP_KERNEL);
403
if (!slice) {
404
ret = -ENOMEM;
405
goto free_sgt;
406
}
407
408
slice->reqs = kvcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL);
409
if (!slice->reqs) {
410
ret = -ENOMEM;
411
goto free_slice;
412
}
413
414
slice->no_xfer = !slice_ent->size;
415
slice->sgt = sgt;
416
slice->nents = sgt->nents;
417
slice->dir = bo->dir;
418
slice->bo = bo;
419
slice->size = slice_ent->size;
420
slice->offset = slice_ent->offset;
421
422
ret = encode_reqs(qdev, slice, slice_ent);
423
if (ret)
424
goto free_req;
425
426
bo->total_slice_nents += sgt->nents;
427
kref_init(&slice->ref_count);
428
drm_gem_object_get(&bo->base);
429
list_add_tail(&slice->slice, &bo->slices);
430
431
return 0;
432
433
free_req:
434
kvfree(slice->reqs);
435
free_slice:
436
kfree(slice);
437
free_sgt:
438
sg_free_table(sgt);
439
kfree(sgt);
440
out:
441
return ret;
442
}
443
444
static int create_sgt(struct qaic_device *qdev, struct sg_table **sgt_out, u64 size)
445
{
446
struct scatterlist *sg;
447
struct sg_table *sgt;
448
struct page **pages;
449
int *pages_order;
450
int buf_extra;
451
int max_order;
452
int nr_pages;
453
int ret = 0;
454
int i, j, k;
455
int order;
456
457
if (size) {
458
nr_pages = DIV_ROUND_UP(size, PAGE_SIZE);
459
/*
460
* calculate how much extra we are going to allocate, to remove
461
* later
462
*/
463
buf_extra = (PAGE_SIZE - size % PAGE_SIZE) % PAGE_SIZE;
464
max_order = min(MAX_PAGE_ORDER, get_order(size));
465
} else {
466
/* allocate a single page for book keeping */
467
nr_pages = 1;
468
buf_extra = 0;
469
max_order = 0;
470
}
471
472
pages = kvmalloc_array(nr_pages, sizeof(*pages) + sizeof(*pages_order), GFP_KERNEL);
473
if (!pages) {
474
ret = -ENOMEM;
475
goto out;
476
}
477
pages_order = (void *)pages + sizeof(*pages) * nr_pages;
478
479
/*
480
* Allocate requested memory using alloc_pages. It is possible to allocate
481
* the requested memory in multiple chunks by calling alloc_pages
482
* multiple times. Use SG table to handle multiple allocated pages.
483
*/
484
i = 0;
485
while (nr_pages > 0) {
486
order = min(get_order(nr_pages * PAGE_SIZE), max_order);
487
while (1) {
488
pages[i] = alloc_pages(GFP_KERNEL | GFP_HIGHUSER |
489
__GFP_NOWARN | __GFP_ZERO |
490
(order ? __GFP_NORETRY : __GFP_RETRY_MAYFAIL),
491
order);
492
if (pages[i])
493
break;
494
if (!order--) {
495
ret = -ENOMEM;
496
goto free_partial_alloc;
497
}
498
}
499
500
max_order = order;
501
pages_order[i] = order;
502
503
nr_pages -= 1 << order;
504
if (nr_pages <= 0)
505
/* account for over allocation */
506
buf_extra += abs(nr_pages) * PAGE_SIZE;
507
i++;
508
}
509
510
sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
511
if (!sgt) {
512
ret = -ENOMEM;
513
goto free_partial_alloc;
514
}
515
516
if (sg_alloc_table(sgt, i, GFP_KERNEL)) {
517
ret = -ENOMEM;
518
goto free_sgt;
519
}
520
521
/* Populate the SG table with the allocated memory pages */
522
sg = sgt->sgl;
523
for (k = 0; k < i; k++, sg = sg_next(sg)) {
524
/* Last entry requires special handling */
525
if (k < i - 1) {
526
sg_set_page(sg, pages[k], PAGE_SIZE << pages_order[k], 0);
527
} else {
528
sg_set_page(sg, pages[k], (PAGE_SIZE << pages_order[k]) - buf_extra, 0);
529
sg_mark_end(sg);
530
}
531
}
532
533
kvfree(pages);
534
*sgt_out = sgt;
535
return ret;
536
537
free_sgt:
538
kfree(sgt);
539
free_partial_alloc:
540
for (j = 0; j < i; j++)
541
__free_pages(pages[j], pages_order[j]);
542
kvfree(pages);
543
out:
544
*sgt_out = NULL;
545
return ret;
546
}
547
548
static bool invalid_sem(struct qaic_sem *sem)
549
{
550
if (sem->val & ~SEM_VAL_MASK || sem->index & ~SEM_INDEX_MASK ||
551
!(sem->presync == 0 || sem->presync == 1) || sem->pad ||
552
sem->flags & ~(QAIC_SEM_INSYNCFENCE | QAIC_SEM_OUTSYNCFENCE) ||
553
sem->cmd > QAIC_SEM_WAIT_GT_0)
554
return true;
555
return false;
556
}
557
558
static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_entry *slice_ent,
559
u32 count, u64 total_size)
560
{
561
u64 total;
562
int i;
563
564
for (i = 0; i < count; i++) {
565
if (!(slice_ent[i].db_len == 32 || slice_ent[i].db_len == 16 ||
566
slice_ent[i].db_len == 8 || slice_ent[i].db_len == 0) ||
567
invalid_sem(&slice_ent[i].sem0) || invalid_sem(&slice_ent[i].sem1) ||
568
invalid_sem(&slice_ent[i].sem2) || invalid_sem(&slice_ent[i].sem3))
569
return -EINVAL;
570
571
if (check_add_overflow(slice_ent[i].offset, slice_ent[i].size, &total) ||
572
total > total_size)
573
return -EINVAL;
574
}
575
576
return 0;
577
}
578
579
static void qaic_free_sgt(struct sg_table *sgt)
580
{
581
struct scatterlist *sg;
582
583
if (!sgt)
584
return;
585
586
for (sg = sgt->sgl; sg; sg = sg_next(sg))
587
if (sg_page(sg))
588
__free_pages(sg_page(sg), get_order(sg->length));
589
sg_free_table(sgt);
590
kfree(sgt);
591
}
592
593
static void qaic_gem_print_info(struct drm_printer *p, unsigned int indent,
594
const struct drm_gem_object *obj)
595
{
596
struct qaic_bo *bo = to_qaic_bo(obj);
597
598
drm_printf_indent(p, indent, "BO DMA direction %d\n", bo->dir);
599
}
600
601
static const struct vm_operations_struct drm_vm_ops = {
602
.open = drm_gem_vm_open,
603
.close = drm_gem_vm_close,
604
};
605
606
static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
607
{
608
struct qaic_bo *bo = to_qaic_bo(obj);
609
unsigned long offset = 0;
610
struct scatterlist *sg;
611
int ret = 0;
612
613
if (drm_gem_is_imported(obj))
614
return -EINVAL;
615
616
for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) {
617
if (sg_page(sg)) {
618
ret = remap_pfn_range(vma, vma->vm_start + offset, page_to_pfn(sg_page(sg)),
619
sg->length, vma->vm_page_prot);
620
if (ret)
621
goto out;
622
offset += sg->length;
623
}
624
}
625
626
out:
627
return ret;
628
}
629
630
static void qaic_free_object(struct drm_gem_object *obj)
631
{
632
struct qaic_bo *bo = to_qaic_bo(obj);
633
634
if (drm_gem_is_imported(obj)) {
635
/* DMABUF/PRIME Path */
636
drm_prime_gem_destroy(obj, NULL);
637
} else {
638
/* Private buffer allocation path */
639
qaic_free_sgt(bo->sgt);
640
}
641
642
mutex_destroy(&bo->lock);
643
drm_gem_object_release(obj);
644
kfree(bo);
645
}
646
647
static struct sg_table *qaic_get_sg_table(struct drm_gem_object *obj)
648
{
649
struct qaic_bo *bo = to_qaic_bo(obj);
650
struct scatterlist *sg, *sg_in;
651
struct sg_table *sgt, *sgt_in;
652
int i;
653
654
sgt_in = bo->sgt;
655
656
sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
657
if (!sgt)
658
return ERR_PTR(-ENOMEM);
659
660
if (sg_alloc_table(sgt, sgt_in->orig_nents, GFP_KERNEL)) {
661
kfree(sgt);
662
return ERR_PTR(-ENOMEM);
663
}
664
665
sg = sgt->sgl;
666
for_each_sgtable_sg(sgt_in, sg_in, i) {
667
memcpy(sg, sg_in, sizeof(*sg));
668
sg = sg_next(sg);
669
}
670
671
return sgt;
672
}
673
674
static const struct drm_gem_object_funcs qaic_gem_funcs = {
675
.free = qaic_free_object,
676
.get_sg_table = qaic_get_sg_table,
677
.print_info = qaic_gem_print_info,
678
.mmap = qaic_gem_object_mmap,
679
.vm_ops = &drm_vm_ops,
680
};
681
682
static void qaic_init_bo(struct qaic_bo *bo, bool reinit)
683
{
684
if (reinit) {
685
bo->sliced = false;
686
reinit_completion(&bo->xfer_done);
687
} else {
688
mutex_init(&bo->lock);
689
init_completion(&bo->xfer_done);
690
}
691
complete_all(&bo->xfer_done);
692
INIT_LIST_HEAD(&bo->slices);
693
INIT_LIST_HEAD(&bo->xfer_list);
694
}
695
696
static struct qaic_bo *qaic_alloc_init_bo(void)
697
{
698
struct qaic_bo *bo;
699
700
bo = kzalloc(sizeof(*bo), GFP_KERNEL);
701
if (!bo)
702
return ERR_PTR(-ENOMEM);
703
704
qaic_init_bo(bo, false);
705
706
return bo;
707
}
708
709
int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
710
{
711
struct qaic_create_bo *args = data;
712
int usr_rcu_id, qdev_rcu_id;
713
struct drm_gem_object *obj;
714
struct qaic_device *qdev;
715
struct qaic_user *usr;
716
struct qaic_bo *bo;
717
size_t size;
718
int ret;
719
720
if (args->pad)
721
return -EINVAL;
722
723
size = PAGE_ALIGN(args->size);
724
if (size == 0)
725
return -EINVAL;
726
727
usr = file_priv->driver_priv;
728
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
729
if (!usr->qddev) {
730
ret = -ENODEV;
731
goto unlock_usr_srcu;
732
}
733
734
qdev = usr->qddev->qdev;
735
qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
736
if (qdev->dev_state != QAIC_ONLINE) {
737
ret = -ENODEV;
738
goto unlock_dev_srcu;
739
}
740
741
bo = qaic_alloc_init_bo();
742
if (IS_ERR(bo)) {
743
ret = PTR_ERR(bo);
744
goto unlock_dev_srcu;
745
}
746
obj = &bo->base;
747
748
drm_gem_private_object_init(dev, obj, size);
749
750
obj->funcs = &qaic_gem_funcs;
751
ret = create_sgt(qdev, &bo->sgt, size);
752
if (ret)
753
goto free_bo;
754
755
ret = drm_gem_create_mmap_offset(obj);
756
if (ret)
757
goto free_bo;
758
759
ret = drm_gem_handle_create(file_priv, obj, &args->handle);
760
if (ret)
761
goto free_bo;
762
763
drm_gem_object_put(obj);
764
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
765
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
766
767
return 0;
768
769
free_bo:
770
drm_gem_object_put(obj);
771
unlock_dev_srcu:
772
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
773
unlock_usr_srcu:
774
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
775
return ret;
776
}
777
778
int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
779
{
780
struct qaic_mmap_bo *args = data;
781
int usr_rcu_id, qdev_rcu_id;
782
struct drm_gem_object *obj;
783
struct qaic_device *qdev;
784
struct qaic_user *usr;
785
int ret = 0;
786
787
usr = file_priv->driver_priv;
788
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
789
if (!usr->qddev) {
790
ret = -ENODEV;
791
goto unlock_usr_srcu;
792
}
793
794
qdev = usr->qddev->qdev;
795
qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
796
if (qdev->dev_state != QAIC_ONLINE) {
797
ret = -ENODEV;
798
goto unlock_dev_srcu;
799
}
800
801
obj = drm_gem_object_lookup(file_priv, args->handle);
802
if (!obj) {
803
ret = -ENOENT;
804
goto unlock_dev_srcu;
805
}
806
807
args->offset = drm_vma_node_offset_addr(&obj->vma_node);
808
809
drm_gem_object_put(obj);
810
811
unlock_dev_srcu:
812
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
813
unlock_usr_srcu:
814
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
815
return ret;
816
}
817
818
struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf)
819
{
820
struct dma_buf_attachment *attach;
821
struct drm_gem_object *obj;
822
struct qaic_bo *bo;
823
int ret;
824
825
bo = qaic_alloc_init_bo();
826
if (IS_ERR(bo)) {
827
ret = PTR_ERR(bo);
828
goto out;
829
}
830
831
obj = &bo->base;
832
get_dma_buf(dma_buf);
833
834
attach = dma_buf_attach(dma_buf, dev->dev);
835
if (IS_ERR(attach)) {
836
ret = PTR_ERR(attach);
837
goto attach_fail;
838
}
839
840
if (!attach->dmabuf->size) {
841
ret = -EINVAL;
842
goto size_align_fail;
843
}
844
845
drm_gem_private_object_init(dev, obj, attach->dmabuf->size);
846
/*
847
* skipping dma_buf_map_attachment() as we do not know the direction
848
* just yet. Once the direction is known in the subsequent IOCTL to
849
* attach slicing, we can do it then.
850
*/
851
852
obj->funcs = &qaic_gem_funcs;
853
obj->import_attach = attach;
854
obj->resv = dma_buf->resv;
855
856
return obj;
857
858
size_align_fail:
859
dma_buf_detach(dma_buf, attach);
860
attach_fail:
861
dma_buf_put(dma_buf);
862
kfree(bo);
863
out:
864
return ERR_PTR(ret);
865
}
866
867
static int qaic_prepare_import_bo(struct qaic_bo *bo, struct qaic_attach_slice_hdr *hdr)
868
{
869
struct drm_gem_object *obj = &bo->base;
870
struct sg_table *sgt;
871
int ret;
872
873
sgt = dma_buf_map_attachment(obj->import_attach, hdr->dir);
874
if (IS_ERR(sgt)) {
875
ret = PTR_ERR(sgt);
876
return ret;
877
}
878
879
bo->sgt = sgt;
880
881
return 0;
882
}
883
884
static int qaic_prepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo,
885
struct qaic_attach_slice_hdr *hdr)
886
{
887
int ret;
888
889
ret = dma_map_sgtable(&qdev->pdev->dev, bo->sgt, hdr->dir, 0);
890
if (ret)
891
return -EFAULT;
892
893
return 0;
894
}
895
896
static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo,
897
struct qaic_attach_slice_hdr *hdr)
898
{
899
int ret;
900
901
if (drm_gem_is_imported(&bo->base))
902
ret = qaic_prepare_import_bo(bo, hdr);
903
else
904
ret = qaic_prepare_export_bo(qdev, bo, hdr);
905
bo->dir = hdr->dir;
906
bo->dbc = &qdev->dbc[hdr->dbc_id];
907
bo->nr_slice = hdr->count;
908
909
return ret;
910
}
911
912
static void qaic_unprepare_import_bo(struct qaic_bo *bo)
913
{
914
dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, bo->dir);
915
bo->sgt = NULL;
916
}
917
918
static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo)
919
{
920
dma_unmap_sgtable(&qdev->pdev->dev, bo->sgt, bo->dir, 0);
921
}
922
923
static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo)
924
{
925
if (drm_gem_is_imported(&bo->base))
926
qaic_unprepare_import_bo(bo);
927
else
928
qaic_unprepare_export_bo(qdev, bo);
929
930
bo->dir = 0;
931
bo->dbc = NULL;
932
bo->nr_slice = 0;
933
}
934
935
static void qaic_free_slices_bo(struct qaic_bo *bo)
936
{
937
struct bo_slice *slice, *temp;
938
939
list_for_each_entry_safe(slice, temp, &bo->slices, slice)
940
kref_put(&slice->ref_count, free_slice);
941
if (WARN_ON_ONCE(bo->total_slice_nents != 0))
942
bo->total_slice_nents = 0;
943
bo->nr_slice = 0;
944
}
945
946
static int qaic_attach_slicing_bo(struct qaic_device *qdev, struct qaic_bo *bo,
947
struct qaic_attach_slice_hdr *hdr,
948
struct qaic_attach_slice_entry *slice_ent)
949
{
950
int ret, i;
951
952
for (i = 0; i < hdr->count; i++) {
953
ret = qaic_map_one_slice(qdev, bo, &slice_ent[i]);
954
if (ret) {
955
qaic_free_slices_bo(bo);
956
return ret;
957
}
958
}
959
960
if (bo->total_slice_nents > bo->dbc->nelem) {
961
qaic_free_slices_bo(bo);
962
return -ENOSPC;
963
}
964
965
return 0;
966
}
967
968
int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
969
{
970
struct qaic_attach_slice_entry *slice_ent;
971
struct qaic_attach_slice *args = data;
972
int rcu_id, usr_rcu_id, qdev_rcu_id;
973
struct dma_bridge_chan *dbc;
974
struct drm_gem_object *obj;
975
struct qaic_device *qdev;
976
unsigned long arg_size;
977
struct qaic_user *usr;
978
u8 __user *user_data;
979
struct qaic_bo *bo;
980
int ret;
981
982
if (args->hdr.count == 0)
983
return -EINVAL;
984
985
if (check_mul_overflow((unsigned long)args->hdr.count,
986
(unsigned long)sizeof(*slice_ent),
987
&arg_size))
988
return -EINVAL;
989
990
if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE))
991
return -EINVAL;
992
993
if (args->data == 0)
994
return -EINVAL;
995
996
usr = file_priv->driver_priv;
997
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
998
if (!usr->qddev) {
999
ret = -ENODEV;
1000
goto unlock_usr_srcu;
1001
}
1002
1003
qdev = usr->qddev->qdev;
1004
qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
1005
if (qdev->dev_state != QAIC_ONLINE) {
1006
ret = -ENODEV;
1007
goto unlock_dev_srcu;
1008
}
1009
1010
if (args->hdr.dbc_id >= qdev->num_dbc) {
1011
ret = -EINVAL;
1012
goto unlock_dev_srcu;
1013
}
1014
1015
user_data = u64_to_user_ptr(args->data);
1016
1017
slice_ent = memdup_user(user_data, arg_size);
1018
if (IS_ERR(slice_ent)) {
1019
ret = PTR_ERR(slice_ent);
1020
goto unlock_dev_srcu;
1021
}
1022
1023
obj = drm_gem_object_lookup(file_priv, args->hdr.handle);
1024
if (!obj) {
1025
ret = -ENOENT;
1026
goto free_slice_ent;
1027
}
1028
1029
ret = qaic_validate_req(qdev, slice_ent, args->hdr.count, obj->size);
1030
if (ret)
1031
goto put_bo;
1032
1033
bo = to_qaic_bo(obj);
1034
ret = mutex_lock_interruptible(&bo->lock);
1035
if (ret)
1036
goto put_bo;
1037
1038
if (bo->sliced) {
1039
ret = -EINVAL;
1040
goto unlock_bo;
1041
}
1042
1043
dbc = &qdev->dbc[args->hdr.dbc_id];
1044
rcu_id = srcu_read_lock(&dbc->ch_lock);
1045
if (dbc->usr != usr) {
1046
ret = -EINVAL;
1047
goto unlock_ch_srcu;
1048
}
1049
1050
if (dbc->id == qdev->ssr_dbc) {
1051
ret = -EPIPE;
1052
goto unlock_ch_srcu;
1053
}
1054
1055
ret = qaic_prepare_bo(qdev, bo, &args->hdr);
1056
if (ret)
1057
goto unlock_ch_srcu;
1058
1059
ret = qaic_attach_slicing_bo(qdev, bo, &args->hdr, slice_ent);
1060
if (ret)
1061
goto unprepare_bo;
1062
1063
if (args->hdr.dir == DMA_TO_DEVICE)
1064
dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, args->hdr.dir);
1065
1066
bo->sliced = true;
1067
list_add_tail(&bo->bo_list, &bo->dbc->bo_lists);
1068
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1069
mutex_unlock(&bo->lock);
1070
kfree(slice_ent);
1071
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
1072
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
1073
1074
return 0;
1075
1076
unprepare_bo:
1077
qaic_unprepare_bo(qdev, bo);
1078
unlock_ch_srcu:
1079
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1080
unlock_bo:
1081
mutex_unlock(&bo->lock);
1082
put_bo:
1083
drm_gem_object_put(obj);
1084
free_slice_ent:
1085
kfree(slice_ent);
1086
unlock_dev_srcu:
1087
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
1088
unlock_usr_srcu:
1089
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
1090
return ret;
1091
}
1092
1093
static inline u32 fifo_space_avail(u32 head, u32 tail, u32 q_size)
1094
{
1095
u32 avail = head - tail - 1;
1096
1097
if (head <= tail)
1098
avail += q_size;
1099
1100
return avail;
1101
}
1102
1103
static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice, u32 dbc_id,
1104
u32 head, u32 *ptail)
1105
{
1106
struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
1107
struct dbc_req *reqs = slice->reqs;
1108
u32 tail = *ptail;
1109
u32 avail;
1110
1111
avail = fifo_space_avail(head, tail, dbc->nelem);
1112
if (avail < slice->nents)
1113
return -EAGAIN;
1114
1115
if (tail + slice->nents > dbc->nelem) {
1116
avail = dbc->nelem - tail;
1117
avail = min_t(u32, avail, slice->nents);
1118
memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail);
1119
reqs += avail;
1120
avail = slice->nents - avail;
1121
if (avail)
1122
memcpy(dbc->req_q_base, reqs, sizeof(*reqs) * avail);
1123
} else {
1124
memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * slice->nents);
1125
}
1126
1127
*ptail = (tail + slice->nents) % dbc->nelem;
1128
1129
return 0;
1130
}
1131
1132
static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice,
1133
u64 resize, struct dma_bridge_chan *dbc, u32 head,
1134
u32 *ptail)
1135
{
1136
struct dbc_req *reqs = slice->reqs;
1137
struct dbc_req *last_req;
1138
u32 tail = *ptail;
1139
u64 last_bytes;
1140
u32 first_n;
1141
u32 avail;
1142
1143
avail = fifo_space_avail(head, tail, dbc->nelem);
1144
1145
/*
1146
* After this for loop is complete, first_n represents the index
1147
* of the last DMA request of this slice that needs to be
1148
* transferred after resizing and last_bytes represents DMA size
1149
* of that request.
1150
*/
1151
last_bytes = resize;
1152
for (first_n = 0; first_n < slice->nents; first_n++)
1153
if (last_bytes > le32_to_cpu(reqs[first_n].len))
1154
last_bytes -= le32_to_cpu(reqs[first_n].len);
1155
else
1156
break;
1157
1158
if (avail < (first_n + 1))
1159
return -EAGAIN;
1160
1161
if (first_n) {
1162
if (tail + first_n > dbc->nelem) {
1163
avail = dbc->nelem - tail;
1164
avail = min_t(u32, avail, first_n);
1165
memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail);
1166
last_req = reqs + avail;
1167
avail = first_n - avail;
1168
if (avail)
1169
memcpy(dbc->req_q_base, last_req, sizeof(*reqs) * avail);
1170
} else {
1171
memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * first_n);
1172
}
1173
}
1174
1175
/*
1176
* Copy over the last entry. Here we need to adjust len to the left over
1177
* size, and set src and dst to the entry it is copied to.
1178
*/
1179
last_req = fifo_at(dbc->req_q_base, (tail + first_n) % dbc->nelem);
1180
memcpy(last_req, reqs + slice->nents - 1, sizeof(*reqs));
1181
1182
/*
1183
* last_bytes holds size of a DMA segment, maximum DMA segment size is
1184
* set to UINT_MAX by qaic and hence last_bytes can never exceed u32
1185
* range. So, by down sizing we are not corrupting the value.
1186
*/
1187
last_req->len = cpu_to_le32((u32)last_bytes);
1188
last_req->src_addr = reqs[first_n].src_addr;
1189
last_req->dest_addr = reqs[first_n].dest_addr;
1190
if (!last_bytes)
1191
/* Disable DMA transfer */
1192
last_req->cmd = GENMASK(7, 2) & reqs[first_n].cmd;
1193
1194
*ptail = (tail + first_n + 1) % dbc->nelem;
1195
1196
return 0;
1197
}
1198
1199
static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *file_priv,
1200
struct qaic_execute_entry *exec, unsigned int count,
1201
bool is_partial, struct dma_bridge_chan *dbc, u32 head,
1202
u32 *tail)
1203
{
1204
struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec;
1205
struct drm_gem_object *obj;
1206
struct bo_slice *slice;
1207
unsigned long flags;
1208
struct qaic_bo *bo;
1209
int i, j;
1210
int ret;
1211
1212
for (i = 0; i < count; i++) {
1213
/*
1214
* ref count will be decremented when the transfer of this
1215
* buffer is complete. It is inside dbc_irq_threaded_fn().
1216
*/
1217
obj = drm_gem_object_lookup(file_priv,
1218
is_partial ? pexec[i].handle : exec[i].handle);
1219
if (!obj) {
1220
ret = -ENOENT;
1221
goto failed_to_send_bo;
1222
}
1223
1224
bo = to_qaic_bo(obj);
1225
ret = mutex_lock_interruptible(&bo->lock);
1226
if (ret)
1227
goto failed_to_send_bo;
1228
1229
if (!bo->sliced) {
1230
ret = -EINVAL;
1231
goto unlock_bo;
1232
}
1233
1234
if (is_partial && pexec[i].resize > bo->base.size) {
1235
ret = -EINVAL;
1236
goto unlock_bo;
1237
}
1238
1239
spin_lock_irqsave(&dbc->xfer_lock, flags);
1240
if (bo_queued(bo)) {
1241
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
1242
ret = -EINVAL;
1243
goto unlock_bo;
1244
}
1245
1246
bo->req_id = dbc->next_req_id++;
1247
1248
list_for_each_entry(slice, &bo->slices, slice) {
1249
for (j = 0; j < slice->nents; j++)
1250
slice->reqs[j].req_id = cpu_to_le16(bo->req_id);
1251
1252
if (is_partial && (!pexec[i].resize || pexec[i].resize <= slice->offset))
1253
/* Configure the slice for no DMA transfer */
1254
ret = copy_partial_exec_reqs(qdev, slice, 0, dbc, head, tail);
1255
else if (is_partial && pexec[i].resize < slice->offset + slice->size)
1256
/* Configure the slice to be partially DMA transferred */
1257
ret = copy_partial_exec_reqs(qdev, slice,
1258
pexec[i].resize - slice->offset, dbc,
1259
head, tail);
1260
else
1261
ret = copy_exec_reqs(qdev, slice, dbc->id, head, tail);
1262
if (ret) {
1263
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
1264
goto unlock_bo;
1265
}
1266
}
1267
reinit_completion(&bo->xfer_done);
1268
list_add_tail(&bo->xfer_list, &dbc->xfer_list);
1269
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
1270
dma_sync_sgtable_for_device(&qdev->pdev->dev, bo->sgt, bo->dir);
1271
mutex_unlock(&bo->lock);
1272
}
1273
1274
return 0;
1275
1276
unlock_bo:
1277
mutex_unlock(&bo->lock);
1278
failed_to_send_bo:
1279
if (likely(obj))
1280
drm_gem_object_put(obj);
1281
for (j = 0; j < i; j++) {
1282
spin_lock_irqsave(&dbc->xfer_lock, flags);
1283
bo = list_last_entry(&dbc->xfer_list, struct qaic_bo, xfer_list);
1284
obj = &bo->base;
1285
list_del_init(&bo->xfer_list);
1286
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
1287
dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
1288
drm_gem_object_put(obj);
1289
}
1290
return ret;
1291
}
1292
1293
static void update_profiling_data(struct drm_file *file_priv,
1294
struct qaic_execute_entry *exec, unsigned int count,
1295
bool is_partial, u64 received_ts, u64 submit_ts, u32 queue_level)
1296
{
1297
struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec;
1298
struct drm_gem_object *obj;
1299
struct qaic_bo *bo;
1300
int i;
1301
1302
for (i = 0; i < count; i++) {
1303
/*
1304
* Since we already committed the BO to hardware, the only way
1305
* this should fail is a pending signal. We can't cancel the
1306
* submit to hardware, so we have to just skip the profiling
1307
* data. In case the signal is not fatal to the process, we
1308
* return success so that the user doesn't try to resubmit.
1309
*/
1310
obj = drm_gem_object_lookup(file_priv,
1311
is_partial ? pexec[i].handle : exec[i].handle);
1312
if (!obj)
1313
break;
1314
bo = to_qaic_bo(obj);
1315
bo->perf_stats.req_received_ts = received_ts;
1316
bo->perf_stats.req_submit_ts = submit_ts;
1317
bo->perf_stats.queue_level_before = queue_level;
1318
queue_level += bo->total_slice_nents;
1319
drm_gem_object_put(obj);
1320
}
1321
}
1322
1323
static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv,
1324
bool is_partial)
1325
{
1326
struct qaic_execute *args = data;
1327
struct qaic_execute_entry *exec;
1328
struct dma_bridge_chan *dbc;
1329
int usr_rcu_id, qdev_rcu_id;
1330
struct qaic_device *qdev;
1331
struct qaic_user *usr;
1332
u64 received_ts;
1333
u32 queue_level;
1334
u64 submit_ts;
1335
int rcu_id;
1336
u32 head;
1337
u32 tail;
1338
u64 size;
1339
int ret;
1340
1341
received_ts = ktime_get_ns();
1342
1343
size = is_partial ? sizeof(struct qaic_partial_execute_entry) : sizeof(*exec);
1344
if (args->hdr.count == 0)
1345
return -EINVAL;
1346
1347
exec = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, size);
1348
if (IS_ERR(exec))
1349
return PTR_ERR(exec);
1350
1351
usr = file_priv->driver_priv;
1352
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
1353
if (!usr->qddev) {
1354
ret = -ENODEV;
1355
goto unlock_usr_srcu;
1356
}
1357
1358
qdev = usr->qddev->qdev;
1359
qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
1360
if (qdev->dev_state != QAIC_ONLINE) {
1361
ret = -ENODEV;
1362
goto unlock_dev_srcu;
1363
}
1364
1365
if (args->hdr.dbc_id >= qdev->num_dbc) {
1366
ret = -EINVAL;
1367
goto unlock_dev_srcu;
1368
}
1369
1370
dbc = &qdev->dbc[args->hdr.dbc_id];
1371
1372
rcu_id = srcu_read_lock(&dbc->ch_lock);
1373
if (!dbc->usr || dbc->usr->handle != usr->handle) {
1374
ret = -EPERM;
1375
goto release_ch_rcu;
1376
}
1377
1378
if (dbc->id == qdev->ssr_dbc) {
1379
ret = -EPIPE;
1380
goto release_ch_rcu;
1381
}
1382
1383
ret = mutex_lock_interruptible(&dbc->req_lock);
1384
if (ret)
1385
goto release_ch_rcu;
1386
1387
head = readl(dbc->dbc_base + REQHP_OFF);
1388
tail = readl(dbc->dbc_base + REQTP_OFF);
1389
1390
if (head == U32_MAX || tail == U32_MAX) {
1391
/* PCI link error */
1392
ret = -ENODEV;
1393
goto unlock_req_lock;
1394
}
1395
1396
queue_level = head <= tail ? tail - head : dbc->nelem - (head - tail);
1397
1398
ret = send_bo_list_to_device(qdev, file_priv, exec, args->hdr.count, is_partial, dbc,
1399
head, &tail);
1400
if (ret)
1401
goto unlock_req_lock;
1402
1403
/* Finalize commit to hardware */
1404
submit_ts = ktime_get_ns();
1405
writel(tail, dbc->dbc_base + REQTP_OFF);
1406
mutex_unlock(&dbc->req_lock);
1407
1408
update_profiling_data(file_priv, exec, args->hdr.count, is_partial, received_ts,
1409
submit_ts, queue_level);
1410
1411
if (datapath_polling)
1412
schedule_work(&dbc->poll_work);
1413
1414
unlock_req_lock:
1415
if (ret)
1416
mutex_unlock(&dbc->req_lock);
1417
release_ch_rcu:
1418
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1419
unlock_dev_srcu:
1420
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
1421
unlock_usr_srcu:
1422
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
1423
kfree(exec);
1424
return ret;
1425
}
1426
1427
int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
1428
{
1429
return __qaic_execute_bo_ioctl(dev, data, file_priv, false);
1430
}
1431
1432
int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
1433
{
1434
return __qaic_execute_bo_ioctl(dev, data, file_priv, true);
1435
}
1436
1437
/*
1438
* Our interrupt handling is a bit more complicated than a simple ideal, but
1439
* sadly necessary.
1440
*
1441
* Each dbc has a completion queue. Entries in the queue correspond to DMA
1442
* requests which the device has processed. The hardware already has a built
1443
* in irq mitigation. When the device puts an entry into the queue, it will
1444
* only trigger an interrupt if the queue was empty. Therefore, when adding
1445
* the Nth event to a non-empty queue, the hardware doesn't trigger an
1446
* interrupt. This means the host doesn't get additional interrupts signaling
1447
* the same thing - the queue has something to process.
1448
* This behavior can be overridden in the DMA request.
1449
* This means that when the host receives an interrupt, it is required to
1450
* drain the queue.
1451
*
1452
* This behavior is what NAPI attempts to accomplish, although we can't use
1453
* NAPI as we don't have a netdev. We use threaded irqs instead.
1454
*
1455
* However, there is a situation where the host drains the queue fast enough
1456
* that every event causes an interrupt. Typically this is not a problem as
1457
* the rate of events would be low. However, that is not the case with
1458
* lprnet for example. On an Intel Xeon D-2191 where we run 8 instances of
1459
* lprnet, the host receives roughly 80k interrupts per second from the device
1460
* (per /proc/interrupts). While NAPI documentation indicates the host should
1461
* just chug along, sadly that behavior causes instability in some hosts.
1462
*
1463
* Therefore, we implement an interrupt disable scheme similar to NAPI. The
1464
* key difference is that we will delay after draining the queue for a small
1465
* time to allow additional events to come in via polling. Using the above
1466
* lprnet workload, this reduces the number of interrupts processed from
1467
* ~80k/sec to about 64 in 5 minutes and appears to solve the system
1468
* instability.
1469
*/
1470
irqreturn_t dbc_irq_handler(int irq, void *data)
1471
{
1472
struct dma_bridge_chan *dbc = data;
1473
int rcu_id;
1474
u32 head;
1475
u32 tail;
1476
1477
rcu_id = srcu_read_lock(&dbc->ch_lock);
1478
1479
if (datapath_polling) {
1480
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1481
/*
1482
* Normally datapath_polling will not have irqs enabled, but
1483
* when running with only one MSI the interrupt is shared with
1484
* MHI so it cannot be disabled. Return ASAP instead.
1485
*/
1486
return IRQ_HANDLED;
1487
}
1488
1489
if (!dbc->usr) {
1490
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1491
return IRQ_HANDLED;
1492
}
1493
1494
head = readl(dbc->dbc_base + RSPHP_OFF);
1495
if (head == U32_MAX) { /* PCI link error */
1496
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1497
return IRQ_NONE;
1498
}
1499
1500
tail = readl(dbc->dbc_base + RSPTP_OFF);
1501
if (tail == U32_MAX) { /* PCI link error */
1502
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1503
return IRQ_NONE;
1504
}
1505
1506
if (head == tail) { /* queue empty */
1507
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1508
return IRQ_NONE;
1509
}
1510
1511
if (!dbc->qdev->single_msi)
1512
disable_irq_nosync(irq);
1513
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1514
return IRQ_WAKE_THREAD;
1515
}
1516
1517
void qaic_irq_polling_work(struct work_struct *work)
1518
{
1519
struct dma_bridge_chan *dbc = container_of(work, struct dma_bridge_chan, poll_work);
1520
unsigned long flags;
1521
int rcu_id;
1522
u32 head;
1523
u32 tail;
1524
1525
rcu_id = srcu_read_lock(&dbc->ch_lock);
1526
1527
while (1) {
1528
if (dbc->qdev->dev_state != QAIC_ONLINE) {
1529
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1530
return;
1531
}
1532
if (!dbc->usr) {
1533
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1534
return;
1535
}
1536
spin_lock_irqsave(&dbc->xfer_lock, flags);
1537
if (list_empty(&dbc->xfer_list)) {
1538
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
1539
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1540
return;
1541
}
1542
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
1543
1544
head = readl(dbc->dbc_base + RSPHP_OFF);
1545
if (head == U32_MAX) { /* PCI link error */
1546
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1547
return;
1548
}
1549
1550
tail = readl(dbc->dbc_base + RSPTP_OFF);
1551
if (tail == U32_MAX) { /* PCI link error */
1552
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1553
return;
1554
}
1555
1556
if (head != tail) {
1557
irq_wake_thread(dbc->irq, dbc);
1558
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1559
return;
1560
}
1561
1562
cond_resched();
1563
usleep_range(datapath_poll_interval_us, 2 * datapath_poll_interval_us);
1564
}
1565
}
1566
1567
irqreturn_t dbc_irq_threaded_fn(int irq, void *data)
1568
{
1569
struct dma_bridge_chan *dbc = data;
1570
int event_count = NUM_EVENTS;
1571
int delay_count = NUM_DELAYS;
1572
struct qaic_device *qdev;
1573
struct qaic_bo *bo, *i;
1574
struct dbc_rsp *rsp;
1575
unsigned long flags;
1576
int rcu_id;
1577
u16 status;
1578
u16 req_id;
1579
u32 head;
1580
u32 tail;
1581
1582
rcu_id = srcu_read_lock(&dbc->ch_lock);
1583
qdev = dbc->qdev;
1584
1585
head = readl(dbc->dbc_base + RSPHP_OFF);
1586
if (head == U32_MAX) /* PCI link error */
1587
goto error_out;
1588
1589
read_fifo:
1590
1591
if (!event_count) {
1592
event_count = NUM_EVENTS;
1593
cond_resched();
1594
}
1595
1596
/*
1597
* if this channel isn't assigned or gets unassigned during processing
1598
* we have nothing further to do
1599
*/
1600
if (!dbc->usr)
1601
goto error_out;
1602
1603
tail = readl(dbc->dbc_base + RSPTP_OFF);
1604
if (tail == U32_MAX) /* PCI link error */
1605
goto error_out;
1606
1607
if (head == tail) { /* queue empty */
1608
if (delay_count) {
1609
--delay_count;
1610
usleep_range(100, 200);
1611
goto read_fifo; /* check for a new event */
1612
}
1613
goto normal_out;
1614
}
1615
1616
delay_count = NUM_DELAYS;
1617
while (head != tail) {
1618
if (!event_count)
1619
break;
1620
--event_count;
1621
rsp = dbc->rsp_q_base + head * sizeof(*rsp);
1622
req_id = le16_to_cpu(rsp->req_id);
1623
status = le16_to_cpu(rsp->status);
1624
if (status)
1625
pci_dbg(qdev->pdev, "req_id %d failed with status %d\n", req_id, status);
1626
spin_lock_irqsave(&dbc->xfer_lock, flags);
1627
/*
1628
* A BO can receive multiple interrupts, since a BO can be
1629
* divided into multiple slices and a buffer receives as many
1630
* interrupts as slices. So until it receives interrupts for
1631
* all the slices we cannot mark that buffer complete.
1632
*/
1633
list_for_each_entry_safe(bo, i, &dbc->xfer_list, xfer_list) {
1634
if (bo->req_id == req_id)
1635
bo->nr_slice_xfer_done++;
1636
else
1637
continue;
1638
1639
if (bo->nr_slice_xfer_done < bo->nr_slice)
1640
break;
1641
1642
/*
1643
* At this point we have received all the interrupts for
1644
* BO, which means BO execution is complete.
1645
*/
1646
dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
1647
bo->nr_slice_xfer_done = 0;
1648
list_del_init(&bo->xfer_list);
1649
bo->perf_stats.req_processed_ts = ktime_get_ns();
1650
complete_all(&bo->xfer_done);
1651
drm_gem_object_put(&bo->base);
1652
break;
1653
}
1654
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
1655
head = (head + 1) % dbc->nelem;
1656
}
1657
1658
/*
1659
* Update the head pointer of response queue and let the device know
1660
* that we have consumed elements from the queue.
1661
*/
1662
writel(head, dbc->dbc_base + RSPHP_OFF);
1663
1664
/* elements might have been put in the queue while we were processing */
1665
goto read_fifo;
1666
1667
normal_out:
1668
if (!qdev->single_msi && likely(!datapath_polling))
1669
enable_irq(irq);
1670
else if (unlikely(datapath_polling))
1671
schedule_work(&dbc->poll_work);
1672
/* checking the fifo and enabling irqs is a race, missed event check */
1673
tail = readl(dbc->dbc_base + RSPTP_OFF);
1674
if (tail != U32_MAX && head != tail) {
1675
if (!qdev->single_msi && likely(!datapath_polling))
1676
disable_irq_nosync(irq);
1677
goto read_fifo;
1678
}
1679
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1680
return IRQ_HANDLED;
1681
1682
error_out:
1683
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1684
if (!qdev->single_msi && likely(!datapath_polling))
1685
enable_irq(irq);
1686
else if (unlikely(datapath_polling))
1687
schedule_work(&dbc->poll_work);
1688
1689
return IRQ_HANDLED;
1690
}
1691
1692
int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
1693
{
1694
struct qaic_wait *args = data;
1695
int usr_rcu_id, qdev_rcu_id;
1696
struct dma_bridge_chan *dbc;
1697
struct drm_gem_object *obj;
1698
struct qaic_device *qdev;
1699
unsigned long timeout;
1700
struct qaic_user *usr;
1701
struct qaic_bo *bo;
1702
int rcu_id;
1703
int ret;
1704
1705
if (args->pad != 0)
1706
return -EINVAL;
1707
1708
usr = file_priv->driver_priv;
1709
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
1710
if (!usr->qddev) {
1711
ret = -ENODEV;
1712
goto unlock_usr_srcu;
1713
}
1714
1715
qdev = usr->qddev->qdev;
1716
qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
1717
if (qdev->dev_state != QAIC_ONLINE) {
1718
ret = -ENODEV;
1719
goto unlock_dev_srcu;
1720
}
1721
1722
if (args->dbc_id >= qdev->num_dbc) {
1723
ret = -EINVAL;
1724
goto unlock_dev_srcu;
1725
}
1726
1727
dbc = &qdev->dbc[args->dbc_id];
1728
1729
rcu_id = srcu_read_lock(&dbc->ch_lock);
1730
if (dbc->usr != usr) {
1731
ret = -EPERM;
1732
goto unlock_ch_srcu;
1733
}
1734
1735
if (dbc->id == qdev->ssr_dbc) {
1736
ret = -EPIPE;
1737
goto unlock_ch_srcu;
1738
}
1739
1740
obj = drm_gem_object_lookup(file_priv, args->handle);
1741
if (!obj) {
1742
ret = -ENOENT;
1743
goto unlock_ch_srcu;
1744
}
1745
1746
bo = to_qaic_bo(obj);
1747
timeout = args->timeout ? args->timeout : wait_exec_default_timeout_ms;
1748
timeout = msecs_to_jiffies(timeout);
1749
ret = wait_for_completion_interruptible_timeout(&bo->xfer_done, timeout);
1750
if (!ret) {
1751
ret = -ETIMEDOUT;
1752
goto put_obj;
1753
}
1754
if (ret > 0)
1755
ret = 0;
1756
1757
if (!dbc->usr)
1758
ret = -EPERM;
1759
1760
if (dbc->id == qdev->ssr_dbc)
1761
ret = -EPIPE;
1762
1763
put_obj:
1764
drm_gem_object_put(obj);
1765
unlock_ch_srcu:
1766
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1767
unlock_dev_srcu:
1768
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
1769
unlock_usr_srcu:
1770
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
1771
return ret;
1772
}
1773
1774
int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
1775
{
1776
struct qaic_perf_stats_entry *ent = NULL;
1777
struct qaic_perf_stats *args = data;
1778
int usr_rcu_id, qdev_rcu_id;
1779
struct drm_gem_object *obj;
1780
struct qaic_device *qdev;
1781
struct qaic_user *usr;
1782
struct qaic_bo *bo;
1783
int ret = 0;
1784
int i;
1785
1786
usr = file_priv->driver_priv;
1787
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
1788
if (!usr->qddev) {
1789
ret = -ENODEV;
1790
goto unlock_usr_srcu;
1791
}
1792
1793
qdev = usr->qddev->qdev;
1794
qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
1795
if (qdev->dev_state != QAIC_ONLINE) {
1796
ret = -ENODEV;
1797
goto unlock_dev_srcu;
1798
}
1799
1800
if (args->hdr.dbc_id >= qdev->num_dbc) {
1801
ret = -EINVAL;
1802
goto unlock_dev_srcu;
1803
}
1804
1805
ent = memdup_array_user(u64_to_user_ptr(args->data), args->hdr.count, sizeof(*ent));
1806
if (IS_ERR(ent)) {
1807
ret = PTR_ERR(ent);
1808
goto unlock_dev_srcu;
1809
}
1810
1811
for (i = 0; i < args->hdr.count; i++) {
1812
obj = drm_gem_object_lookup(file_priv, ent[i].handle);
1813
if (!obj) {
1814
ret = -ENOENT;
1815
goto free_ent;
1816
}
1817
bo = to_qaic_bo(obj);
1818
if (!bo->sliced) {
1819
drm_gem_object_put(obj);
1820
ret = -EINVAL;
1821
goto free_ent;
1822
}
1823
if (bo->dbc->id != args->hdr.dbc_id) {
1824
drm_gem_object_put(obj);
1825
ret = -EINVAL;
1826
goto free_ent;
1827
}
1828
/*
1829
* perf stats ioctl is called before wait ioctl is complete then
1830
* the latency information is invalid.
1831
*/
1832
if (bo->perf_stats.req_processed_ts < bo->perf_stats.req_submit_ts) {
1833
ent[i].device_latency_us = 0;
1834
} else {
1835
ent[i].device_latency_us = div_u64((bo->perf_stats.req_processed_ts -
1836
bo->perf_stats.req_submit_ts), 1000);
1837
}
1838
ent[i].submit_latency_us = div_u64((bo->perf_stats.req_submit_ts -
1839
bo->perf_stats.req_received_ts), 1000);
1840
ent[i].queue_level_before = bo->perf_stats.queue_level_before;
1841
ent[i].num_queue_element = bo->total_slice_nents;
1842
drm_gem_object_put(obj);
1843
}
1844
1845
if (copy_to_user(u64_to_user_ptr(args->data), ent, args->hdr.count * sizeof(*ent)))
1846
ret = -EFAULT;
1847
1848
free_ent:
1849
kfree(ent);
1850
unlock_dev_srcu:
1851
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
1852
unlock_usr_srcu:
1853
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
1854
return ret;
1855
}
1856
1857
static void detach_slice_bo(struct qaic_device *qdev, struct qaic_bo *bo)
1858
{
1859
qaic_free_slices_bo(bo);
1860
qaic_unprepare_bo(qdev, bo);
1861
qaic_init_bo(bo, true);
1862
list_del(&bo->bo_list);
1863
drm_gem_object_put(&bo->base);
1864
}
1865
1866
int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
1867
{
1868
struct qaic_detach_slice *args = data;
1869
int rcu_id, usr_rcu_id, qdev_rcu_id;
1870
struct dma_bridge_chan *dbc;
1871
struct drm_gem_object *obj;
1872
struct qaic_device *qdev;
1873
struct qaic_user *usr;
1874
unsigned long flags;
1875
struct qaic_bo *bo;
1876
int ret;
1877
1878
if (args->pad != 0)
1879
return -EINVAL;
1880
1881
usr = file_priv->driver_priv;
1882
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
1883
if (!usr->qddev) {
1884
ret = -ENODEV;
1885
goto unlock_usr_srcu;
1886
}
1887
1888
qdev = usr->qddev->qdev;
1889
qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
1890
if (qdev->dev_state != QAIC_ONLINE) {
1891
ret = -ENODEV;
1892
goto unlock_dev_srcu;
1893
}
1894
1895
obj = drm_gem_object_lookup(file_priv, args->handle);
1896
if (!obj) {
1897
ret = -ENOENT;
1898
goto unlock_dev_srcu;
1899
}
1900
1901
bo = to_qaic_bo(obj);
1902
ret = mutex_lock_interruptible(&bo->lock);
1903
if (ret)
1904
goto put_bo;
1905
1906
if (!bo->sliced) {
1907
ret = -EINVAL;
1908
goto unlock_bo;
1909
}
1910
1911
dbc = bo->dbc;
1912
rcu_id = srcu_read_lock(&dbc->ch_lock);
1913
if (dbc->usr != usr) {
1914
ret = -EINVAL;
1915
goto unlock_ch_srcu;
1916
}
1917
1918
/* Check if BO is committed to H/W for DMA */
1919
spin_lock_irqsave(&dbc->xfer_lock, flags);
1920
if (bo_queued(bo)) {
1921
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
1922
ret = -EBUSY;
1923
goto unlock_ch_srcu;
1924
}
1925
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
1926
1927
detach_slice_bo(qdev, bo);
1928
1929
unlock_ch_srcu:
1930
srcu_read_unlock(&dbc->ch_lock, rcu_id);
1931
unlock_bo:
1932
mutex_unlock(&bo->lock);
1933
put_bo:
1934
drm_gem_object_put(obj);
1935
unlock_dev_srcu:
1936
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
1937
unlock_usr_srcu:
1938
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
1939
return ret;
1940
}
1941
1942
static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc)
1943
{
1944
unsigned long flags;
1945
struct qaic_bo *bo;
1946
1947
spin_lock_irqsave(&dbc->xfer_lock, flags);
1948
while (!list_empty(&dbc->xfer_list)) {
1949
bo = list_first_entry(&dbc->xfer_list, typeof(*bo), xfer_list);
1950
list_del_init(&bo->xfer_list);
1951
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
1952
bo->nr_slice_xfer_done = 0;
1953
bo->req_id = 0;
1954
bo->perf_stats.req_received_ts = 0;
1955
bo->perf_stats.req_submit_ts = 0;
1956
bo->perf_stats.req_processed_ts = 0;
1957
bo->perf_stats.queue_level_before = 0;
1958
dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
1959
complete_all(&bo->xfer_done);
1960
drm_gem_object_put(&bo->base);
1961
spin_lock_irqsave(&dbc->xfer_lock, flags);
1962
}
1963
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
1964
}
1965
1966
static void sync_empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc)
1967
{
1968
empty_xfer_list(qdev, dbc);
1969
synchronize_srcu(&dbc->ch_lock);
1970
/*
1971
* Threads holding channel lock, may add more elements in the xfer_list.
1972
* Flush out these elements from xfer_list.
1973
*/
1974
empty_xfer_list(qdev, dbc);
1975
}
1976
1977
int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
1978
{
1979
if (!qdev->dbc[dbc_id].usr || qdev->dbc[dbc_id].usr->handle != usr->handle)
1980
return -EPERM;
1981
1982
qdev->dbc[dbc_id].usr = NULL;
1983
synchronize_srcu(&qdev->dbc[dbc_id].ch_lock);
1984
return 0;
1985
}
1986
1987
/**
1988
* enable_dbc - Enable the DBC. DBCs are disabled by removing the context of
1989
* user. Add user context back to DBC to enable it. This function trusts the
1990
* DBC ID passed and expects the DBC to be disabled.
1991
* @qdev: qaic device handle
1992
* @dbc_id: ID of the DBC
1993
* @usr: User context
1994
*/
1995
void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
1996
{
1997
qdev->dbc[dbc_id].usr = usr;
1998
}
1999
2000
void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id)
2001
{
2002
struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
2003
2004
dbc->usr = NULL;
2005
sync_empty_xfer_list(qdev, dbc);
2006
}
2007
2008
void release_dbc(struct qaic_device *qdev, u32 dbc_id)
2009
{
2010
struct qaic_bo *bo, *bo_temp;
2011
struct dma_bridge_chan *dbc;
2012
2013
dbc = &qdev->dbc[dbc_id];
2014
if (!dbc->in_use)
2015
return;
2016
2017
wakeup_dbc(qdev, dbc_id);
2018
2019
dma_free_coherent(&qdev->pdev->dev, dbc->total_size, dbc->req_q_base, dbc->dma_addr);
2020
dbc->total_size = 0;
2021
dbc->req_q_base = NULL;
2022
dbc->dma_addr = 0;
2023
dbc->nelem = 0;
2024
dbc->usr = NULL;
2025
2026
list_for_each_entry_safe(bo, bo_temp, &dbc->bo_lists, bo_list) {
2027
drm_gem_object_get(&bo->base);
2028
mutex_lock(&bo->lock);
2029
detach_slice_bo(qdev, bo);
2030
mutex_unlock(&bo->lock);
2031
drm_gem_object_put(&bo->base);
2032
}
2033
2034
dbc->in_use = false;
2035
wake_up(&dbc->dbc_release);
2036
}
2037
2038
void qaic_data_get_fifo_info(struct dma_bridge_chan *dbc, u32 *head, u32 *tail)
2039
{
2040
if (!dbc || !head || !tail)
2041
return;
2042
2043
*head = readl(dbc->dbc_base + REQHP_OFF);
2044
*tail = readl(dbc->dbc_base + REQTP_OFF);
2045
}
2046
2047
/*
2048
* qaic_dbc_enter_ssr - Prepare to enter in sub system reset(SSR) for given DBC ID.
2049
* @qdev: qaic device handle
2050
* @dbc_id: ID of the DBC which will enter SSR
2051
*
2052
* The device will automatically deactivate the workload as not
2053
* all errors can be silently recovered. The user will be
2054
* notified and will need to decide the required recovery
2055
* action to take.
2056
*/
2057
void qaic_dbc_enter_ssr(struct qaic_device *qdev, u32 dbc_id)
2058
{
2059
qdev->ssr_dbc = dbc_id;
2060
release_dbc(qdev, dbc_id);
2061
}
2062
2063
/*
2064
* qaic_dbc_exit_ssr - Prepare to exit from sub system reset(SSR) for given DBC ID.
2065
* @qdev: qaic device handle
2066
*
2067
* The DBC returns to an operational state and begins accepting work after exiting SSR.
2068
*/
2069
void qaic_dbc_exit_ssr(struct qaic_device *qdev)
2070
{
2071
qdev->ssr_dbc = QAIC_SSR_DBC_SENTINEL;
2072
}
2073
2074