Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/gpu/host1x/job.c
26444 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Tegra host1x Job
4
*
5
* Copyright (c) 2010-2015, NVIDIA Corporation.
6
*/
7
8
#include <linux/dma-mapping.h>
9
#include <linux/err.h>
10
#include <linux/host1x.h>
11
#include <linux/iommu.h>
12
#include <linux/kref.h>
13
#include <linux/module.h>
14
#include <linux/scatterlist.h>
15
#include <linux/slab.h>
16
#include <linux/vmalloc.h>
17
#include <trace/events/host1x.h>
18
19
#include "channel.h"
20
#include "dev.h"
21
#include "job.h"
22
#include "syncpt.h"
23
24
#define HOST1X_WAIT_SYNCPT_OFFSET 0x8
25
26
struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
27
u32 num_cmdbufs, u32 num_relocs,
28
bool skip_firewall)
29
{
30
struct host1x_job *job = NULL;
31
unsigned int num_unpins = num_relocs;
32
bool enable_firewall;
33
u64 total;
34
void *mem;
35
36
enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall;
37
38
if (!enable_firewall)
39
num_unpins += num_cmdbufs;
40
41
/* Check that we're not going to overflow */
42
total = sizeof(struct host1x_job) +
43
(u64)num_relocs * sizeof(struct host1x_reloc) +
44
(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
45
(u64)num_cmdbufs * sizeof(struct host1x_job_cmd) +
46
(u64)num_unpins * sizeof(dma_addr_t) +
47
(u64)num_unpins * sizeof(u32 *);
48
if (total > ULONG_MAX)
49
return NULL;
50
51
mem = job = kzalloc(total, GFP_KERNEL);
52
if (!job)
53
return NULL;
54
55
job->enable_firewall = enable_firewall;
56
57
kref_init(&job->ref);
58
job->channel = ch;
59
60
/* Redistribute memory to the structs */
61
mem += sizeof(struct host1x_job);
62
job->relocs = num_relocs ? mem : NULL;
63
mem += num_relocs * sizeof(struct host1x_reloc);
64
job->unpins = num_unpins ? mem : NULL;
65
mem += num_unpins * sizeof(struct host1x_job_unpin_data);
66
job->cmds = num_cmdbufs ? mem : NULL;
67
mem += num_cmdbufs * sizeof(struct host1x_job_cmd);
68
job->addr_phys = num_unpins ? mem : NULL;
69
70
job->reloc_addr_phys = job->addr_phys;
71
job->gather_addr_phys = &job->addr_phys[num_relocs];
72
73
return job;
74
}
75
EXPORT_SYMBOL(host1x_job_alloc);
76
77
struct host1x_job *host1x_job_get(struct host1x_job *job)
78
{
79
kref_get(&job->ref);
80
return job;
81
}
82
EXPORT_SYMBOL(host1x_job_get);
83
84
static void job_free(struct kref *ref)
85
{
86
struct host1x_job *job = container_of(ref, struct host1x_job, ref);
87
88
if (job->release)
89
job->release(job);
90
91
if (job->fence) {
92
/*
93
* remove_callback is atomic w.r.t. fence signaling, so
94
* after the call returns, we know that the callback is not
95
* in execution, and the fence can be safely freed.
96
*/
97
dma_fence_remove_callback(job->fence, &job->fence_cb);
98
dma_fence_put(job->fence);
99
}
100
101
if (job->syncpt)
102
host1x_syncpt_put(job->syncpt);
103
104
kfree(job);
105
}
106
107
void host1x_job_put(struct host1x_job *job)
108
{
109
kref_put(&job->ref, job_free);
110
}
111
EXPORT_SYMBOL(host1x_job_put);
112
113
void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
114
unsigned int words, unsigned int offset)
115
{
116
struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather;
117
118
gather->words = words;
119
gather->bo = bo;
120
gather->offset = offset;
121
122
job->num_cmds++;
123
}
124
EXPORT_SYMBOL(host1x_job_add_gather);
125
126
void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
127
bool relative, u32 next_class)
128
{
129
struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
130
131
cmd->is_wait = true;
132
cmd->wait.id = id;
133
cmd->wait.threshold = thresh;
134
cmd->wait.next_class = next_class;
135
cmd->wait.relative = relative;
136
137
job->num_cmds++;
138
}
139
EXPORT_SYMBOL(host1x_job_add_wait);
140
141
static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
142
{
143
unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE;
144
struct host1x_client *client = job->client;
145
struct device *dev = client->dev;
146
struct host1x_job_gather *g;
147
unsigned int i;
148
int err;
149
150
job->num_unpins = 0;
151
152
for (i = 0; i < job->num_relocs; i++) {
153
struct host1x_reloc *reloc = &job->relocs[i];
154
enum dma_data_direction direction;
155
struct host1x_bo_mapping *map;
156
struct host1x_bo *bo;
157
158
reloc->target.bo = host1x_bo_get(reloc->target.bo);
159
if (!reloc->target.bo) {
160
err = -EINVAL;
161
goto unpin;
162
}
163
164
bo = reloc->target.bo;
165
166
switch (reloc->flags & mask) {
167
case HOST1X_RELOC_READ:
168
direction = DMA_TO_DEVICE;
169
break;
170
171
case HOST1X_RELOC_WRITE:
172
direction = DMA_FROM_DEVICE;
173
break;
174
175
case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
176
direction = DMA_BIDIRECTIONAL;
177
break;
178
179
default:
180
err = -EINVAL;
181
goto unpin;
182
}
183
184
map = host1x_bo_pin(dev, bo, direction, NULL);
185
if (IS_ERR(map)) {
186
err = PTR_ERR(map);
187
goto unpin;
188
}
189
190
/*
191
* host1x clients are generally not able to do scatter-gather themselves, so fail
192
* if the buffer is discontiguous and we fail to map its SG table to a single
193
* contiguous chunk of I/O virtual memory.
194
*/
195
if (map->chunks > 1) {
196
err = -EINVAL;
197
goto unpin;
198
}
199
200
job->addr_phys[job->num_unpins] = map->phys;
201
job->unpins[job->num_unpins].map = map;
202
job->num_unpins++;
203
}
204
205
/*
206
* We will copy gathers BO content later, so there is no need to
207
* hold and pin them.
208
*/
209
if (job->enable_firewall)
210
return 0;
211
212
for (i = 0; i < job->num_cmds; i++) {
213
struct host1x_bo_mapping *map;
214
size_t gather_size = 0;
215
struct scatterlist *sg;
216
unsigned long shift;
217
struct iova *alloc;
218
unsigned int j;
219
220
if (job->cmds[i].is_wait)
221
continue;
222
223
g = &job->cmds[i].gather;
224
225
g->bo = host1x_bo_get(g->bo);
226
if (!g->bo) {
227
err = -EINVAL;
228
goto unpin;
229
}
230
231
map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, NULL);
232
if (IS_ERR(map)) {
233
err = PTR_ERR(map);
234
goto unpin;
235
}
236
237
if (host->domain) {
238
for_each_sgtable_sg(map->sgt, sg, j)
239
gather_size += sg->length;
240
241
gather_size = iova_align(&host->iova, gather_size);
242
243
shift = iova_shift(&host->iova);
244
alloc = alloc_iova(&host->iova, gather_size >> shift,
245
host->iova_end >> shift, true);
246
if (!alloc) {
247
err = -ENOMEM;
248
goto put;
249
}
250
251
err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc),
252
map->sgt, IOMMU_READ);
253
if (err == 0) {
254
__free_iova(&host->iova, alloc);
255
err = -EINVAL;
256
goto put;
257
}
258
259
map->phys = iova_dma_addr(&host->iova, alloc);
260
map->size = gather_size;
261
}
262
263
job->addr_phys[job->num_unpins] = map->phys;
264
job->unpins[job->num_unpins].map = map;
265
job->num_unpins++;
266
267
job->gather_addr_phys[i] = map->phys;
268
}
269
270
return 0;
271
272
put:
273
host1x_bo_put(g->bo);
274
unpin:
275
host1x_job_unpin(job);
276
return err;
277
}
278
279
static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
280
{
281
void *cmdbuf_addr = NULL;
282
struct host1x_bo *cmdbuf = g->bo;
283
unsigned int i;
284
285
/* pin & patch the relocs for one gather */
286
for (i = 0; i < job->num_relocs; i++) {
287
struct host1x_reloc *reloc = &job->relocs[i];
288
u32 reloc_addr = (job->reloc_addr_phys[i] +
289
reloc->target.offset) >> reloc->shift;
290
u32 *target;
291
292
/* skip all other gathers */
293
if (cmdbuf != reloc->cmdbuf.bo)
294
continue;
295
296
if (job->enable_firewall) {
297
target = (u32 *)job->gather_copy_mapped +
298
reloc->cmdbuf.offset / sizeof(u32) +
299
g->offset / sizeof(u32);
300
goto patch_reloc;
301
}
302
303
if (!cmdbuf_addr) {
304
cmdbuf_addr = host1x_bo_mmap(cmdbuf);
305
306
if (unlikely(!cmdbuf_addr)) {
307
pr_err("Could not map cmdbuf for relocation\n");
308
return -ENOMEM;
309
}
310
}
311
312
target = cmdbuf_addr + reloc->cmdbuf.offset;
313
patch_reloc:
314
*target = reloc_addr;
315
}
316
317
if (cmdbuf_addr)
318
host1x_bo_munmap(cmdbuf, cmdbuf_addr);
319
320
return 0;
321
}
322
323
static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
324
unsigned int offset)
325
{
326
offset *= sizeof(u32);
327
328
if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
329
return false;
330
331
/* relocation shift value validation isn't implemented yet */
332
if (reloc->shift)
333
return false;
334
335
return true;
336
}
337
338
struct host1x_firewall {
339
struct host1x_job *job;
340
struct device *dev;
341
342
unsigned int num_relocs;
343
struct host1x_reloc *reloc;
344
345
struct host1x_bo *cmdbuf;
346
unsigned int offset;
347
348
u32 words;
349
u32 class;
350
u32 reg;
351
u32 mask;
352
u32 count;
353
};
354
355
static int check_register(struct host1x_firewall *fw, unsigned long offset)
356
{
357
if (!fw->job->is_addr_reg)
358
return 0;
359
360
if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
361
if (!fw->num_relocs)
362
return -EINVAL;
363
364
if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset))
365
return -EINVAL;
366
367
fw->num_relocs--;
368
fw->reloc++;
369
}
370
371
return 0;
372
}
373
374
static int check_class(struct host1x_firewall *fw, u32 class)
375
{
376
if (!fw->job->is_valid_class) {
377
if (fw->class != class)
378
return -EINVAL;
379
} else {
380
if (!fw->job->is_valid_class(fw->class))
381
return -EINVAL;
382
}
383
384
return 0;
385
}
386
387
static int check_mask(struct host1x_firewall *fw)
388
{
389
u32 mask = fw->mask;
390
u32 reg = fw->reg;
391
int ret;
392
393
while (mask) {
394
if (fw->words == 0)
395
return -EINVAL;
396
397
if (mask & 1) {
398
ret = check_register(fw, reg);
399
if (ret < 0)
400
return ret;
401
402
fw->words--;
403
fw->offset++;
404
}
405
mask >>= 1;
406
reg++;
407
}
408
409
return 0;
410
}
411
412
static int check_incr(struct host1x_firewall *fw)
413
{
414
u32 count = fw->count;
415
u32 reg = fw->reg;
416
int ret;
417
418
while (count) {
419
if (fw->words == 0)
420
return -EINVAL;
421
422
ret = check_register(fw, reg);
423
if (ret < 0)
424
return ret;
425
426
reg++;
427
fw->words--;
428
fw->offset++;
429
count--;
430
}
431
432
return 0;
433
}
434
435
static int check_nonincr(struct host1x_firewall *fw)
436
{
437
u32 count = fw->count;
438
int ret;
439
440
while (count) {
441
if (fw->words == 0)
442
return -EINVAL;
443
444
ret = check_register(fw, fw->reg);
445
if (ret < 0)
446
return ret;
447
448
fw->words--;
449
fw->offset++;
450
count--;
451
}
452
453
return 0;
454
}
455
456
static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
457
{
458
u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
459
(g->offset / sizeof(u32));
460
u32 job_class = fw->class;
461
int err = 0;
462
463
fw->words = g->words;
464
fw->cmdbuf = g->bo;
465
fw->offset = 0;
466
467
while (fw->words && !err) {
468
u32 word = cmdbuf_base[fw->offset];
469
u32 opcode = (word & 0xf0000000) >> 28;
470
471
fw->mask = 0;
472
fw->reg = 0;
473
fw->count = 0;
474
fw->words--;
475
fw->offset++;
476
477
switch (opcode) {
478
case 0:
479
fw->class = word >> 6 & 0x3ff;
480
fw->mask = word & 0x3f;
481
fw->reg = word >> 16 & 0xfff;
482
err = check_class(fw, job_class);
483
if (!err)
484
err = check_mask(fw);
485
if (err)
486
goto out;
487
break;
488
case 1:
489
fw->reg = word >> 16 & 0xfff;
490
fw->count = word & 0xffff;
491
err = check_incr(fw);
492
if (err)
493
goto out;
494
break;
495
496
case 2:
497
fw->reg = word >> 16 & 0xfff;
498
fw->count = word & 0xffff;
499
err = check_nonincr(fw);
500
if (err)
501
goto out;
502
break;
503
504
case 3:
505
fw->mask = word & 0xffff;
506
fw->reg = word >> 16 & 0xfff;
507
err = check_mask(fw);
508
if (err)
509
goto out;
510
break;
511
case 4:
512
case 14:
513
break;
514
default:
515
err = -EINVAL;
516
break;
517
}
518
}
519
520
out:
521
return err;
522
}
523
524
static inline int copy_gathers(struct device *host, struct host1x_job *job,
525
struct device *dev)
526
{
527
struct host1x_firewall fw;
528
size_t size = 0;
529
size_t offset = 0;
530
unsigned int i;
531
532
fw.job = job;
533
fw.dev = dev;
534
fw.reloc = job->relocs;
535
fw.num_relocs = job->num_relocs;
536
fw.class = job->class;
537
538
for (i = 0; i < job->num_cmds; i++) {
539
struct host1x_job_gather *g;
540
541
if (job->cmds[i].is_wait)
542
continue;
543
544
g = &job->cmds[i].gather;
545
546
size += g->words * sizeof(u32);
547
}
548
549
/*
550
* Try a non-blocking allocation from a higher priority pools first,
551
* as awaiting for the allocation here is a major performance hit.
552
*/
553
job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy,
554
GFP_NOWAIT);
555
556
/* the higher priority allocation failed, try the generic-blocking */
557
if (!job->gather_copy_mapped)
558
job->gather_copy_mapped = dma_alloc_wc(host, size,
559
&job->gather_copy,
560
GFP_KERNEL);
561
if (!job->gather_copy_mapped)
562
return -ENOMEM;
563
564
job->gather_copy_size = size;
565
566
for (i = 0; i < job->num_cmds; i++) {
567
struct host1x_job_gather *g;
568
void *gather;
569
570
if (job->cmds[i].is_wait)
571
continue;
572
g = &job->cmds[i].gather;
573
574
/* Copy the gather */
575
gather = host1x_bo_mmap(g->bo);
576
memcpy(job->gather_copy_mapped + offset, gather + g->offset,
577
g->words * sizeof(u32));
578
host1x_bo_munmap(g->bo, gather);
579
580
/* Store the location in the buffer */
581
g->base = job->gather_copy;
582
g->offset = offset;
583
584
/* Validate the job */
585
if (validate(&fw, g))
586
return -EINVAL;
587
588
offset += g->words * sizeof(u32);
589
}
590
591
/* No relocs should remain at this point */
592
if (fw.num_relocs)
593
return -EINVAL;
594
595
return 0;
596
}
597
598
int host1x_job_pin(struct host1x_job *job, struct device *dev)
599
{
600
int err;
601
unsigned int i, j;
602
struct host1x *host = dev_get_drvdata(dev->parent);
603
604
/* pin memory */
605
err = pin_job(host, job);
606
if (err)
607
goto out;
608
609
if (job->enable_firewall) {
610
err = copy_gathers(host->dev, job, dev);
611
if (err)
612
goto out;
613
}
614
615
/* patch gathers */
616
for (i = 0; i < job->num_cmds; i++) {
617
struct host1x_job_gather *g;
618
619
if (job->cmds[i].is_wait)
620
continue;
621
g = &job->cmds[i].gather;
622
623
/* process each gather mem only once */
624
if (g->handled)
625
continue;
626
627
/* copy_gathers() sets gathers base if firewall is enabled */
628
if (!job->enable_firewall)
629
g->base = job->gather_addr_phys[i];
630
631
for (j = i + 1; j < job->num_cmds; j++) {
632
if (!job->cmds[j].is_wait &&
633
job->cmds[j].gather.bo == g->bo) {
634
job->cmds[j].gather.handled = true;
635
job->cmds[j].gather.base = g->base;
636
}
637
}
638
639
err = do_relocs(job, g);
640
if (err)
641
break;
642
}
643
644
out:
645
if (err)
646
host1x_job_unpin(job);
647
wmb();
648
649
return err;
650
}
651
EXPORT_SYMBOL(host1x_job_pin);
652
653
void host1x_job_unpin(struct host1x_job *job)
654
{
655
struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
656
unsigned int i;
657
658
for (i = 0; i < job->num_unpins; i++) {
659
struct host1x_bo_mapping *map = job->unpins[i].map;
660
struct host1x_bo *bo = map->bo;
661
662
if (!job->enable_firewall && map->size && host->domain) {
663
iommu_unmap(host->domain, job->addr_phys[i], map->size);
664
free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i]));
665
}
666
667
host1x_bo_unpin(map);
668
host1x_bo_put(bo);
669
}
670
671
job->num_unpins = 0;
672
673
if (job->gather_copy_size)
674
dma_free_wc(host->dev, job->gather_copy_size,
675
job->gather_copy_mapped, job->gather_copy);
676
}
677
EXPORT_SYMBOL(host1x_job_unpin);
678
679
/*
680
* Debug routine used to dump job entries
681
*/
682
void host1x_job_dump(struct device *dev, struct host1x_job *job)
683
{
684
dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt->id);
685
dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end);
686
dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get);
687
dev_dbg(dev, " TIMEOUT %d\n", job->timeout);
688
dev_dbg(dev, " NUM_SLOTS %d\n", job->num_slots);
689
dev_dbg(dev, " NUM_HANDLES %d\n", job->num_unpins);
690
}
691
692