Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
4574 views
1
/*
2
* Copyright © 2014 Broadcom
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
/**
25
* DOC: Shader validator for VC4.
26
*
27
* The VC4 has no IOMMU between it and system memory, so a user with
28
* access to execute shaders could escalate privilege by overwriting
29
* system memory (using the VPM write address register in the
30
* general-purpose DMA mode) or reading system memory it shouldn't
31
* (reading it as a texture, or uniform data, or vertex data).
32
*
33
* This walks over a shader BO, ensuring that its accesses are
34
* appropriately bounded, and recording how many texture accesses are
35
* made and where so that we can do relocations for them in the
36
* uniform stream.
37
*/
38
39
#include "vc4_drv.h"
40
#include "vc4_qpu.h"
41
#include "vc4_qpu_defines.h"
42
43
#define LIVE_REG_COUNT (32 + 32 + 4)
44
45
struct vc4_shader_validation_state {
46
/* Current IP being validated. */
47
uint32_t ip;
48
49
/* IP at the end of the BO, do not read shader[max_ip] */
50
uint32_t max_ip;
51
52
uint64_t *shader;
53
54
struct vc4_texture_sample_info tmu_setup[2];
55
int tmu_write_count[2];
56
57
/* For registers that were last written to by a MIN instruction with
58
* one argument being a uniform, the address of the uniform.
59
* Otherwise, ~0.
60
*
61
* This is used for the validation of direct address memory reads.
62
*/
63
uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
64
bool live_max_clamp_regs[LIVE_REG_COUNT];
65
uint32_t live_immediates[LIVE_REG_COUNT];
66
67
/* Bitfield of which IPs are used as branch targets.
68
*
69
* Used for validation that the uniform stream is updated at the right
70
* points and clearing the texturing/clamping state.
71
*/
72
unsigned long *branch_targets;
73
74
/* Set when entering a basic block, and cleared when the uniform
75
* address update is found. This is used to make sure that we don't
76
* read uniforms when the address is undefined.
77
*/
78
bool needs_uniform_address_update;
79
80
/* Set when we find a backwards branch. If the branch is backwards,
81
* the taraget is probably doing an address reset to read uniforms,
82
* and so we need to be sure that a uniforms address is present in the
83
* stream, even if the shader didn't need to read uniforms in later
84
* basic blocks.
85
*/
86
bool needs_uniform_address_for_loop;
87
88
/* Set when we find an instruction which violates the criterion for a
89
* threaded shader. These are:
90
* - only write the lower half of the register space
91
* - last thread switch signaled at the end
92
* So track the usage of the thread switches and the register usage.
93
*/
94
bool all_registers_used;
95
};
96
97
static uint32_t
98
waddr_to_live_reg_index(uint32_t waddr, bool is_b)
99
{
100
if (waddr < 32) {
101
if (is_b)
102
return 32 + waddr;
103
else
104
return waddr;
105
} else if (waddr <= QPU_W_ACC3) {
106
return 64 + waddr - QPU_W_ACC0;
107
} else {
108
return ~0;
109
}
110
}
111
112
static uint32_t
113
raddr_add_a_to_live_reg_index(uint64_t inst)
114
{
115
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
116
uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
117
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
118
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
119
120
if (add_a == QPU_MUX_A)
121
return raddr_a;
122
else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
123
return 32 + raddr_b;
124
else if (add_a <= QPU_MUX_R3)
125
return 64 + add_a;
126
else
127
return ~0;
128
}
129
130
static bool live_reg_is_upper_half(uint32_t lri)
131
{
132
return (lri >=16 && lri < 32) ||
133
(lri >=32 + 16 && lri < 32 + 32);
134
}
135
136
static bool
137
is_tmu_submit(uint32_t waddr)
138
{
139
return (waddr == QPU_W_TMU0_S ||
140
waddr == QPU_W_TMU1_S);
141
}
142
143
static bool
144
is_tmu_write(uint32_t waddr)
145
{
146
return (waddr >= QPU_W_TMU0_S &&
147
waddr <= QPU_W_TMU1_B);
148
}
149
150
static bool
151
record_texture_sample(struct vc4_validated_shader_info *validated_shader,
152
struct vc4_shader_validation_state *validation_state,
153
int tmu)
154
{
155
uint32_t s = validated_shader->num_texture_samples;
156
int i;
157
struct vc4_texture_sample_info *temp_samples;
158
159
temp_samples = krealloc(validated_shader->texture_samples,
160
(s + 1) * sizeof(*temp_samples),
161
GFP_KERNEL);
162
if (!temp_samples)
163
return false;
164
165
memcpy(&temp_samples[s],
166
&validation_state->tmu_setup[tmu],
167
sizeof(*temp_samples));
168
169
validated_shader->num_texture_samples = s + 1;
170
validated_shader->texture_samples = temp_samples;
171
172
for (i = 0; i < 4; i++)
173
validation_state->tmu_setup[tmu].p_offset[i] = ~0;
174
175
return true;
176
}
177
178
static bool
179
check_tmu_write(struct vc4_validated_shader_info *validated_shader,
180
struct vc4_shader_validation_state *validation_state,
181
bool is_mul)
182
{
183
uint64_t inst = validation_state->shader[validation_state->ip];
184
uint32_t waddr = (is_mul ?
185
QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
186
QPU_GET_FIELD(inst, QPU_WADDR_ADD));
187
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
188
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
189
int tmu = waddr > QPU_W_TMU0_B;
190
bool submit = is_tmu_submit(waddr);
191
bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
192
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
193
194
if (is_direct) {
195
uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
196
uint32_t clamp_reg, clamp_offset;
197
198
if (sig == QPU_SIG_SMALL_IMM) {
199
DRM_ERROR("direct TMU read used small immediate\n");
200
return false;
201
}
202
203
/* Make sure that this texture load is an add of the base
204
* address of the UBO to a clamped offset within the UBO.
205
*/
206
if (is_mul ||
207
QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
208
DRM_ERROR("direct TMU load wasn't an add\n");
209
return false;
210
}
211
212
/* We assert that the clamped address is the first
213
* argument, and the UBO base address is the second argument.
214
* This is arbitrary, but simpler than supporting flipping the
215
* two either way.
216
*/
217
clamp_reg = raddr_add_a_to_live_reg_index(inst);
218
if (clamp_reg == ~0) {
219
DRM_ERROR("direct TMU load wasn't clamped\n");
220
return false;
221
}
222
223
clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
224
if (clamp_offset == ~0) {
225
DRM_ERROR("direct TMU load wasn't clamped\n");
226
return false;
227
}
228
229
/* Store the clamp value's offset in p1 (see reloc_tex() in
230
* vc4_validate.c).
231
*/
232
validation_state->tmu_setup[tmu].p_offset[1] =
233
clamp_offset;
234
235
if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
236
!(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
237
DRM_ERROR("direct TMU load didn't add to a uniform\n");
238
return false;
239
}
240
241
validation_state->tmu_setup[tmu].is_direct = true;
242
} else {
243
if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
244
raddr_b == QPU_R_UNIF)) {
245
DRM_ERROR("uniform read in the same instruction as "
246
"texture setup.\n");
247
return false;
248
}
249
}
250
251
if (validation_state->tmu_write_count[tmu] >= 4) {
252
DRM_ERROR("TMU%d got too many parameters before dispatch\n",
253
tmu);
254
return false;
255
}
256
validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
257
validated_shader->uniforms_size;
258
validation_state->tmu_write_count[tmu]++;
259
/* Since direct uses a RADDR uniform reference, it will get counted in
260
* check_instruction_reads()
261
*/
262
if (!is_direct) {
263
if (validation_state->needs_uniform_address_update) {
264
DRM_ERROR("Texturing with undefined uniform address\n");
265
return false;
266
}
267
268
validated_shader->uniforms_size += 4;
269
}
270
271
if (submit) {
272
if (!record_texture_sample(validated_shader,
273
validation_state, tmu)) {
274
return false;
275
}
276
277
validation_state->tmu_write_count[tmu] = 0;
278
}
279
280
return true;
281
}
282
283
static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
284
{
285
uint32_t o = validated_shader->num_uniform_addr_offsets;
286
uint32_t num_uniforms = validated_shader->uniforms_size / 4;
287
288
validated_shader->uniform_addr_offsets =
289
krealloc(validated_shader->uniform_addr_offsets,
290
(o + 1) *
291
sizeof(*validated_shader->uniform_addr_offsets),
292
GFP_KERNEL);
293
if (!validated_shader->uniform_addr_offsets)
294
return false;
295
296
validated_shader->uniform_addr_offsets[o] = num_uniforms;
297
validated_shader->num_uniform_addr_offsets++;
298
299
return true;
300
}
301
302
static bool
303
validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
304
struct vc4_shader_validation_state *validation_state,
305
bool is_mul)
306
{
307
uint64_t inst = validation_state->shader[validation_state->ip];
308
u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
309
u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
310
u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
311
u32 add_lri = raddr_add_a_to_live_reg_index(inst);
312
/* We want our reset to be pointing at whatever uniform follows the
313
* uniforms base address.
314
*/
315
u32 expected_offset = validated_shader->uniforms_size + 4;
316
317
/* We only support absolute uniform address changes, and we
318
* require that they be in the current basic block before any
319
* of its uniform reads.
320
*
321
* One could potentially emit more efficient QPU code, by
322
* noticing that (say) an if statement does uniform control
323
* flow for all threads and that the if reads the same number
324
* of uniforms on each side. However, this scheme is easy to
325
* validate so it's all we allow for now.
326
*/
327
328
if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) {
329
DRM_ERROR("uniforms address change must be "
330
"normal math\n");
331
return false;
332
}
333
334
if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
335
DRM_ERROR("Uniform address reset must be an ADD.\n");
336
return false;
337
}
338
339
if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
340
DRM_ERROR("Uniform address reset must be unconditional.\n");
341
return false;
342
}
343
344
if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
345
!(inst & QPU_PM)) {
346
DRM_ERROR("No packing allowed on uniforms reset\n");
347
return false;
348
}
349
350
if (add_lri == -1) {
351
DRM_ERROR("First argument of uniform address write must be "
352
"an immediate value.\n");
353
return false;
354
}
355
356
if (validation_state->live_immediates[add_lri] != expected_offset) {
357
DRM_ERROR("Resetting uniforms with offset %db instead of %db\n",
358
validation_state->live_immediates[add_lri],
359
expected_offset);
360
return false;
361
}
362
363
if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
364
!(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
365
DRM_ERROR("Second argument of uniform address write must be "
366
"a uniform.\n");
367
return false;
368
}
369
370
validation_state->needs_uniform_address_update = false;
371
validation_state->needs_uniform_address_for_loop = false;
372
return require_uniform_address_uniform(validated_shader);
373
}
374
375
static bool
376
check_reg_write(struct vc4_validated_shader_info *validated_shader,
377
struct vc4_shader_validation_state *validation_state,
378
bool is_mul)
379
{
380
uint64_t inst = validation_state->shader[validation_state->ip];
381
uint32_t waddr = (is_mul ?
382
QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
383
QPU_GET_FIELD(inst, QPU_WADDR_ADD));
384
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
385
bool ws = inst & QPU_WS;
386
bool is_b = is_mul ^ ws;
387
u32 lri = waddr_to_live_reg_index(waddr, is_b);
388
389
if (lri != -1) {
390
uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
391
uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
392
393
if (sig == QPU_SIG_LOAD_IMM &&
394
QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
395
((is_mul && cond_mul == QPU_COND_ALWAYS) ||
396
(!is_mul && cond_add == QPU_COND_ALWAYS))) {
397
validation_state->live_immediates[lri] =
398
QPU_GET_FIELD(inst, QPU_LOAD_IMM);
399
} else {
400
validation_state->live_immediates[lri] = ~0;
401
}
402
403
if (live_reg_is_upper_half(lri))
404
validation_state->all_registers_used = true;
405
}
406
407
switch (waddr) {
408
case QPU_W_UNIFORMS_ADDRESS:
409
if (is_b) {
410
DRM_ERROR("relative uniforms address change "
411
"unsupported\n");
412
return false;
413
}
414
415
return validate_uniform_address_write(validated_shader,
416
validation_state,
417
is_mul);
418
419
case QPU_W_TLB_COLOR_MS:
420
case QPU_W_TLB_COLOR_ALL:
421
case QPU_W_TLB_Z:
422
/* These only interact with the tile buffer, not main memory,
423
* so they're safe.
424
*/
425
return true;
426
427
case QPU_W_TMU0_S:
428
case QPU_W_TMU0_T:
429
case QPU_W_TMU0_R:
430
case QPU_W_TMU0_B:
431
case QPU_W_TMU1_S:
432
case QPU_W_TMU1_T:
433
case QPU_W_TMU1_R:
434
case QPU_W_TMU1_B:
435
return check_tmu_write(validated_shader, validation_state,
436
is_mul);
437
438
case QPU_W_HOST_INT:
439
case QPU_W_TMU_NOSWAP:
440
case QPU_W_TLB_ALPHA_MASK:
441
case QPU_W_MUTEX_RELEASE:
442
/* XXX: I haven't thought about these, so don't support them
443
* for now.
444
*/
445
DRM_ERROR("Unsupported waddr %d\n", waddr);
446
return false;
447
448
case QPU_W_VPM_ADDR:
449
DRM_ERROR("General VPM DMA unsupported\n");
450
return false;
451
452
case QPU_W_VPM:
453
case QPU_W_VPMVCD_SETUP:
454
/* We allow VPM setup in general, even including VPM DMA
455
* configuration setup, because the (unsafe) DMA can only be
456
* triggered by QPU_W_VPM_ADDR writes.
457
*/
458
return true;
459
460
case QPU_W_TLB_STENCIL_SETUP:
461
return true;
462
}
463
464
return true;
465
}
466
467
static void
468
track_live_clamps(struct vc4_validated_shader_info *validated_shader,
469
struct vc4_shader_validation_state *validation_state)
470
{
471
uint64_t inst = validation_state->shader[validation_state->ip];
472
uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
473
uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
474
uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
475
uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
476
uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
477
uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
478
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
479
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
480
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
481
bool ws = inst & QPU_WS;
482
uint32_t lri_add_a, lri_add, lri_mul;
483
bool add_a_is_min_0;
484
485
/* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
486
* before we clear previous live state.
487
*/
488
lri_add_a = raddr_add_a_to_live_reg_index(inst);
489
add_a_is_min_0 = (lri_add_a != ~0 &&
490
validation_state->live_max_clamp_regs[lri_add_a]);
491
492
/* Clear live state for registers written by our instruction. */
493
lri_add = waddr_to_live_reg_index(waddr_add, ws);
494
lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
495
if (lri_mul != ~0) {
496
validation_state->live_max_clamp_regs[lri_mul] = false;
497
validation_state->live_min_clamp_offsets[lri_mul] = ~0;
498
}
499
if (lri_add != ~0) {
500
validation_state->live_max_clamp_regs[lri_add] = false;
501
validation_state->live_min_clamp_offsets[lri_add] = ~0;
502
} else {
503
/* Nothing further to do for live tracking, since only ADDs
504
* generate new live clamp registers.
505
*/
506
return;
507
}
508
509
/* Now, handle remaining live clamp tracking for the ADD operation. */
510
511
if (cond_add != QPU_COND_ALWAYS)
512
return;
513
514
if (op_add == QPU_A_MAX) {
515
/* Track live clamps of a value to a minimum of 0 (in either
516
* arg).
517
*/
518
if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
519
(add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
520
return;
521
}
522
523
validation_state->live_max_clamp_regs[lri_add] = true;
524
} else if (op_add == QPU_A_MIN) {
525
/* Track live clamps of a value clamped to a minimum of 0 and
526
* a maximum of some uniform's offset.
527
*/
528
if (!add_a_is_min_0)
529
return;
530
531
if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
532
!(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
533
sig != QPU_SIG_SMALL_IMM)) {
534
return;
535
}
536
537
validation_state->live_min_clamp_offsets[lri_add] =
538
validated_shader->uniforms_size;
539
}
540
}
541
542
static bool
543
check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
544
struct vc4_shader_validation_state *validation_state)
545
{
546
uint64_t inst = validation_state->shader[validation_state->ip];
547
uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
548
uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
549
bool ok;
550
551
if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
552
DRM_ERROR("ADD and MUL both set up textures\n");
553
return false;
554
}
555
556
ok = (check_reg_write(validated_shader, validation_state, false) &&
557
check_reg_write(validated_shader, validation_state, true));
558
559
track_live_clamps(validated_shader, validation_state);
560
561
return ok;
562
}
563
564
static bool
565
check_branch(uint64_t inst,
566
struct vc4_validated_shader_info *validated_shader,
567
struct vc4_shader_validation_state *validation_state,
568
int ip)
569
{
570
int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
571
uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
572
uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
573
574
if ((int)branch_imm < 0)
575
validation_state->needs_uniform_address_for_loop = true;
576
577
/* We don't want to have to worry about validation of this, and
578
* there's no need for it.
579
*/
580
if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) {
581
DRM_ERROR("branch instruction at %d wrote a register.\n",
582
validation_state->ip);
583
return false;
584
}
585
586
return true;
587
}
588
589
static bool
590
check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
591
struct vc4_shader_validation_state *validation_state)
592
{
593
uint64_t inst = validation_state->shader[validation_state->ip];
594
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
595
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
596
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
597
598
if (raddr_a == QPU_R_UNIF ||
599
(raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
600
/* This can't overflow the uint32_t, because we're reading 8
601
* bytes of instruction to increment by 4 here, so we'd
602
* already be OOM.
603
*/
604
validated_shader->uniforms_size += 4;
605
606
if (validation_state->needs_uniform_address_update) {
607
DRM_ERROR("Uniform read with undefined uniform "
608
"address\n");
609
return false;
610
}
611
}
612
613
if ((raddr_a >= 16 && raddr_a < 32) ||
614
(raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) {
615
validation_state->all_registers_used = true;
616
}
617
618
return true;
619
}
620
621
/* Make sure that all branches are absolute and point within the shader, and
622
* note their targets for later.
623
*/
624
static bool
625
vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
626
{
627
uint32_t max_branch_target = 0;
628
int ip;
629
int last_branch = -2;
630
631
for (ip = 0; ip < validation_state->max_ip; ip++) {
632
uint64_t inst = validation_state->shader[ip];
633
int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
634
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
635
uint32_t after_delay_ip = ip + 4;
636
uint32_t branch_target_ip;
637
638
if (sig == QPU_SIG_PROG_END) {
639
/* There are two delay slots after program end is
640
* signaled that are still executed, then we're
641
* finished. validation_state->max_ip is the
642
* instruction after the last valid instruction in the
643
* program.
644
*/
645
validation_state->max_ip = ip + 3;
646
continue;
647
}
648
649
if (sig != QPU_SIG_BRANCH)
650
continue;
651
652
if (ip - last_branch < 4) {
653
DRM_ERROR("Branch at %d during delay slots\n", ip);
654
return false;
655
}
656
last_branch = ip;
657
658
if (inst & QPU_BRANCH_REG) {
659
DRM_ERROR("branching from register relative "
660
"not supported\n");
661
return false;
662
}
663
664
if (!(inst & QPU_BRANCH_REL)) {
665
DRM_ERROR("relative branching required\n");
666
return false;
667
}
668
669
/* The actual branch target is the instruction after the delay
670
* slots, plus whatever byte offset is in the low 32 bits of
671
* the instruction. Make sure we're not branching beyond the
672
* end of the shader object.
673
*/
674
if (branch_imm % sizeof(inst) != 0) {
675
DRM_ERROR("branch target not aligned\n");
676
return false;
677
};
678
679
branch_target_ip = after_delay_ip + (branch_imm >> 3);
680
if (branch_target_ip >= validation_state->max_ip) {
681
DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n",
682
ip, branch_target_ip,
683
validation_state->max_ip);
684
return false;
685
}
686
set_bit(branch_target_ip, validation_state->branch_targets);
687
688
/* Make sure that the non-branching path is also not outside
689
* the shader.
690
*/
691
if (after_delay_ip >= validation_state->max_ip) {
692
DRM_ERROR("Branch at %d continues past shader end "
693
"(%d/%d)\n",
694
ip, after_delay_ip, validation_state->max_ip);
695
return false;
696
}
697
set_bit(after_delay_ip, validation_state->branch_targets);
698
max_branch_target = max(max_branch_target, after_delay_ip);
699
}
700
701
if (max_branch_target > validation_state->max_ip - 3) {
702
DRM_ERROR("Branch landed after QPU_SIG_PROG_END");
703
return false;
704
}
705
706
return true;
707
}
708
709
/* Resets any known state for the shader, used when we may be branched to from
710
* multiple locations in the program (or at shader start).
711
*/
712
static void
713
reset_validation_state(struct vc4_shader_validation_state *validation_state)
714
{
715
int i;
716
717
for (i = 0; i < 8; i++)
718
validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0;
719
720
for (i = 0; i < LIVE_REG_COUNT; i++) {
721
validation_state->live_min_clamp_offsets[i] = ~0;
722
validation_state->live_max_clamp_regs[i] = false;
723
validation_state->live_immediates[i] = ~0;
724
}
725
}
726
727
static bool
728
texturing_in_progress(struct vc4_shader_validation_state *validation_state)
729
{
730
return (validation_state->tmu_write_count[0] != 0 ||
731
validation_state->tmu_write_count[1] != 0);
732
}
733
734
static bool
735
vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
736
{
737
uint32_t ip = validation_state->ip;
738
739
if (!test_bit(ip, validation_state->branch_targets))
740
return true;
741
742
if (texturing_in_progress(validation_state)) {
743
DRM_ERROR("Branch target landed during TMU setup\n");
744
return false;
745
}
746
747
/* Reset our live values tracking, since this instruction may have
748
* multiple predecessors.
749
*
750
* One could potentially do analysis to determine that, for
751
* example, all predecessors have a live max clamp in the same
752
* register, but we don't bother with that.
753
*/
754
reset_validation_state(validation_state);
755
756
/* Since we've entered a basic block from potentially multiple
757
* predecessors, we need the uniforms address to be updated before any
758
* unforms are read. We require that after any branch point, the next
759
* uniform to be loaded is a uniform address offset. That uniform's
760
* offset will be marked by the uniform address register write
761
* validation, or a one-off the end-of-program check.
762
*/
763
validation_state->needs_uniform_address_update = true;
764
765
return true;
766
}
767
768
struct vc4_validated_shader_info *
769
vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
770
{
771
bool found_shader_end = false;
772
int shader_end_ip = 0;
773
uint32_t last_thread_switch_ip = -3;
774
uint32_t ip;
775
struct vc4_validated_shader_info *validated_shader = NULL;
776
struct vc4_shader_validation_state validation_state;
777
778
memset(&validation_state, 0, sizeof(validation_state));
779
validation_state.shader = shader_obj->vaddr;
780
validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
781
782
reset_validation_state(&validation_state);
783
784
validation_state.branch_targets =
785
kcalloc(BITS_TO_LONGS(validation_state.max_ip),
786
sizeof(unsigned long), GFP_KERNEL);
787
if (!validation_state.branch_targets)
788
goto fail;
789
790
validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
791
if (!validated_shader)
792
goto fail;
793
794
if (!vc4_validate_branches(&validation_state))
795
goto fail;
796
797
for (ip = 0; ip < validation_state.max_ip; ip++) {
798
uint64_t inst = validation_state.shader[ip];
799
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
800
801
validation_state.ip = ip;
802
803
if (!vc4_handle_branch_target(&validation_state))
804
goto fail;
805
806
if (ip == last_thread_switch_ip + 3) {
807
/* Reset r0-r3 live clamp data */
808
int i;
809
for (i = 64; i < LIVE_REG_COUNT; i++) {
810
validation_state.live_min_clamp_offsets[i] = ~0;
811
validation_state.live_max_clamp_regs[i] = false;
812
validation_state.live_immediates[i] = ~0;
813
}
814
}
815
816
switch (sig) {
817
case QPU_SIG_NONE:
818
case QPU_SIG_WAIT_FOR_SCOREBOARD:
819
case QPU_SIG_SCOREBOARD_UNLOCK:
820
case QPU_SIG_COLOR_LOAD:
821
case QPU_SIG_LOAD_TMU0:
822
case QPU_SIG_LOAD_TMU1:
823
case QPU_SIG_PROG_END:
824
case QPU_SIG_SMALL_IMM:
825
case QPU_SIG_THREAD_SWITCH:
826
case QPU_SIG_LAST_THREAD_SWITCH:
827
if (!check_instruction_writes(validated_shader,
828
&validation_state)) {
829
DRM_ERROR("Bad write at ip %d\n", ip);
830
goto fail;
831
}
832
833
if (!check_instruction_reads(validated_shader,
834
&validation_state))
835
goto fail;
836
837
if (sig == QPU_SIG_PROG_END) {
838
found_shader_end = true;
839
shader_end_ip = ip;
840
}
841
842
if (sig == QPU_SIG_THREAD_SWITCH ||
843
sig == QPU_SIG_LAST_THREAD_SWITCH) {
844
validated_shader->is_threaded = true;
845
846
if (ip < last_thread_switch_ip + 3) {
847
DRM_ERROR("Thread switch too soon after "
848
"last switch at ip %d\n", ip);
849
goto fail;
850
}
851
last_thread_switch_ip = ip;
852
}
853
854
break;
855
856
case QPU_SIG_LOAD_IMM:
857
if (!check_instruction_writes(validated_shader,
858
&validation_state)) {
859
DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
860
goto fail;
861
}
862
break;
863
864
case QPU_SIG_BRANCH:
865
if (!check_branch(inst, validated_shader,
866
&validation_state, ip))
867
goto fail;
868
869
if (ip < last_thread_switch_ip + 3) {
870
DRM_ERROR("Branch in thread switch at ip %d",
871
ip);
872
goto fail;
873
}
874
875
break;
876
default:
877
DRM_ERROR("Unsupported QPU signal %d at "
878
"instruction %d\n", sig, ip);
879
goto fail;
880
}
881
882
/* There are two delay slots after program end is signaled
883
* that are still executed, then we're finished.
884
*/
885
if (found_shader_end && ip == shader_end_ip + 2)
886
break;
887
}
888
889
if (ip == validation_state.max_ip) {
890
DRM_ERROR("shader failed to terminate before "
891
"shader BO end at %zd\n",
892
shader_obj->base.size);
893
goto fail;
894
}
895
896
/* Might corrupt other thread */
897
if (validated_shader->is_threaded &&
898
validation_state.all_registers_used) {
899
DRM_ERROR("Shader uses threading, but uses the upper "
900
"half of the registers, too\n");
901
goto fail;
902
}
903
904
/* If we did a backwards branch and we haven't emitted a uniforms
905
* reset since then, we still need the uniforms stream to have the
906
* uniforms address available so that the backwards branch can do its
907
* uniforms reset.
908
*
909
* We could potentially prove that the backwards branch doesn't
910
* contain any uses of uniforms until program exit, but that doesn't
911
* seem to be worth the trouble.
912
*/
913
if (validation_state.needs_uniform_address_for_loop) {
914
if (!require_uniform_address_uniform(validated_shader))
915
goto fail;
916
validated_shader->uniforms_size += 4;
917
}
918
919
/* Again, no chance of integer overflow here because the worst case
920
* scenario is 8 bytes of uniforms plus handles per 8-byte
921
* instruction.
922
*/
923
validated_shader->uniforms_src_size =
924
(validated_shader->uniforms_size +
925
4 * validated_shader->num_texture_samples);
926
927
kfree(validation_state.branch_targets);
928
929
return validated_shader;
930
931
fail:
932
kfree(validation_state.branch_targets);
933
if (validated_shader) {
934
kfree(validated_shader->texture_samples);
935
kfree(validated_shader);
936
}
937
return NULL;
938
}
939
940