CoCalc -- vc4_validate

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
⁴⁵⁷⁴ views
1
/*
2
 * Copyright © 2014 Broadcom
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 */
23

24
/**
25
 * DOC: Shader validator for VC4.
26
 *
27
 * The VC4 has no IOMMU between it and system memory, so a user with
28
 * access to execute shaders could escalate privilege by overwriting
29
 * system memory (using the VPM write address register in the
30
 * general-purpose DMA mode) or reading system memory it shouldn't
31
 * (reading it as a texture, or uniform data, or vertex data).
32
 *
33
 * This walks over a shader BO, ensuring that its accesses are
34
 * appropriately bounded, and recording how many texture accesses are
35
 * made and where so that we can do relocations for them in the
36
 * uniform stream.
37
 */
38

39
#include "vc4_drv.h"
40
#include "vc4_qpu.h"
41
#include "vc4_qpu_defines.h"
42

43
#define LIVE_REG_COUNT (32 + 32 + 4)
44

45
struct vc4_shader_validation_state {
46
	/* Current IP being validated. */
47
	uint32_t ip;
48

49
	/* IP at the end of the BO, do not read shader[max_ip] */
50
	uint32_t max_ip;
51

52
	uint64_t *shader;
53

54
	struct vc4_texture_sample_info tmu_setup[2];
55
	int tmu_write_count[2];
56

57
	/* For registers that were last written to by a MIN instruction with
58
	 * one argument being a uniform, the address of the uniform.
59
	 * Otherwise, ~0.
60
	 *
61
	 * This is used for the validation of direct address memory reads.
62
	 */
63
	uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
64
	bool live_max_clamp_regs[LIVE_REG_COUNT];
65
	uint32_t live_immediates[LIVE_REG_COUNT];
66

67
	/* Bitfield of which IPs are used as branch targets.
68
	 *
69
	 * Used for validation that the uniform stream is updated at the right
70
	 * points and clearing the texturing/clamping state.
71
	 */
72
	unsigned long *branch_targets;
73

74
	/* Set when entering a basic block, and cleared when the uniform
75
	 * address update is found.  This is used to make sure that we don't
76
	 * read uniforms when the address is undefined.
77
	 */
78
	bool needs_uniform_address_update;
79

80
	/* Set when we find a backwards branch.  If the branch is backwards,
81
	 * the taraget is probably doing an address reset to read uniforms,
82
	 * and so we need to be sure that a uniforms address is present in the
83
	 * stream, even if the shader didn't need to read uniforms in later
84
	 * basic blocks.
85
	 */
86
	bool needs_uniform_address_for_loop;
87

88
	/* Set when we find an instruction which violates the criterion for a
89
	 * threaded shader. These are:
90
	 * 	- only write the lower half of the register space
91
	 * 	- last thread switch signaled at the end
92
	 * So track the usage of the thread switches and the register usage.
93
	 */
94
	bool all_registers_used;
95
};
96

97
static uint32_t
98
waddr_to_live_reg_index(uint32_t waddr, bool is_b)
99
{
100
	if (waddr < 32) {
101
		if (is_b)
102
			return 32 + waddr;
103
		else
104
			return waddr;
105
	} else if (waddr <= QPU_W_ACC3) {
106
		return 64 + waddr - QPU_W_ACC0;
107
	} else {
108
		return ~0;
109
	}
110
}
111

112
static uint32_t
113
raddr_add_a_to_live_reg_index(uint64_t inst)
114
{
115
	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
116
	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
117
	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
118
	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
119

120
	if (add_a == QPU_MUX_A)
121
		return raddr_a;
122
	else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
123
		return 32 + raddr_b;
124
	else if (add_a <= QPU_MUX_R3)
125
		return 64 + add_a;
126
	else
127
		return ~0;
128
}
129

130
static bool live_reg_is_upper_half(uint32_t lri)
131
{
132
	return	(lri >=16 && lri < 32) ||
133
		(lri >=32 + 16 && lri < 32 + 32);
134
}
135

136
static bool
137
is_tmu_submit(uint32_t waddr)
138
{
139
	return (waddr == QPU_W_TMU0_S ||
140
		waddr == QPU_W_TMU1_S);
141
}
142

143
static bool
144
is_tmu_write(uint32_t waddr)
145
{
146
	return (waddr >= QPU_W_TMU0_S &&
147
		waddr <= QPU_W_TMU1_B);
148
}
149

150
static bool
151
record_texture_sample(struct vc4_validated_shader_info *validated_shader,
152
		      struct vc4_shader_validation_state *validation_state,
153
		      int tmu)
154
{
155
	uint32_t s = validated_shader->num_texture_samples;
156
	int i;
157
	struct vc4_texture_sample_info *temp_samples;
158

159
	temp_samples = krealloc(validated_shader->texture_samples,
160
				(s + 1) * sizeof(*temp_samples),
161
				GFP_KERNEL);
162
	if (!temp_samples)
163
		return false;
164

165
	memcpy(&temp_samples[s],
166
	       &validation_state->tmu_setup[tmu],
167
	       sizeof(*temp_samples));
168

169
	validated_shader->num_texture_samples = s + 1;
170
	validated_shader->texture_samples = temp_samples;
171

172
	for (i = 0; i < 4; i++)
173
		validation_state->tmu_setup[tmu].p_offset[i] = ~0;
174

175
	return true;
176
}
177

178
static bool
179
check_tmu_write(struct vc4_validated_shader_info *validated_shader,
180
		struct vc4_shader_validation_state *validation_state,
181
		bool is_mul)
182
{
183
	uint64_t inst = validation_state->shader[validation_state->ip];
184
	uint32_t waddr = (is_mul ?
185
			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
186
			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
187
	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
188
	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
189
	int tmu = waddr > QPU_W_TMU0_B;
190
	bool submit = is_tmu_submit(waddr);
191
	bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
192
	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
193

194
	if (is_direct) {
195
		uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
196
		uint32_t clamp_reg, clamp_offset;
197

198
		if (sig == QPU_SIG_SMALL_IMM) {
199
			DRM_ERROR("direct TMU read used small immediate\n");
200
			return false;
201
		}
202

203
		/* Make sure that this texture load is an add of the base
204
		 * address of the UBO to a clamped offset within the UBO.
205
		 */
206
		if (is_mul ||
207
		    QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
208
			DRM_ERROR("direct TMU load wasn't an add\n");
209
			return false;
210
		}
211

212
		/* We assert that the clamped address is the first
213
		 * argument, and the UBO base address is the second argument.
214
		 * This is arbitrary, but simpler than supporting flipping the
215
		 * two either way.
216
		 */
217
		clamp_reg = raddr_add_a_to_live_reg_index(inst);
218
		if (clamp_reg == ~0) {
219
			DRM_ERROR("direct TMU load wasn't clamped\n");
220
			return false;
221
		}
222

223
		clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
224
		if (clamp_offset == ~0) {
225
			DRM_ERROR("direct TMU load wasn't clamped\n");
226
			return false;
227
		}
228

229
		/* Store the clamp value's offset in p1 (see reloc_tex() in
230
		 * vc4_validate.c).
231
		 */
232
		validation_state->tmu_setup[tmu].p_offset[1] =
233
			clamp_offset;
234

235
		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
236
		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
237
			DRM_ERROR("direct TMU load didn't add to a uniform\n");
238
			return false;
239
		}
240

241
		validation_state->tmu_setup[tmu].is_direct = true;
242
	} else {
243
		if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
244
					      raddr_b == QPU_R_UNIF)) {
245
			DRM_ERROR("uniform read in the same instruction as "
246
				  "texture setup.\n");
247
			return false;
248
		}
249
	}
250

251
	if (validation_state->tmu_write_count[tmu] >= 4) {
252
		DRM_ERROR("TMU%d got too many parameters before dispatch\n",
253
			  tmu);
254
		return false;
255
	}
256
	validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
257
		validated_shader->uniforms_size;
258
	validation_state->tmu_write_count[tmu]++;
259
	/* Since direct uses a RADDR uniform reference, it will get counted in
260
	 * check_instruction_reads()
261
	 */
262
	if (!is_direct) {
263
		if (validation_state->needs_uniform_address_update) {
264
			DRM_ERROR("Texturing with undefined uniform address\n");
265
			return false;
266
		}
267

268
		validated_shader->uniforms_size += 4;
269
	}
270

271
	if (submit) {
272
		if (!record_texture_sample(validated_shader,
273
					   validation_state, tmu)) {
274
			return false;
275
		}
276

277
		validation_state->tmu_write_count[tmu] = 0;
278
	}
279

280
	return true;
281
}
282

283
static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
284
{
285
	uint32_t o = validated_shader->num_uniform_addr_offsets;
286
	uint32_t num_uniforms = validated_shader->uniforms_size / 4;
287

288
	validated_shader->uniform_addr_offsets =
289
		krealloc(validated_shader->uniform_addr_offsets,
290
			 (o + 1) *
291
			 sizeof(*validated_shader->uniform_addr_offsets),
292
			 GFP_KERNEL);
293
	if (!validated_shader->uniform_addr_offsets)
294
		return false;
295

296
	validated_shader->uniform_addr_offsets[o] = num_uniforms;
297
	validated_shader->num_uniform_addr_offsets++;
298

299
	return true;
300
}
301

302
static bool
303
validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
304
			       struct vc4_shader_validation_state *validation_state,
305
			       bool is_mul)
306
{
307
	uint64_t inst = validation_state->shader[validation_state->ip];
308
	u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
309
	u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
310
	u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
311
	u32 add_lri = raddr_add_a_to_live_reg_index(inst);
312
	/* We want our reset to be pointing at whatever uniform follows the
313
	 * uniforms base address.
314
	 */
315
	u32 expected_offset = validated_shader->uniforms_size + 4;
316

317
	/* We only support absolute uniform address changes, and we
318
	 * require that they be in the current basic block before any
319
	 * of its uniform reads.
320
	 *
321
	 * One could potentially emit more efficient QPU code, by
322
	 * noticing that (say) an if statement does uniform control
323
	 * flow for all threads and that the if reads the same number
324
	 * of uniforms on each side.  However, this scheme is easy to
325
	 * validate so it's all we allow for now.
326
	 */
327

328
	if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) {
329
		DRM_ERROR("uniforms address change must be "
330
			  "normal math\n");
331
		return false;
332
	}
333

334
	if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
335
		DRM_ERROR("Uniform address reset must be an ADD.\n");
336
		return false;
337
	}
338

339
	if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
340
		DRM_ERROR("Uniform address reset must be unconditional.\n");
341
		return false;
342
	}
343

344
	if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
345
	    !(inst & QPU_PM)) {
346
		DRM_ERROR("No packing allowed on uniforms reset\n");
347
		return false;
348
	}
349

350
	if (add_lri == -1) {
351
		DRM_ERROR("First argument of uniform address write must be "
352
			  "an immediate value.\n");
353
		return false;
354
	}
355

356
	if (validation_state->live_immediates[add_lri] != expected_offset) {
357
		DRM_ERROR("Resetting uniforms with offset %db instead of %db\n",
358
			  validation_state->live_immediates[add_lri],
359
			  expected_offset);
360
		return false;
361
	}
362

363
	if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
364
	    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
365
		DRM_ERROR("Second argument of uniform address write must be "
366
			  "a uniform.\n");
367
		return false;
368
	}
369

370
	validation_state->needs_uniform_address_update = false;
371
	validation_state->needs_uniform_address_for_loop = false;
372
	return require_uniform_address_uniform(validated_shader);
373
}
374

375
static bool
376
check_reg_write(struct vc4_validated_shader_info *validated_shader,
377
		struct vc4_shader_validation_state *validation_state,
378
		bool is_mul)
379
{
380
	uint64_t inst = validation_state->shader[validation_state->ip];
381
	uint32_t waddr = (is_mul ?
382
			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
383
			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
384
	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
385
	bool ws = inst & QPU_WS;
386
	bool is_b = is_mul ^ ws;
387
	u32 lri = waddr_to_live_reg_index(waddr, is_b);
388

389
	if (lri != -1) {
390
		uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
391
		uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
392

393
		if (sig == QPU_SIG_LOAD_IMM &&
394
		    QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
395
		    ((is_mul && cond_mul == QPU_COND_ALWAYS) ||
396
		     (!is_mul && cond_add == QPU_COND_ALWAYS))) {
397
			validation_state->live_immediates[lri] =
398
				QPU_GET_FIELD(inst, QPU_LOAD_IMM);
399
		} else {
400
			validation_state->live_immediates[lri] = ~0;
401
		}
402

403
		if (live_reg_is_upper_half(lri))
404
			validation_state->all_registers_used = true;
405
	}
406

407
	switch (waddr) {
408
	case QPU_W_UNIFORMS_ADDRESS:
409
		if (is_b) {
410
			DRM_ERROR("relative uniforms address change "
411
				  "unsupported\n");
412
			return false;
413
		}
414

415
		return validate_uniform_address_write(validated_shader,
416
						      validation_state,
417
						      is_mul);
418

419
	case QPU_W_TLB_COLOR_MS:
420
	case QPU_W_TLB_COLOR_ALL:
421
	case QPU_W_TLB_Z:
422
		/* These only interact with the tile buffer, not main memory,
423
		 * so they're safe.
424
		 */
425
		return true;
426

427
	case QPU_W_TMU0_S:
428
	case QPU_W_TMU0_T:
429
	case QPU_W_TMU0_R:
430
	case QPU_W_TMU0_B:
431
	case QPU_W_TMU1_S:
432
	case QPU_W_TMU1_T:
433
	case QPU_W_TMU1_R:
434
	case QPU_W_TMU1_B:
435
		return check_tmu_write(validated_shader, validation_state,
436
				       is_mul);
437

438
	case QPU_W_HOST_INT:
439
	case QPU_W_TMU_NOSWAP:
440
	case QPU_W_TLB_ALPHA_MASK:
441
	case QPU_W_MUTEX_RELEASE:
442
		/* XXX: I haven't thought about these, so don't support them
443
		 * for now.
444
		 */
445
		DRM_ERROR("Unsupported waddr %d\n", waddr);
446
		return false;
447

448
	case QPU_W_VPM_ADDR:
449
		DRM_ERROR("General VPM DMA unsupported\n");
450
		return false;
451

452
	case QPU_W_VPM:
453
	case QPU_W_VPMVCD_SETUP:
454
		/* We allow VPM setup in general, even including VPM DMA
455
		 * configuration setup, because the (unsafe) DMA can only be
456
		 * triggered by QPU_W_VPM_ADDR writes.
457
		 */
458
		return true;
459

460
	case QPU_W_TLB_STENCIL_SETUP:
461
		return true;
462
	}
463

464
	return true;
465
}
466

467
static void
468
track_live_clamps(struct vc4_validated_shader_info *validated_shader,
469
		  struct vc4_shader_validation_state *validation_state)
470
{
471
	uint64_t inst = validation_state->shader[validation_state->ip];
472
	uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
473
	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
474
	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
475
	uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
476
	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
477
	uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
478
	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
479
	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
480
	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
481
	bool ws = inst & QPU_WS;
482
	uint32_t lri_add_a, lri_add, lri_mul;
483
	bool add_a_is_min_0;
484

485
	/* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
486
	 * before we clear previous live state.
487
	 */
488
	lri_add_a = raddr_add_a_to_live_reg_index(inst);
489
	add_a_is_min_0 = (lri_add_a != ~0 &&
490
			  validation_state->live_max_clamp_regs[lri_add_a]);
491

492
	/* Clear live state for registers written by our instruction. */
493
	lri_add = waddr_to_live_reg_index(waddr_add, ws);
494
	lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
495
	if (lri_mul != ~0) {
496
		validation_state->live_max_clamp_regs[lri_mul] = false;
497
		validation_state->live_min_clamp_offsets[lri_mul] = ~0;
498
	}
499
	if (lri_add != ~0) {
500
		validation_state->live_max_clamp_regs[lri_add] = false;
501
		validation_state->live_min_clamp_offsets[lri_add] = ~0;
502
	} else {
503
		/* Nothing further to do for live tracking, since only ADDs
504
		 * generate new live clamp registers.
505
		 */
506
		return;
507
	}
508

509
	/* Now, handle remaining live clamp tracking for the ADD operation. */
510

511
	if (cond_add != QPU_COND_ALWAYS)
512
		return;
513

514
	if (op_add == QPU_A_MAX) {
515
		/* Track live clamps of a value to a minimum of 0 (in either
516
		 * arg).
517
		 */
518
		if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
519
		    (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
520
			return;
521
		}
522

523
		validation_state->live_max_clamp_regs[lri_add] = true;
524
	} else if (op_add == QPU_A_MIN) {
525
		/* Track live clamps of a value clamped to a minimum of 0 and
526
		 * a maximum of some uniform's offset.
527
		 */
528
		if (!add_a_is_min_0)
529
			return;
530

531
		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
532
		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
533
		      sig != QPU_SIG_SMALL_IMM)) {
534
			return;
535
		}
536

537
		validation_state->live_min_clamp_offsets[lri_add] =
538
			validated_shader->uniforms_size;
539
	}
540
}
541

542
static bool
543
check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
544
			 struct vc4_shader_validation_state *validation_state)
545
{
546
	uint64_t inst = validation_state->shader[validation_state->ip];
547
	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
548
	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
549
	bool ok;
550

551
	if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
552
		DRM_ERROR("ADD and MUL both set up textures\n");
553
		return false;
554
	}
555

556
	ok = (check_reg_write(validated_shader, validation_state, false) &&
557
	      check_reg_write(validated_shader, validation_state, true));
558

559
	track_live_clamps(validated_shader, validation_state);
560

561
	return ok;
562
}
563

564
static bool
565
check_branch(uint64_t inst,
566
	     struct vc4_validated_shader_info *validated_shader,
567
	     struct vc4_shader_validation_state *validation_state,
568
	     int ip)
569
{
570
	int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
571
	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
572
	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
573

574
	if ((int)branch_imm < 0)
575
		validation_state->needs_uniform_address_for_loop = true;
576

577
	/* We don't want to have to worry about validation of this, and
578
	 * there's no need for it.
579
	 */
580
	if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) {
581
		DRM_ERROR("branch instruction at %d wrote a register.\n",
582
			  validation_state->ip);
583
		return false;
584
	}
585

586
	return true;
587
}
588

589
static bool
590
check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
591
			struct vc4_shader_validation_state *validation_state)
592
{
593
	uint64_t inst = validation_state->shader[validation_state->ip];
594
	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
595
	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
596
	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
597

598
	if (raddr_a == QPU_R_UNIF ||
599
	    (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
600
		/* This can't overflow the uint32_t, because we're reading 8
601
		 * bytes of instruction to increment by 4 here, so we'd
602
		 * already be OOM.
603
		 */
604
		validated_shader->uniforms_size += 4;
605

606
		if (validation_state->needs_uniform_address_update) {
607
			DRM_ERROR("Uniform read with undefined uniform "
608
				  "address\n");
609
			return false;
610
		}
611
	}
612

613
	if ((raddr_a >= 16 && raddr_a < 32) ||
614
	    (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) {
615
		validation_state->all_registers_used = true;
616
	}
617

618
	return true;
619
}
620

621
/* Make sure that all branches are absolute and point within the shader, and
622
 * note their targets for later.
623
 */
624
static bool
625
vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
626
{
627
	uint32_t max_branch_target = 0;
628
	int ip;
629
	int last_branch = -2;
630

631
	for (ip = 0; ip < validation_state->max_ip; ip++) {
632
		uint64_t inst = validation_state->shader[ip];
633
		int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
634
		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
635
		uint32_t after_delay_ip = ip + 4;
636
		uint32_t branch_target_ip;
637

638
		if (sig == QPU_SIG_PROG_END) {
639
			/* There are two delay slots after program end is
640
			 * signaled that are still executed, then we're
641
			 * finished.  validation_state->max_ip is the
642
			 * instruction after the last valid instruction in the
643
			 * program.
644
			 */
645
			validation_state->max_ip = ip + 3;
646
			continue;
647
		}
648

649
		if (sig != QPU_SIG_BRANCH)
650
			continue;
651

652
		if (ip - last_branch < 4) {
653
			DRM_ERROR("Branch at %d during delay slots\n", ip);
654
			return false;
655
		}
656
		last_branch = ip;
657

658
		if (inst & QPU_BRANCH_REG) {
659
			DRM_ERROR("branching from register relative "
660
				  "not supported\n");
661
			return false;
662
		}
663

664
		if (!(inst & QPU_BRANCH_REL)) {
665
			DRM_ERROR("relative branching required\n");
666
			return false;
667
		}
668

669
		/* The actual branch target is the instruction after the delay
670
		 * slots, plus whatever byte offset is in the low 32 bits of
671
		 * the instruction.  Make sure we're not branching beyond the
672
		 * end of the shader object.
673
		 */
674
		if (branch_imm % sizeof(inst) != 0) {
675
			DRM_ERROR("branch target not aligned\n");
676
			return false;
677
		};
678

679
		branch_target_ip = after_delay_ip + (branch_imm >> 3);
680
		if (branch_target_ip >= validation_state->max_ip) {
681
			DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n",
682
				  ip, branch_target_ip,
683
				  validation_state->max_ip);
684
			return false;
685
		}
686
		set_bit(branch_target_ip, validation_state->branch_targets);
687

688
		/* Make sure that the non-branching path is also not outside
689
		 * the shader.
690
		 */
691
		if (after_delay_ip >= validation_state->max_ip) {
692
			DRM_ERROR("Branch at %d continues past shader end "
693
				  "(%d/%d)\n",
694
				  ip, after_delay_ip, validation_state->max_ip);
695
			return false;
696
		}
697
		set_bit(after_delay_ip, validation_state->branch_targets);
698
		max_branch_target = max(max_branch_target, after_delay_ip);
699
	}
700

701
	if (max_branch_target > validation_state->max_ip - 3) {
702
		DRM_ERROR("Branch landed after QPU_SIG_PROG_END");
703
		return false;
704
	}
705

706
	return true;
707
}
708

709
/* Resets any known state for the shader, used when we may be branched to from
710
 * multiple locations in the program (or at shader start).
711
 */
712
static void
713
reset_validation_state(struct vc4_shader_validation_state *validation_state)
714
{
715
	int i;
716

717
	for (i = 0; i < 8; i++)
718
		validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0;
719

720
	for (i = 0; i < LIVE_REG_COUNT; i++) {
721
		validation_state->live_min_clamp_offsets[i] = ~0;
722
		validation_state->live_max_clamp_regs[i] = false;
723
		validation_state->live_immediates[i] = ~0;
724
	}
725
}
726

727
static bool
728
texturing_in_progress(struct vc4_shader_validation_state *validation_state)
729
{
730
	return (validation_state->tmu_write_count[0] != 0 ||
731
		validation_state->tmu_write_count[1] != 0);
732
}
733

734
static bool
735
vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
736
{
737
	uint32_t ip = validation_state->ip;
738

739
	if (!test_bit(ip, validation_state->branch_targets))
740
		return true;
741

742
	if (texturing_in_progress(validation_state)) {
743
		DRM_ERROR("Branch target landed during TMU setup\n");
744
		return false;
745
	}
746

747
	/* Reset our live values tracking, since this instruction may have
748
	 * multiple predecessors.
749
	 *
750
	 * One could potentially do analysis to determine that, for
751
	 * example, all predecessors have a live max clamp in the same
752
	 * register, but we don't bother with that.
753
	 */
754
	reset_validation_state(validation_state);
755

756
	/* Since we've entered a basic block from potentially multiple
757
	 * predecessors, we need the uniforms address to be updated before any
758
	 * unforms are read.  We require that after any branch point, the next
759
	 * uniform to be loaded is a uniform address offset.  That uniform's
760
	 * offset will be marked by the uniform address register write
761
	 * validation, or a one-off the end-of-program check.
762
	 */
763
	validation_state->needs_uniform_address_update = true;
764

765
	return true;
766
}
767

768
struct vc4_validated_shader_info *
769
vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
770
{
771
	bool found_shader_end = false;
772
	int shader_end_ip = 0;
773
	uint32_t last_thread_switch_ip = -3;
774
	uint32_t ip;
775
	struct vc4_validated_shader_info *validated_shader = NULL;
776
	struct vc4_shader_validation_state validation_state;
777

778
	memset(&validation_state, 0, sizeof(validation_state));
779
	validation_state.shader = shader_obj->vaddr;
780
	validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
781

782
	reset_validation_state(&validation_state);
783

784
	validation_state.branch_targets =
785
		kcalloc(BITS_TO_LONGS(validation_state.max_ip),
786
			sizeof(unsigned long), GFP_KERNEL);
787
	if (!validation_state.branch_targets)
788
		goto fail;
789

790
	validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
791
	if (!validated_shader)
792
		goto fail;
793

794
	if (!vc4_validate_branches(&validation_state))
795
		goto fail;
796

797
	for (ip = 0; ip < validation_state.max_ip; ip++) {
798
		uint64_t inst = validation_state.shader[ip];
799
		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
800

801
		validation_state.ip = ip;
802

803
		if (!vc4_handle_branch_target(&validation_state))
804
			goto fail;
805

806
		if (ip == last_thread_switch_ip + 3) {
807
			/* Reset r0-r3 live clamp data */
808
			int i;
809
			for (i = 64; i < LIVE_REG_COUNT; i++) {
810
				validation_state.live_min_clamp_offsets[i] = ~0;
811
				validation_state.live_max_clamp_regs[i] = false;
812
				validation_state.live_immediates[i] = ~0;
813
			}
814
		}
815

816
		switch (sig) {
817
		case QPU_SIG_NONE:
818
		case QPU_SIG_WAIT_FOR_SCOREBOARD:
819
		case QPU_SIG_SCOREBOARD_UNLOCK:
820
		case QPU_SIG_COLOR_LOAD:
821
		case QPU_SIG_LOAD_TMU0:
822
		case QPU_SIG_LOAD_TMU1:
823
		case QPU_SIG_PROG_END:
824
		case QPU_SIG_SMALL_IMM:
825
		case QPU_SIG_THREAD_SWITCH:
826
		case QPU_SIG_LAST_THREAD_SWITCH:
827
			if (!check_instruction_writes(validated_shader,
828
						      &validation_state)) {
829
				DRM_ERROR("Bad write at ip %d\n", ip);
830
				goto fail;
831
			}
832

833
			if (!check_instruction_reads(validated_shader,
834
						     &validation_state))
835
				goto fail;
836

837
			if (sig == QPU_SIG_PROG_END) {
838
				found_shader_end = true;
839
				shader_end_ip = ip;
840
			}
841

842
			if (sig == QPU_SIG_THREAD_SWITCH ||
843
			    sig == QPU_SIG_LAST_THREAD_SWITCH) {
844
				validated_shader->is_threaded = true;
845

846
				if (ip < last_thread_switch_ip + 3) {
847
					DRM_ERROR("Thread switch too soon after "
848
						  "last switch at ip %d\n", ip);
849
					goto fail;
850
				}
851
				last_thread_switch_ip = ip;
852
			}
853

854
			break;
855

856
		case QPU_SIG_LOAD_IMM:
857
			if (!check_instruction_writes(validated_shader,
858
						      &validation_state)) {
859
				DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
860
				goto fail;
861
			}
862
			break;
863

864
		case QPU_SIG_BRANCH:
865
			if (!check_branch(inst, validated_shader,
866
					  &validation_state, ip))
867
				goto fail;
868

869
			if (ip < last_thread_switch_ip + 3) {
870
				DRM_ERROR("Branch in thread switch at ip %d",
871
					  ip);
872
				goto fail;
873
			}
874

875
			break;
876
		default:
877
			DRM_ERROR("Unsupported QPU signal %d at "
878
				  "instruction %d\n", sig, ip);
879
			goto fail;
880
		}
881

882
		/* There are two delay slots after program end is signaled
883
		 * that are still executed, then we're finished.
884
		 */
885
		if (found_shader_end && ip == shader_end_ip + 2)
886
			break;
887
	}
888

889
	if (ip == validation_state.max_ip) {
890
		DRM_ERROR("shader failed to terminate before "
891
			  "shader BO end at %zd\n",
892
			  shader_obj->base.size);
893
		goto fail;
894
	}
895

896
	/* Might corrupt other thread */
897
	if (validated_shader->is_threaded &&
898
	    validation_state.all_registers_used) {
899
		DRM_ERROR("Shader uses threading, but uses the upper "
900
			  "half of the registers, too\n");
901
		goto fail;
902
	}
903

904
	/* If we did a backwards branch and we haven't emitted a uniforms
905
	 * reset since then, we still need the uniforms stream to have the
906
	 * uniforms address available so that the backwards branch can do its
907
	 * uniforms reset.
908
	 *
909
	 * We could potentially prove that the backwards branch doesn't
910
	 * contain any uses of uniforms until program exit, but that doesn't
911
	 * seem to be worth the trouble.
912
	 */
913
	if (validation_state.needs_uniform_address_for_loop) {
914
		if (!require_uniform_address_uniform(validated_shader))
915
			goto fail;
916
		validated_shader->uniforms_size += 4;
917
	}
918

919
	/* Again, no chance of integer overflow here because the worst case
920
	 * scenario is 8 bytes of uniforms plus handles per 8-byte
921
	 * instruction.
922
	 */
923
	validated_shader->uniforms_src_size =
924
		(validated_shader->uniforms_size +
925
		 4 * validated_shader->num_texture_samples);
926

927
	kfree(validation_state.branch_targets);
928

929
	return validated_shader;
930

931
fail:
932
	kfree(validation_state.branch_targets);
933
	if (validated_shader) {
934
		kfree(validated_shader->texture_samples);
935
		kfree(validated_shader);
936
	}
937
	return NULL;
938
}
939

940
Product

Resources

Company