Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/v3d/v3d_program.c
4570 views
1
/*
2
* Copyright © 2014-2017 Broadcom
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include <inttypes.h>
25
#include "util/format/u_format.h"
26
#include "util/u_math.h"
27
#include "util/u_memory.h"
28
#include "util/ralloc.h"
29
#include "util/hash_table.h"
30
#include "util/u_upload_mgr.h"
31
#include "tgsi/tgsi_dump.h"
32
#include "tgsi/tgsi_parse.h"
33
#include "compiler/nir/nir.h"
34
#include "compiler/nir/nir_builder.h"
35
#include "nir/tgsi_to_nir.h"
36
#include "compiler/v3d_compiler.h"
37
#include "v3d_context.h"
38
#include "broadcom/cle/v3d_packet_v33_pack.h"
39
40
static struct v3d_compiled_shader *
41
v3d_get_compiled_shader(struct v3d_context *v3d,
42
struct v3d_key *key, size_t key_size);
43
static void
44
v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled,
45
struct v3d_key *key);
46
47
static gl_varying_slot
48
v3d_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location)
49
{
50
nir_foreach_shader_out_variable(var, s) {
51
if (var->data.driver_location == driver_location) {
52
return var->data.location;
53
}
54
}
55
56
return -1;
57
}
58
59
/**
60
* Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader.
61
*
62
* A shader can have 16 of these specs, and each one of them can write up to
63
* 16 dwords. Since we allow a total of 64 transform feedback output
64
* components (not 16 vectors), we have to group the writes of multiple
65
* varyings together in a single data spec.
66
*/
67
static void
68
v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so,
69
const struct pipe_stream_output_info *stream_output)
70
{
71
if (!stream_output->num_outputs)
72
return;
73
74
struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4];
75
int slot_count = 0;
76
77
for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) {
78
uint32_t buffer_offset = 0;
79
uint32_t vpm_start = slot_count;
80
81
for (int i = 0; i < stream_output->num_outputs; i++) {
82
const struct pipe_stream_output *output =
83
&stream_output->output[i];
84
85
if (output->output_buffer != buffer)
86
continue;
87
88
/* We assume that the SO outputs appear in increasing
89
* order in the buffer.
90
*/
91
assert(output->dst_offset >= buffer_offset);
92
93
/* Pad any undefined slots in the output */
94
for (int j = buffer_offset; j < output->dst_offset; j++) {
95
slots[slot_count] =
96
v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0);
97
slot_count++;
98
buffer_offset++;
99
}
100
101
/* Set the coordinate shader up to output the
102
* components of this varying.
103
*/
104
for (int j = 0; j < output->num_components; j++) {
105
gl_varying_slot slot =
106
v3d_get_slot_for_driver_location(so->base.ir.nir, output->register_index);
107
108
slots[slot_count] =
109
v3d_slot_from_slot_and_component(slot,
110
output->start_component + j);
111
slot_count++;
112
buffer_offset++;
113
}
114
}
115
116
uint32_t vpm_size = slot_count - vpm_start;
117
if (!vpm_size)
118
continue;
119
120
uint32_t vpm_start_offset = vpm_start + 6;
121
122
while (vpm_size) {
123
uint32_t write_size = MIN2(vpm_size, 1 << 4);
124
125
struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
126
/* We need the offset from the coordinate shader's VPM
127
* output block, which has the [X, Y, Z, W, Xs, Ys]
128
* values at the start.
129
*/
130
.first_shaded_vertex_value_to_output = vpm_start_offset,
131
.number_of_consecutive_vertex_values_to_output_as_32_bit_values = write_size,
132
.output_buffer_to_write_to = buffer,
133
};
134
135
/* GFXH-1559 */
136
assert(unpacked.first_shaded_vertex_value_to_output != 8 ||
137
so->num_tf_specs != 0);
138
139
assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs));
140
V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
141
(void *)&so->tf_specs[so->num_tf_specs],
142
&unpacked);
143
144
/* If point size is being written by the shader, then
145
* all the VPM start offsets are shifted up by one.
146
* We won't know that until the variant is compiled,
147
* though.
148
*/
149
unpacked.first_shaded_vertex_value_to_output++;
150
151
/* GFXH-1559 */
152
assert(unpacked.first_shaded_vertex_value_to_output != 8 ||
153
so->num_tf_specs != 0);
154
155
V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
156
(void *)&so->tf_specs_psiz[so->num_tf_specs],
157
&unpacked);
158
so->num_tf_specs++;
159
vpm_start_offset += write_size;
160
vpm_size -= write_size;
161
}
162
so->base.stream_output.stride[buffer] =
163
stream_output->stride[buffer];
164
}
165
166
so->num_tf_outputs = slot_count;
167
so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot,
168
slot_count);
169
memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count);
170
}
171
172
static int
173
type_size(const struct glsl_type *type, bool bindless)
174
{
175
return glsl_count_attribute_slots(type, false);
176
}
177
178
static void
179
precompile_all_outputs(nir_shader *s,
180
struct v3d_varying_slot *outputs,
181
uint8_t *num_outputs)
182
{
183
nir_foreach_shader_out_variable(var, s) {
184
const int array_len = MAX2(glsl_get_length(var->type), 1);
185
for (int j = 0; j < array_len; j++) {
186
const int slot = var->data.location + j;
187
const int num_components =
188
glsl_get_components(var->type);
189
for (int i = 0; i < num_components; i++) {
190
const int swiz = var->data.location_frac + i;
191
outputs[(*num_outputs)++] =
192
v3d_slot_from_slot_and_component(slot,
193
swiz);
194
}
195
}
196
}
197
}
198
199
/**
200
* Precompiles a shader variant at shader state creation time if
201
* V3D_DEBUG=precompile is set. Used for shader-db
202
* (https://gitlab.freedesktop.org/mesa/shader-db)
203
*/
204
static void
205
v3d_shader_precompile(struct v3d_context *v3d,
206
struct v3d_uncompiled_shader *so)
207
{
208
nir_shader *s = so->base.ir.nir;
209
210
if (s->info.stage == MESA_SHADER_FRAGMENT) {
211
struct v3d_fs_key key = {
212
.base.shader_state = so,
213
};
214
215
nir_foreach_shader_out_variable(var, s) {
216
if (var->data.location == FRAG_RESULT_COLOR) {
217
key.cbufs |= 1 << 0;
218
} else if (var->data.location >= FRAG_RESULT_DATA0) {
219
key.cbufs |= 1 << (var->data.location -
220
FRAG_RESULT_DATA0);
221
}
222
}
223
224
key.logicop_func = PIPE_LOGICOP_COPY;
225
226
v3d_setup_shared_precompile_key(so, &key.base);
227
v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
228
} else if (s->info.stage == MESA_SHADER_GEOMETRY) {
229
struct v3d_gs_key key = {
230
.base.shader_state = so,
231
.base.is_last_geometry_stage = true,
232
};
233
234
v3d_setup_shared_precompile_key(so, &key.base);
235
236
precompile_all_outputs(s,
237
key.used_outputs,
238
&key.num_used_outputs);
239
240
v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
241
242
/* Compile GS bin shader: only position (XXX: include TF) */
243
key.is_coord = true;
244
key.num_used_outputs = 0;
245
for (int i = 0; i < 4; i++) {
246
key.used_outputs[key.num_used_outputs++] =
247
v3d_slot_from_slot_and_component(VARYING_SLOT_POS,
248
i);
249
}
250
v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
251
} else {
252
assert(s->info.stage == MESA_SHADER_VERTEX);
253
struct v3d_vs_key key = {
254
.base.shader_state = so,
255
/* Emit fixed function outputs */
256
.base.is_last_geometry_stage = true,
257
};
258
259
v3d_setup_shared_precompile_key(so, &key.base);
260
261
precompile_all_outputs(s,
262
key.used_outputs,
263
&key.num_used_outputs);
264
265
v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
266
267
/* Compile VS bin shader: only position (XXX: include TF) */
268
key.is_coord = true;
269
key.num_used_outputs = 0;
270
for (int i = 0; i < 4; i++) {
271
key.used_outputs[key.num_used_outputs++] =
272
v3d_slot_from_slot_and_component(VARYING_SLOT_POS,
273
i);
274
}
275
v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
276
}
277
}
278
279
static void *
280
v3d_uncompiled_shader_create(struct pipe_context *pctx,
281
enum pipe_shader_ir type, void *ir)
282
{
283
struct v3d_context *v3d = v3d_context(pctx);
284
struct v3d_uncompiled_shader *so = CALLOC_STRUCT(v3d_uncompiled_shader);
285
if (!so)
286
return NULL;
287
288
so->program_id = v3d->next_uncompiled_program_id++;
289
290
nir_shader *s;
291
292
if (type == PIPE_SHADER_IR_NIR) {
293
/* The backend takes ownership of the NIR shader on state
294
* creation.
295
*/
296
s = ir;
297
} else {
298
assert(type == PIPE_SHADER_IR_TGSI);
299
300
if (V3D_DEBUG & V3D_DEBUG_TGSI) {
301
fprintf(stderr, "prog %d TGSI:\n",
302
so->program_id);
303
tgsi_dump(ir, 0);
304
fprintf(stderr, "\n");
305
}
306
s = tgsi_to_nir(ir, pctx->screen, false);
307
}
308
309
if (s->info.stage != MESA_SHADER_VERTEX &&
310
s->info.stage != MESA_SHADER_GEOMETRY) {
311
NIR_PASS_V(s, nir_lower_io,
312
nir_var_shader_in | nir_var_shader_out,
313
type_size, (nir_lower_io_options)0);
314
}
315
316
NIR_PASS_V(s, nir_lower_regs_to_ssa);
317
NIR_PASS_V(s, nir_normalize_cubemap_coords);
318
319
NIR_PASS_V(s, nir_lower_load_const_to_scalar);
320
321
v3d_optimize_nir(NULL, s);
322
323
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
324
325
/* Garbage collect dead instructions */
326
nir_sweep(s);
327
328
so->base.type = PIPE_SHADER_IR_NIR;
329
so->base.ir.nir = s;
330
331
if (V3D_DEBUG & (V3D_DEBUG_NIR |
332
v3d_debug_flag_for_shader_stage(s->info.stage))) {
333
fprintf(stderr, "%s prog %d NIR:\n",
334
gl_shader_stage_name(s->info.stage),
335
so->program_id);
336
nir_print_shader(s, stderr);
337
fprintf(stderr, "\n");
338
}
339
340
if (V3D_DEBUG & V3D_DEBUG_PRECOMPILE)
341
v3d_shader_precompile(v3d, so);
342
343
return so;
344
}
345
346
static void
347
v3d_shader_debug_output(const char *message, void *data)
348
{
349
struct v3d_context *v3d = data;
350
351
pipe_debug_message(&v3d->debug, SHADER_INFO, "%s", message);
352
}
353
354
static void *
355
v3d_shader_state_create(struct pipe_context *pctx,
356
const struct pipe_shader_state *cso)
357
{
358
struct v3d_uncompiled_shader *so =
359
v3d_uncompiled_shader_create(pctx,
360
cso->type,
361
(cso->type == PIPE_SHADER_IR_TGSI ?
362
(void *)cso->tokens :
363
cso->ir.nir));
364
365
v3d_set_transform_feedback_outputs(so, &cso->stream_output);
366
367
return so;
368
}
369
370
struct v3d_compiled_shader *
371
v3d_get_compiled_shader(struct v3d_context *v3d,
372
struct v3d_key *key,
373
size_t key_size)
374
{
375
struct v3d_uncompiled_shader *shader_state = key->shader_state;
376
nir_shader *s = shader_state->base.ir.nir;
377
378
struct hash_table *ht = v3d->prog.cache[s->info.stage];
379
struct hash_entry *entry = _mesa_hash_table_search(ht, key);
380
if (entry)
381
return entry->data;
382
383
struct v3d_compiled_shader *shader =
384
rzalloc(NULL, struct v3d_compiled_shader);
385
386
int program_id = shader_state->program_id;
387
int variant_id =
388
p_atomic_inc_return(&shader_state->compiled_variant_count);
389
uint64_t *qpu_insts;
390
uint32_t shader_size;
391
392
qpu_insts = v3d_compile(v3d->screen->compiler, key,
393
&shader->prog_data.base, s,
394
v3d_shader_debug_output,
395
v3d,
396
program_id, variant_id, &shader_size);
397
ralloc_steal(shader, shader->prog_data.base);
398
399
v3d_set_shader_uniform_dirty_flags(shader);
400
401
if (shader_size) {
402
u_upload_data(v3d->state_uploader, 0, shader_size, 8,
403
qpu_insts, &shader->offset, &shader->resource);
404
}
405
406
free(qpu_insts);
407
408
if (ht) {
409
struct v3d_key *dup_key;
410
dup_key = ralloc_size(shader, key_size);
411
memcpy(dup_key, key, key_size);
412
_mesa_hash_table_insert(ht, dup_key, shader);
413
}
414
415
if (shader->prog_data.base->spill_size >
416
v3d->prog.spill_size_per_thread) {
417
/* The TIDX register we use for choosing the area to access
418
* for scratch space is: (core << 6) | (qpu << 2) | thread.
419
* Even at minimum threadcount in a particular shader, that
420
* means we still multiply by qpus by 4.
421
*/
422
int total_spill_size = (v3d->screen->devinfo.qpu_count * 4 *
423
shader->prog_data.base->spill_size);
424
425
v3d_bo_unreference(&v3d->prog.spill_bo);
426
v3d->prog.spill_bo = v3d_bo_alloc(v3d->screen,
427
total_spill_size, "spill");
428
v3d->prog.spill_size_per_thread =
429
shader->prog_data.base->spill_size;
430
}
431
432
return shader;
433
}
434
435
static void
436
v3d_free_compiled_shader(struct v3d_compiled_shader *shader)
437
{
438
pipe_resource_reference(&shader->resource, NULL);
439
ralloc_free(shader);
440
}
441
442
static void
443
v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key,
444
struct v3d_texture_stateobj *texstate)
445
{
446
const struct v3d_device_info *devinfo = &v3d->screen->devinfo;
447
448
key->num_tex_used = texstate->num_textures;
449
key->num_samplers_used = texstate->num_textures;
450
assert(key->num_tex_used == key->num_samplers_used);
451
for (int i = 0; i < texstate->num_textures; i++) {
452
struct pipe_sampler_view *sampler = texstate->textures[i];
453
struct v3d_sampler_view *v3d_sampler = v3d_sampler_view(sampler);
454
struct pipe_sampler_state *sampler_state =
455
texstate->samplers[i];
456
457
if (!sampler)
458
continue;
459
460
key->sampler[i].return_size =
461
v3d_get_tex_return_size(devinfo,
462
sampler->format,
463
sampler_state->compare_mode);
464
465
/* For 16-bit, we set up the sampler to always return 2
466
* channels (meaning no recompiles for most statechanges),
467
* while for 32 we actually scale the returns with channels.
468
*/
469
if (key->sampler[i].return_size == 16) {
470
key->sampler[i].return_channels = 2;
471
} else if (devinfo->ver > 40) {
472
key->sampler[i].return_channels = 4;
473
} else {
474
key->sampler[i].return_channels =
475
v3d_get_tex_return_channels(devinfo,
476
sampler->format);
477
}
478
479
if (key->sampler[i].return_size == 32 && devinfo->ver < 40) {
480
memcpy(key->tex[i].swizzle,
481
v3d_sampler->swizzle,
482
sizeof(v3d_sampler->swizzle));
483
} else {
484
/* For 16-bit returns, we let the sampler state handle
485
* the swizzle.
486
*/
487
key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
488
key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
489
key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
490
key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
491
}
492
}
493
}
494
495
static void
496
v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled,
497
struct v3d_key *key)
498
{
499
nir_shader *s = uncompiled->base.ir.nir;
500
501
/* Note that below we access they key's texture and sampler fields
502
* using the same index. On OpenGL they are the same (they are
503
* combined)
504
*/
505
key->num_tex_used = s->info.num_textures;
506
key->num_samplers_used = s->info.num_textures;
507
for (int i = 0; i < s->info.num_textures; i++) {
508
key->sampler[i].return_size = 16;
509
key->sampler[i].return_channels = 2;
510
511
key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
512
key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
513
key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
514
key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
515
}
516
}
517
518
static void
519
v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode)
520
{
521
struct v3d_job *job = v3d->job;
522
struct v3d_fs_key local_key;
523
struct v3d_fs_key *key = &local_key;
524
nir_shader *s = v3d->prog.bind_fs->base.ir.nir;
525
526
if (!(v3d->dirty & (V3D_DIRTY_PRIM_MODE |
527
V3D_DIRTY_BLEND |
528
V3D_DIRTY_FRAMEBUFFER |
529
V3D_DIRTY_ZSA |
530
V3D_DIRTY_RASTERIZER |
531
V3D_DIRTY_SAMPLE_STATE |
532
V3D_DIRTY_FRAGTEX |
533
V3D_DIRTY_UNCOMPILED_FS))) {
534
return;
535
}
536
537
memset(key, 0, sizeof(*key));
538
v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_FRAGMENT]);
539
key->base.shader_state = v3d->prog.bind_fs;
540
key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable;
541
key->is_points = (prim_mode == PIPE_PRIM_POINTS);
542
key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
543
prim_mode <= PIPE_PRIM_LINE_STRIP);
544
key->line_smoothing = (key->is_lines &&
545
v3d_line_smoothing_enabled(v3d));
546
key->has_gs = v3d->prog.bind_gs != NULL;
547
if (v3d->blend->base.logicop_enable) {
548
key->logicop_func = v3d->blend->base.logicop_func;
549
} else {
550
key->logicop_func = PIPE_LOGICOP_COPY;
551
}
552
if (job->msaa) {
553
key->msaa = v3d->rasterizer->base.multisample;
554
key->sample_coverage = (v3d->rasterizer->base.multisample &&
555
v3d->sample_mask != (1 << V3D_MAX_SAMPLES) - 1);
556
key->sample_alpha_to_coverage = v3d->blend->base.alpha_to_coverage;
557
key->sample_alpha_to_one = v3d->blend->base.alpha_to_one;
558
}
559
560
key->swap_color_rb = v3d->swap_color_rb;
561
562
for (int i = 0; i < v3d->framebuffer.nr_cbufs; i++) {
563
struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i];
564
if (!cbuf)
565
continue;
566
567
/* gl_FragColor's propagation to however many bound color
568
* buffers there are means that the shader compile needs to
569
* know what buffers are present.
570
*/
571
key->cbufs |= 1 << i;
572
573
/* If logic operations are enabled then we might emit color
574
* reads and we need to know the color buffer format and
575
* swizzle for that.
576
*/
577
if (key->logicop_func != PIPE_LOGICOP_COPY) {
578
key->color_fmt[i].format = cbuf->format;
579
key->color_fmt[i].swizzle =
580
v3d_get_format_swizzle(&v3d->screen->devinfo,
581
cbuf->format);
582
}
583
584
const struct util_format_description *desc =
585
util_format_description(cbuf->format);
586
587
if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
588
desc->channel[0].size == 32) {
589
key->f32_color_rb |= 1 << i;
590
}
591
592
if (s->info.fs.untyped_color_outputs) {
593
if (util_format_is_pure_uint(cbuf->format))
594
key->uint_color_rb |= 1 << i;
595
else if (util_format_is_pure_sint(cbuf->format))
596
key->int_color_rb |= 1 << i;
597
}
598
}
599
600
if (key->is_points) {
601
key->point_sprite_mask =
602
v3d->rasterizer->base.sprite_coord_enable;
603
key->point_coord_upper_left =
604
(v3d->rasterizer->base.sprite_coord_mode ==
605
PIPE_SPRITE_COORD_UPPER_LEFT);
606
}
607
608
struct v3d_compiled_shader *old_fs = v3d->prog.fs;
609
v3d->prog.fs = v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
610
if (v3d->prog.fs == old_fs)
611
return;
612
613
v3d->dirty |= V3D_DIRTY_COMPILED_FS;
614
615
if (old_fs) {
616
if (v3d->prog.fs->prog_data.fs->flat_shade_flags !=
617
old_fs->prog_data.fs->flat_shade_flags) {
618
v3d->dirty |= V3D_DIRTY_FLAT_SHADE_FLAGS;
619
}
620
621
if (v3d->prog.fs->prog_data.fs->noperspective_flags !=
622
old_fs->prog_data.fs->noperspective_flags) {
623
v3d->dirty |= V3D_DIRTY_NOPERSPECTIVE_FLAGS;
624
}
625
626
if (v3d->prog.fs->prog_data.fs->centroid_flags !=
627
old_fs->prog_data.fs->centroid_flags) {
628
v3d->dirty |= V3D_DIRTY_CENTROID_FLAGS;
629
}
630
}
631
632
if (old_fs && memcmp(v3d->prog.fs->prog_data.fs->input_slots,
633
old_fs->prog_data.fs->input_slots,
634
sizeof(v3d->prog.fs->prog_data.fs->input_slots))) {
635
v3d->dirty |= V3D_DIRTY_FS_INPUTS;
636
}
637
}
638
639
static void
640
v3d_update_compiled_gs(struct v3d_context *v3d, uint8_t prim_mode)
641
{
642
struct v3d_gs_key local_key;
643
struct v3d_gs_key *key = &local_key;
644
645
if (!(v3d->dirty & (V3D_DIRTY_GEOMTEX |
646
V3D_DIRTY_RASTERIZER |
647
V3D_DIRTY_UNCOMPILED_GS |
648
V3D_DIRTY_PRIM_MODE |
649
V3D_DIRTY_FS_INPUTS))) {
650
return;
651
}
652
653
if (!v3d->prog.bind_gs) {
654
v3d->prog.gs = NULL;
655
v3d->prog.gs_bin = NULL;
656
return;
657
}
658
659
memset(key, 0, sizeof(*key));
660
v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_GEOMETRY]);
661
key->base.shader_state = v3d->prog.bind_gs;
662
key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable;
663
key->base.is_last_geometry_stage = true;
664
key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs;
665
STATIC_ASSERT(sizeof(key->used_outputs) ==
666
sizeof(v3d->prog.fs->prog_data.fs->input_slots));
667
memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots,
668
sizeof(key->used_outputs));
669
670
key->per_vertex_point_size =
671
(prim_mode == PIPE_PRIM_POINTS &&
672
v3d->rasterizer->base.point_size_per_vertex);
673
674
struct v3d_compiled_shader *gs =
675
v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
676
if (gs != v3d->prog.gs) {
677
v3d->prog.gs = gs;
678
v3d->dirty |= V3D_DIRTY_COMPILED_GS;
679
}
680
681
key->is_coord = true;
682
683
/* The last bin-mode shader in the geometry pipeline only outputs
684
* varyings used by transform feedback.
685
*/
686
struct v3d_uncompiled_shader *shader_state = key->base.shader_state;
687
memcpy(key->used_outputs, shader_state->tf_outputs,
688
sizeof(*key->used_outputs) * shader_state->num_tf_outputs);
689
if (shader_state->num_tf_outputs < key->num_used_outputs) {
690
uint32_t size = sizeof(*key->used_outputs) *
691
(key->num_used_outputs -
692
shader_state->num_tf_outputs);
693
memset(&key->used_outputs[shader_state->num_tf_outputs],
694
0, size);
695
}
696
key->num_used_outputs = shader_state->num_tf_outputs;
697
698
struct v3d_compiled_shader *old_gs = v3d->prog.gs;
699
struct v3d_compiled_shader *gs_bin =
700
v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
701
if (gs_bin != old_gs) {
702
v3d->prog.gs_bin = gs_bin;
703
v3d->dirty |= V3D_DIRTY_COMPILED_GS_BIN;
704
}
705
706
if (old_gs && memcmp(v3d->prog.gs->prog_data.gs->input_slots,
707
old_gs->prog_data.gs->input_slots,
708
sizeof(v3d->prog.gs->prog_data.gs->input_slots))) {
709
v3d->dirty |= V3D_DIRTY_GS_INPUTS;
710
}
711
}
712
713
static void
714
v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode)
715
{
716
struct v3d_vs_key local_key;
717
struct v3d_vs_key *key = &local_key;
718
719
if (!(v3d->dirty & (V3D_DIRTY_VERTTEX |
720
V3D_DIRTY_VTXSTATE |
721
V3D_DIRTY_UNCOMPILED_VS |
722
(v3d->prog.bind_gs ? 0 : V3D_DIRTY_RASTERIZER) |
723
(v3d->prog.bind_gs ? 0 : V3D_DIRTY_PRIM_MODE) |
724
(v3d->prog.bind_gs ? V3D_DIRTY_GS_INPUTS :
725
V3D_DIRTY_FS_INPUTS)))) {
726
return;
727
}
728
729
memset(key, 0, sizeof(*key));
730
v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]);
731
key->base.shader_state = v3d->prog.bind_vs;
732
key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable;
733
key->base.is_last_geometry_stage = !v3d->prog.bind_gs;
734
735
if (!v3d->prog.bind_gs) {
736
key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs;
737
STATIC_ASSERT(sizeof(key->used_outputs) ==
738
sizeof(v3d->prog.fs->prog_data.fs->input_slots));
739
memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots,
740
sizeof(key->used_outputs));
741
} else {
742
key->num_used_outputs = v3d->prog.gs->prog_data.gs->num_inputs;
743
STATIC_ASSERT(sizeof(key->used_outputs) ==
744
sizeof(v3d->prog.gs->prog_data.gs->input_slots));
745
memcpy(key->used_outputs, v3d->prog.gs->prog_data.gs->input_slots,
746
sizeof(key->used_outputs));
747
}
748
749
key->per_vertex_point_size =
750
(prim_mode == PIPE_PRIM_POINTS &&
751
v3d->rasterizer->base.point_size_per_vertex);
752
753
nir_shader *s = v3d->prog.bind_vs->base.ir.nir;
754
uint64_t inputs_read = s->info.inputs_read;
755
assert(util_bitcount(inputs_read) <= v3d->vtx->num_elements);
756
757
while (inputs_read) {
758
int location = u_bit_scan64(&inputs_read);
759
nir_variable *var =
760
nir_find_variable_with_location(s, nir_var_shader_in, location);
761
assert (var != NULL);
762
int driver_location = var->data.driver_location;
763
switch (v3d->vtx->pipe[driver_location].src_format) {
764
case PIPE_FORMAT_B8G8R8A8_UNORM:
765
case PIPE_FORMAT_B10G10R10A2_UNORM:
766
case PIPE_FORMAT_B10G10R10A2_SNORM:
767
case PIPE_FORMAT_B10G10R10A2_USCALED:
768
case PIPE_FORMAT_B10G10R10A2_SSCALED:
769
key->va_swap_rb_mask |= 1 << location;
770
break;
771
default:
772
break;
773
}
774
}
775
776
struct v3d_compiled_shader *vs =
777
v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
778
if (vs != v3d->prog.vs) {
779
v3d->prog.vs = vs;
780
v3d->dirty |= V3D_DIRTY_COMPILED_VS;
781
}
782
783
key->is_coord = true;
784
785
/* Coord shaders only output varyings used by transform feedback,
786
* unless they are linked to other shaders in the geometry side
787
* of the pipeline, since in that case any of the output varyings
788
* could be required in later geometry stages to compute
789
* gl_Position or TF outputs.
790
*/
791
if (!v3d->prog.bind_gs) {
792
struct v3d_uncompiled_shader *shader_state =
793
key->base.shader_state;
794
memcpy(key->used_outputs, shader_state->tf_outputs,
795
sizeof(*key->used_outputs) *
796
shader_state->num_tf_outputs);
797
if (shader_state->num_tf_outputs < key->num_used_outputs) {
798
uint32_t tail_bytes =
799
sizeof(*key->used_outputs) *
800
(key->num_used_outputs -
801
shader_state->num_tf_outputs);
802
memset(&key->used_outputs[shader_state->num_tf_outputs],
803
0, tail_bytes);
804
}
805
key->num_used_outputs = shader_state->num_tf_outputs;
806
} else {
807
key->num_used_outputs = v3d->prog.gs_bin->prog_data.gs->num_inputs;
808
STATIC_ASSERT(sizeof(key->used_outputs) ==
809
sizeof(v3d->prog.gs_bin->prog_data.gs->input_slots));
810
memcpy(key->used_outputs, v3d->prog.gs_bin->prog_data.gs->input_slots,
811
sizeof(key->used_outputs));
812
}
813
814
struct v3d_compiled_shader *cs =
815
v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
816
if (cs != v3d->prog.cs) {
817
v3d->prog.cs = cs;
818
v3d->dirty |= V3D_DIRTY_COMPILED_CS;
819
}
820
}
821
822
void
823
v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode)
824
{
825
v3d_update_compiled_fs(v3d, prim_mode);
826
v3d_update_compiled_gs(v3d, prim_mode);
827
v3d_update_compiled_vs(v3d, prim_mode);
828
}
829
830
void
831
v3d_update_compiled_cs(struct v3d_context *v3d)
832
{
833
struct v3d_key local_key;
834
struct v3d_key *key = &local_key;
835
836
if (!(v3d->dirty & (V3D_DIRTY_UNCOMPILED_CS |
837
V3D_DIRTY_COMPTEX))) {
838
return;
839
}
840
841
memset(key, 0, sizeof(*key));
842
v3d_setup_shared_key(v3d, key, &v3d->tex[PIPE_SHADER_COMPUTE]);
843
key->shader_state = v3d->prog.bind_compute;
844
845
struct v3d_compiled_shader *cs =
846
v3d_get_compiled_shader(v3d, key, sizeof(*key));
847
if (cs != v3d->prog.compute) {
848
v3d->prog.compute = cs;
849
v3d->dirty |= V3D_DIRTY_COMPILED_CS; /* XXX */
850
}
851
}
852
853
static uint32_t
854
fs_cache_hash(const void *key)
855
{
856
return _mesa_hash_data(key, sizeof(struct v3d_fs_key));
857
}
858
859
static uint32_t
860
gs_cache_hash(const void *key)
861
{
862
return _mesa_hash_data(key, sizeof(struct v3d_gs_key));
863
}
864
865
static uint32_t
866
vs_cache_hash(const void *key)
867
{
868
return _mesa_hash_data(key, sizeof(struct v3d_vs_key));
869
}
870
871
static uint32_t
872
cs_cache_hash(const void *key)
873
{
874
return _mesa_hash_data(key, sizeof(struct v3d_key));
875
}
876
877
static bool
878
fs_cache_compare(const void *key1, const void *key2)
879
{
880
return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0;
881
}
882
883
static bool
884
gs_cache_compare(const void *key1, const void *key2)
885
{
886
return memcmp(key1, key2, sizeof(struct v3d_gs_key)) == 0;
887
}
888
889
static bool
890
vs_cache_compare(const void *key1, const void *key2)
891
{
892
return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0;
893
}
894
895
static bool
896
cs_cache_compare(const void *key1, const void *key2)
897
{
898
return memcmp(key1, key2, sizeof(struct v3d_key)) == 0;
899
}
900
901
static void
902
v3d_shader_state_delete(struct pipe_context *pctx, void *hwcso)
903
{
904
struct v3d_context *v3d = v3d_context(pctx);
905
struct v3d_uncompiled_shader *so = hwcso;
906
nir_shader *s = so->base.ir.nir;
907
908
hash_table_foreach(v3d->prog.cache[s->info.stage], entry) {
909
const struct v3d_key *key = entry->key;
910
struct v3d_compiled_shader *shader = entry->data;
911
912
if (key->shader_state != so)
913
continue;
914
915
if (v3d->prog.fs == shader)
916
v3d->prog.fs = NULL;
917
if (v3d->prog.vs == shader)
918
v3d->prog.vs = NULL;
919
if (v3d->prog.cs == shader)
920
v3d->prog.cs = NULL;
921
if (v3d->prog.compute == shader)
922
v3d->prog.compute = NULL;
923
924
_mesa_hash_table_remove(v3d->prog.cache[s->info.stage], entry);
925
v3d_free_compiled_shader(shader);
926
}
927
928
ralloc_free(so->base.ir.nir);
929
free(so);
930
}
931
932
static void
933
v3d_fp_state_bind(struct pipe_context *pctx, void *hwcso)
934
{
935
struct v3d_context *v3d = v3d_context(pctx);
936
v3d->prog.bind_fs = hwcso;
937
v3d->dirty |= V3D_DIRTY_UNCOMPILED_FS;
938
}
939
940
static void
941
v3d_gp_state_bind(struct pipe_context *pctx, void *hwcso)
942
{
943
struct v3d_context *v3d = v3d_context(pctx);
944
v3d->prog.bind_gs = hwcso;
945
v3d->dirty |= V3D_DIRTY_UNCOMPILED_GS;
946
}
947
948
static void
949
v3d_vp_state_bind(struct pipe_context *pctx, void *hwcso)
950
{
951
struct v3d_context *v3d = v3d_context(pctx);
952
v3d->prog.bind_vs = hwcso;
953
v3d->dirty |= V3D_DIRTY_UNCOMPILED_VS;
954
}
955
956
static void
957
v3d_compute_state_bind(struct pipe_context *pctx, void *state)
958
{
959
struct v3d_context *v3d = v3d_context(pctx);
960
961
v3d->prog.bind_compute = state;
962
v3d->dirty |= V3D_DIRTY_UNCOMPILED_CS;
963
}
964
965
static void *
966
v3d_create_compute_state(struct pipe_context *pctx,
967
const struct pipe_compute_state *cso)
968
{
969
return v3d_uncompiled_shader_create(pctx, cso->ir_type,
970
(void *)cso->prog);
971
}
972
973
void
974
v3d_program_init(struct pipe_context *pctx)
975
{
976
struct v3d_context *v3d = v3d_context(pctx);
977
978
pctx->create_vs_state = v3d_shader_state_create;
979
pctx->delete_vs_state = v3d_shader_state_delete;
980
981
pctx->create_gs_state = v3d_shader_state_create;
982
pctx->delete_gs_state = v3d_shader_state_delete;
983
984
pctx->create_fs_state = v3d_shader_state_create;
985
pctx->delete_fs_state = v3d_shader_state_delete;
986
987
pctx->bind_fs_state = v3d_fp_state_bind;
988
pctx->bind_gs_state = v3d_gp_state_bind;
989
pctx->bind_vs_state = v3d_vp_state_bind;
990
991
if (v3d->screen->has_csd) {
992
pctx->create_compute_state = v3d_create_compute_state;
993
pctx->delete_compute_state = v3d_shader_state_delete;
994
pctx->bind_compute_state = v3d_compute_state_bind;
995
}
996
997
v3d->prog.cache[MESA_SHADER_VERTEX] =
998
_mesa_hash_table_create(pctx, vs_cache_hash, vs_cache_compare);
999
v3d->prog.cache[MESA_SHADER_GEOMETRY] =
1000
_mesa_hash_table_create(pctx, gs_cache_hash, gs_cache_compare);
1001
v3d->prog.cache[MESA_SHADER_FRAGMENT] =
1002
_mesa_hash_table_create(pctx, fs_cache_hash, fs_cache_compare);
1003
v3d->prog.cache[MESA_SHADER_COMPUTE] =
1004
_mesa_hash_table_create(pctx, cs_cache_hash, cs_cache_compare);
1005
}
1006
1007
void
1008
v3d_program_fini(struct pipe_context *pctx)
1009
{
1010
struct v3d_context *v3d = v3d_context(pctx);
1011
1012
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
1013
struct hash_table *cache = v3d->prog.cache[i];
1014
if (!cache)
1015
continue;
1016
1017
hash_table_foreach(cache, entry) {
1018
struct v3d_compiled_shader *shader = entry->data;
1019
v3d_free_compiled_shader(shader);
1020
_mesa_hash_table_remove(cache, entry);
1021
}
1022
}
1023
1024
v3d_bo_unreference(&v3d->prog.spill_bo);
1025
}
1026
1027