Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/panfrost/lib/pan_blitter.c
4560 views
1
/*
2
* Copyright (C) 2020-2021 Collabora, Ltd.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*
23
* Authors:
24
* Alyssa Rosenzweig <[email protected]>
25
* Boris Brezillon <[email protected]>
26
*/
27
28
#include <math.h>
29
#include <stdio.h>
30
#include "pan_blend.h"
31
#include "pan_blitter.h"
32
#include "pan_cs.h"
33
#include "pan_encoder.h"
34
#include "pan_pool.h"
35
#include "pan_shader.h"
36
#include "pan_scoreboard.h"
37
#include "pan_texture.h"
38
#include "panfrost-quirks.h"
39
#include "compiler/nir/nir_builder.h"
40
#include "util/u_math.h"
41
42
/* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
43
* missing in many cases. We instead use software paths as fallbacks to
44
* implement blits, which are done as TILER jobs. No vertex shader is
45
* necessary since we can supply screen-space coordinates directly.
46
*
47
* This is primarily designed as a fallback for preloads but could be extended
48
* for other clears/blits if needed in the future. */
49
50
static enum mali_bifrost_register_file_format
51
blit_type_to_reg_fmt(nir_alu_type in)
52
{
53
switch (in) {
54
case nir_type_float32:
55
return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
56
case nir_type_int32:
57
return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
58
case nir_type_uint32:
59
return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
60
default:
61
unreachable("Invalid blit type");
62
}
63
}
64
65
struct pan_blit_surface {
66
gl_frag_result loc : 4;
67
nir_alu_type type : 8;
68
enum mali_texture_dimension dim : 2;
69
bool array : 1;
70
unsigned src_samples: 5;
71
unsigned dst_samples: 5;
72
};
73
74
struct pan_blit_shader_key {
75
struct pan_blit_surface surfaces[8];
76
};
77
78
struct pan_blit_shader_data {
79
struct pan_blit_shader_key key;
80
mali_ptr address;
81
unsigned blend_ret_offsets[8];
82
nir_alu_type blend_types[8];
83
};
84
85
struct pan_blit_blend_shader_key {
86
enum pipe_format format;
87
nir_alu_type type;
88
unsigned rt : 3;
89
unsigned nr_samples : 5;
90
};
91
92
struct pan_blit_blend_shader_data {
93
struct pan_blit_blend_shader_key key;
94
mali_ptr address;
95
};
96
97
struct pan_blit_rsd_key {
98
struct {
99
enum pipe_format format;
100
nir_alu_type type : 8;
101
unsigned src_samples : 5;
102
unsigned dst_samples : 5;
103
enum mali_texture_dimension dim : 2;
104
bool array : 1;
105
} rts[8], z, s;
106
};
107
108
struct pan_blit_rsd_data {
109
struct pan_blit_rsd_key key;
110
mali_ptr address;
111
};
112
113
static void
114
pan_blitter_prepare_midgard_rsd(const struct panfrost_device *dev,
115
const struct pan_image_view **rts,
116
mali_ptr *blend_shaders, bool zs,
117
struct MALI_RENDERER_STATE *rsd)
118
{
119
mali_ptr blend_shader = blend_shaders ? blend_shaders[0] : 0;
120
121
rsd->properties.midgard.work_register_count = 4;
122
rsd->properties.midgard.force_early_z = !zs;
123
rsd->stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
124
if (!(dev->quirks & MIDGARD_SFBD)) {
125
rsd->sfbd_blend_shader = blend_shader;
126
return;
127
}
128
129
rsd->stencil_mask_misc.sfbd_write_enable = true;
130
rsd->stencil_mask_misc.sfbd_dither_disable = true;
131
rsd->multisample_misc.sfbd_blend_shader = !!blend_shader;
132
rsd->sfbd_blend_shader = blend_shader;
133
if (rsd->multisample_misc.sfbd_blend_shader)
134
return;
135
136
rsd->sfbd_blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
137
rsd->sfbd_blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
138
rsd->sfbd_blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
139
rsd->sfbd_blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
140
rsd->sfbd_blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
141
rsd->sfbd_blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
142
rsd->sfbd_blend_constant = 0;
143
144
if (rts && rts[0]) {
145
rsd->stencil_mask_misc.sfbd_srgb =
146
util_format_is_srgb(rts[0]->format);
147
rsd->sfbd_blend_equation.color_mask = 0xf;
148
}
149
}
150
151
static void
152
pan_blitter_prepare_bifrost_rsd(const struct panfrost_device *dev,
153
bool zs, bool ms,
154
struct MALI_RENDERER_STATE *rsd)
155
{
156
if (zs) {
157
rsd->properties.bifrost.zs_update_operation =
158
MALI_PIXEL_KILL_FORCE_LATE;
159
rsd->properties.bifrost.pixel_kill_operation =
160
MALI_PIXEL_KILL_FORCE_LATE;
161
} else {
162
rsd->properties.bifrost.zs_update_operation =
163
MALI_PIXEL_KILL_STRONG_EARLY;
164
rsd->properties.bifrost.pixel_kill_operation =
165
MALI_PIXEL_KILL_FORCE_EARLY;
166
}
167
168
/* We can only allow blit shader fragments to kill if they write all
169
* colour outputs. This is true for our colour (non-Z/S) blit shaders,
170
* but obviously not true for Z/S shaders. However, blit shaders
171
* otherwise lack side effects, so other fragments may kill them.
172
* However, while shaders writing Z/S can normally be killed, on v6
173
* for frame shaders it can cause GPU timeouts, so only allow colour
174
* blit shaders to be killed. */
175
176
rsd->properties.bifrost.allow_forward_pixel_to_kill = !zs;
177
rsd->properties.bifrost.allow_forward_pixel_to_be_killed = (dev->arch >= 7) || !zs;
178
179
rsd->preload.fragment.coverage = true;
180
rsd->preload.fragment.sample_mask_id = ms;
181
}
182
183
static void
184
pan_blitter_emit_midgard_blend(const struct panfrost_device *dev,
185
unsigned rt,
186
const struct pan_image_view *iview,
187
mali_ptr blend_shader,
188
void *out)
189
{
190
assert(!(dev->quirks & MIDGARD_SFBD));
191
192
pan_pack(out, BLEND, cfg) {
193
if (!iview) {
194
cfg.midgard.equation.color_mask = 0xf;
195
cfg.midgard.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
196
cfg.midgard.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
197
cfg.midgard.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
198
cfg.midgard.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
199
cfg.midgard.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
200
cfg.midgard.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
201
continue;
202
}
203
204
cfg.round_to_fb_precision = true;
205
cfg.srgb = util_format_is_srgb(iview->format);
206
207
if (!blend_shader) {
208
cfg.midgard.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
209
cfg.midgard.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
210
cfg.midgard.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
211
cfg.midgard.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
212
cfg.midgard.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
213
cfg.midgard.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
214
cfg.midgard.equation.color_mask = 0xf;
215
} else {
216
cfg.midgard.blend_shader = true;
217
cfg.midgard.shader_pc = blend_shader;
218
}
219
}
220
}
221
222
static void
223
pan_blitter_emit_bifrost_blend(const struct panfrost_device *dev,
224
unsigned rt,
225
const struct pan_image_view *iview,
226
const struct pan_blit_shader_data *blit_shader,
227
mali_ptr blend_shader,
228
void *out)
229
{
230
pan_pack(out, BLEND, cfg) {
231
if (!iview) {
232
cfg.enable = false;
233
cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OFF;
234
continue;
235
}
236
237
nir_alu_type type = blit_shader->key.surfaces[rt].type;
238
239
cfg.round_to_fb_precision = true;
240
cfg.srgb = util_format_is_srgb(iview->format);
241
cfg.bifrost.internal.mode = blend_shader ?
242
MALI_BIFROST_BLEND_MODE_SHADER :
243
MALI_BIFROST_BLEND_MODE_OPAQUE;
244
if (blend_shader) {
245
cfg.bifrost.internal.shader.pc = blend_shader;
246
if (blit_shader->blend_ret_offsets[rt]) {
247
cfg.bifrost.internal.shader.return_value =
248
blit_shader->address +
249
blit_shader->blend_ret_offsets[rt];
250
}
251
} else {
252
cfg.bifrost.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
253
cfg.bifrost.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
254
cfg.bifrost.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
255
cfg.bifrost.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
256
cfg.bifrost.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
257
cfg.bifrost.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
258
cfg.bifrost.equation.color_mask = 0xf;
259
cfg.bifrost.internal.fixed_function.num_comps = 4;
260
cfg.bifrost.internal.fixed_function.conversion.memory_format =
261
panfrost_format_to_bifrost_blend(dev, iview->format);
262
cfg.bifrost.internal.fixed_function.conversion.register_format =
263
blit_type_to_reg_fmt(type);
264
265
cfg.bifrost.internal.fixed_function.rt = rt;
266
}
267
}
268
}
269
270
static void
271
pan_blitter_emit_rsd(const struct panfrost_device *dev,
272
const struct pan_blit_shader_data *blit_shader,
273
unsigned rt_count,
274
const struct pan_image_view **rts,
275
mali_ptr *blend_shaders,
276
const struct pan_image_view *z,
277
const struct pan_image_view *s,
278
void *out)
279
{
280
unsigned tex_count = 0;
281
bool zs = (z || s);
282
bool ms = false;
283
284
for (unsigned i = 0; i < rt_count; i++) {
285
if (rts[i]) {
286
tex_count++;
287
if (rts[i]->nr_samples > 1)
288
ms = true;
289
}
290
}
291
292
if (z) {
293
if (z->image->layout.nr_samples > 1)
294
ms = true;
295
tex_count++;
296
}
297
298
if (s) {
299
if (s->image->layout.nr_samples > 1)
300
ms = true;
301
tex_count++;
302
}
303
304
pan_pack(out, RENDERER_STATE, cfg) {
305
assert(blit_shader->address);
306
cfg.shader.shader = blit_shader->address;
307
cfg.shader.varying_count = 1;
308
cfg.shader.texture_count = tex_count;
309
cfg.shader.sampler_count = 1;
310
311
cfg.properties.stencil_from_shader = s != NULL;
312
cfg.properties.depth_source =
313
z ?
314
MALI_DEPTH_SOURCE_SHADER :
315
MALI_DEPTH_SOURCE_FIXED_FUNCTION;
316
317
cfg.multisample_misc.sample_mask = 0xFFFF;
318
cfg.multisample_misc.multisample_enable = ms;
319
cfg.multisample_misc.evaluate_per_sample = ms;
320
cfg.multisample_misc.depth_write_mask = z != NULL;
321
cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
322
323
cfg.stencil_mask_misc.stencil_enable = s != NULL;
324
cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
325
cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
326
cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
327
cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
328
cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
329
cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
330
cfg.stencil_front.mask = 0xFF;
331
cfg.stencil_back = cfg.stencil_front;
332
333
if (pan_is_bifrost(dev)) {
334
pan_blitter_prepare_bifrost_rsd(dev, zs, ms, &cfg);
335
} else {
336
pan_blitter_prepare_midgard_rsd(dev, rts,
337
blend_shaders, zs,
338
&cfg);
339
}
340
}
341
342
if (dev->quirks & MIDGARD_SFBD)
343
return;
344
345
for (unsigned i = 0; i < MAX2(rt_count, 1); ++i) {
346
void *dest = out + MALI_RENDERER_STATE_LENGTH + MALI_BLEND_LENGTH * i;
347
const struct pan_image_view *rt_view = rts ? rts[i] : NULL;
348
mali_ptr blend_shader = blend_shaders ? blend_shaders[i] : 0;
349
350
if (pan_is_bifrost(dev)) {
351
pan_blitter_emit_bifrost_blend(dev, i, rt_view, blit_shader,
352
blend_shader, dest);
353
} else {
354
pan_blitter_emit_midgard_blend(dev, i, rt_view,
355
blend_shader, dest);
356
}
357
}
358
}
359
360
static void
361
pan_blitter_get_blend_shaders(struct panfrost_device *dev,
362
unsigned rt_count,
363
const struct pan_image_view **rts,
364
const struct pan_blit_shader_data *blit_shader,
365
mali_ptr *blend_shaders)
366
{
367
if (!rt_count)
368
return;
369
370
struct pan_blend_state blend_state = {
371
.rt_count = rt_count,
372
};
373
374
for (unsigned i = 0; i < rt_count; i++) {
375
if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)
376
continue;
377
378
struct pan_blit_blend_shader_key key = {
379
.format = rts[i]->format,
380
.rt = i,
381
.nr_samples = rts[i]->image->layout.nr_samples,
382
.type = blit_shader->blend_types[i],
383
};
384
385
pthread_mutex_lock(&dev->blitter.shaders.lock);
386
struct hash_entry *he =
387
_mesa_hash_table_search(dev->blitter.shaders.blend, &key);
388
struct pan_blit_blend_shader_data *blend_shader = he ? he->data : NULL;
389
if (blend_shader) {
390
blend_shaders[i] = blend_shader->address;
391
pthread_mutex_unlock(&dev->blitter.shaders.lock);
392
continue;
393
}
394
395
blend_shader = rzalloc(dev->blitter.shaders.blend,
396
struct pan_blit_blend_shader_data);
397
blend_shader->key = key;
398
399
blend_state.rts[i] = (struct pan_blend_rt_state) {
400
.format = rts[i]->format,
401
.nr_samples = rts[i]->image->layout.nr_samples,
402
.equation = {
403
.blend_enable = true,
404
.rgb_src_factor = BLEND_FACTOR_ZERO,
405
.rgb_invert_src_factor = true,
406
.rgb_dst_factor = BLEND_FACTOR_ZERO,
407
.rgb_func = BLEND_FUNC_ADD,
408
.alpha_src_factor = BLEND_FACTOR_ZERO,
409
.alpha_invert_src_factor = true,
410
.alpha_dst_factor = BLEND_FACTOR_ZERO,
411
.alpha_func = BLEND_FUNC_ADD,
412
.color_mask = 0xf,
413
},
414
};
415
416
pthread_mutex_lock(&dev->blend_shaders.lock);
417
struct pan_blend_shader_variant *b =
418
pan_blend_get_shader_locked(dev, &blend_state,
419
blit_shader->blend_types[i],
420
nir_type_float32, /* unused */
421
i);
422
423
ASSERTED unsigned full_threads =
424
(dev->arch >= 7) ? 32 : ((dev->arch == 6) ? 64 : 4);
425
assert(b->work_reg_count <= full_threads);
426
struct panfrost_ptr bin =
427
pan_pool_alloc_aligned(dev->blitter.shaders.pool,
428
b->binary.size,
429
pan_is_bifrost(dev) ? 128 : 64);
430
memcpy(bin.cpu, b->binary.data, b->binary.size);
431
432
blend_shader->address = bin.gpu | b->first_tag;
433
pthread_mutex_unlock(&dev->blend_shaders.lock);
434
_mesa_hash_table_insert(dev->blitter.shaders.blend,
435
&blend_shader->key, blend_shader);
436
pthread_mutex_unlock(&dev->blitter.shaders.lock);
437
blend_shaders[i] = blend_shader->address;
438
}
439
}
440
441
static const struct pan_blit_shader_data *
442
pan_blitter_get_blit_shader(struct panfrost_device *dev,
443
const struct pan_blit_shader_key *key)
444
{
445
pthread_mutex_lock(&dev->blitter.shaders.lock);
446
struct hash_entry *he = _mesa_hash_table_search(dev->blitter.shaders.blit, key);
447
struct pan_blit_shader_data *shader = he ? he->data : NULL;
448
449
if (shader)
450
goto out;
451
452
unsigned coord_comps = 0;
453
unsigned sig_offset = 0;
454
char sig[256];
455
bool first = true;
456
for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
457
const char *type_str, *dim_str;
458
if (key->surfaces[i].type == nir_type_invalid)
459
continue;
460
461
switch (key->surfaces[i].type) {
462
case nir_type_float32: type_str = "float"; break;
463
case nir_type_uint32: type_str = "uint"; break;
464
case nir_type_int32: type_str = "int"; break;
465
default: unreachable("Invalid type\n");
466
}
467
468
switch (key->surfaces[i].dim) {
469
case MALI_TEXTURE_DIMENSION_CUBE: dim_str = "cube"; break;
470
case MALI_TEXTURE_DIMENSION_1D: dim_str = "1D"; break;
471
case MALI_TEXTURE_DIMENSION_2D: dim_str = "2D"; break;
472
case MALI_TEXTURE_DIMENSION_3D: dim_str = "3D"; break;
473
default: unreachable("Invalid dim\n");
474
}
475
476
coord_comps = MAX2(coord_comps,
477
(key->surfaces[i].dim ? : 3) +
478
(key->surfaces[i].array ? 1 : 0));
479
first = false;
480
481
if (sig_offset >= sizeof(sig))
482
continue;
483
484
sig_offset += snprintf(sig + sig_offset, sizeof(sig) - sig_offset,
485
"%s[%s;%s;%s%s;src_samples=%d,dst_samples=%d]",
486
first ? "" : ",",
487
gl_frag_result_name(key->surfaces[i].loc),
488
type_str, dim_str,
489
key->surfaces[i].array ? "[]" : "",
490
key->surfaces[i].src_samples,
491
key->surfaces[i].dst_samples);
492
}
493
494
nir_builder b =
495
nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
496
pan_shader_get_compiler_options(dev),
497
"pan_blit(%s)", sig);
498
b.shader->info.internal = true;
499
500
nir_variable *coord_var =
501
nir_variable_create(b.shader, nir_var_shader_in,
502
glsl_vector_type(GLSL_TYPE_FLOAT, coord_comps),
503
"coord");
504
coord_var->data.location = VARYING_SLOT_TEX0;
505
506
nir_ssa_def *coord = nir_load_var(&b, coord_var);
507
508
unsigned active_count = 0;
509
for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
510
if (key->surfaces[i].type == nir_type_invalid)
511
continue;
512
513
/* Resolve operations only work for N -> 1 samples. */
514
assert(key->surfaces[i].dst_samples == 1 ||
515
key->surfaces[i].src_samples == key->surfaces[i].dst_samples);
516
517
static const char *out_names[] = {
518
"out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7",
519
};
520
521
unsigned ncomps = key->surfaces[i].loc >= FRAG_RESULT_DATA0 ? 4 : 1;
522
nir_variable *out =
523
nir_variable_create(b.shader, nir_var_shader_out,
524
glsl_vector_type(GLSL_TYPE_FLOAT, ncomps),
525
out_names[active_count]);
526
out->data.location = key->surfaces[i].loc;
527
out->data.driver_location = active_count;
528
529
bool resolve = key->surfaces[i].src_samples > key->surfaces[i].dst_samples;
530
bool ms = key->surfaces[i].src_samples > 1;
531
enum glsl_sampler_dim sampler_dim;
532
533
switch (key->surfaces[i].dim) {
534
case MALI_TEXTURE_DIMENSION_1D:
535
sampler_dim = GLSL_SAMPLER_DIM_1D;
536
break;
537
case MALI_TEXTURE_DIMENSION_2D:
538
sampler_dim = ms ?
539
GLSL_SAMPLER_DIM_MS :
540
GLSL_SAMPLER_DIM_2D;
541
break;
542
case MALI_TEXTURE_DIMENSION_3D:
543
sampler_dim = GLSL_SAMPLER_DIM_3D;
544
break;
545
case MALI_TEXTURE_DIMENSION_CUBE:
546
sampler_dim = GLSL_SAMPLER_DIM_CUBE;
547
break;
548
}
549
550
nir_ssa_def *res = NULL;
551
552
if (resolve) {
553
/* When resolving a float type, we need to calculate
554
* the average of all samples. For integer resolve, GL
555
* and Vulkan say that one sample should be chosen
556
* without telling which. Let's just pick the first one
557
* in that case.
558
*/
559
nir_alu_type base_type =
560
nir_alu_type_get_base_type(key->surfaces[i].type);
561
unsigned nsamples = base_type == nir_type_float ?
562
key->surfaces[i].src_samples : 1;
563
564
for (unsigned s = 0; s < nsamples; s++) {
565
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
566
567
tex->op = nir_texop_txf_ms;
568
tex->dest_type = key->surfaces[i].type;
569
tex->texture_index = active_count;
570
tex->is_array = key->surfaces[i].array;
571
tex->sampler_dim = sampler_dim;
572
573
tex->src[0].src_type = nir_tex_src_coord;
574
tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
575
tex->coord_components = coord_comps;
576
577
tex->src[1].src_type = nir_tex_src_ms_index;
578
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s));
579
580
tex->src[2].src_type = nir_tex_src_lod;
581
tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
582
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
583
nir_builder_instr_insert(&b, &tex->instr);
584
585
res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa;
586
}
587
588
if (base_type == nir_type_float) {
589
unsigned type_sz =
590
nir_alu_type_get_type_size(key->surfaces[i].type);
591
res = nir_fmul(&b, res,
592
nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz));
593
}
594
} else {
595
nir_tex_instr *tex =
596
nir_tex_instr_create(b.shader, ms ? 3 : 1);
597
598
tex->dest_type = key->surfaces[i].type;
599
tex->texture_index = active_count;
600
tex->is_array = key->surfaces[i].array;
601
tex->sampler_dim = sampler_dim;
602
603
if (ms) {
604
tex->op = nir_texop_txf_ms;
605
606
tex->src[0].src_type = nir_tex_src_coord;
607
tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
608
tex->coord_components = coord_comps;
609
610
tex->src[1].src_type = nir_tex_src_ms_index;
611
tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
612
613
tex->src[2].src_type = nir_tex_src_lod;
614
tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
615
} else {
616
tex->op = nir_texop_tex;
617
618
tex->src[0].src_type = nir_tex_src_coord;
619
tex->src[0].src = nir_src_for_ssa(coord);
620
tex->coord_components = coord_comps;
621
}
622
623
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
624
nir_builder_instr_insert(&b, &tex->instr);
625
res = &tex->dest.ssa;
626
}
627
628
assert(res);
629
630
if (key->surfaces[i].loc >= FRAG_RESULT_DATA0) {
631
nir_store_var(&b, out, res, 0xFF);
632
} else {
633
unsigned c = key->surfaces[i].loc == FRAG_RESULT_STENCIL ? 1 : 0;
634
nir_store_var(&b, out, nir_channel(&b, res, c), 0xFF);
635
}
636
active_count++;
637
}
638
639
struct panfrost_compile_inputs inputs = {
640
.gpu_id = dev->gpu_id,
641
.is_blit = true,
642
};
643
struct util_dynarray binary;
644
struct pan_shader_info info;
645
646
util_dynarray_init(&binary, NULL);
647
648
pan_shader_compile(dev, b.shader, &inputs, &binary, &info);
649
650
shader = rzalloc(dev->blitter.shaders.blit,
651
struct pan_blit_shader_data);
652
shader->key = *key;
653
shader->address =
654
pan_pool_upload_aligned(dev->blitter.shaders.pool,
655
binary.data, binary.size,
656
pan_is_bifrost(dev) ? 128 : 64);
657
658
util_dynarray_fini(&binary);
659
ralloc_free(b.shader);
660
661
if (!pan_is_bifrost(dev))
662
shader->address |= info.midgard.first_tag;
663
664
if (pan_is_bifrost(dev)) {
665
for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {
666
shader->blend_ret_offsets[i] = info.bifrost.blend[i].return_offset;
667
shader->blend_types[i] = info.bifrost.blend[i].type;
668
}
669
}
670
671
_mesa_hash_table_insert(dev->blitter.shaders.blit, &shader->key, shader);
672
673
out:
674
pthread_mutex_unlock(&dev->blitter.shaders.lock);
675
return shader;
676
}
677
678
static mali_ptr
679
pan_blitter_get_rsd(struct panfrost_device *dev,
680
unsigned rt_count,
681
const struct pan_image_view **src_rts,
682
const struct pan_image_view **dst_rts,
683
const struct pan_image_view *src_z,
684
const struct pan_image_view *dst_z,
685
const struct pan_image_view *src_s,
686
const struct pan_image_view *dst_s)
687
{
688
struct pan_blit_rsd_key rsd_key = { 0 };
689
690
assert(!rt_count || (!src_z && !src_s));
691
692
struct pan_blit_shader_key blit_key = { 0 };
693
694
if (src_z) {
695
assert(dst_z);
696
rsd_key.z.format = dst_z->format;
697
blit_key.surfaces[0].loc = FRAG_RESULT_DEPTH;
698
rsd_key.z.type = blit_key.surfaces[0].type = nir_type_float32;
699
rsd_key.z.src_samples = blit_key.surfaces[0].src_samples = src_z->image->layout.nr_samples;
700
rsd_key.z.dst_samples = blit_key.surfaces[0].dst_samples = dst_z->image->layout.nr_samples;
701
rsd_key.z.dim = blit_key.surfaces[0].dim = src_z->dim;
702
rsd_key.z.array = blit_key.surfaces[0].array = src_z->first_layer != src_z->last_layer;
703
}
704
705
if (src_s) {
706
assert(dst_s);
707
rsd_key.s.format = dst_s->format;
708
blit_key.surfaces[1].loc = FRAG_RESULT_STENCIL;
709
rsd_key.s.type = blit_key.surfaces[1].type = nir_type_uint32;
710
rsd_key.s.src_samples = blit_key.surfaces[1].src_samples = src_s->image->layout.nr_samples;
711
rsd_key.s.dst_samples = blit_key.surfaces[1].dst_samples = dst_s->image->layout.nr_samples;
712
rsd_key.s.dim = blit_key.surfaces[1].dim = src_s->dim;
713
rsd_key.s.array = blit_key.surfaces[1].array = src_s->first_layer != src_s->last_layer;
714
}
715
716
for (unsigned i = 0; i < rt_count; i++) {
717
if (!src_rts[i])
718
continue;
719
720
assert(dst_rts[i]);
721
rsd_key.rts[i].format = dst_rts[i]->format;
722
blit_key.surfaces[i].loc = FRAG_RESULT_DATA0 + i;
723
rsd_key.rts[i].type = blit_key.surfaces[i].type =
724
util_format_is_pure_uint(src_rts[i]->format) ? nir_type_uint32 :
725
util_format_is_pure_sint(src_rts[i]->format) ? nir_type_int32 :
726
nir_type_float32;
727
rsd_key.rts[i].src_samples = blit_key.surfaces[i].src_samples = src_rts[i]->image->layout.nr_samples;
728
rsd_key.rts[i].dst_samples = blit_key.surfaces[i].dst_samples = dst_rts[i]->image->layout.nr_samples;
729
rsd_key.rts[i].dim = blit_key.surfaces[i].dim = src_rts[i]->dim;
730
rsd_key.rts[i].array = blit_key.surfaces[i].array = src_rts[i]->first_layer != src_rts[i]->last_layer;
731
}
732
733
pthread_mutex_lock(&dev->blitter.rsds.lock);
734
struct hash_entry *he =
735
_mesa_hash_table_search(dev->blitter.rsds.rsds, &rsd_key);
736
struct pan_blit_rsd_data *rsd = he ? he->data : NULL;
737
if (rsd)
738
goto out;
739
740
rsd = rzalloc(dev->blitter.rsds.rsds, struct pan_blit_rsd_data);
741
rsd->key = rsd_key;
742
743
struct panfrost_ptr rsd_ptr =
744
(dev->quirks & MIDGARD_SFBD) ?
745
pan_pool_alloc_desc(dev->blitter.rsds.pool, RENDERER_STATE) :
746
pan_pool_alloc_desc_aggregate(dev->blitter.rsds.pool,
747
PAN_DESC(RENDERER_STATE),
748
PAN_DESC_ARRAY(MAX2(rt_count, 1), BLEND));
749
750
mali_ptr blend_shaders[8] = { 0 };
751
752
const struct pan_blit_shader_data *blit_shader =
753
pan_blitter_get_blit_shader(dev, &blit_key);
754
755
pan_blitter_get_blend_shaders(dev, rt_count, dst_rts,
756
blit_shader, blend_shaders);
757
758
pan_blitter_emit_rsd(dev, blit_shader,
759
MAX2(rt_count, 1), dst_rts, blend_shaders,
760
dst_z, dst_s, rsd_ptr.cpu);
761
rsd->address = rsd_ptr.gpu;
762
_mesa_hash_table_insert(dev->blitter.rsds.rsds, &rsd->key, rsd);
763
764
out:
765
pthread_mutex_unlock(&dev->blitter.rsds.lock);
766
return rsd->address;
767
}
768
769
static mali_ptr
770
pan_preload_get_rsd(struct panfrost_device *dev,
771
const struct pan_fb_info *fb,
772
bool zs)
773
{
774
const struct pan_image_view *rts[8] = { NULL };
775
const struct pan_image_view *z = NULL, *s = NULL;
776
struct pan_image_view patched_s_view;
777
unsigned rt_count = 0;
778
779
if (zs) {
780
if (fb->zs.preload.z)
781
z = fb->zs.view.zs;
782
783
if (fb->zs.preload.s) {
784
const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;
785
enum pipe_format fmt = util_format_get_depth_only(view->format);
786
787
switch (view->format) {
788
case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;
789
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;
790
default: fmt = view->format; break;
791
}
792
793
if (fmt != view->format) {
794
patched_s_view = *view;
795
patched_s_view.format = fmt;
796
s = &patched_s_view;
797
} else {
798
s = view;
799
}
800
}
801
} else {
802
for (unsigned i = 0; i < fb->rt_count; i++) {
803
if (fb->rts[i].preload)
804
rts[i] = fb->rts[i].view;
805
}
806
807
rt_count = fb->rt_count;
808
}
809
810
return pan_blitter_get_rsd(dev, rt_count, rts, rts, z, z, s, s);
811
}
812
813
static mali_ptr
814
pan_blit_get_rsd(struct panfrost_device *dev,
815
const struct pan_image_view *src_views,
816
const struct pan_image_view *dst_view)
817
{
818
const struct util_format_description *desc =
819
util_format_description(src_views[0].format);
820
const struct pan_image_view *src_rt = NULL, *dst_rt = NULL;
821
const struct pan_image_view *src_z = NULL, *dst_z = NULL;
822
const struct pan_image_view *src_s = NULL, *dst_s = NULL;
823
824
if (util_format_has_depth(desc)) {
825
src_z = &src_views[0];
826
dst_z = dst_view;
827
}
828
829
if (src_views[1].format) {
830
src_s = &src_views[1];
831
dst_s = dst_view;
832
} else if (util_format_has_stencil(desc)) {
833
src_s = &src_views[0];
834
dst_s = dst_view;
835
}
836
837
if (!src_z && !src_s) {
838
src_rt = &src_views[0];
839
dst_rt = dst_view;
840
}
841
842
return pan_blitter_get_rsd(dev, src_rt ? 1 : 0, &src_rt, &dst_rt,
843
src_z, dst_z, src_s, dst_s);
844
}
845
846
static bool
847
pan_preload_needed(const struct pan_fb_info *fb, bool zs)
848
{
849
if (zs) {
850
if (fb->zs.preload.z || fb->zs.preload.s)
851
return true;
852
} else {
853
for (unsigned i = 0; i < fb->rt_count; i++) {
854
if (fb->rts[i].preload)
855
return true;
856
}
857
}
858
859
return false;
860
}
861
862
static void
863
pan_blitter_emit_varying(struct pan_pool *pool,
864
mali_ptr coordinates,
865
struct MALI_DRAW *draw)
866
{
867
/* Bifrost needs an empty desc to mark end of prefetching */
868
bool padding_buffer = pan_is_bifrost(pool->dev);
869
870
struct panfrost_ptr varying =
871
pan_pool_alloc_desc(pool, ATTRIBUTE);
872
struct panfrost_ptr varying_buffer =
873
pan_pool_alloc_desc_array(pool, (padding_buffer ? 2 : 1),
874
ATTRIBUTE_BUFFER);
875
876
pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
877
cfg.pointer = coordinates;
878
cfg.stride = 4 * sizeof(float);
879
cfg.size = cfg.stride * 4;
880
}
881
882
if (padding_buffer) {
883
pan_pack(varying_buffer.cpu + MALI_ATTRIBUTE_BUFFER_LENGTH,
884
ATTRIBUTE_BUFFER, cfg);
885
}
886
887
pan_pack(varying.cpu, ATTRIBUTE, cfg) {
888
cfg.buffer_index = 0;
889
cfg.offset_enable = !pan_is_bifrost(pool->dev);
890
cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
891
}
892
893
draw->varyings = varying.gpu;
894
draw->varying_buffers = varying_buffer.gpu;
895
}
896
897
static mali_ptr
898
pan_blitter_emit_bifrost_sampler(struct pan_pool *pool,
899
bool nearest_filter)
900
{
901
struct panfrost_ptr sampler =
902
pan_pool_alloc_desc(pool, BIFROST_SAMPLER);
903
904
pan_pack(sampler.cpu, BIFROST_SAMPLER, cfg) {
905
cfg.seamless_cube_map = false;
906
cfg.normalized_coordinates = false;
907
cfg.point_sample_minify = nearest_filter;
908
cfg.point_sample_magnify = nearest_filter;
909
}
910
911
return sampler.gpu;
912
}
913
914
static mali_ptr
915
pan_blitter_emit_midgard_sampler(struct pan_pool *pool,
916
bool nearest_filter)
917
{
918
struct panfrost_ptr sampler =
919
pan_pool_alloc_desc(pool, MIDGARD_SAMPLER);
920
921
pan_pack(sampler.cpu, MIDGARD_SAMPLER, cfg) {
922
cfg.normalized_coordinates = false;
923
cfg.magnify_nearest = nearest_filter;
924
cfg.minify_nearest = nearest_filter;
925
}
926
927
return sampler.gpu;
928
}
929
930
static mali_ptr
931
pan_blitter_emit_bifrost_textures(struct pan_pool *pool,
932
unsigned tex_count,
933
const struct pan_image_view **views)
934
{
935
struct panfrost_ptr textures =
936
pan_pool_alloc_desc_array(pool, tex_count, BIFROST_TEXTURE);
937
938
for (unsigned i = 0; i < tex_count; i++) {
939
void *texture = textures.cpu + (MALI_BIFROST_TEXTURE_LENGTH * i);
940
size_t payload_size =
941
panfrost_estimate_texture_payload_size(pool->dev, views[i]);
942
struct panfrost_ptr surfaces =
943
pan_pool_alloc_aligned(pool, payload_size,
944
MALI_SURFACE_WITH_STRIDE_ALIGN);
945
946
panfrost_new_texture(pool->dev, views[i], texture, &surfaces);
947
}
948
949
return textures.gpu;
950
}
951
952
static mali_ptr
953
pan_blitter_emit_midgard_textures(struct pan_pool *pool,
954
unsigned tex_count,
955
const struct pan_image_view **views)
956
{
957
mali_ptr textures[8] = { 0 };
958
959
for (unsigned i = 0; i < tex_count; i++) {
960
size_t sz = MALI_MIDGARD_TEXTURE_LENGTH +
961
panfrost_estimate_texture_payload_size(pool->dev, views[i]);
962
struct panfrost_ptr texture =
963
pan_pool_alloc_aligned(pool, sz, MALI_MIDGARD_TEXTURE_ALIGN);
964
struct panfrost_ptr surfaces = {
965
.cpu = texture.cpu + MALI_MIDGARD_TEXTURE_LENGTH,
966
.gpu = texture.gpu + MALI_MIDGARD_TEXTURE_LENGTH,
967
};
968
969
panfrost_new_texture(pool->dev, views[i], texture.cpu, &surfaces);
970
textures[i] = texture.gpu;
971
}
972
973
return pan_pool_upload_aligned(pool, textures,
974
tex_count * sizeof(mali_ptr),
975
sizeof(mali_ptr));
976
}
977
978
static void
979
pan_preload_emit_textures(struct pan_pool *pool,
980
const struct pan_fb_info *fb, bool zs,
981
struct MALI_DRAW *draw)
982
{
983
const struct pan_image_view *views[8];
984
struct pan_image_view patched_s_view;
985
unsigned tex_count = 0;
986
987
if (zs) {
988
if (fb->zs.preload.z)
989
views[tex_count++] = fb->zs.view.zs;
990
991
if (fb->zs.preload.s) {
992
const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;
993
enum pipe_format fmt = util_format_get_depth_only(view->format);
994
995
switch (view->format) {
996
case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;
997
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;
998
default: fmt = view->format; break;
999
}
1000
1001
if (fmt != view->format) {
1002
patched_s_view = *view;
1003
patched_s_view.format = fmt;
1004
view = &patched_s_view;
1005
}
1006
views[tex_count++] = view;
1007
}
1008
} else {
1009
for (unsigned i = 0; i < fb->rt_count; i++) {
1010
if (fb->rts[i].preload)
1011
views[tex_count++] = fb->rts[i].view;
1012
}
1013
1014
}
1015
1016
if (pan_is_bifrost(pool->dev))
1017
draw->textures = pan_blitter_emit_bifrost_textures(pool, tex_count, views);
1018
else
1019
draw->textures = pan_blitter_emit_midgard_textures(pool, tex_count, views);
1020
}
1021
1022
static mali_ptr
1023
pan_blitter_emit_viewport(struct pan_pool *pool,
1024
uint16_t minx, uint16_t miny,
1025
uint16_t maxx, uint16_t maxy)
1026
{
1027
struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT);
1028
1029
pan_pack(vp.cpu, VIEWPORT, cfg) {
1030
cfg.scissor_minimum_x = minx;
1031
cfg.scissor_minimum_y = miny;
1032
cfg.scissor_maximum_x = maxx;
1033
cfg.scissor_maximum_y = maxy;
1034
}
1035
1036
return vp.gpu;
1037
}
1038
1039
static void
1040
pan_preload_emit_dcd(struct pan_pool *pool,
1041
struct pan_fb_info *fb, bool zs,
1042
mali_ptr coordinates,
1043
mali_ptr tsd, mali_ptr rsd,
1044
void *out, bool always_write)
1045
{
1046
pan_pack(out, DRAW, cfg) {
1047
cfg.four_components_per_vertex = true;
1048
cfg.draw_descriptor_is_64b = true;
1049
cfg.thread_storage = tsd;
1050
cfg.state = rsd;
1051
1052
cfg.position = coordinates;
1053
pan_blitter_emit_varying(pool, coordinates, &cfg);
1054
uint16_t minx = 0, miny = 0, maxx, maxy;
1055
if (pool->dev->quirks & MIDGARD_SFBD) {
1056
maxx = fb->width - 1;
1057
maxy = fb->height - 1;
1058
} else {
1059
/* Align on 32x32 tiles */
1060
minx = fb->extent.minx & ~31;
1061
miny = fb->extent.miny & ~31;
1062
maxx = MIN2(ALIGN_POT(fb->extent.maxx + 1, 32), fb->width) - 1;
1063
maxy = MIN2(ALIGN_POT(fb->extent.maxy + 1, 32), fb->height) - 1;
1064
}
1065
1066
cfg.viewport =
1067
pan_blitter_emit_viewport(pool, minx, miny, maxx, maxy);
1068
1069
pan_preload_emit_textures(pool, fb, zs, &cfg);
1070
1071
if (pan_is_bifrost(pool->dev)) {
1072
cfg.samplers = pan_blitter_emit_bifrost_sampler(pool, true);
1073
1074
/* Tiles updated by blit shaders are still considered
1075
* clean (separate for colour and Z/S), allowing us to
1076
* suppress unnecessary writeback */
1077
cfg.clean_fragment_write = !always_write;
1078
} else {
1079
cfg.samplers = pan_blitter_emit_midgard_sampler(pool, true);
1080
cfg.texture_descriptor_is_64b = true;
1081
}
1082
}
1083
}
1084
1085
static void
1086
pan_blit_emit_dcd(struct pan_pool *pool,
1087
mali_ptr src_coords, mali_ptr dst_coords,
1088
mali_ptr textures, mali_ptr samplers,
1089
mali_ptr vpd, mali_ptr tsd, mali_ptr rsd,
1090
void *out)
1091
{
1092
pan_pack(out, DRAW, cfg) {
1093
cfg.four_components_per_vertex = true;
1094
cfg.draw_descriptor_is_64b = true;
1095
cfg.thread_storage = tsd;
1096
cfg.state = rsd;
1097
1098
cfg.position = dst_coords;
1099
pan_blitter_emit_varying(pool, src_coords, &cfg);
1100
cfg.viewport = vpd;
1101
cfg.texture_descriptor_is_64b = !pan_is_bifrost(pool->dev);
1102
cfg.textures = textures;
1103
cfg.samplers = samplers;
1104
}
1105
}
1106
1107
static void
1108
pan_preload_fb_bifrost_alloc_pre_post_dcds(struct pan_pool *desc_pool,
1109
struct pan_fb_info *fb)
1110
{
1111
assert(pan_is_bifrost(desc_pool->dev));
1112
1113
if (fb->bifrost.pre_post.dcds.gpu)
1114
return;
1115
1116
fb->bifrost.pre_post.dcds =
1117
pan_pool_alloc_desc_aggregate(desc_pool,
1118
PAN_DESC(DRAW),
1119
PAN_DESC(DRAW_PADDING),
1120
PAN_DESC(DRAW),
1121
PAN_DESC(DRAW_PADDING),
1122
PAN_DESC(DRAW),
1123
PAN_DESC(DRAW_PADDING));
1124
}
1125
1126
static void
1127
pan_preload_emit_midgard_tiler_job(struct pan_pool *desc_pool,
1128
struct pan_scoreboard *scoreboard,
1129
struct pan_fb_info *fb, bool zs,
1130
mali_ptr coords, mali_ptr rsd, mali_ptr tsd)
1131
{
1132
struct panfrost_ptr job =
1133
pan_pool_alloc_desc(desc_pool, MIDGARD_TILER_JOB);
1134
1135
pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, rsd,
1136
pan_section_ptr(job.cpu, MIDGARD_TILER_JOB, DRAW),
1137
false);
1138
1139
pan_section_pack(job.cpu, MIDGARD_TILER_JOB, PRIMITIVE, cfg) {
1140
cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
1141
cfg.index_count = 4;
1142
cfg.job_task_split = 6;
1143
}
1144
1145
pan_section_pack(job.cpu, MIDGARD_TILER_JOB, PRIMITIVE_SIZE, cfg) {
1146
cfg.constant = 1.0f;
1147
}
1148
1149
void *invoc = pan_section_ptr(job.cpu,
1150
MIDGARD_TILER_JOB,
1151
INVOCATION);
1152
panfrost_pack_work_groups_compute(invoc, 1, 4,
1153
1, 1, 1, 1, true, false);
1154
1155
panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
1156
false, false, 0, 0, &job, true);
1157
}
1158
1159
static void
1160
pan_blit_emit_midgard_tiler_job(struct pan_pool *desc_pool,
1161
struct pan_scoreboard *scoreboard,
1162
mali_ptr src_coords, mali_ptr dst_coords,
1163
mali_ptr textures, mali_ptr samplers,
1164
mali_ptr vpd, mali_ptr rsd, mali_ptr tsd)
1165
{
1166
struct panfrost_ptr job =
1167
pan_pool_alloc_desc(desc_pool, MIDGARD_TILER_JOB);
1168
1169
pan_blit_emit_dcd(desc_pool,
1170
src_coords, dst_coords, textures, samplers,
1171
vpd, tsd, rsd,
1172
pan_section_ptr(job.cpu, MIDGARD_TILER_JOB, DRAW));
1173
1174
pan_section_pack(job.cpu, MIDGARD_TILER_JOB, PRIMITIVE, cfg) {
1175
cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
1176
cfg.index_count = 4;
1177
cfg.job_task_split = 6;
1178
}
1179
1180
pan_section_pack(job.cpu, MIDGARD_TILER_JOB, PRIMITIVE_SIZE, cfg) {
1181
cfg.constant = 1.0f;
1182
}
1183
1184
void *invoc = pan_section_ptr(job.cpu,
1185
MIDGARD_TILER_JOB,
1186
INVOCATION);
1187
panfrost_pack_work_groups_compute(invoc, 1, 4,
1188
1, 1, 1, 1, true, false);
1189
1190
panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
1191
false, false, 0, 0, &job, false);
1192
}
1193
1194
static void
1195
pan_blit_emit_bifrost_tiler_job(struct pan_pool *desc_pool,
1196
struct pan_scoreboard *scoreboard,
1197
mali_ptr src_coords, mali_ptr dst_coords,
1198
mali_ptr textures, mali_ptr samplers,
1199
mali_ptr vpd, mali_ptr rsd,
1200
mali_ptr tsd, mali_ptr tiler)
1201
{
1202
struct panfrost_ptr job =
1203
pan_pool_alloc_desc(desc_pool, BIFROST_TILER_JOB);
1204
1205
pan_blit_emit_dcd(desc_pool,
1206
src_coords, dst_coords, textures, samplers,
1207
vpd, tsd, rsd,
1208
pan_section_ptr(job.cpu, BIFROST_TILER_JOB, DRAW));
1209
1210
pan_section_pack(job.cpu, BIFROST_TILER_JOB, PRIMITIVE, cfg) {
1211
cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
1212
cfg.index_count = 4;
1213
cfg.job_task_split = 6;
1214
}
1215
1216
pan_section_pack(job.cpu, BIFROST_TILER_JOB, PRIMITIVE_SIZE, cfg) {
1217
cfg.constant = 1.0f;
1218
}
1219
1220
void *invoc = pan_section_ptr(job.cpu,
1221
BIFROST_TILER_JOB,
1222
INVOCATION);
1223
panfrost_pack_work_groups_compute(invoc, 1, 4,
1224
1, 1, 1, 1, true, false);
1225
1226
pan_section_pack(job.cpu, BIFROST_TILER_JOB, PADDING, cfg);
1227
pan_section_pack(job.cpu, BIFROST_TILER_JOB, TILER, cfg) {
1228
cfg.address = tiler;
1229
}
1230
1231
panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
1232
false, false, 0, 0, &job, false);
1233
}
1234
1235
static void
1236
pan_preload_emit_bifrost_pre_frame_dcd(struct pan_pool *desc_pool,
1237
struct pan_fb_info *fb, bool zs,
1238
mali_ptr coords, mali_ptr rsd,
1239
mali_ptr tsd)
1240
{
1241
struct panfrost_device *dev = desc_pool->dev;
1242
1243
unsigned dcd_idx = zs ? 0 : 1;
1244
pan_preload_fb_bifrost_alloc_pre_post_dcds(desc_pool, fb);
1245
assert(fb->bifrost.pre_post.dcds.cpu);
1246
void *dcd = fb->bifrost.pre_post.dcds.cpu +
1247
(dcd_idx * (MALI_DRAW_LENGTH + MALI_DRAW_PADDING_LENGTH));
1248
1249
int crc_rt = pan_select_crc_rt(dev, fb);
1250
1251
bool always_write = false;
1252
1253
/* If CRC data is currently invalid and this batch will make it valid,
1254
* write even clean tiles to make sure CRC data is updated. */
1255
if (crc_rt >= 0) {
1256
bool *valid = fb->rts[crc_rt].crc_valid;
1257
bool full = !fb->extent.minx && !fb->extent.miny &&
1258
fb->extent.maxx == (fb->width - 1) &&
1259
fb->extent.maxy == (fb->height - 1);
1260
1261
if (full && !(*valid))
1262
always_write = true;
1263
}
1264
1265
pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, rsd, dcd, always_write);
1266
if (zs) {
1267
enum pipe_format fmt = fb->zs.view.zs->image->layout.format;
1268
bool always = false;
1269
1270
/* If we're dealing with a combined ZS resource and only one
1271
* component is cleared, we need to reload the whole surface
1272
* because the zs_clean_pixel_write_enable flag is set in that
1273
* case.
1274
*/
1275
if (util_format_is_depth_and_stencil(fmt) &&
1276
fb->zs.clear.z != fb->zs.clear.s)
1277
always = true;
1278
1279
/* We could use INTERSECT on Bifrost v7 too, but
1280
* EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
1281
* buffer one or more tiles ahead, making ZS data immediately
1282
* available for any ZS tests taking place in other shaders.
1283
* Thing's haven't been benchmarked to determine what's
1284
* preferable (saving bandwidth vs having ZS preloaded
1285
* earlier), so let's leave it like that for now.
1286
*/
1287
fb->bifrost.pre_post.modes[dcd_idx] =
1288
desc_pool->dev->arch > 6 ?
1289
MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS :
1290
always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :
1291
MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1292
} else {
1293
fb->bifrost.pre_post.modes[dcd_idx] =
1294
always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :
1295
MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1296
}
1297
}
1298
1299
static void
1300
pan_preload_fb_part(struct pan_pool *pool,
1301
struct pan_scoreboard *scoreboard,
1302
struct pan_fb_info *fb, bool zs,
1303
mali_ptr coords, mali_ptr tsd, mali_ptr tiler)
1304
{
1305
struct panfrost_device *dev = pool->dev;
1306
mali_ptr rsd = pan_preload_get_rsd(dev, fb, zs);
1307
1308
if (pan_is_bifrost(dev)) {
1309
pan_preload_emit_bifrost_pre_frame_dcd(pool, fb, zs,
1310
coords, rsd, tsd);
1311
} else {
1312
pan_preload_emit_midgard_tiler_job(pool, scoreboard,
1313
fb, zs, coords, rsd, tsd);
1314
}
1315
}
1316
1317
void
1318
pan_preload_fb(struct pan_pool *pool,
1319
struct pan_scoreboard *scoreboard,
1320
struct pan_fb_info *fb,
1321
mali_ptr tsd, mali_ptr tiler)
1322
{
1323
bool preload_zs = pan_preload_needed(fb, true);
1324
bool preload_rts = pan_preload_needed(fb, false);
1325
mali_ptr coords;
1326
1327
if (!preload_zs && !preload_rts)
1328
return;
1329
1330
float rect[] = {
1331
0.0, 0.0, 0.0, 1.0,
1332
fb->width, 0.0, 0.0, 1.0,
1333
0.0, fb->height, 0.0, 1.0,
1334
fb->width, fb->height, 0.0, 1.0,
1335
};
1336
1337
coords = pan_pool_upload_aligned(pool, rect,
1338
sizeof(rect), 64);
1339
1340
if (preload_zs)
1341
pan_preload_fb_part(pool, scoreboard, fb, true, coords,
1342
tsd, tiler);
1343
1344
if (preload_rts)
1345
pan_preload_fb_part(pool, scoreboard, fb, false, coords,
1346
tsd, tiler);
1347
}
1348
1349
void
1350
pan_blit_ctx_init(struct panfrost_device *dev,
1351
const struct pan_blit_info *info,
1352
struct pan_pool *blit_pool,
1353
struct pan_blit_context *ctx)
1354
{
1355
memset(ctx, 0, sizeof(*ctx));
1356
1357
ctx->z_scale = (float)(info->dst.end.z - info->dst.start.z + 1) /
1358
(info->src.end.z - info->src.start.z + 1);
1359
1360
struct pan_image_view sviews[2] = {
1361
{
1362
.format = info->src.planes[0].format,
1363
.image = info->src.planes[0].image,
1364
.dim = info->src.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
1365
MALI_TEXTURE_DIMENSION_2D : info->src.planes[0].image->layout.dim,
1366
.first_level = info->src.level,
1367
.last_level = info->src.level,
1368
.first_layer = info->src.start.layer,
1369
.last_layer = info->src.end.layer,
1370
.swizzle = {
1371
PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
1372
PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
1373
},
1374
},
1375
};
1376
1377
struct pan_image_view dview = {
1378
.format = info->dst.planes[0].format,
1379
.image = info->dst.planes[0].image,
1380
.dim = info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_1D ?
1381
MALI_TEXTURE_DIMENSION_1D : MALI_TEXTURE_DIMENSION_2D,
1382
.first_level = info->dst.level,
1383
.last_level = info->dst.level,
1384
.first_layer = info->dst.start.layer,
1385
.last_layer = info->dst.start.layer,
1386
.swizzle = {
1387
PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
1388
PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
1389
},
1390
};
1391
1392
ctx->src.start.x = info->src.start.x;
1393
ctx->src.start.y = info->src.start.y;
1394
ctx->src.end.x = info->src.end.x;
1395
ctx->src.end.y = info->src.end.y;
1396
ctx->src.dim = sviews[0].dim;
1397
if (sviews[0].dim == MALI_TEXTURE_DIMENSION_3D)
1398
ctx->src.z_offset = info->src.start.z;
1399
else
1400
ctx->src.layer_offset = info->src.start.layer;
1401
1402
if (info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_3D) {
1403
ctx->dst.layer_offset = info->dst.start.z;
1404
ctx->dst.cur_layer = info->dst.start.z;
1405
ctx->dst.last_layer = info->dst.end.z;
1406
} else {
1407
ctx->dst.layer_offset = info->dst.start.layer;
1408
ctx->dst.cur_layer = info->dst.start.layer;
1409
ctx->dst.last_layer = info->dst.end.layer;
1410
}
1411
1412
/* Split depth and stencil */
1413
if (util_format_is_depth_and_stencil(sviews[0].format)) {
1414
sviews[1] = sviews[0];
1415
sviews[0].format = util_format_get_depth_only(sviews[0].format);
1416
sviews[1].format = util_format_stencil_only(sviews[1].format);
1417
} else if (info->src.planes[1].format) {
1418
sviews[1] = sviews[0];
1419
sviews[1].format = info->src.planes[1].format;
1420
sviews[1].image = info->src.planes[1].image;
1421
}
1422
1423
ctx->rsd = pan_blit_get_rsd(dev, sviews, &dview);
1424
1425
ASSERTED unsigned nlayers = info->src.end.layer - info->src.start.layer + 1;
1426
1427
assert(nlayers == (info->dst.end.layer - info->dst.start.layer + 1));
1428
1429
unsigned dst_w = u_minify(info->dst.planes[0].image->layout.width, info->dst.level);
1430
unsigned dst_h = u_minify(info->dst.planes[0].image->layout.height, info->dst.level);
1431
unsigned minx = MAX2(info->dst.start.x, 0);
1432
unsigned miny = MAX2(info->dst.start.y, 0);
1433
unsigned maxx = MIN2(info->dst.end.x, dst_w - 1);
1434
unsigned maxy = MIN2(info->dst.end.y, dst_h - 1);
1435
1436
if (info->scissor.enable) {
1437
minx = MAX2(minx, info->scissor.minx);
1438
miny = MAX2(miny, info->scissor.miny);
1439
maxx = MIN2(maxx, info->scissor.maxx);
1440
maxy = MIN2(maxy, info->scissor.maxy);
1441
}
1442
1443
const struct pan_image_view *sview_ptrs[] = { &sviews[0], &sviews[1] };
1444
unsigned nviews = sviews[1].format ? 2 : 1;
1445
1446
if (pan_is_bifrost(dev)) {
1447
ctx->textures =
1448
pan_blitter_emit_bifrost_textures(blit_pool, nviews, sview_ptrs);
1449
ctx->samplers =
1450
pan_blitter_emit_bifrost_sampler(blit_pool, info->nearest);
1451
} else {
1452
ctx->textures =
1453
pan_blitter_emit_midgard_textures(blit_pool, nviews, sview_ptrs);
1454
ctx->samplers =
1455
pan_blitter_emit_midgard_sampler(blit_pool, info->nearest);
1456
}
1457
1458
ctx->vpd = pan_blitter_emit_viewport(blit_pool,
1459
minx, miny, maxx, maxy);
1460
1461
float dst_rect[] = {
1462
info->dst.start.x, info->dst.start.y, 0.0, 1.0,
1463
info->dst.end.x + 1, info->dst.start.y, 0.0, 1.0,
1464
info->dst.start.x, info->dst.end.y + 1, 0.0, 1.0,
1465
info->dst.end.x + 1, info->dst.end.y + 1, 0.0, 1.0,
1466
};
1467
1468
ctx->position =
1469
pan_pool_upload_aligned(blit_pool, dst_rect,
1470
sizeof(dst_rect), 64);
1471
}
1472
1473
bool
1474
pan_blit_next_surface(struct pan_blit_context *ctx)
1475
{
1476
if (ctx->dst.cur_layer >= ctx->dst.last_layer)
1477
return false;
1478
1479
ctx->dst.cur_layer++;
1480
return true;
1481
}
1482
1483
void
1484
pan_blit(struct pan_blit_context *ctx,
1485
struct pan_pool *pool,
1486
struct pan_scoreboard *scoreboard,
1487
mali_ptr tsd, mali_ptr tiler)
1488
{
1489
if (ctx->dst.cur_layer < 0 || ctx->dst.cur_layer > ctx->dst.last_layer)
1490
return;
1491
1492
int32_t layer = ctx->dst.cur_layer - ctx->dst.layer_offset;
1493
float src_z;
1494
if (ctx->src.dim == MALI_TEXTURE_DIMENSION_3D)
1495
src_z = (ctx->z_scale * layer) + ctx->src.z_offset;
1496
else
1497
src_z = ctx->src.layer_offset + layer;
1498
1499
float src_rect[] = {
1500
ctx->src.start.x, ctx->src.start.y, src_z, 1.0,
1501
ctx->src.end.x + 1, ctx->src.start.y, src_z, 1.0,
1502
ctx->src.start.x, ctx->src.end.y + 1, src_z, 1.0,
1503
ctx->src.end.x + 1, ctx->src.end.y + 1, src_z, 1.0,
1504
};
1505
1506
mali_ptr src_coords =
1507
pan_pool_upload_aligned(pool, src_rect,
1508
sizeof(src_rect), 64);
1509
1510
if (pan_is_bifrost(pool->dev)) {
1511
pan_blit_emit_bifrost_tiler_job(pool, scoreboard,
1512
src_coords, ctx->position,
1513
ctx->textures, ctx->samplers,
1514
ctx->vpd, ctx->rsd, tsd, tiler);
1515
} else {
1516
pan_blit_emit_midgard_tiler_job(pool, scoreboard,
1517
src_coords, ctx->position,
1518
ctx->textures, ctx->samplers,
1519
ctx->vpd, ctx->rsd, tsd);
1520
}
1521
}
1522
1523
static uint32_t pan_blit_shader_key_hash(const void *key)
1524
{
1525
return _mesa_hash_data(key, sizeof(struct pan_blit_shader_key));
1526
}
1527
1528
static bool pan_blit_shader_key_equal(const void *a, const void *b)
1529
{
1530
return !memcmp(a, b, sizeof(struct pan_blit_shader_key));
1531
}
1532
1533
static uint32_t pan_blit_blend_shader_key_hash(const void *key)
1534
{
1535
return _mesa_hash_data(key, sizeof(struct pan_blit_blend_shader_key));
1536
}
1537
1538
static bool pan_blit_blend_shader_key_equal(const void *a, const void *b)
1539
{
1540
return !memcmp(a, b, sizeof(struct pan_blit_blend_shader_key));
1541
}
1542
1543
static uint32_t pan_blit_rsd_key_hash(const void *key)
1544
{
1545
return _mesa_hash_data(key, sizeof(struct pan_blit_rsd_key));
1546
}
1547
1548
static bool pan_blit_rsd_key_equal(const void *a, const void *b)
1549
{
1550
return !memcmp(a, b, sizeof(struct pan_blit_rsd_key));
1551
}
1552
1553
static void
1554
pan_blitter_prefill_blit_shader_cache(struct panfrost_device *dev)
1555
{
1556
static const struct pan_blit_shader_key prefill[] = {
1557
{
1558
.surfaces[0] = {
1559
.loc = FRAG_RESULT_DEPTH,
1560
.type = nir_type_float32,
1561
.dim = MALI_TEXTURE_DIMENSION_2D,
1562
.src_samples = 1,
1563
.dst_samples = 1,
1564
},
1565
},
1566
{
1567
.surfaces[1] = {
1568
.loc = FRAG_RESULT_STENCIL,
1569
.type = nir_type_uint32,
1570
.dim = MALI_TEXTURE_DIMENSION_2D,
1571
.src_samples = 1,
1572
.dst_samples = 1,
1573
},
1574
},
1575
{
1576
.surfaces[0] = {
1577
.loc = FRAG_RESULT_DATA0,
1578
.type = nir_type_float32,
1579
.dim = MALI_TEXTURE_DIMENSION_2D,
1580
.src_samples = 1,
1581
.dst_samples = 1,
1582
},
1583
},
1584
};
1585
1586
for (unsigned i = 0; i < ARRAY_SIZE(prefill); i++)
1587
pan_blitter_get_blit_shader(dev, &prefill[i]);
1588
}
1589
1590
void
1591
pan_blitter_init(struct panfrost_device *dev,
1592
struct pan_pool *bin_pool,
1593
struct pan_pool *desc_pool)
1594
{
1595
dev->blitter.shaders.blit =
1596
_mesa_hash_table_create(NULL, pan_blit_shader_key_hash,
1597
pan_blit_shader_key_equal);
1598
dev->blitter.shaders.blend =
1599
_mesa_hash_table_create(NULL, pan_blit_blend_shader_key_hash,
1600
pan_blit_blend_shader_key_equal);
1601
dev->blitter.shaders.pool = bin_pool;
1602
pthread_mutex_init(&dev->blitter.shaders.lock, NULL);
1603
pan_blitter_prefill_blit_shader_cache(dev);
1604
1605
dev->blitter.rsds.pool = desc_pool;
1606
dev->blitter.rsds.rsds =
1607
_mesa_hash_table_create(NULL, pan_blit_rsd_key_hash,
1608
pan_blit_rsd_key_equal);
1609
pthread_mutex_init(&dev->blitter.rsds.lock, NULL);
1610
}
1611
1612
void
1613
pan_blitter_cleanup(struct panfrost_device *dev)
1614
{
1615
_mesa_hash_table_destroy(dev->blitter.shaders.blit, NULL);
1616
_mesa_hash_table_destroy(dev->blitter.shaders.blend, NULL);
1617
pthread_mutex_destroy(&dev->blitter.shaders.lock);
1618
_mesa_hash_table_destroy(dev->blitter.rsds.rsds, NULL);
1619
pthread_mutex_destroy(&dev->blitter.rsds.lock);
1620
}
1621
1622