Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
4574 views
1
/*
2
* Copyright (C) 2016 Rob Clark <[email protected]>
3
* Copyright © 2018 Google, Inc.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
14
* Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*
24
* Authors:
25
* Rob Clark <[email protected]>
26
*/
27
28
#include <stdio.h>
29
30
#include "pipe/p_state.h"
31
#include "util/format/u_format.h"
32
#include "util/u_inlines.h"
33
#include "util/u_memory.h"
34
#include "util/u_string.h"
35
36
#include "freedreno_draw.h"
37
#include "freedreno_resource.h"
38
#include "freedreno_state.h"
39
#include "freedreno_tracepoints.h"
40
41
#include "fd6_blitter.h"
42
#include "fd6_context.h"
43
#include "fd6_draw.h"
44
#include "fd6_emit.h"
45
#include "fd6_format.h"
46
#include "fd6_gmem.h"
47
#include "fd6_pack.h"
48
#include "fd6_program.h"
49
#include "fd6_resource.h"
50
#include "fd6_zsa.h"
51
52
/**
53
* Emits the flags registers, suitable for RB_MRT_FLAG_BUFFER,
54
* RB_DEPTH_FLAG_BUFFER, SP_PS_2D_SRC_FLAGS, and RB_BLIT_FLAG_DST.
55
*/
56
void
57
fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc,
58
int level, int layer)
59
{
60
if (fd_resource_ubwc_enabled(rsc, level)) {
61
OUT_RELOC(ring, rsc->bo, fd_resource_ubwc_offset(rsc, level, layer), 0,
62
0);
63
OUT_RING(ring, A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(
64
fdl_ubwc_pitch(&rsc->layout, level)) |
65
A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(
66
rsc->layout.ubwc_layer_size >> 2));
67
} else {
68
OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
69
OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
70
OUT_RING(ring, 0x00000000);
71
}
72
}
73
74
static void
75
emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb,
76
const struct fd_gmem_stateobj *gmem)
77
{
78
unsigned srgb_cntl = 0;
79
unsigned i;
80
81
unsigned max_layer_index = 0;
82
83
for (i = 0; i < pfb->nr_cbufs; i++) {
84
enum a6xx_format format = 0;
85
enum a3xx_color_swap swap = WZYX;
86
bool sint = false, uint = false;
87
struct fd_resource *rsc = NULL;
88
struct fdl_slice *slice = NULL;
89
uint32_t stride = 0;
90
uint32_t array_stride = 0;
91
uint32_t offset;
92
uint32_t tile_mode;
93
94
if (!pfb->cbufs[i])
95
continue;
96
97
struct pipe_surface *psurf = pfb->cbufs[i];
98
enum pipe_format pformat = psurf->format;
99
rsc = fd_resource(psurf->texture);
100
if (!rsc->bo)
101
continue;
102
103
uint32_t base = gmem ? gmem->cbuf_base[i] : 0;
104
slice = fd_resource_slice(rsc, psurf->u.tex.level);
105
format = fd6_pipe2color(pformat);
106
sint = util_format_is_pure_sint(pformat);
107
uint = util_format_is_pure_uint(pformat);
108
109
if (util_format_is_srgb(pformat))
110
srgb_cntl |= (1 << i);
111
112
offset =
113
fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
114
115
stride = fd_resource_pitch(rsc, psurf->u.tex.level);
116
array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level);
117
swap = fd6_resource_swap(rsc, pformat);
118
119
tile_mode = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
120
max_layer_index = psurf->u.tex.last_layer - psurf->u.tex.first_layer;
121
122
debug_assert((offset + slice->size0) <= fd_bo_size(rsc->bo));
123
124
OUT_REG(
125
ring,
126
A6XX_RB_MRT_BUF_INFO(i, .color_format = format,
127
.color_tile_mode = tile_mode, .color_swap = swap),
128
A6XX_RB_MRT_PITCH(i, .a6xx_rb_mrt_pitch = stride),
129
A6XX_RB_MRT_ARRAY_PITCH(i, .a6xx_rb_mrt_array_pitch = array_stride),
130
A6XX_RB_MRT_BASE(i, .bo = rsc->bo, .bo_offset = offset),
131
A6XX_RB_MRT_BASE_GMEM(i, .unknown = base));
132
133
OUT_REG(ring, A6XX_SP_FS_MRT_REG(i, .color_format = format,
134
.color_sint = sint, .color_uint = uint));
135
136
OUT_PKT4(ring, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
137
fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
138
psurf->u.tex.first_layer);
139
}
140
141
OUT_REG(ring, A6XX_RB_SRGB_CNTL(.dword = srgb_cntl));
142
OUT_REG(ring, A6XX_SP_SRGB_CNTL(.dword = srgb_cntl));
143
144
OUT_REG(ring, A6XX_GRAS_MAX_LAYER_INDEX(max_layer_index));
145
}
146
147
static void
148
emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
149
const struct fd_gmem_stateobj *gmem)
150
{
151
if (zsbuf) {
152
struct fd_resource *rsc = fd_resource(zsbuf->texture);
153
enum a6xx_depth_format fmt = fd6_pipe2depth(zsbuf->format);
154
uint32_t stride = fd_resource_pitch(rsc, 0);
155
uint32_t array_stride = fd_resource_layer_stride(rsc, 0);
156
uint32_t base = gmem ? gmem->zsbuf_base[0] : 0;
157
uint32_t offset =
158
fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
159
160
OUT_REG(
161
ring, A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = fmt),
162
A6XX_RB_DEPTH_BUFFER_PITCH(.a6xx_rb_depth_buffer_pitch = stride),
163
A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(.a6xx_rb_depth_buffer_array_pitch =
164
array_stride),
165
A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset),
166
A6XX_RB_DEPTH_BUFFER_BASE_GMEM(.dword = base));
167
168
OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt));
169
170
OUT_PKT4(ring, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE, 3);
171
fd6_emit_flag_reference(ring, rsc, zsbuf->u.tex.level,
172
zsbuf->u.tex.first_layer);
173
174
if (rsc->lrz) {
175
OUT_REG(ring, A6XX_GRAS_LRZ_BUFFER_BASE(.bo = rsc->lrz),
176
A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = rsc->lrz_pitch),
177
// XXX a6xx seems to use a different buffer here.. not sure
178
// what for..
179
A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE());
180
} else {
181
OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);
182
OUT_RING(ring, 0x00000000);
183
OUT_RING(ring, 0x00000000);
184
OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
185
OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
186
OUT_RING(ring, 0x00000000);
187
}
188
189
/* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE
190
* plus this CP_EVENT_WRITE at the end in it's own IB..
191
*/
192
OUT_PKT7(ring, CP_EVENT_WRITE, 1);
193
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25));
194
195
if (rsc->stencil) {
196
stride = fd_resource_pitch(rsc->stencil, 0);
197
array_stride = fd_resource_layer_stride(rsc->stencil, 0);
198
uint32_t base = gmem ? gmem->zsbuf_base[1] : 0;
199
200
OUT_REG(ring, A6XX_RB_STENCIL_INFO(.separate_stencil = true),
201
A6XX_RB_STENCIL_BUFFER_PITCH(.a6xx_rb_stencil_buffer_pitch =
202
stride),
203
A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(
204
.a6xx_rb_stencil_buffer_array_pitch = array_stride),
205
A6XX_RB_STENCIL_BUFFER_BASE(.bo = rsc->stencil->bo),
206
A6XX_RB_STENCIL_BUFFER_BASE_GMEM(.dword = base));
207
} else {
208
OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
209
}
210
} else {
211
OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
212
OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
213
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
214
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
215
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
216
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
217
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */
218
219
OUT_REG(ring,
220
A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE));
221
222
OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);
223
OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
224
OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
225
OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
226
OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
227
OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
228
229
OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
230
}
231
}
232
233
static bool
234
use_hw_binning(struct fd_batch *batch)
235
{
236
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
237
238
if ((gmem->maxpw * gmem->maxph) > 32)
239
return false;
240
241
return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) >= 2) &&
242
(batch->num_draws > 0);
243
}
244
245
static void
246
patch_fb_read_gmem(struct fd_batch *batch)
247
{
248
unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
249
if (!num_patches)
250
return;
251
252
struct fd_screen *screen = batch->ctx->screen;
253
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
254
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
255
struct pipe_surface *psurf = pfb->cbufs[0];
256
uint32_t texconst0 = fd6_tex_const_0(
257
psurf->texture, psurf->u.tex.level, psurf->format, PIPE_SWIZZLE_X,
258
PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);
259
260
/* always TILE6_2 mode in GMEM.. which also means no swap: */
261
texconst0 &=
262
~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
263
texconst0 |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
264
265
for (unsigned i = 0; i < num_patches; i++) {
266
struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
267
patch->cs[0] = texconst0;
268
patch->cs[2] = A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[0]) |
269
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
270
patch->cs[4] = A6XX_TEX_CONST_4_BASE_LO(screen->gmem_base);
271
patch->cs[5] = A6XX_TEX_CONST_5_BASE_HI(screen->gmem_base >> 32) |
272
A6XX_TEX_CONST_5_DEPTH(1);
273
}
274
util_dynarray_clear(&batch->fb_read_patches);
275
}
276
277
static void
278
patch_fb_read_sysmem(struct fd_batch *batch)
279
{
280
unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
281
if (!num_patches)
282
return;
283
284
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
285
struct pipe_surface *psurf = pfb->cbufs[0];
286
if (!psurf)
287
return;
288
289
struct fd_resource *rsc = fd_resource(psurf->texture);
290
unsigned lvl = psurf->u.tex.level;
291
unsigned layer = psurf->u.tex.first_layer;
292
bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, lvl);
293
uint64_t iova = fd_bo_get_iova(rsc->bo) + fd_resource_offset(rsc, lvl, layer);
294
uint64_t ubwc_iova = fd_bo_get_iova(rsc->bo) + fd_resource_ubwc_offset(rsc, lvl, layer);
295
uint32_t texconst0 = fd6_tex_const_0(
296
psurf->texture, psurf->u.tex.level, psurf->format, PIPE_SWIZZLE_X,
297
PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);
298
uint32_t block_width, block_height;
299
fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
300
301
for (unsigned i = 0; i < num_patches; i++) {
302
struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
303
patch->cs[0] = texconst0;
304
patch->cs[2] = A6XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl)) |
305
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
306
/* This is cheating a bit, since we can't use OUT_RELOC() here.. but
307
* the render target will already have a reloc emitted for RB_MRT state,
308
* so we can get away with manually patching in the address here:
309
*/
310
patch->cs[4] = A6XX_TEX_CONST_4_BASE_LO(iova);
311
patch->cs[5] = A6XX_TEX_CONST_5_BASE_HI(iova >> 32) |
312
A6XX_TEX_CONST_5_DEPTH(1);
313
314
if (!ubwc_enabled)
315
continue;
316
317
patch->cs[3] |= A6XX_TEX_CONST_3_FLAG;
318
patch->cs[7] = A6XX_TEX_CONST_7_FLAG_LO(ubwc_iova);
319
patch->cs[8] = A6XX_TEX_CONST_8_FLAG_HI(ubwc_iova >> 32);
320
patch->cs[9] = A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(
321
rsc->layout.ubwc_layer_size >> 2);
322
patch->cs[10] =
323
A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(
324
fdl_ubwc_pitch(&rsc->layout, lvl)) |
325
A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(
326
DIV_ROUND_UP(u_minify(psurf->texture->width0, lvl), block_width))) |
327
A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(
328
DIV_ROUND_UP(u_minify(psurf->texture->height0, lvl), block_height)));
329
}
330
util_dynarray_clear(&batch->fb_read_patches);
331
}
332
333
static void
334
update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb,
335
bool binning)
336
{
337
struct fd_ringbuffer *ring = batch->gmem;
338
struct fd_screen *screen = batch->ctx->screen;
339
uint32_t cntl = 0;
340
bool depth_ubwc_enable = false;
341
uint32_t mrts_ubwc_enable = 0;
342
int i;
343
344
if (pfb->zsbuf) {
345
struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
346
depth_ubwc_enable =
347
fd_resource_ubwc_enabled(rsc, pfb->zsbuf->u.tex.level);
348
}
349
350
for (i = 0; i < pfb->nr_cbufs; i++) {
351
if (!pfb->cbufs[i])
352
continue;
353
354
struct pipe_surface *psurf = pfb->cbufs[i];
355
struct fd_resource *rsc = fd_resource(psurf->texture);
356
if (!rsc->bo)
357
continue;
358
359
if (fd_resource_ubwc_enabled(rsc, psurf->u.tex.level))
360
mrts_ubwc_enable |= 1 << i;
361
}
362
363
cntl |= A6XX_RB_RENDER_CNTL_UNK4;
364
if (binning)
365
cntl |= A6XX_RB_RENDER_CNTL_BINNING;
366
367
if (screen->info->a6xx.has_cp_reg_write) {
368
OUT_PKT7(ring, CP_REG_WRITE, 3);
369
OUT_RING(ring, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL));
370
OUT_RING(ring, REG_A6XX_RB_RENDER_CNTL);
371
} else {
372
OUT_PKT4(ring, REG_A6XX_RB_RENDER_CNTL, 1);
373
}
374
OUT_RING(ring, cntl |
375
COND(depth_ubwc_enable, A6XX_RB_RENDER_CNTL_FLAG_DEPTH) |
376
A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable));
377
}
378
379
/* extra size to store VSC_DRAW_STRM_SIZE: */
380
#define VSC_DRAW_STRM_SIZE(pitch) ((pitch)*32 + 0x100)
381
#define VSC_PRIM_STRM_SIZE(pitch) ((pitch)*32)
382
383
static void
384
update_vsc_pipe(struct fd_batch *batch)
385
{
386
struct fd_context *ctx = batch->ctx;
387
struct fd6_context *fd6_ctx = fd6_context(ctx);
388
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
389
struct fd_ringbuffer *ring = batch->gmem;
390
int i;
391
392
if (batch->draw_strm_bits / 8 > fd6_ctx->vsc_draw_strm_pitch) {
393
if (fd6_ctx->vsc_draw_strm)
394
fd_bo_del(fd6_ctx->vsc_draw_strm);
395
fd6_ctx->vsc_draw_strm = NULL;
396
/* Note: probably only need to align to 0x40, but aligning stronger
397
* reduces the odds that we will have to realloc again on the next
398
* frame:
399
*/
400
fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits / 8, 0x4000);
401
mesa_logd("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x",
402
fd6_ctx->vsc_draw_strm_pitch);
403
}
404
405
if (batch->prim_strm_bits / 8 > fd6_ctx->vsc_prim_strm_pitch) {
406
if (fd6_ctx->vsc_prim_strm)
407
fd_bo_del(fd6_ctx->vsc_prim_strm);
408
fd6_ctx->vsc_prim_strm = NULL;
409
fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits / 8, 0x4000);
410
mesa_logd("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x",
411
fd6_ctx->vsc_prim_strm_pitch);
412
}
413
414
if (!fd6_ctx->vsc_draw_strm) {
415
fd6_ctx->vsc_draw_strm = fd_bo_new(
416
ctx->screen->dev, VSC_DRAW_STRM_SIZE(fd6_ctx->vsc_draw_strm_pitch),
417
0, "vsc_draw_strm");
418
}
419
420
if (!fd6_ctx->vsc_prim_strm) {
421
fd6_ctx->vsc_prim_strm = fd_bo_new(
422
ctx->screen->dev, VSC_PRIM_STRM_SIZE(fd6_ctx->vsc_prim_strm_pitch),
423
0, "vsc_prim_strm");
424
}
425
426
OUT_REG(
427
ring, A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h),
428
A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = fd6_ctx->vsc_draw_strm,
429
.bo_offset =
430
32 * fd6_ctx->vsc_draw_strm_pitch));
431
432
OUT_REG(ring, A6XX_VSC_BIN_COUNT(.nx = gmem->nbins_x, .ny = gmem->nbins_y));
433
434
OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
435
for (i = 0; i < 32; i++) {
436
const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
437
OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
438
A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
439
A6XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
440
A6XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
441
}
442
443
OUT_REG(
444
ring, A6XX_VSC_PRIM_STRM_ADDRESS(.bo = fd6_ctx->vsc_prim_strm),
445
A6XX_VSC_PRIM_STRM_PITCH(.dword = fd6_ctx->vsc_prim_strm_pitch),
446
A6XX_VSC_PRIM_STRM_LIMIT(.dword = fd6_ctx->vsc_prim_strm_pitch - 64));
447
448
OUT_REG(
449
ring, A6XX_VSC_DRAW_STRM_ADDRESS(.bo = fd6_ctx->vsc_draw_strm),
450
A6XX_VSC_DRAW_STRM_PITCH(.dword = fd6_ctx->vsc_draw_strm_pitch),
451
A6XX_VSC_DRAW_STRM_LIMIT(.dword = fd6_ctx->vsc_draw_strm_pitch - 64));
452
}
453
454
/*
455
* If overflow is detected, either 0x1 (VSC_DRAW_STRM overflow) or 0x3
456
* (VSC_PRIM_STRM overflow) plus the size of the overflowed buffer is
457
* written to control->vsc_overflow. This allows the CPU to
458
* detect which buffer overflowed (and, since the current size is
459
* encoded as well, this protects against already-submitted but
460
* not executed batches from fooling the CPU into increasing the
461
* size again unnecessarily).
462
*/
463
static void
464
emit_vsc_overflow_test(struct fd_batch *batch)
465
{
466
struct fd_ringbuffer *ring = batch->gmem;
467
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
468
struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
469
470
debug_assert((fd6_ctx->vsc_draw_strm_pitch & 0x3) == 0);
471
debug_assert((fd6_ctx->vsc_prim_strm_pitch & 0x3) == 0);
472
473
/* Check for overflow, write vsc_scratch if detected: */
474
for (int i = 0; i < gmem->num_vsc_pipes; i++) {
475
OUT_PKT7(ring, CP_COND_WRITE5, 8);
476
OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
477
CP_COND_WRITE5_0_WRITE_MEMORY);
478
OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
479
REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));
480
OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
481
OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_draw_strm_pitch - 64));
482
OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
483
OUT_RELOC(ring,
484
control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
485
OUT_RING(ring,
486
CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_draw_strm_pitch));
487
488
OUT_PKT7(ring, CP_COND_WRITE5, 8);
489
OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
490
CP_COND_WRITE5_0_WRITE_MEMORY);
491
OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
492
REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));
493
OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
494
OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_prim_strm_pitch - 64));
495
OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
496
OUT_RELOC(ring,
497
control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
498
OUT_RING(ring,
499
CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_prim_strm_pitch));
500
}
501
502
OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
503
}
504
505
static void
506
check_vsc_overflow(struct fd_context *ctx)
507
{
508
struct fd6_context *fd6_ctx = fd6_context(ctx);
509
struct fd6_control *control = fd_bo_map(fd6_ctx->control_mem);
510
uint32_t vsc_overflow = control->vsc_overflow;
511
512
if (!vsc_overflow)
513
return;
514
515
/* clear overflow flag: */
516
control->vsc_overflow = 0;
517
518
unsigned buffer = vsc_overflow & 0x3;
519
unsigned size = vsc_overflow & ~0x3;
520
521
if (buffer == 0x1) {
522
/* VSC_DRAW_STRM overflow: */
523
524
if (size < fd6_ctx->vsc_draw_strm_pitch) {
525
/* we've already increased the size, this overflow is
526
* from a batch submitted before resize, but executed
527
* after
528
*/
529
return;
530
}
531
532
fd_bo_del(fd6_ctx->vsc_draw_strm);
533
fd6_ctx->vsc_draw_strm = NULL;
534
fd6_ctx->vsc_draw_strm_pitch *= 2;
535
536
mesa_logd("resized VSC_DRAW_STRM_PITCH to: 0x%x",
537
fd6_ctx->vsc_draw_strm_pitch);
538
539
} else if (buffer == 0x3) {
540
/* VSC_PRIM_STRM overflow: */
541
542
if (size < fd6_ctx->vsc_prim_strm_pitch) {
543
/* we've already increased the size */
544
return;
545
}
546
547
fd_bo_del(fd6_ctx->vsc_prim_strm);
548
fd6_ctx->vsc_prim_strm = NULL;
549
fd6_ctx->vsc_prim_strm_pitch *= 2;
550
551
mesa_logd("resized VSC_PRIM_STRM_PITCH to: 0x%x",
552
fd6_ctx->vsc_prim_strm_pitch);
553
554
} else {
555
/* NOTE: it's possible, for example, for overflow to corrupt the
556
* control page. I mostly just see this hit if I set initial VSC
557
* buffer size extremely small. Things still seem to recover,
558
* but maybe we should pre-emptively realloc vsc_data/vsc_data2
559
* and hope for different memory placement?
560
*/
561
mesa_loge("invalid vsc_overflow value: 0x%08x", vsc_overflow);
562
}
563
}
564
565
static void
566
emit_common_init(struct fd_batch *batch)
567
{
568
struct fd_ringbuffer *ring = batch->gmem;
569
struct fd_autotune *at = &batch->ctx->autotune;
570
struct fd_batch_result *result = batch->autotune_result;
571
572
if (!result)
573
return;
574
575
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
576
OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
577
578
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
579
OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_start));
580
581
fd6_event_write(batch, ring, ZPASS_DONE, false);
582
}
583
584
static void
585
emit_common_fini(struct fd_batch *batch)
586
{
587
struct fd_ringbuffer *ring = batch->gmem;
588
struct fd_autotune *at = &batch->ctx->autotune;
589
struct fd_batch_result *result = batch->autotune_result;
590
591
if (!result)
592
return;
593
594
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
595
OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
596
597
OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
598
OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_end));
599
600
fd6_event_write(batch, ring, ZPASS_DONE, false);
601
602
// TODO is there a better event to use.. a single ZPASS_DONE_TS would be nice
603
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
604
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS));
605
OUT_RELOC(ring, results_ptr(at, fence));
606
OUT_RING(ring, result->fence);
607
}
608
609
/*
610
* Emit conditional CP_INDIRECT_BRANCH based on VSC_STATE[p], ie. the IB
611
* is skipped for tiles that have no visible geometry.
612
*/
613
static void
614
emit_conditional_ib(struct fd_batch *batch, const struct fd_tile *tile,
615
struct fd_ringbuffer *target)
616
{
617
struct fd_ringbuffer *ring = batch->gmem;
618
619
if (target->cur == target->start)
620
return;
621
622
emit_marker6(ring, 6);
623
624
unsigned count = fd_ringbuffer_cmd_count(target);
625
626
BEGIN_RING(ring, 5 + 4 * count); /* ensure conditional doesn't get split */
627
628
OUT_PKT7(ring, CP_REG_TEST, 1);
629
OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(tile->p)) |
630
A6XX_CP_REG_TEST_0_BIT(tile->n) |
631
A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
632
633
OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
634
OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
635
OUT_RING(ring, CP_COND_REG_EXEC_1_DWORDS(4 * count));
636
637
for (unsigned i = 0; i < count; i++) {
638
uint32_t dwords;
639
OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
640
dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
641
assert(dwords > 0);
642
OUT_RING(ring, dwords);
643
}
644
645
emit_marker6(ring, 6);
646
}
647
648
static void
649
set_scissor(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1, uint32_t x2,
650
uint32_t y2)
651
{
652
OUT_REG(ring, A6XX_GRAS_SC_WINDOW_SCISSOR_TL(.x = x1, .y = y1),
653
A6XX_GRAS_SC_WINDOW_SCISSOR_BR(.x = x2, .y = y2));
654
655
OUT_REG(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1(.x = x1, .y = y1),
656
A6XX_GRAS_2D_RESOLVE_CNTL_2(.x = x2, .y = y2));
657
}
658
659
static void
660
set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag)
661
{
662
OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
663
OUT_REG(ring, A6XX_RB_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
664
/* no flag for RB_BIN_CONTROL2... */
665
OUT_REG(ring, A6XX_RB_BIN_CONTROL2(.binw = w, .binh = h));
666
}
667
668
static void
669
emit_binning_pass(struct fd_batch *batch) assert_dt
670
{
671
struct fd_ringbuffer *ring = batch->gmem;
672
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
673
struct fd_screen *screen = batch->ctx->screen;
674
675
debug_assert(!batch->tessellation);
676
677
set_scissor(ring, 0, 0, gmem->width - 1, gmem->height - 1);
678
679
emit_marker6(ring, 7);
680
OUT_PKT7(ring, CP_SET_MARKER, 1);
681
OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
682
emit_marker6(ring, 7);
683
684
OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
685
OUT_RING(ring, 0x1);
686
687
OUT_PKT7(ring, CP_SET_MODE, 1);
688
OUT_RING(ring, 0x1);
689
690
OUT_WFI5(ring);
691
692
OUT_REG(ring, A6XX_VFD_MODE_CNTL(.binning_pass = true));
693
694
update_vsc_pipe(batch);
695
696
OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1);
697
OUT_RING(ring, screen->info->a6xx.magic.PC_UNKNOWN_9805);
698
699
OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1);
700
OUT_RING(ring, screen->info->a6xx.magic.SP_UNKNOWN_A0F8);
701
702
OUT_PKT7(ring, CP_EVENT_WRITE, 1);
703
OUT_RING(ring, UNK_2C);
704
705
OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
706
OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(0) | A6XX_RB_WINDOW_OFFSET_Y(0));
707
708
OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
709
OUT_RING(ring,
710
A6XX_SP_TP_WINDOW_OFFSET_X(0) | A6XX_SP_TP_WINDOW_OFFSET_Y(0));
711
712
/* emit IB to binning drawcmds: */
713
trace_start_binning_ib(&batch->trace);
714
fd6_emit_ib(ring, batch->draw);
715
trace_end_binning_ib(&batch->trace);
716
717
fd_reset_wfi(batch);
718
719
OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
720
OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
721
CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
722
CP_SET_DRAW_STATE__0_GROUP_ID(0));
723
OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
724
OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
725
726
OUT_PKT7(ring, CP_EVENT_WRITE, 1);
727
OUT_RING(ring, UNK_2D);
728
729
fd6_cache_inv(batch, ring);
730
fd6_cache_flush(batch, ring);
731
fd_wfi(batch, ring);
732
733
OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
734
735
trace_start_vsc_overflow_test(&batch->trace);
736
emit_vsc_overflow_test(batch);
737
trace_end_vsc_overflow_test(&batch->trace);
738
739
OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
740
OUT_RING(ring, 0x0);
741
742
OUT_PKT7(ring, CP_SET_MODE, 1);
743
OUT_RING(ring, 0x0);
744
745
OUT_WFI5(ring);
746
747
OUT_REG(ring,
748
A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
749
.gmem = true,
750
.unk2 = screen->info->a6xx.ccu_cntl_gmem_unk2));
751
}
752
753
static void
754
emit_msaa(struct fd_ringbuffer *ring, unsigned nr)
755
{
756
enum a3xx_msaa_samples samples = fd_msaa_samples(nr);
757
758
OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
759
OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
760
OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
761
COND(samples == MSAA_ONE,
762
A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
763
764
OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
765
OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
766
OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
767
COND(samples == MSAA_ONE,
768
A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));
769
770
OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
771
OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
772
OUT_RING(ring,
773
A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
774
COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
775
776
OUT_PKT4(ring, REG_A6XX_RB_MSAA_CNTL, 1);
777
OUT_RING(ring, A6XX_RB_MSAA_CNTL_SAMPLES(samples));
778
}
779
780
static void prepare_tile_setup_ib(struct fd_batch *batch);
781
static void prepare_tile_fini_ib(struct fd_batch *batch);
782
783
/* before first tile */
784
static void
785
fd6_emit_tile_init(struct fd_batch *batch) assert_dt
786
{
787
struct fd_ringbuffer *ring = batch->gmem;
788
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
789
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
790
struct fd_screen *screen = batch->ctx->screen;
791
792
fd6_emit_restore(batch, ring);
793
794
fd6_emit_lrz_flush(ring);
795
796
if (batch->prologue) {
797
trace_start_prologue(&batch->trace);
798
fd6_emit_ib(ring, batch->prologue);
799
trace_end_prologue(&batch->trace);
800
}
801
802
fd6_cache_inv(batch, ring);
803
804
prepare_tile_setup_ib(batch);
805
prepare_tile_fini_ib(batch);
806
807
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
808
OUT_RING(ring, 0x0);
809
810
/* blob controls "local" in IB2, but I think that is not required */
811
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
812
OUT_RING(ring, 0x1);
813
814
fd_wfi(batch, ring);
815
OUT_REG(ring,
816
A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
817
.gmem = true,
818
.unk2 = screen->info->a6xx.ccu_cntl_gmem_unk2));
819
820
emit_zs(ring, pfb->zsbuf, batch->gmem_state);
821
emit_mrt(ring, pfb, batch->gmem_state);
822
emit_msaa(ring, pfb->samples);
823
patch_fb_read_gmem(batch);
824
825
if (use_hw_binning(batch)) {
826
/* enable stream-out during binning pass: */
827
OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
828
829
set_bin_size(ring, gmem->bin_w, gmem->bin_h,
830
A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000);
831
update_render_cntl(batch, pfb, true);
832
emit_binning_pass(batch);
833
834
/* and disable stream-out for draw pass: */
835
OUT_REG(ring, A6XX_VPC_SO_DISABLE(true));
836
837
/*
838
* NOTE: even if we detect VSC overflow and disable use of
839
* visibility stream in draw pass, it is still safe to execute
840
* the reset of these cmds:
841
*/
842
843
// NOTE a618 not setting .USE_VIZ .. from a quick check on a630, it
844
// does not appear that this bit changes much (ie. it isn't actually
845
// .USE_VIZ like previous gens)
846
set_bin_size(ring, gmem->bin_w, gmem->bin_h,
847
A6XX_RB_BIN_CONTROL_USE_VIZ | 0x6000000);
848
849
OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
850
OUT_RING(ring, 0x0);
851
852
OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1);
853
OUT_RING(ring, screen->info->a6xx.magic.PC_UNKNOWN_9805);
854
855
OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1);
856
OUT_RING(ring, screen->info->a6xx.magic.SP_UNKNOWN_A0F8);
857
858
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
859
OUT_RING(ring, 0x1);
860
} else {
861
/* no binning pass, so enable stream-out for draw pass:: */
862
OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
863
864
set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
865
}
866
867
update_render_cntl(batch, pfb, false);
868
869
emit_common_init(batch);
870
}
871
872
static void
873
set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1)
874
{
875
OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
876
OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1));
877
878
OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1);
879
OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1));
880
881
OUT_PKT4(ring, REG_A6XX_SP_WINDOW_OFFSET, 1);
882
OUT_RING(ring, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1));
883
884
OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
885
OUT_RING(ring,
886
A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1));
887
}
888
889
/* before mem2gmem */
890
static void
891
fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
892
{
893
struct fd_context *ctx = batch->ctx;
894
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
895
struct fd6_context *fd6_ctx = fd6_context(ctx);
896
struct fd_ringbuffer *ring = batch->gmem;
897
898
emit_marker6(ring, 7);
899
OUT_PKT7(ring, CP_SET_MARKER, 1);
900
OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));
901
emit_marker6(ring, 7);
902
903
uint32_t x1 = tile->xoff;
904
uint32_t y1 = tile->yoff;
905
uint32_t x2 = tile->xoff + tile->bin_w - 1;
906
uint32_t y2 = tile->yoff + tile->bin_h - 1;
907
908
set_scissor(ring, x1, y1, x2, y2);
909
910
if (use_hw_binning(batch)) {
911
const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
912
913
OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
914
915
OUT_PKT7(ring, CP_SET_MODE, 1);
916
OUT_RING(ring, 0x0);
917
918
OUT_PKT7(ring, CP_SET_BIN_DATA5, 7);
919
OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
920
CP_SET_BIN_DATA5_0_VSC_N(tile->n));
921
OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* per-pipe draw-stream address */
922
(tile->p * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
923
OUT_RELOC(ring,
924
fd6_ctx->vsc_draw_strm, /* VSC_DRAW_STRM_ADDRESS + (p * 4) */
925
(tile->p * 4) + (32 * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
926
OUT_RELOC(ring, fd6_ctx->vsc_prim_strm,
927
(tile->p * fd6_ctx->vsc_prim_strm_pitch), 0, 0);
928
929
OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
930
OUT_RING(ring, 0x0);
931
932
set_window_offset(ring, x1, y1);
933
934
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
935
set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
936
937
OUT_PKT7(ring, CP_SET_MODE, 1);
938
OUT_RING(ring, 0x0);
939
} else {
940
set_window_offset(ring, x1, y1);
941
942
OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
943
OUT_RING(ring, 0x1);
944
945
OUT_PKT7(ring, CP_SET_MODE, 1);
946
OUT_RING(ring, 0x0);
947
}
948
}
949
950
static void
951
set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring)
952
{
953
struct pipe_scissor_state blit_scissor = batch->max_scissor;
954
955
blit_scissor.minx = ROUND_DOWN_TO(blit_scissor.minx, 16);
956
blit_scissor.miny = ROUND_DOWN_TO(blit_scissor.miny, 4);
957
blit_scissor.maxx = ALIGN(blit_scissor.maxx, 16);
958
blit_scissor.maxy = ALIGN(blit_scissor.maxy, 4);
959
960
OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
961
OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) |
962
A6XX_RB_BLIT_SCISSOR_TL_Y(blit_scissor.miny));
963
OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(blit_scissor.maxx - 1) |
964
A6XX_RB_BLIT_SCISSOR_BR_Y(blit_scissor.maxy - 1));
965
}
966
967
static void
968
emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base,
969
struct pipe_surface *psurf, bool stencil)
970
{
971
struct fd_resource *rsc = fd_resource(psurf->texture);
972
enum pipe_format pfmt = psurf->format;
973
uint32_t offset;
974
bool ubwc_enabled;
975
976
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
977
978
/* separate stencil case: */
979
if (stencil) {
980
rsc = rsc->stencil;
981
pfmt = rsc->b.b.format;
982
}
983
984
offset =
985
fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
986
ubwc_enabled = fd_resource_ubwc_enabled(rsc, psurf->u.tex.level);
987
988
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
989
990
enum a6xx_format format = fd6_pipe2color(pfmt);
991
uint32_t stride = fd_resource_pitch(rsc, psurf->u.tex.level);
992
uint32_t size = fd_resource_slice(rsc, psurf->u.tex.level)->size0;
993
enum a3xx_color_swap swap = fd6_resource_swap(rsc, pfmt);
994
enum a3xx_msaa_samples samples = fd_msaa_samples(rsc->b.b.nr_samples);
995
uint32_t tile_mode = fd_resource_tile_mode(&rsc->b.b, psurf->u.tex.level);
996
997
OUT_REG(ring,
998
A6XX_RB_BLIT_DST_INFO(.tile_mode = tile_mode, .samples = samples,
999
.color_format = format, .color_swap = swap,
1000
.flags = ubwc_enabled),
1001
A6XX_RB_BLIT_DST(.bo = rsc->bo, .bo_offset = offset),
1002
A6XX_RB_BLIT_DST_PITCH(.a6xx_rb_blit_dst_pitch = stride),
1003
A6XX_RB_BLIT_DST_ARRAY_PITCH(.a6xx_rb_blit_dst_array_pitch = size));
1004
1005
OUT_REG(ring, A6XX_RB_BLIT_BASE_GMEM(.dword = base));
1006
1007
if (ubwc_enabled) {
1008
OUT_PKT4(ring, REG_A6XX_RB_BLIT_FLAG_DST, 3);
1009
fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
1010
psurf->u.tex.first_layer);
1011
}
1012
1013
fd6_emit_blit(batch, ring);
1014
}
1015
1016
static void
1017
emit_restore_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
1018
uint32_t base, struct pipe_surface *psurf, unsigned buffer)
1019
{
1020
bool stencil = (buffer == FD_BUFFER_STENCIL);
1021
1022
OUT_REG(ring, A6XX_RB_BLIT_INFO(.gmem = true, .unk0 = true,
1023
.depth = (buffer == FD_BUFFER_DEPTH),
1024
.sample_0 = util_format_is_pure_integer(
1025
psurf->format)));
1026
1027
emit_blit(batch, ring, base, psurf, stencil);
1028
}
1029
1030
static void
1031
emit_clears(struct fd_batch *batch, struct fd_ringbuffer *ring)
1032
{
1033
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1034
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1035
enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
1036
1037
uint32_t buffers = batch->fast_cleared;
1038
1039
if (buffers & PIPE_CLEAR_COLOR) {
1040
1041
for (int i = 0; i < pfb->nr_cbufs; i++) {
1042
union pipe_color_union *color = &batch->clear_color[i];
1043
union util_color uc = {0};
1044
1045
if (!pfb->cbufs[i])
1046
continue;
1047
1048
if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1049
continue;
1050
1051
enum pipe_format pfmt = pfb->cbufs[i]->format;
1052
1053
// XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
1054
union pipe_color_union swapped;
1055
switch (fd6_pipe2swap(pfmt)) {
1056
case WZYX:
1057
swapped.ui[0] = color->ui[0];
1058
swapped.ui[1] = color->ui[1];
1059
swapped.ui[2] = color->ui[2];
1060
swapped.ui[3] = color->ui[3];
1061
break;
1062
case WXYZ:
1063
swapped.ui[2] = color->ui[0];
1064
swapped.ui[1] = color->ui[1];
1065
swapped.ui[0] = color->ui[2];
1066
swapped.ui[3] = color->ui[3];
1067
break;
1068
case ZYXW:
1069
swapped.ui[3] = color->ui[0];
1070
swapped.ui[0] = color->ui[1];
1071
swapped.ui[1] = color->ui[2];
1072
swapped.ui[2] = color->ui[3];
1073
break;
1074
case XYZW:
1075
swapped.ui[3] = color->ui[0];
1076
swapped.ui[2] = color->ui[1];
1077
swapped.ui[1] = color->ui[2];
1078
swapped.ui[0] = color->ui[3];
1079
break;
1080
}
1081
1082
util_pack_color_union(pfmt, &uc, &swapped);
1083
1084
OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1085
OUT_RING(ring,
1086
A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1087
A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1088
A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
1089
1090
OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1091
OUT_RING(ring,
1092
A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
1093
1094
OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1095
OUT_RING(ring, gmem->cbuf_base[i]);
1096
1097
OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1098
OUT_RING(ring, 0);
1099
1100
OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
1101
OUT_RING(ring, uc.ui[0]);
1102
OUT_RING(ring, uc.ui[1]);
1103
OUT_RING(ring, uc.ui[2]);
1104
OUT_RING(ring, uc.ui[3]);
1105
1106
fd6_emit_blit(batch, ring);
1107
}
1108
}
1109
1110
const bool has_depth = pfb->zsbuf;
1111
const bool has_separate_stencil =
1112
has_depth && fd_resource(pfb->zsbuf->texture)->stencil;
1113
1114
/* First clear depth or combined depth/stencil. */
1115
if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
1116
(!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1117
enum pipe_format pfmt = pfb->zsbuf->format;
1118
uint32_t clear_value;
1119
uint32_t mask = 0;
1120
1121
if (has_separate_stencil) {
1122
pfmt = util_format_get_depth_only(pfb->zsbuf->format);
1123
clear_value = util_pack_z(pfmt, batch->clear_depth);
1124
} else {
1125
pfmt = pfb->zsbuf->format;
1126
clear_value =
1127
util_pack_z_stencil(pfmt, batch->clear_depth, batch->clear_stencil);
1128
}
1129
1130
if (buffers & PIPE_CLEAR_DEPTH)
1131
mask |= 0x1;
1132
1133
if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))
1134
mask |= 0x2;
1135
1136
OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1137
OUT_RING(ring,
1138
A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1139
A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1140
A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
1141
1142
OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1143
OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
1144
// XXX UNK0 for separate stencil ??
1145
A6XX_RB_BLIT_INFO_DEPTH |
1146
A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));
1147
1148
OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1149
OUT_RING(ring, gmem->zsbuf_base[0]);
1150
1151
OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1152
OUT_RING(ring, 0);
1153
1154
OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1155
OUT_RING(ring, clear_value);
1156
1157
fd6_emit_blit(batch, ring);
1158
}
1159
1160
/* Then clear the separate stencil buffer in case of 32 bit depth
1161
* formats with separate stencil. */
1162
if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1163
OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1164
OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1165
A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1166
A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(FMT6_8_UINT));
1167
1168
OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1169
OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
1170
// A6XX_RB_BLIT_INFO_UNK0 |
1171
A6XX_RB_BLIT_INFO_DEPTH |
1172
A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1));
1173
1174
OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1175
OUT_RING(ring, gmem->zsbuf_base[1]);
1176
1177
OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1178
OUT_RING(ring, 0);
1179
1180
OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1181
OUT_RING(ring, batch->clear_stencil & 0xff);
1182
1183
fd6_emit_blit(batch, ring);
1184
}
1185
}
1186
1187
/*
1188
* transfer from system memory to gmem
1189
*/
1190
static void
1191
emit_restore_blits(struct fd_batch *batch, struct fd_ringbuffer *ring)
1192
{
1193
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1194
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1195
1196
if (batch->restore & FD_BUFFER_COLOR) {
1197
unsigned i;
1198
for (i = 0; i < pfb->nr_cbufs; i++) {
1199
if (!pfb->cbufs[i])
1200
continue;
1201
if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
1202
continue;
1203
emit_restore_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
1204
FD_BUFFER_COLOR);
1205
}
1206
}
1207
1208
if (batch->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1209
struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1210
1211
if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) {
1212
emit_restore_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
1213
FD_BUFFER_DEPTH);
1214
}
1215
if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) {
1216
emit_restore_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
1217
FD_BUFFER_STENCIL);
1218
}
1219
}
1220
}
1221
1222
static void
1223
prepare_tile_setup_ib(struct fd_batch *batch)
1224
{
1225
if (!(batch->restore || batch->fast_cleared))
1226
return;
1227
1228
batch->tile_setup =
1229
fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1230
1231
set_blit_scissor(batch, batch->tile_setup);
1232
1233
emit_restore_blits(batch, batch->tile_setup);
1234
emit_clears(batch, batch->tile_setup);
1235
}
1236
1237
/*
1238
* transfer from system memory to gmem
1239
*/
1240
static void
1241
fd6_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
1242
{
1243
}
1244
1245
/* before IB to rendering cmds: */
1246
static void
1247
fd6_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
1248
{
1249
if (!batch->tile_setup)
1250
return;
1251
1252
trace_start_clear_restore(&batch->trace, batch->fast_cleared);
1253
if (batch->fast_cleared || !use_hw_binning(batch)) {
1254
fd6_emit_ib(batch->gmem, batch->tile_setup);
1255
} else {
1256
emit_conditional_ib(batch, tile, batch->tile_setup);
1257
}
1258
trace_end_clear_restore(&batch->trace);
1259
}
1260
1261
static bool
1262
blit_can_resolve(enum pipe_format format)
1263
{
1264
const struct util_format_description *desc = util_format_description(format);
1265
1266
/* blit event can only do resolve for simple cases:
1267
* averaging samples as unsigned integers or choosing only one sample
1268
*/
1269
if (util_format_is_snorm(format) || util_format_is_srgb(format))
1270
return false;
1271
1272
/* can't do formats with larger channel sizes
1273
* note: this includes all float formats
1274
* note2: single channel integer formats seem OK
1275
*/
1276
if (desc->channel[0].size > 10)
1277
return false;
1278
1279
switch (format) {
1280
/* for unknown reasons blit event can't msaa resolve these formats when tiled
1281
* likely related to these formats having different layout from other cpp=2
1282
* formats
1283
*/
1284
case PIPE_FORMAT_R8G8_UNORM:
1285
case PIPE_FORMAT_R8G8_UINT:
1286
case PIPE_FORMAT_R8G8_SINT:
1287
/* TODO: this one should be able to work? */
1288
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1289
return false;
1290
default:
1291
break;
1292
}
1293
1294
return true;
1295
}
1296
1297
static bool
1298
needs_resolve(struct pipe_surface *psurf)
1299
{
1300
return psurf->nr_samples &&
1301
(psurf->nr_samples != psurf->texture->nr_samples);
1302
}
1303
1304
static void
1305
emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
1306
uint32_t base, struct pipe_surface *psurf,
1307
unsigned buffer) assert_dt
1308
{
1309
uint32_t info = 0;
1310
bool stencil = false;
1311
1312
if (!fd_resource(psurf->texture)->valid)
1313
return;
1314
1315
/* if we need to resolve, but cannot with BLIT event, we instead need
1316
* to generate per-tile CP_BLIT (r2d) commands:
1317
*
1318
* The separate-stencil is a special case, we might need to use CP_BLIT
1319
* for depth, but we can still resolve stencil with a BLIT event
1320
*/
1321
if (needs_resolve(psurf) && !blit_can_resolve(psurf->format) &&
1322
(buffer != FD_BUFFER_STENCIL)) {
1323
fd6_resolve_tile(batch, ring, base, psurf);
1324
return;
1325
}
1326
1327
switch (buffer) {
1328
case FD_BUFFER_COLOR:
1329
break;
1330
case FD_BUFFER_STENCIL:
1331
info |= A6XX_RB_BLIT_INFO_UNK0;
1332
stencil = true;
1333
break;
1334
case FD_BUFFER_DEPTH:
1335
info |= A6XX_RB_BLIT_INFO_DEPTH;
1336
break;
1337
}
1338
1339
if (util_format_is_pure_integer(psurf->format) ||
1340
util_format_is_depth_or_stencil(psurf->format))
1341
info |= A6XX_RB_BLIT_INFO_SAMPLE_0;
1342
1343
OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1344
OUT_RING(ring, info);
1345
1346
emit_blit(batch, ring, base, psurf, stencil);
1347
}
1348
1349
/*
1350
* transfer from gmem to system memory (ie. normal RAM)
1351
*/
1352
1353
static void
1354
prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
1355
{
1356
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1357
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1358
struct fd_ringbuffer *ring;
1359
1360
batch->tile_fini =
1361
fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1362
ring = batch->tile_fini;
1363
1364
set_blit_scissor(batch, ring);
1365
1366
if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1367
struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1368
1369
if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) {
1370
emit_resolve_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
1371
FD_BUFFER_DEPTH);
1372
}
1373
if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) {
1374
emit_resolve_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
1375
FD_BUFFER_STENCIL);
1376
}
1377
}
1378
1379
if (batch->resolve & FD_BUFFER_COLOR) {
1380
unsigned i;
1381
for (i = 0; i < pfb->nr_cbufs; i++) {
1382
if (!pfb->cbufs[i])
1383
continue;
1384
if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
1385
continue;
1386
emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
1387
FD_BUFFER_COLOR);
1388
}
1389
}
1390
}
1391
1392
static void
1393
fd6_emit_tile(struct fd_batch *batch, const struct fd_tile *tile)
1394
{
1395
if (!use_hw_binning(batch)) {
1396
fd6_emit_ib(batch->gmem, batch->draw);
1397
} else {
1398
emit_conditional_ib(batch, tile, batch->draw);
1399
}
1400
1401
if (batch->epilogue)
1402
fd6_emit_ib(batch->gmem, batch->epilogue);
1403
}
1404
1405
static void
1406
fd6_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
1407
{
1408
struct fd_ringbuffer *ring = batch->gmem;
1409
1410
if (use_hw_binning(batch)) {
1411
OUT_PKT7(ring, CP_SET_MARKER, 1);
1412
OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS));
1413
}
1414
1415
OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
1416
OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
1417
CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1418
CP_SET_DRAW_STATE__0_GROUP_ID(0));
1419
OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1420
OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1421
1422
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1423
OUT_RING(ring, 0x0);
1424
1425
emit_marker6(ring, 7);
1426
OUT_PKT7(ring, CP_SET_MARKER, 1);
1427
OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
1428
emit_marker6(ring, 7);
1429
1430
trace_start_resolve(&batch->trace);
1431
if (batch->fast_cleared || !use_hw_binning(batch)) {
1432
fd6_emit_ib(batch->gmem, batch->tile_fini);
1433
} else {
1434
emit_conditional_ib(batch, tile, batch->tile_fini);
1435
}
1436
trace_end_resolve(&batch->trace);
1437
}
1438
1439
static void
1440
fd6_emit_tile_fini(struct fd_batch *batch)
1441
{
1442
struct fd_ringbuffer *ring = batch->gmem;
1443
1444
emit_common_fini(batch);
1445
1446
OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
1447
OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE);
1448
1449
fd6_emit_lrz_flush(ring);
1450
1451
fd6_event_write(batch, ring, PC_CCU_RESOLVE_TS, true);
1452
1453
if (use_hw_binning(batch)) {
1454
check_vsc_overflow(batch->ctx);
1455
}
1456
}
1457
1458
static void
1459
emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
1460
{
1461
struct fd_context *ctx = batch->ctx;
1462
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1463
1464
uint32_t buffers = batch->fast_cleared;
1465
1466
if (!buffers)
1467
return;
1468
1469
trace_start_clear_restore(&batch->trace, buffers);
1470
1471
if (buffers & PIPE_CLEAR_COLOR) {
1472
for (int i = 0; i < pfb->nr_cbufs; i++) {
1473
union pipe_color_union color = batch->clear_color[i];
1474
1475
if (!pfb->cbufs[i])
1476
continue;
1477
1478
if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1479
continue;
1480
1481
fd6_clear_surface(ctx, ring, pfb->cbufs[i], pfb->width, pfb->height,
1482
&color);
1483
}
1484
}
1485
if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
1486
union pipe_color_union value = {};
1487
1488
const bool has_depth = pfb->zsbuf;
1489
struct pipe_resource *separate_stencil =
1490
has_depth && fd_resource(pfb->zsbuf->texture)->stencil
1491
? &fd_resource(pfb->zsbuf->texture)->stencil->b.b
1492
: NULL;
1493
1494
if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
1495
(!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1496
value.f[0] = batch->clear_depth;
1497
value.ui[1] = batch->clear_stencil;
1498
fd6_clear_surface(ctx, ring, pfb->zsbuf, pfb->width, pfb->height,
1499
&value);
1500
}
1501
1502
if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1503
value.ui[0] = batch->clear_stencil;
1504
1505
struct pipe_surface stencil_surf = *pfb->zsbuf;
1506
stencil_surf.format = PIPE_FORMAT_S8_UINT;
1507
stencil_surf.texture = separate_stencil;
1508
1509
fd6_clear_surface(ctx, ring, &stencil_surf, pfb->width, pfb->height,
1510
&value);
1511
}
1512
}
1513
1514
fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
1515
1516
trace_end_clear_restore(&batch->trace);
1517
}
1518
1519
static void
1520
setup_tess_buffers(struct fd_batch *batch, struct fd_ringbuffer *ring)
1521
{
1522
struct fd_context *ctx = batch->ctx;
1523
1524
batch->tessfactor_bo = fd_bo_new(ctx->screen->dev, batch->tessfactor_size,
1525
0, "tessfactor");
1526
1527
batch->tessparam_bo = fd_bo_new(ctx->screen->dev, batch->tessparam_size,
1528
0, "tessparam");
1529
1530
OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR, 2);
1531
OUT_RELOC(ring, batch->tessfactor_bo, 0, 0, 0);
1532
1533
batch->tess_addrs_constobj->cur = batch->tess_addrs_constobj->start;
1534
OUT_RELOC(batch->tess_addrs_constobj, batch->tessparam_bo, 0, 0, 0);
1535
OUT_RELOC(batch->tess_addrs_constobj, batch->tessfactor_bo, 0, 0, 0);
1536
}
1537
1538
static void
1539
fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
1540
{
1541
struct fd_ringbuffer *ring = batch->gmem;
1542
struct fd_screen *screen = batch->ctx->screen;
1543
1544
fd6_emit_restore(batch, ring);
1545
fd6_emit_lrz_flush(ring);
1546
1547
if (batch->prologue) {
1548
if (!batch->nondraw) {
1549
trace_start_prologue(&batch->trace);
1550
}
1551
fd6_emit_ib(ring, batch->prologue);
1552
if (!batch->nondraw) {
1553
trace_end_prologue(&batch->trace);
1554
}
1555
}
1556
1557
/* remaining setup below here does not apply to blit/compute: */
1558
if (batch->nondraw)
1559
return;
1560
1561
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1562
1563
if (pfb->width > 0 && pfb->height > 0)
1564
set_scissor(ring, 0, 0, pfb->width - 1, pfb->height - 1);
1565
else
1566
set_scissor(ring, 0, 0, 0, 0);
1567
1568
set_window_offset(ring, 0, 0);
1569
1570
set_bin_size(ring, 0, 0, 0xc00000); /* 0xc00000 = BYPASS? */
1571
1572
emit_sysmem_clears(batch, ring);
1573
1574
emit_marker6(ring, 7);
1575
OUT_PKT7(ring, CP_SET_MARKER, 1);
1576
OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
1577
emit_marker6(ring, 7);
1578
1579
if (batch->tessellation)
1580
setup_tess_buffers(batch, ring);
1581
1582
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1583
OUT_RING(ring, 0x0);
1584
1585
/* blob controls "local" in IB2, but I think that is not required */
1586
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1587
OUT_RING(ring, 0x1);
1588
1589
fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
1590
fd6_cache_inv(batch, ring);
1591
1592
fd_wfi(batch, ring);
1593
OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass));
1594
1595
/* enable stream-out, with sysmem there is only one pass: */
1596
OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
1597
1598
OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
1599
OUT_RING(ring, 0x1);
1600
1601
emit_zs(ring, pfb->zsbuf, NULL);
1602
emit_mrt(ring, pfb, NULL);
1603
emit_msaa(ring, pfb->samples);
1604
patch_fb_read_sysmem(batch);
1605
1606
update_render_cntl(batch, pfb, false);
1607
1608
emit_common_init(batch);
1609
}
1610
1611
static void
1612
fd6_emit_sysmem_fini(struct fd_batch *batch)
1613
{
1614
struct fd_ringbuffer *ring = batch->gmem;
1615
1616
emit_common_fini(batch);
1617
1618
if (batch->epilogue)
1619
fd6_emit_ib(batch->gmem, batch->epilogue);
1620
1621
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1622
OUT_RING(ring, 0x0);
1623
1624
fd6_emit_lrz_flush(ring);
1625
1626
fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
1627
fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
1628
}
1629
1630
void
1631
fd6_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
1632
{
1633
struct fd_context *ctx = fd_context(pctx);
1634
1635
ctx->emit_tile_init = fd6_emit_tile_init;
1636
ctx->emit_tile_prep = fd6_emit_tile_prep;
1637
ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem;
1638
ctx->emit_tile_renderprep = fd6_emit_tile_renderprep;
1639
ctx->emit_tile = fd6_emit_tile;
1640
ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem;
1641
ctx->emit_tile_fini = fd6_emit_tile_fini;
1642
ctx->emit_sysmem_prep = fd6_emit_sysmem_prep;
1643
ctx->emit_sysmem_fini = fd6_emit_sysmem_fini;
1644
}
1645
1646