Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c
4574 views
1
/*
2
* Copyright (C) 2016 Rob Clark <[email protected]>
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*
23
* Authors:
24
* Rob Clark <[email protected]>
25
*/
26
27
#include "pipe/p_state.h"
28
#include "util/format/u_format.h"
29
#include "util/u_inlines.h"
30
#include "util/u_memory.h"
31
#include "util/u_string.h"
32
33
#include "freedreno_draw.h"
34
#include "freedreno_resource.h"
35
#include "freedreno_state.h"
36
37
#include "fd5_context.h"
38
#include "fd5_draw.h"
39
#include "fd5_emit.h"
40
#include "fd5_format.h"
41
#include "fd5_gmem.h"
42
#include "fd5_program.h"
43
#include "fd5_zsa.h"
44
45
static void
46
emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
47
struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem)
48
{
49
enum a5xx_tile_mode tile_mode;
50
unsigned i;
51
52
for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
53
enum a5xx_color_fmt format = 0;
54
enum a3xx_color_swap swap = WZYX;
55
bool srgb = false, sint = false, uint = false;
56
struct fd_resource *rsc = NULL;
57
struct fdl_slice *slice = NULL;
58
uint32_t stride = 0;
59
uint32_t size = 0;
60
uint32_t base = 0;
61
uint32_t offset = 0;
62
63
if (gmem) {
64
tile_mode = TILE5_2;
65
} else {
66
tile_mode = TILE5_LINEAR;
67
}
68
69
if ((i < nr_bufs) && bufs[i]) {
70
struct pipe_surface *psurf = bufs[i];
71
enum pipe_format pformat = psurf->format;
72
73
rsc = fd_resource(psurf->texture);
74
75
slice = fd_resource_slice(rsc, psurf->u.tex.level);
76
format = fd5_pipe2color(pformat);
77
swap = fd5_pipe2swap(pformat);
78
srgb = util_format_is_srgb(pformat);
79
sint = util_format_is_pure_sint(pformat);
80
uint = util_format_is_pure_uint(pformat);
81
82
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
83
84
offset = fd_resource_offset(rsc, psurf->u.tex.level,
85
psurf->u.tex.first_layer);
86
87
if (gmem) {
88
stride = gmem->bin_w * gmem->cbuf_cpp[i];
89
size = stride * gmem->bin_h;
90
base = gmem->cbuf_base[i];
91
} else {
92
stride = fd_resource_pitch(rsc, psurf->u.tex.level);
93
size = slice->size0;
94
95
tile_mode =
96
fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
97
}
98
}
99
100
OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
101
OUT_RING(
102
ring,
103
A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
104
A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
105
A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
106
COND(gmem,
107
0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
108
COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
109
OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
110
OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
111
if (gmem || (i >= nr_bufs) || !bufs[i]) {
112
OUT_RING(ring, base); /* RB_MRT[i].BASE_LO */
113
OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */
114
} else {
115
debug_assert((offset + size) <= fd_bo_size(rsc->bo));
116
OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */
117
}
118
119
OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
120
OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
121
COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) |
122
COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) |
123
COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
124
125
/* when we support UBWC, these would be the system memory
126
* addr/pitch/etc:
127
*/
128
OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
129
OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
130
OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
131
OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
132
OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
133
}
134
}
135
136
static void
137
emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
138
const struct fd_gmem_stateobj *gmem)
139
{
140
if (zsbuf) {
141
struct fd_resource *rsc = fd_resource(zsbuf->texture);
142
enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);
143
uint32_t cpp = rsc->layout.cpp;
144
uint32_t stride = 0;
145
uint32_t size = 0;
146
147
if (gmem) {
148
stride = cpp * gmem->bin_w;
149
size = stride * gmem->bin_h;
150
} else {
151
stride = fd_resource_pitch(rsc, 0);
152
size = fd_resource_slice(rsc, 0)->size0;
153
}
154
155
OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
156
OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
157
if (gmem) {
158
OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */
159
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
160
} else {
161
OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */
162
}
163
OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));
164
OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));
165
166
OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
167
OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
168
169
OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
170
OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
171
OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
172
OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
173
174
if (rsc->lrz) {
175
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
176
OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);
177
OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
178
179
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
180
OUT_RELOC(ring, rsc->lrz, 0, 0, 0);
181
} else {
182
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
183
OUT_RING(ring, 0x00000000);
184
OUT_RING(ring, 0x00000000);
185
OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
186
187
OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
188
OUT_RING(ring, 0x00000000);
189
OUT_RING(ring, 0x00000000);
190
}
191
192
if (rsc->stencil) {
193
if (gmem) {
194
stride = 1 * gmem->bin_w;
195
size = stride * gmem->bin_h;
196
} else {
197
stride = fd_resource_pitch(rsc->stencil, 0);
198
size = fd_resource_slice(rsc->stencil, 0)->size0;
199
}
200
201
OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);
202
OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);
203
if (gmem) {
204
OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */
205
OUT_RING(ring, 0x00000000); /* RB_STENCIL_BASE_HI */
206
} else {
207
OUT_RELOC(ring, rsc->stencil->bo, 0, 0,
208
0); /* RB_STENCIL_BASE_LO/HI */
209
}
210
OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));
211
OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));
212
} else {
213
OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
214
OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
215
}
216
} else {
217
OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
218
OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
219
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
220
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
221
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
222
OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
223
224
OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
225
OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
226
227
OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
228
OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
229
OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
230
OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
231
232
OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
233
OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
234
}
235
}
236
237
static bool
238
use_hw_binning(struct fd_batch *batch)
239
{
240
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
241
242
if ((gmem->maxpw * gmem->maxph) > 32)
243
return false;
244
245
if ((gmem->maxpw > 15) || (gmem->maxph > 15))
246
return false;
247
248
return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&
249
(batch->num_draws > 0);
250
}
251
252
static void
253
patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
254
{
255
unsigned i;
256
for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
257
struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
258
*patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
259
}
260
util_dynarray_clear(&batch->draw_patches);
261
}
262
263
static void
264
update_vsc_pipe(struct fd_batch *batch) assert_dt
265
{
266
struct fd_context *ctx = batch->ctx;
267
struct fd5_context *fd5_ctx = fd5_context(ctx);
268
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
269
struct fd_ringbuffer *ring = batch->gmem;
270
int i;
271
272
OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);
273
OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
274
A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
275
OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
276
277
OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);
278
OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */
279
OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */
280
281
OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);
282
for (i = 0; i < 16; i++) {
283
const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
284
OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
285
A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
286
A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
287
A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
288
}
289
290
OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);
291
for (i = 0; i < 16; i++) {
292
if (!ctx->vsc_pipe_bo[i]) {
293
ctx->vsc_pipe_bo[i] = fd_bo_new(
294
ctx->dev, 0x20000, 0, "vsc_pipe[%u]", i);
295
}
296
OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,
297
0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
298
}
299
300
OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);
301
for (i = 0; i < 16; i++) {
302
OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -
303
32); /* VSC_PIPE_DATA_LENGTH[i] */
304
}
305
}
306
307
static void
308
emit_binning_pass(struct fd_batch *batch) assert_dt
309
{
310
struct fd_ringbuffer *ring = batch->gmem;
311
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
312
313
uint32_t x1 = gmem->minx;
314
uint32_t y1 = gmem->miny;
315
uint32_t x2 = gmem->minx + gmem->width - 1;
316
uint32_t y2 = gmem->miny + gmem->height - 1;
317
318
fd5_set_render_mode(batch->ctx, ring, BINNING);
319
320
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
321
OUT_RING(ring,
322
A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
323
324
OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
325
OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
326
A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
327
OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
328
A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
329
330
OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
331
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
332
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
333
334
update_vsc_pipe(batch);
335
336
OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
337
OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);
338
339
fd5_event_write(batch, ring, UNK_2C, false);
340
341
OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
342
OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
343
344
/* emit IB to binning drawcmds: */
345
fd5_emit_ib(ring, batch->binning);
346
347
fd_reset_wfi(batch);
348
349
fd5_event_write(batch, ring, UNK_2D, false);
350
351
fd5_event_write(batch, ring, CACHE_FLUSH_TS, true);
352
353
// TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)
354
355
fd_wfi(batch, ring);
356
357
OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
358
OUT_RING(ring, 0x0);
359
}
360
361
/* before first tile */
362
static void
363
fd5_emit_tile_init(struct fd_batch *batch) assert_dt
364
{
365
struct fd_ringbuffer *ring = batch->gmem;
366
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
367
368
fd5_emit_restore(batch, ring);
369
370
if (batch->prologue)
371
fd5_emit_ib(ring, batch->prologue);
372
373
fd5_emit_lrz_flush(batch, ring);
374
375
OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
376
OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
377
378
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
379
OUT_RING(ring, 0x0);
380
381
OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
382
OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
383
384
OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
385
OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
386
387
/* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
388
fd_wfi(batch, ring);
389
OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
390
OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */
391
392
emit_zs(ring, pfb->zsbuf, batch->gmem_state);
393
emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state);
394
395
/* Enable stream output for the first pass (likely the binning). */
396
OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
397
OUT_RING(ring, 0);
398
399
if (use_hw_binning(batch)) {
400
emit_binning_pass(batch);
401
402
/* Disable stream output after binning, since each VS output should get
403
* streamed out once.
404
*/
405
OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
406
OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
407
408
fd5_emit_lrz_flush(batch, ring);
409
patch_draws(batch, USE_VISIBILITY);
410
} else {
411
patch_draws(batch, IGNORE_VISIBILITY);
412
}
413
414
fd5_set_render_mode(batch->ctx, ring, GMEM);
415
416
/* XXX If we're in gmem mode but not doing HW binning, then after the first
417
* tile we should disable stream output (fd6_gmem.c doesn't do that either).
418
*/
419
}
420
421
/* before mem2gmem */
422
static void
423
fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) assert_dt
424
{
425
struct fd_context *ctx = batch->ctx;
426
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
427
struct fd5_context *fd5_ctx = fd5_context(ctx);
428
struct fd_ringbuffer *ring = batch->gmem;
429
430
uint32_t x1 = tile->xoff;
431
uint32_t y1 = tile->yoff;
432
uint32_t x2 = tile->xoff + tile->bin_w - 1;
433
uint32_t y2 = tile->yoff + tile->bin_h - 1;
434
435
OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
436
OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
437
A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
438
OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
439
A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
440
441
OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
442
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
443
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
444
445
if (use_hw_binning(batch)) {
446
const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
447
struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
448
449
OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
450
451
OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
452
OUT_RING(ring, 0x0);
453
454
OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);
455
OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
456
CP_SET_BIN_DATA5_0_VSC_N(tile->n));
457
OUT_RELOC(ring, pipe_bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */
458
OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */
459
(tile->p * 4), 0, 0);
460
} else {
461
OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
462
OUT_RING(ring, 0x1);
463
}
464
465
OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
466
OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | A5XX_RB_WINDOW_OFFSET_Y(y1));
467
}
468
469
/*
470
* transfer from system memory to gmem
471
*/
472
473
static void
474
emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
475
struct pipe_surface *psurf, enum a5xx_blit_buf buf)
476
{
477
struct fd_ringbuffer *ring = batch->gmem;
478
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
479
struct fd_resource *rsc = fd_resource(psurf->texture);
480
uint32_t stride, size;
481
482
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
483
484
if (buf == BLIT_S)
485
rsc = rsc->stencil;
486
487
if ((buf == BLIT_ZS) || (buf == BLIT_S)) {
488
// XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't
489
// know otherwise how to go from linear in sysmem to tiled in gmem.
490
// possibly we want to flip this around gmem2mem and keep depth
491
// tiled in sysmem (and fixup sampler state to assume tiled).. this
492
// might be required for doing depth/stencil in bypass mode?
493
struct fdl_slice *slice = fd_resource_slice(rsc, 0);
494
enum a5xx_color_fmt format =
495
fd5_pipe2color(fd_gmem_restore_format(rsc->b.b.format));
496
497
OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
498
OUT_RING(ring,
499
A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
500
A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) |
501
A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
502
OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, 0)));
503
OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(slice->size0));
504
OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* BASE_LO/HI */
505
506
buf = BLIT_MRT0;
507
}
508
509
stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout);
510
size = stride * gmem->bin_h;
511
512
OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
513
OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
514
OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
515
OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
516
OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
517
518
OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
519
OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */
520
OUT_RING(ring, base); /* RB_BLIT_DST_LO */
521
OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */
522
OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride));
523
OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size));
524
525
OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
526
OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
527
528
fd5_emit_blit(batch, ring);
529
}
530
531
static void
532
fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
533
{
534
struct fd_ringbuffer *ring = batch->gmem;
535
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
536
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
537
538
/*
539
* setup mrt and zs with system memory base addresses:
540
*/
541
542
emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
543
// emit_zs(ring, pfb->zsbuf, NULL);
544
545
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
546
OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
547
A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | A5XX_RB_CNTL_BYPASS);
548
549
if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
550
unsigned i;
551
for (i = 0; i < pfb->nr_cbufs; i++) {
552
if (!pfb->cbufs[i])
553
continue;
554
if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
555
continue;
556
emit_mem2gmem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
557
BLIT_MRT0 + i);
558
}
559
}
560
561
if (fd_gmem_needs_restore(batch, tile,
562
FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
563
struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
564
565
if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH))
566
emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
567
if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL))
568
emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
569
}
570
}
571
572
/* before IB to rendering cmds: */
573
static void
574
fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
575
{
576
struct fd_ringbuffer *ring = batch->gmem;
577
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
578
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
579
580
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
581
OUT_RING(ring,
582
A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
583
584
emit_zs(ring, pfb->zsbuf, gmem);
585
emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
586
587
enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
588
589
OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
590
OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples));
591
OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
592
COND(samples == MSAA_ONE,
593
A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
594
595
OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
596
OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
597
OUT_RING(ring,
598
A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
599
COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
600
601
OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
602
OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples));
603
OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) |
604
COND(samples == MSAA_ONE,
605
A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE));
606
}
607
608
/*
609
* transfer from gmem to system memory (ie. normal RAM)
610
*/
611
612
static void
613
emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
614
struct pipe_surface *psurf, enum a5xx_blit_buf buf)
615
{
616
struct fd_ringbuffer *ring = batch->gmem;
617
struct fd_resource *rsc = fd_resource(psurf->texture);
618
struct fdl_slice *slice;
619
bool tiled;
620
uint32_t offset, pitch;
621
622
if (!rsc->valid)
623
return;
624
625
if (buf == BLIT_S)
626
rsc = rsc->stencil;
627
628
slice = fd_resource_slice(rsc, psurf->u.tex.level);
629
offset =
630
fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
631
pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
632
633
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
634
635
OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
636
OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
637
OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
638
OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
639
OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
640
641
tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
642
643
OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
644
OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */
645
COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));
646
OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */
647
OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch));
648
OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0));
649
650
OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
651
OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
652
653
// bool msaa_resolve = pfb->samples > 1;
654
bool msaa_resolve = false;
655
OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
656
OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE));
657
658
fd5_emit_blit(batch, ring);
659
}
660
661
static void
662
fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
663
{
664
const struct fd_gmem_stateobj *gmem = batch->gmem_state;
665
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
666
667
if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
668
struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
669
670
if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
671
emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
672
if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
673
emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
674
}
675
676
if (batch->resolve & FD_BUFFER_COLOR) {
677
unsigned i;
678
for (i = 0; i < pfb->nr_cbufs; i++) {
679
if (!pfb->cbufs[i])
680
continue;
681
if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
682
continue;
683
emit_gmem2mem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
684
BLIT_MRT0 + i);
685
}
686
}
687
}
688
689
static void
690
fd5_emit_tile_fini(struct fd_batch *batch) assert_dt
691
{
692
struct fd_ringbuffer *ring = batch->gmem;
693
694
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
695
OUT_RING(ring, 0x0);
696
697
fd5_emit_lrz_flush(batch, ring);
698
699
fd5_cache_flush(batch, ring);
700
fd5_set_render_mode(batch->ctx, ring, BYPASS);
701
}
702
703
static void
704
fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt
705
{
706
struct fd_ringbuffer *ring = batch->gmem;
707
708
fd5_emit_restore(batch, ring);
709
710
fd5_emit_lrz_flush(batch, ring);
711
712
if (batch->prologue)
713
fd5_emit_ib(ring, batch->prologue);
714
715
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
716
OUT_RING(ring, 0x0);
717
718
fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
719
720
OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
721
OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
722
723
OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
724
OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
725
726
/* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
727
fd_wfi(batch, ring);
728
OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
729
OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */
730
731
OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
732
OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | A5XX_RB_CNTL_HEIGHT(0) |
733
A5XX_RB_CNTL_BYPASS);
734
735
/* remaining setup below here does not apply to blit/compute: */
736
if (batch->nondraw)
737
return;
738
739
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
740
741
OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
742
OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
743
A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
744
OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
745
A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
746
747
OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
748
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
749
OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) |
750
A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1));
751
752
OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
753
OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
754
755
/* Enable stream output, since there's no binning pass to put it in. */
756
OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
757
OUT_RING(ring, 0);
758
759
OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
760
OUT_RING(ring, 0x1);
761
762
patch_draws(batch, IGNORE_VISIBILITY);
763
764
emit_zs(ring, pfb->zsbuf, NULL);
765
emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
766
767
OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
768
OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
769
OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
770
A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE);
771
772
OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
773
OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
774
OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
775
A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE);
776
777
OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
778
OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
779
OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
780
A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE);
781
}
782
783
static void
784
fd5_emit_sysmem_fini(struct fd_batch *batch)
785
{
786
struct fd_ringbuffer *ring = batch->gmem;
787
788
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
789
OUT_RING(ring, 0x0);
790
791
fd5_emit_lrz_flush(batch, ring);
792
793
fd5_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
794
fd5_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
795
}
796
797
void
798
fd5_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
799
{
800
struct fd_context *ctx = fd_context(pctx);
801
802
ctx->emit_tile_init = fd5_emit_tile_init;
803
ctx->emit_tile_prep = fd5_emit_tile_prep;
804
ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;
805
ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;
806
ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;
807
ctx->emit_tile_fini = fd5_emit_tile_fini;
808
ctx->emit_sysmem_prep = fd5_emit_sysmem_prep;
809
ctx->emit_sysmem_fini = fd5_emit_sysmem_fini;
810
}
811
812