Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/crocus/crocus_resolve.c
4570 views
1
/*
2
* Copyright © 2017 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included
12
* in all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
* DEALINGS IN THE SOFTWARE.
21
*/
22
23
/**
24
* @file crocus_resolve.c
25
*
26
* This file handles resolve tracking for main and auxiliary surfaces.
27
*
28
* It also handles our cache tracking. We have sets for the render cache,
29
* depth cache, and so on. If a BO is in a cache's set, then it may have
30
* data in that cache. The helpers take care of emitting flushes for
31
* render-to-texture, format reinterpretation issues, and other situations.
32
*/
33
34
#include "util/hash_table.h"
35
#include "util/set.h"
36
#include "crocus_context.h"
37
#include "compiler/nir/nir.h"
38
39
#define FILE_DEBUG_FLAG DEBUG_BLORP
40
41
static void
42
crocus_update_stencil_shadow(struct crocus_context *ice,
43
struct crocus_resource *res);
44
/**
45
* Disable auxiliary buffers if a renderbuffer is also bound as a texture
46
* or shader image. This causes a self-dependency, where both rendering
47
* and sampling may concurrently read or write the CCS buffer, causing
48
* incorrect pixels.
49
*/
50
static bool
51
disable_rb_aux_buffer(struct crocus_context *ice,
52
bool *draw_aux_buffer_disabled,
53
struct crocus_resource *tex_res,
54
unsigned min_level, unsigned num_levels,
55
const char *usage)
56
{
57
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
58
bool found = false;
59
60
/* We only need to worry about fast clears. */
61
if (tex_res->aux.usage != ISL_AUX_USAGE_CCS_D)
62
return false;
63
64
for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
65
struct crocus_surface *surf = (void *) cso_fb->cbufs[i];
66
if (!surf)
67
continue;
68
69
struct crocus_resource *rb_res = (void *) surf->base.texture;
70
71
if (rb_res->bo == tex_res->bo &&
72
surf->base.u.tex.level >= min_level &&
73
surf->base.u.tex.level < min_level + num_levels) {
74
found = draw_aux_buffer_disabled[i] = true;
75
}
76
}
77
78
if (found) {
79
perf_debug(&ice->dbg,
80
"Disabling CCS because a renderbuffer is also bound %s.\n",
81
usage);
82
}
83
84
return found;
85
}
86
87
static void
88
resolve_sampler_views(struct crocus_context *ice,
89
struct crocus_batch *batch,
90
struct crocus_shader_state *shs,
91
const struct shader_info *info,
92
bool *draw_aux_buffer_disabled,
93
bool consider_framebuffer)
94
{
95
uint32_t views = info ? (shs->bound_sampler_views & info->textures_used[0]) : 0;
96
97
while (views) {
98
const int i = u_bit_scan(&views);
99
struct crocus_sampler_view *isv = shs->textures[i];
100
101
if (isv->res->base.b.target != PIPE_BUFFER) {
102
if (consider_framebuffer) {
103
disable_rb_aux_buffer(ice, draw_aux_buffer_disabled, isv->res,
104
isv->view.base_level, isv->view.levels,
105
"for sampling");
106
}
107
108
crocus_resource_prepare_texture(ice, isv->res, isv->view.format,
109
isv->view.base_level, isv->view.levels,
110
isv->view.base_array_layer,
111
isv->view.array_len);
112
}
113
114
crocus_cache_flush_for_read(batch, isv->res->bo);
115
116
if (batch->screen->devinfo.ver == 7 &&
117
(isv->base.format == PIPE_FORMAT_X24S8_UINT ||
118
isv->base.format == PIPE_FORMAT_X32_S8X24_UINT ||
119
isv->base.format == PIPE_FORMAT_S8_UINT)) {
120
struct crocus_resource *zres, *sres;
121
crocus_get_depth_stencil_resources(&batch->screen->devinfo, isv->base.texture, &zres, &sres);
122
crocus_update_stencil_shadow(ice, sres);
123
crocus_cache_flush_for_read(batch, sres->shadow->bo);
124
}
125
}
126
}
127
128
static void
129
resolve_image_views(struct crocus_context *ice,
130
struct crocus_batch *batch,
131
struct crocus_shader_state *shs,
132
bool *draw_aux_buffer_disabled,
133
bool consider_framebuffer)
134
{
135
/* TODO: Consider images used by program */
136
uint32_t views = shs->bound_image_views;
137
138
while (views) {
139
const int i = u_bit_scan(&views);
140
struct pipe_image_view *pview = &shs->image[i].base;
141
struct crocus_resource *res = (void *) pview->resource;
142
143
if (res->base.b.target != PIPE_BUFFER) {
144
if (consider_framebuffer) {
145
disable_rb_aux_buffer(ice, draw_aux_buffer_disabled,
146
res, pview->u.tex.level, 1,
147
"as a shader image");
148
}
149
150
unsigned num_layers =
151
pview->u.tex.last_layer - pview->u.tex.first_layer + 1;
152
153
/* The data port doesn't understand any compression */
154
crocus_resource_prepare_access(ice, res,
155
pview->u.tex.level, 1,
156
pview->u.tex.first_layer, num_layers,
157
ISL_AUX_USAGE_NONE, false);
158
}
159
160
crocus_cache_flush_for_read(batch, res->bo);
161
}
162
}
163
164
static void
165
crocus_update_align_res(struct crocus_batch *batch,
166
struct crocus_surface *surf,
167
bool copy_to_wa)
168
{
169
struct crocus_screen *screen = (struct crocus_screen *)batch->screen;
170
struct pipe_blit_info info = { 0 };
171
172
info.src.resource = copy_to_wa ? surf->base.texture : surf->align_res;
173
info.src.level = copy_to_wa ? surf->base.u.tex.level : 0;
174
u_box_2d_zslice(0, 0, copy_to_wa ? surf->base.u.tex.first_layer : 0,
175
u_minify(surf->base.texture->width0, surf->base.u.tex.level),
176
u_minify(surf->base.texture->height0, surf->base.u.tex.level), &info.src.box);
177
info.src.format = surf->base.texture->format;
178
info.dst.resource = copy_to_wa ? surf->align_res : surf->base.texture;
179
info.dst.level = copy_to_wa ? 0 : surf->base.u.tex.level;
180
info.dst.box = info.src.box;
181
info.dst.box.z = copy_to_wa ? 0 : surf->base.u.tex.first_layer;
182
info.dst.format = surf->base.texture->format;
183
info.mask = util_format_is_depth_or_stencil(surf->base.texture->format) ? PIPE_MASK_ZS : PIPE_MASK_RGBA;
184
info.filter = 0;
185
if (!screen->vtbl.blit_blt(batch, &info)) {
186
assert(0);
187
}
188
}
189
190
/**
191
* \brief Resolve buffers before drawing.
192
*
193
* Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
194
* enabled depth texture, and flush the render cache for any dirty textures.
195
*/
196
void
197
crocus_predraw_resolve_inputs(struct crocus_context *ice,
198
struct crocus_batch *batch,
199
bool *draw_aux_buffer_disabled,
200
gl_shader_stage stage,
201
bool consider_framebuffer)
202
{
203
struct crocus_shader_state *shs = &ice->state.shaders[stage];
204
const struct shader_info *info = crocus_get_shader_info(ice, stage);
205
206
uint64_t stage_dirty = (CROCUS_STAGE_DIRTY_BINDINGS_VS << stage) |
207
(consider_framebuffer ? CROCUS_STAGE_DIRTY_BINDINGS_FS : 0);
208
209
if (ice->state.stage_dirty & stage_dirty) {
210
resolve_sampler_views(ice, batch, shs, info, draw_aux_buffer_disabled,
211
consider_framebuffer);
212
resolve_image_views(ice, batch, shs, draw_aux_buffer_disabled,
213
consider_framebuffer);
214
}
215
}
216
217
void
218
crocus_predraw_resolve_framebuffer(struct crocus_context *ice,
219
struct crocus_batch *batch,
220
bool *draw_aux_buffer_disabled)
221
{
222
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
223
struct crocus_screen *screen = (void *) ice->ctx.screen;
224
struct intel_device_info *devinfo = &screen->devinfo;
225
struct crocus_uncompiled_shader *ish =
226
ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
227
const nir_shader *nir = ish->nir;
228
229
if (ice->state.dirty & CROCUS_DIRTY_DEPTH_BUFFER) {
230
struct pipe_surface *zs_surf = cso_fb->zsbuf;
231
232
if (zs_surf) {
233
struct crocus_resource *z_res, *s_res;
234
crocus_get_depth_stencil_resources(devinfo, zs_surf->texture, &z_res, &s_res);
235
unsigned num_layers =
236
zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1;
237
238
if (z_res) {
239
crocus_resource_prepare_render(ice, z_res,
240
zs_surf->u.tex.level,
241
zs_surf->u.tex.first_layer,
242
num_layers, ice->state.hiz_usage);
243
crocus_cache_flush_for_depth(batch, z_res->bo);
244
245
if (((struct crocus_surface *)zs_surf)->align_res) {
246
crocus_update_align_res(batch, (struct crocus_surface *)zs_surf, true);
247
}
248
}
249
250
if (s_res) {
251
crocus_cache_flush_for_depth(batch, s_res->bo);
252
}
253
}
254
}
255
256
if (nir->info.outputs_read != 0) {
257
for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
258
if (cso_fb->cbufs[i]) {
259
struct crocus_surface *surf = (void *) cso_fb->cbufs[i];
260
struct crocus_resource *res = (void *) cso_fb->cbufs[i]->texture;
261
262
crocus_resource_prepare_texture(ice, res, surf->view.format,
263
surf->view.base_level, 1,
264
surf->view.base_array_layer,
265
surf->view.array_len);
266
}
267
}
268
}
269
270
if (ice->state.stage_dirty & CROCUS_STAGE_DIRTY_BINDINGS_FS) {
271
for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
272
struct crocus_surface *surf = (void *) cso_fb->cbufs[i];
273
if (!surf)
274
continue;
275
276
struct crocus_resource *res = (void *) surf->base.texture;
277
278
if (surf->align_res)
279
crocus_update_align_res(batch, surf, true);
280
281
enum isl_aux_usage aux_usage =
282
crocus_resource_render_aux_usage(ice, res, surf->view.base_level,
283
surf->view.format,
284
draw_aux_buffer_disabled[i]);
285
286
if (ice->state.draw_aux_usage[i] != aux_usage) {
287
ice->state.draw_aux_usage[i] = aux_usage;
288
/* XXX: Need to track which bindings to make dirty */
289
ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_BINDINGS;
290
}
291
292
crocus_resource_prepare_render(ice, res, surf->view.base_level,
293
surf->view.base_array_layer,
294
surf->view.array_len,
295
aux_usage);
296
297
crocus_cache_flush_for_render(batch, res->bo, surf->view.format,
298
aux_usage);
299
}
300
}
301
}
302
303
/**
304
* \brief Call this after drawing to mark which buffers need resolving
305
*
306
* If the depth buffer was written to and if it has an accompanying HiZ
307
* buffer, then mark that it needs a depth resolve.
308
*
309
* If the color buffer is a multisample window system buffer, then
310
* mark that it needs a downsample.
311
*
312
* Also mark any render targets which will be textured as needing a render
313
* cache flush.
314
*/
315
void
316
crocus_postdraw_update_resolve_tracking(struct crocus_context *ice,
317
struct crocus_batch *batch)
318
{
319
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
320
struct crocus_screen *screen = (void *) ice->ctx.screen;
321
struct intel_device_info *devinfo = &screen->devinfo;
322
// XXX: front buffer drawing?
323
324
bool may_have_resolved_depth =
325
ice->state.dirty & (CROCUS_DIRTY_DEPTH_BUFFER |
326
CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL);
327
328
struct pipe_surface *zs_surf = cso_fb->zsbuf;
329
if (zs_surf) {
330
struct crocus_resource *z_res, *s_res;
331
crocus_get_depth_stencil_resources(devinfo, zs_surf->texture, &z_res, &s_res);
332
unsigned num_layers =
333
zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1;
334
335
if (z_res) {
336
if (may_have_resolved_depth && ice->state.depth_writes_enabled) {
337
crocus_resource_finish_render(ice, z_res, zs_surf->u.tex.level,
338
zs_surf->u.tex.first_layer, num_layers,
339
ice->state.hiz_usage);
340
}
341
342
if (ice->state.depth_writes_enabled)
343
crocus_depth_cache_add_bo(batch, z_res->bo);
344
345
if (((struct crocus_surface *)zs_surf)->align_res) {
346
crocus_update_align_res(batch, (struct crocus_surface *)zs_surf, false);
347
}
348
}
349
350
if (s_res) {
351
if (may_have_resolved_depth && ice->state.stencil_writes_enabled) {
352
crocus_resource_finish_write(ice, s_res, zs_surf->u.tex.level,
353
zs_surf->u.tex.first_layer, num_layers,
354
s_res->aux.usage);
355
}
356
357
if (ice->state.stencil_writes_enabled)
358
crocus_depth_cache_add_bo(batch, s_res->bo);
359
}
360
}
361
362
bool may_have_resolved_color =
363
ice->state.stage_dirty & CROCUS_STAGE_DIRTY_BINDINGS_FS;
364
365
for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
366
struct crocus_surface *surf = (void *) cso_fb->cbufs[i];
367
if (!surf)
368
continue;
369
370
if (surf->align_res)
371
crocus_update_align_res(batch, surf, false);
372
struct crocus_resource *res = (void *) surf->base.texture;
373
enum isl_aux_usage aux_usage = ice->state.draw_aux_usage[i];
374
375
crocus_render_cache_add_bo(batch, res->bo, surf->view.format,
376
aux_usage);
377
378
if (may_have_resolved_color) {
379
union pipe_surface_desc *desc = &surf->base.u;
380
unsigned num_layers =
381
desc->tex.last_layer - desc->tex.first_layer + 1;
382
crocus_resource_finish_render(ice, res, desc->tex.level,
383
desc->tex.first_layer, num_layers,
384
aux_usage);
385
}
386
}
387
}
388
389
/**
390
* Clear the cache-tracking sets.
391
*/
392
void
393
crocus_cache_sets_clear(struct crocus_batch *batch)
394
{
395
hash_table_foreach(batch->cache.render, render_entry)
396
_mesa_hash_table_remove(batch->cache.render, render_entry);
397
398
set_foreach(batch->cache.depth, depth_entry)
399
_mesa_set_remove(batch->cache.depth, depth_entry);
400
}
401
402
/**
403
* Emits an appropriate flush for a BO if it has been rendered to within the
404
* same batchbuffer as a read that's about to be emitted.
405
*
406
* The GPU has separate, incoherent caches for the render cache and the
407
* sampler cache, along with other caches. Usually data in the different
408
* caches don't interact (e.g. we don't render to our driver-generated
409
* immediate constant data), but for render-to-texture in FBOs we definitely
410
* do. When a batchbuffer is flushed, the kernel will ensure that everything
411
* necessary is flushed before another use of that BO, but for reuse from
412
* different caches within a batchbuffer, it's all our responsibility.
413
*/
414
void
415
crocus_flush_depth_and_render_caches(struct crocus_batch *batch)
416
{
417
const struct intel_device_info *devinfo = &batch->screen->devinfo;
418
if (devinfo->ver >= 6) {
419
crocus_emit_pipe_control_flush(batch,
420
"cache tracker: render-to-texture",
421
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
422
PIPE_CONTROL_RENDER_TARGET_FLUSH |
423
PIPE_CONTROL_CS_STALL);
424
425
crocus_emit_pipe_control_flush(batch,
426
"cache tracker: render-to-texture",
427
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
428
PIPE_CONTROL_CONST_CACHE_INVALIDATE);
429
} else {
430
crocus_emit_mi_flush(batch);
431
}
432
433
crocus_cache_sets_clear(batch);
434
}
435
436
void
437
crocus_cache_flush_for_read(struct crocus_batch *batch,
438
struct crocus_bo *bo)
439
{
440
if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo) ||
441
_mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo))
442
crocus_flush_depth_and_render_caches(batch);
443
}
444
445
static void *
446
format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage)
447
{
448
return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage);
449
}
450
451
void
452
crocus_cache_flush_for_render(struct crocus_batch *batch,
453
struct crocus_bo *bo,
454
enum isl_format format,
455
enum isl_aux_usage aux_usage)
456
{
457
if (_mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo))
458
crocus_flush_depth_and_render_caches(batch);
459
460
/* Check to see if this bo has been used by a previous rendering operation
461
* but with a different format or aux usage. If it has, flush the render
462
* cache so we ensure that it's only in there with one format or aux usage
463
* at a time.
464
*
465
* Even though it's not obvious, this can easily happen in practice.
466
* Suppose a client is blending on a surface with sRGB encode enabled on
467
* gen9. This implies that you get AUX_USAGE_CCS_D at best. If the client
468
* then disables sRGB decode and continues blending we will flip on
469
* AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
470
* perfectly valid since CCS_E is a subset of CCS_D). However, this means
471
* that we have fragments in-flight which are rendering with UNORM+CCS_E
472
* and other fragments in-flight with SRGB+CCS_D on the same surface at the
473
* same time and the pixel scoreboard and color blender are trying to sort
474
* it all out. This ends badly (i.e. GPU hangs).
475
*
476
* To date, we have never observed GPU hangs or even corruption to be
477
* associated with switching the format, only the aux usage. However,
478
* there are comments in various docs which indicate that the render cache
479
* isn't 100% resilient to format changes. We may as well be conservative
480
* and flush on format changes too. We can always relax this later if we
481
* find it to be a performance problem.
482
*/
483
struct hash_entry *entry =
484
_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo);
485
if (entry && entry->data != format_aux_tuple(format, aux_usage))
486
crocus_flush_depth_and_render_caches(batch);
487
}
488
489
void
490
crocus_render_cache_add_bo(struct crocus_batch *batch,
491
struct crocus_bo *bo,
492
enum isl_format format,
493
enum isl_aux_usage aux_usage)
494
{
495
#ifndef NDEBUG
496
struct hash_entry *entry =
497
_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo);
498
if (entry) {
499
/* Otherwise, someone didn't do a flush_for_render and that would be
500
* very bad indeed.
501
*/
502
assert(entry->data == format_aux_tuple(format, aux_usage));
503
}
504
#endif
505
506
_mesa_hash_table_insert_pre_hashed(batch->cache.render, bo->hash, bo,
507
format_aux_tuple(format, aux_usage));
508
}
509
510
void
511
crocus_cache_flush_for_depth(struct crocus_batch *batch,
512
struct crocus_bo *bo)
513
{
514
if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo))
515
crocus_flush_depth_and_render_caches(batch);
516
}
517
518
void
519
crocus_depth_cache_add_bo(struct crocus_batch *batch, struct crocus_bo *bo)
520
{
521
_mesa_set_add_pre_hashed(batch->cache.depth, bo->hash, bo);
522
}
523
524
static void
525
crocus_resolve_color(struct crocus_context *ice,
526
struct crocus_batch *batch,
527
struct crocus_resource *res,
528
unsigned level, unsigned layer,
529
enum isl_aux_op resolve_op)
530
{
531
struct crocus_screen *screen = batch->screen;
532
DBG("%s to res %p level %u layer %u\n", __func__, res, level, layer);
533
534
struct blorp_surf surf;
535
crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf,
536
&res->base.b, res->aux.usage, level, true);
537
538
crocus_batch_maybe_flush(batch, 1500);
539
540
/* Ivybridge PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
541
*
542
* "Any transition from any value in {Clear, Render, Resolve} to a
543
* different value in {Clear, Render, Resolve} requires end of pipe
544
* synchronization."
545
*
546
* In other words, fast clear ops are not properly synchronized with
547
* other drawing. We need to use a PIPE_CONTROL to ensure that the
548
* contents of the previous draw hit the render target before we resolve
549
* and again afterwards to ensure that the resolve is complete before we
550
* do any more regular drawing.
551
*/
552
crocus_emit_end_of_pipe_sync(batch, "color resolve: pre-flush",
553
PIPE_CONTROL_RENDER_TARGET_FLUSH);
554
555
struct blorp_batch blorp_batch;
556
blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
557
blorp_ccs_resolve(&blorp_batch, &surf, level, layer, 1,
558
isl_format_srgb_to_linear(res->surf.format),
559
resolve_op);
560
blorp_batch_finish(&blorp_batch);
561
562
/* See comment above */
563
crocus_emit_end_of_pipe_sync(batch, "color resolve: post-flush",
564
PIPE_CONTROL_RENDER_TARGET_FLUSH);
565
}
566
567
static void
568
crocus_mcs_partial_resolve(struct crocus_context *ice,
569
struct crocus_batch *batch,
570
struct crocus_resource *res,
571
uint32_t start_layer,
572
uint32_t num_layers)
573
{
574
struct crocus_screen *screen = batch->screen;
575
576
DBG("%s to res %p layers %u-%u\n", __func__, res,
577
start_layer, start_layer + num_layers - 1);
578
579
assert(isl_aux_usage_has_mcs(res->aux.usage));
580
581
struct blorp_surf surf;
582
crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf,
583
&res->base.b, res->aux.usage, 0, true);
584
585
struct blorp_batch blorp_batch;
586
blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
587
blorp_mcs_partial_resolve(&blorp_batch, &surf,
588
isl_format_srgb_to_linear(res->surf.format),
589
start_layer, num_layers);
590
blorp_batch_finish(&blorp_batch);
591
}
592
593
/**
594
* Perform a HiZ or depth resolve operation.
595
*
596
* For an overview of HiZ ops, see the following sections of the Sandy Bridge
597
* PRM, Volume 1, Part 2:
598
* - 7.5.3.1 Depth Buffer Clear
599
* - 7.5.3.2 Depth Buffer Resolve
600
* - 7.5.3.3 Hierarchical Depth Buffer Resolve
601
*/
602
void
603
crocus_hiz_exec(struct crocus_context *ice,
604
struct crocus_batch *batch,
605
struct crocus_resource *res,
606
unsigned int level, unsigned int start_layer,
607
unsigned int num_layers, enum isl_aux_op op,
608
bool update_clear_depth)
609
{
610
struct crocus_screen *screen = batch->screen;
611
const struct intel_device_info *devinfo = &batch->screen->devinfo;
612
assert(crocus_resource_level_has_hiz(res, level));
613
assert(op != ISL_AUX_OP_NONE);
614
UNUSED const char *name = NULL;
615
616
switch (op) {
617
case ISL_AUX_OP_FULL_RESOLVE:
618
name = "depth resolve";
619
break;
620
case ISL_AUX_OP_AMBIGUATE:
621
name = "hiz ambiguate";
622
break;
623
case ISL_AUX_OP_FAST_CLEAR:
624
name = "depth clear";
625
break;
626
case ISL_AUX_OP_PARTIAL_RESOLVE:
627
case ISL_AUX_OP_NONE:
628
unreachable("Invalid HiZ op");
629
}
630
631
DBG("%s %s to res %p level %d layers %d-%d\n",
632
__func__, name, res, level, start_layer, start_layer + num_layers - 1);
633
634
/* The following stalls and flushes are only documented to be required
635
* for HiZ clear operations. However, they also seem to be required for
636
* resolve operations.
637
*
638
* From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
639
*
640
* "If other rendering operations have preceded this clear, a
641
* PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
642
* enabled must be issued before the rectangle primitive used for
643
* the depth buffer clear operation."
644
*
645
* Same applies for Gen8 and Gen9.
646
*
647
* In addition, from the Ivybridge PRM, volume 2, 1.10.4.1
648
* PIPE_CONTROL, Depth Cache Flush Enable:
649
*
650
* "This bit must not be set when Depth Stall Enable bit is set in
651
* this packet."
652
*
653
* This is confirmed to hold for real, Haswell gets immediate gpu hangs.
654
*
655
* Therefore issue two pipe control flushes, one for cache flush and
656
* another for depth stall.
657
*/
658
if (devinfo->ver == 6) {
659
/* From the Sandy Bridge PRM, volume 2 part 1, page 313:
660
*
661
* "If other rendering operations have preceded this clear, a
662
* PIPE_CONTROL with write cache flush enabled and Z-inhibit
663
* disabled must be issued before the rectangle primitive used for
664
* the depth buffer clear operation.
665
*/
666
crocus_emit_pipe_control_flush(batch,
667
"hiz op: pre-flushes (1)",
668
PIPE_CONTROL_RENDER_TARGET_FLUSH |
669
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
670
PIPE_CONTROL_CS_STALL);
671
} else if (devinfo->ver >= 7) {
672
crocus_emit_pipe_control_flush(batch,
673
"hiz op: pre-flushes (1/2)",
674
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
675
PIPE_CONTROL_CS_STALL);
676
crocus_emit_pipe_control_flush(batch, "hiz op: pre-flushes (2/2)",
677
PIPE_CONTROL_DEPTH_STALL);
678
}
679
680
assert(isl_aux_usage_has_hiz(res->aux.usage) && res->aux.bo);
681
682
crocus_batch_maybe_flush(batch, 1500);
683
684
struct blorp_surf surf;
685
crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf,
686
&res->base.b, res->aux.usage, level, true);
687
688
struct blorp_batch blorp_batch;
689
enum blorp_batch_flags flags = 0;
690
flags |= update_clear_depth ? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR;
691
blorp_batch_init(&ice->blorp, &blorp_batch, batch, flags);
692
blorp_hiz_op(&blorp_batch, &surf, level, start_layer, num_layers, op);
693
blorp_batch_finish(&blorp_batch);
694
695
/* The following stalls and flushes are only documented to be required
696
* for HiZ clear operations. However, they also seem to be required for
697
* resolve operations.
698
*
699
* From the Broadwell PRM, volume 7, "Depth Buffer Clear":
700
*
701
* "Depth buffer clear pass using any of the methods (WM_STATE,
702
* 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
703
* PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
704
* "set" before starting to render. DepthStall and DepthFlush are
705
* not needed between consecutive depth clear passes nor is it
706
* required if the depth clear pass was done with
707
* 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP."
708
*
709
* TODO: Such as the spec says, this could be conditional.
710
*/
711
if (devinfo->ver == 6) {
712
/* From the Sandy Bridge PRM, volume 2 part 1, page 314:
713
*
714
* "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be
715
* followed by a PIPE_CONTROL command with DEPTH_STALL bit set
716
* and Then followed by Depth FLUSH'
717
*/
718
crocus_emit_pipe_control_flush(batch,
719
"hiz op: post-flushes (1/2)",
720
PIPE_CONTROL_DEPTH_STALL);
721
722
crocus_emit_pipe_control_flush(batch,
723
"hiz op: post-flushes (2/2)",
724
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
725
PIPE_CONTROL_CS_STALL);
726
}
727
}
728
729
/**
730
* Does the resource's slice have hiz enabled?
731
*/
732
bool
733
crocus_resource_level_has_hiz(const struct crocus_resource *res, uint32_t level)
734
{
735
crocus_resource_check_level_layer(res, level, 0);
736
return res->aux.has_hiz & 1 << level;
737
}
738
739
static bool
740
crocus_resource_level_has_aux(const struct crocus_resource *res, uint32_t level)
741
{
742
if (isl_aux_usage_has_hiz(res->aux.usage))
743
return crocus_resource_level_has_hiz(res, level);
744
else
745
return level < res->aux.surf.levels;
746
}
747
748
/** \brief Assert that the level and layer are valid for the resource. */
749
void
750
crocus_resource_check_level_layer(UNUSED const struct crocus_resource *res,
751
UNUSED uint32_t level, UNUSED uint32_t layer)
752
{
753
assert(level < res->surf.levels);
754
assert(layer < util_num_layers(&res->base.b, level));
755
}
756
757
static inline uint32_t
758
miptree_level_range_length(const struct crocus_resource *res,
759
uint32_t start_level, uint32_t num_levels)
760
{
761
assert(start_level < res->surf.levels);
762
763
if (num_levels == INTEL_REMAINING_LAYERS)
764
num_levels = res->surf.levels;
765
766
/* Check for overflow */
767
assert(start_level + num_levels >= start_level);
768
assert(start_level + num_levels <= res->surf.levels);
769
770
return num_levels;
771
}
772
773
static inline uint32_t
774
miptree_layer_range_length(const struct crocus_resource *res, uint32_t level,
775
uint32_t start_layer, uint32_t num_layers)
776
{
777
assert(level <= res->base.b.last_level);
778
779
const uint32_t total_num_layers = crocus_get_num_logical_layers(res, level);
780
assert(start_layer < total_num_layers);
781
if (num_layers == INTEL_REMAINING_LAYERS)
782
num_layers = total_num_layers - start_layer;
783
/* Check for overflow */
784
assert(start_layer + num_layers >= start_layer);
785
assert(start_layer + num_layers <= total_num_layers);
786
787
return num_layers;
788
}
789
790
bool
791
crocus_has_invalid_primary(const struct crocus_resource *res,
792
unsigned start_level, unsigned num_levels,
793
unsigned start_layer, unsigned num_layers)
794
{
795
if (!res->aux.bo)
796
return false;
797
798
/* Clamp the level range to fit the resource */
799
num_levels = miptree_level_range_length(res, start_level, num_levels);
800
801
for (uint32_t l = 0; l < num_levels; l++) {
802
const uint32_t level = start_level + l;
803
if (!crocus_resource_level_has_aux(res, level))
804
continue;
805
806
const uint32_t level_layers =
807
miptree_layer_range_length(res, level, start_layer, num_layers);
808
for (unsigned a = 0; a < level_layers; a++) {
809
enum isl_aux_state aux_state =
810
crocus_resource_get_aux_state(res, level, start_layer + a);
811
if (!isl_aux_state_has_valid_primary(aux_state))
812
return true;
813
}
814
}
815
816
return false;
817
}
818
819
void
820
crocus_resource_prepare_access(struct crocus_context *ice,
821
struct crocus_resource *res,
822
uint32_t start_level, uint32_t num_levels,
823
uint32_t start_layer, uint32_t num_layers,
824
enum isl_aux_usage aux_usage,
825
bool fast_clear_supported)
826
{
827
if (!res->aux.bo)
828
return;
829
830
/* We can't do resolves on the compute engine, so awkwardly, we have to
831
* do them on the render batch...
832
*/
833
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
834
835
const uint32_t clamped_levels =
836
miptree_level_range_length(res, start_level, num_levels);
837
for (uint32_t l = 0; l < clamped_levels; l++) {
838
const uint32_t level = start_level + l;
839
if (!crocus_resource_level_has_aux(res, level))
840
continue;
841
842
const uint32_t level_layers =
843
miptree_layer_range_length(res, level, start_layer, num_layers);
844
for (uint32_t a = 0; a < level_layers; a++) {
845
const uint32_t layer = start_layer + a;
846
const enum isl_aux_state aux_state =
847
crocus_resource_get_aux_state(res, level, layer);
848
const enum isl_aux_op aux_op =
849
isl_aux_prepare_access(aux_state, aux_usage, fast_clear_supported);
850
851
/* Prepare the aux buffer for a conditional or unconditional access.
852
* A conditional access is handled by assuming that the access will
853
* not evaluate to a no-op. If the access does in fact occur, the aux
854
* will be in the required state. If it does not, no data is lost
855
* because the aux_op performed is lossless.
856
*/
857
if (aux_op == ISL_AUX_OP_NONE) {
858
/* Nothing to do here. */
859
} else if (isl_aux_usage_has_mcs(res->aux.usage)) {
860
assert(aux_op == ISL_AUX_OP_PARTIAL_RESOLVE);
861
crocus_mcs_partial_resolve(ice, batch, res, layer, 1);
862
} else if (isl_aux_usage_has_hiz(res->aux.usage)) {
863
crocus_hiz_exec(ice, batch, res, level, layer, 1, aux_op, false);
864
} else if (res->aux.usage == ISL_AUX_USAGE_STC_CCS) {
865
unreachable("crocus doesn't resolve STC_CCS resources");
866
} else {
867
assert(isl_aux_usage_has_ccs(res->aux.usage));
868
crocus_resolve_color(ice, batch, res, level, layer, aux_op);
869
}
870
871
const enum isl_aux_state new_state =
872
isl_aux_state_transition_aux_op(aux_state, res->aux.usage, aux_op);
873
crocus_resource_set_aux_state(ice, res, level, layer, 1, new_state);
874
}
875
}
876
}
877
878
void
879
crocus_resource_finish_write(struct crocus_context *ice,
880
struct crocus_resource *res, uint32_t level,
881
uint32_t start_layer, uint32_t num_layers,
882
enum isl_aux_usage aux_usage)
883
{
884
if (res->base.b.format == PIPE_FORMAT_S8_UINT)
885
res->shadow_needs_update = true;
886
887
if (!crocus_resource_level_has_aux(res, level))
888
return;
889
890
const uint32_t level_layers =
891
miptree_layer_range_length(res, level, start_layer, num_layers);
892
893
for (uint32_t a = 0; a < level_layers; a++) {
894
const uint32_t layer = start_layer + a;
895
const enum isl_aux_state aux_state =
896
crocus_resource_get_aux_state(res, level, layer);
897
898
/* Transition the aux state for a conditional or unconditional write. A
899
* conditional write is handled by assuming that the write applies to
900
* only part of the render target. This prevents the new state from
901
* losing the types of compression that might exist in the current state
902
* (e.g. CLEAR). If the write evaluates to a no-op, the state will still
903
* be able to communicate when resolves are necessary (but it may
904
* falsely communicate this as well).
905
*/
906
const enum isl_aux_state new_aux_state =
907
isl_aux_state_transition_write(aux_state, aux_usage, false);
908
909
crocus_resource_set_aux_state(ice, res, level, layer, 1, new_aux_state);
910
}
911
}
912
913
enum isl_aux_state
914
crocus_resource_get_aux_state(const struct crocus_resource *res,
915
uint32_t level, uint32_t layer)
916
{
917
crocus_resource_check_level_layer(res, level, layer);
918
assert(crocus_resource_level_has_aux(res, level));
919
920
return res->aux.state[level][layer];
921
}
922
923
void
924
crocus_resource_set_aux_state(struct crocus_context *ice,
925
struct crocus_resource *res, uint32_t level,
926
uint32_t start_layer, uint32_t num_layers,
927
enum isl_aux_state aux_state)
928
{
929
assert(crocus_resource_level_has_aux(res, level));
930
931
num_layers = miptree_layer_range_length(res, level, start_layer, num_layers);
932
for (unsigned a = 0; a < num_layers; a++) {
933
if (res->aux.state[level][start_layer + a] != aux_state) {
934
res->aux.state[level][start_layer + a] = aux_state;
935
ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES |
936
CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES;
937
/* XXX: Need to track which bindings to make dirty */
938
ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_BINDINGS;
939
}
940
}
941
}
942
943
static bool
944
isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b)
945
{
946
/* On gen8 and earlier, the hardware was only capable of handling 0/1 clear
947
* values so sRGB curve application was a no-op for all fast-clearable
948
* formats.
949
*
950
* On gen9+, the hardware supports arbitrary clear values. For sRGB clear
951
* values, the hardware interprets the floats, not as what would be
952
* returned from the sampler (or written by the shader), but as being
953
* between format conversion and sRGB curve application. This means that
954
* we can switch between sRGB and UNORM without having to whack the clear
955
* color.
956
*/
957
return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b);
958
}
959
960
void
961
crocus_resource_prepare_texture(struct crocus_context *ice,
962
struct crocus_resource *res,
963
enum isl_format view_format,
964
uint32_t start_level, uint32_t num_levels,
965
uint32_t start_layer, uint32_t num_layers)
966
{
967
enum isl_aux_usage aux_usage =
968
crocus_resource_texture_aux_usage(res);
969
970
bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
971
972
/* Clear color is specified as ints or floats and the conversion is done by
973
* the sampler. If we have a texture view, we would have to perform the
974
* clear color conversion manually. Just disable clear color.
975
*/
976
if (!isl_formats_are_fast_clear_compatible(res->surf.format, view_format))
977
clear_supported = false;
978
979
crocus_resource_prepare_access(ice, res, start_level, num_levels,
980
start_layer, num_layers,
981
aux_usage, clear_supported);
982
}
983
984
enum isl_aux_usage
985
crocus_resource_render_aux_usage(struct crocus_context *ice,
986
struct crocus_resource *res,
987
uint32_t level,
988
enum isl_format render_format,
989
bool draw_aux_disabled)
990
{
991
struct crocus_screen *screen = (void *) ice->ctx.screen;
992
struct intel_device_info *devinfo = &screen->devinfo;
993
994
if (draw_aux_disabled)
995
return ISL_AUX_USAGE_NONE;
996
997
switch (res->aux.usage) {
998
case ISL_AUX_USAGE_MCS:
999
return res->aux.usage;
1000
1001
case ISL_AUX_USAGE_CCS_D:
1002
/* Otherwise, we try to fall back to CCS_D */
1003
if (isl_format_supports_ccs_d(devinfo, render_format))
1004
return ISL_AUX_USAGE_CCS_D;
1005
1006
return ISL_AUX_USAGE_NONE;
1007
1008
case ISL_AUX_USAGE_HIZ:
1009
assert(render_format == res->surf.format);
1010
return crocus_resource_level_has_hiz(res, level) ?
1011
res->aux.usage : ISL_AUX_USAGE_NONE;
1012
1013
default:
1014
return ISL_AUX_USAGE_NONE;
1015
}
1016
}
1017
1018
void
1019
crocus_resource_prepare_render(struct crocus_context *ice,
1020
struct crocus_resource *res, uint32_t level,
1021
uint32_t start_layer, uint32_t layer_count,
1022
enum isl_aux_usage aux_usage)
1023
{
1024
crocus_resource_prepare_access(ice, res, level, 1, start_layer,
1025
layer_count, aux_usage,
1026
aux_usage != ISL_AUX_USAGE_NONE);
1027
}
1028
1029
void
1030
crocus_resource_finish_render(struct crocus_context *ice,
1031
struct crocus_resource *res, uint32_t level,
1032
uint32_t start_layer, uint32_t layer_count,
1033
enum isl_aux_usage aux_usage)
1034
{
1035
crocus_resource_finish_write(ice, res, level, start_layer, layer_count,
1036
aux_usage);
1037
}
1038
1039
static void
1040
crocus_update_stencil_shadow(struct crocus_context *ice,
1041
struct crocus_resource *res)
1042
{
1043
struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1044
UNUSED const struct intel_device_info *devinfo = &screen->devinfo;
1045
assert(devinfo->ver == 7);
1046
1047
if (!res->shadow_needs_update)
1048
return;
1049
1050
struct pipe_box box;
1051
for (unsigned level = 0; level <= res->base.b.last_level; level++) {
1052
u_box_2d(0, 0,
1053
u_minify(res->base.b.width0, level),
1054
u_minify(res->base.b.height0, level), &box);
1055
const unsigned depth = res->base.b.target == PIPE_TEXTURE_3D ?
1056
u_minify(res->base.b.depth0, level) : res->base.b.array_size;
1057
1058
for (unsigned layer = 0; layer < depth; layer++) {
1059
box.z = layer;
1060
ice->ctx.resource_copy_region(&ice->ctx,
1061
&res->shadow->base.b, level, 0, 0, layer,
1062
&res->base.b, level, &box);
1063
}
1064
}
1065
res->shadow_needs_update = false;
1066
}
1067
1068