Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/iris/iris_blit.c
4565 views
1
/*
2
* Copyright © 2017 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included
12
* in all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
* DEALINGS IN THE SOFTWARE.
21
*/
22
23
#include <stdio.h>
24
#include "pipe/p_defines.h"
25
#include "pipe/p_state.h"
26
#include "pipe/p_context.h"
27
#include "pipe/p_screen.h"
28
#include "util/format/u_format.h"
29
#include "util/u_inlines.h"
30
#include "util/ralloc.h"
31
#include "intel/blorp/blorp.h"
32
#include "iris_context.h"
33
#include "iris_resource.h"
34
#include "iris_screen.h"
35
36
/**
37
* Helper function for handling mirror image blits.
38
*
39
* If coord0 > coord1, swap them and return "true" (mirrored).
40
*/
41
static bool
42
apply_mirror(float *coord0, float *coord1)
43
{
44
if (*coord0 > *coord1) {
45
float tmp = *coord0;
46
*coord0 = *coord1;
47
*coord1 = tmp;
48
return true;
49
}
50
return false;
51
}
52
53
/**
54
* Compute the number of pixels to clip for each side of a rect
55
*
56
* \param x0 The rect's left coordinate
57
* \param y0 The rect's bottom coordinate
58
* \param x1 The rect's right coordinate
59
* \param y1 The rect's top coordinate
60
* \param min_x The clipping region's left coordinate
61
* \param min_y The clipping region's bottom coordinate
62
* \param max_x The clipping region's right coordinate
63
* \param max_y The clipping region's top coordinate
64
* \param clipped_x0 The number of pixels to clip from the left side
65
* \param clipped_y0 The number of pixels to clip from the bottom side
66
* \param clipped_x1 The number of pixels to clip from the right side
67
* \param clipped_y1 The number of pixels to clip from the top side
68
*
69
* \return false if we clip everything away, true otherwise
70
*/
71
static inline bool
72
compute_pixels_clipped(float x0, float y0, float x1, float y1,
73
float min_x, float min_y, float max_x, float max_y,
74
float *clipped_x0, float *clipped_y0,
75
float *clipped_x1, float *clipped_y1)
76
{
77
/* If we are going to clip everything away, stop. */
78
if (!(min_x <= max_x &&
79
min_y <= max_y &&
80
x0 <= max_x &&
81
y0 <= max_y &&
82
min_x <= x1 &&
83
min_y <= y1 &&
84
x0 <= x1 &&
85
y0 <= y1)) {
86
return false;
87
}
88
89
if (x0 < min_x)
90
*clipped_x0 = min_x - x0;
91
else
92
*clipped_x0 = 0;
93
if (max_x < x1)
94
*clipped_x1 = x1 - max_x;
95
else
96
*clipped_x1 = 0;
97
98
if (y0 < min_y)
99
*clipped_y0 = min_y - y0;
100
else
101
*clipped_y0 = 0;
102
if (max_y < y1)
103
*clipped_y1 = y1 - max_y;
104
else
105
*clipped_y1 = 0;
106
107
return true;
108
}
109
110
/**
111
* Clips a coordinate (left, right, top or bottom) for the src or dst rect
112
* (whichever requires the largest clip) and adjusts the coordinate
113
* for the other rect accordingly.
114
*
115
* \param mirror true if mirroring is required
116
* \param src the source rect coordinate (for example src_x0)
117
* \param dst0 the dst rect coordinate (for example dst_x0)
118
* \param dst1 the opposite dst rect coordinate (for example dst_x1)
119
* \param clipped_dst0 number of pixels to clip from the dst coordinate
120
* \param clipped_dst1 number of pixels to clip from the opposite dst coordinate
121
* \param scale the src vs dst scale involved for that coordinate
122
* \param is_left_or_bottom true if we are clipping the left or bottom sides
123
* of the rect.
124
*/
125
static void
126
clip_coordinates(bool mirror,
127
float *src, float *dst0, float *dst1,
128
float clipped_dst0,
129
float clipped_dst1,
130
float scale,
131
bool is_left_or_bottom)
132
{
133
/* When clipping we need to add or subtract pixels from the original
134
* coordinates depending on whether we are acting on the left/bottom
135
* or right/top sides of the rect respectively. We assume we have to
136
* add them in the code below, and multiply by -1 when we should
137
* subtract.
138
*/
139
int mult = is_left_or_bottom ? 1 : -1;
140
141
if (!mirror) {
142
*dst0 += clipped_dst0 * mult;
143
*src += clipped_dst0 * scale * mult;
144
} else {
145
*dst1 -= clipped_dst1 * mult;
146
*src += clipped_dst1 * scale * mult;
147
}
148
}
149
150
/**
151
* Apply a scissor rectangle to blit coordinates.
152
*
153
* Returns true if the blit was entirely scissored away.
154
*/
155
static bool
156
apply_blit_scissor(const struct pipe_scissor_state *scissor,
157
float *src_x0, float *src_y0,
158
float *src_x1, float *src_y1,
159
float *dst_x0, float *dst_y0,
160
float *dst_x1, float *dst_y1,
161
bool mirror_x, bool mirror_y)
162
{
163
float clip_dst_x0, clip_dst_x1, clip_dst_y0, clip_dst_y1;
164
165
/* Compute number of pixels to scissor away. */
166
if (!compute_pixels_clipped(*dst_x0, *dst_y0, *dst_x1, *dst_y1,
167
scissor->minx, scissor->miny,
168
scissor->maxx, scissor->maxy,
169
&clip_dst_x0, &clip_dst_y0,
170
&clip_dst_x1, &clip_dst_y1))
171
return true;
172
173
// XXX: comments assume source clipping, which we don't do
174
175
/* When clipping any of the two rects we need to adjust the coordinates
176
* in the other rect considering the scaling factor involved. To obtain
177
* the best precision we want to make sure that we only clip once per
178
* side to avoid accumulating errors due to the scaling adjustment.
179
*
180
* For example, if src_x0 and dst_x0 need both to be clipped we want to
181
* avoid the situation where we clip src_x0 first, then adjust dst_x0
182
* accordingly but then we realize that the resulting dst_x0 still needs
183
* to be clipped, so we clip dst_x0 and adjust src_x0 again. Because we are
184
* applying scaling factors to adjust the coordinates in each clipping
185
* pass we lose some precision and that can affect the results of the
186
* blorp blit operation slightly. What we want to do here is detect the
187
* rect that we should clip first for each side so that when we adjust
188
* the other rect we ensure the resulting coordinate does not need to be
189
* clipped again.
190
*
191
* The code below implements this by comparing the number of pixels that
192
* we need to clip for each side of both rects considering the scales
193
* involved. For example, clip_src_x0 represents the number of pixels
194
* to be clipped for the src rect's left side, so if clip_src_x0 = 5,
195
* clip_dst_x0 = 4 and scale_x = 2 it means that we are clipping more
196
* from the dst rect so we should clip dst_x0 only and adjust src_x0.
197
* This is because clipping 4 pixels in the dst is equivalent to
198
* clipping 4 * 2 = 8 > 5 in the src.
199
*/
200
201
if (*src_x0 == *src_x1 || *src_y0 == *src_y1
202
|| *dst_x0 == *dst_x1 || *dst_y0 == *dst_y1)
203
return true;
204
205
float scale_x = (float) (*src_x1 - *src_x0) / (*dst_x1 - *dst_x0);
206
float scale_y = (float) (*src_y1 - *src_y0) / (*dst_y1 - *dst_y0);
207
208
/* Clip left side */
209
clip_coordinates(mirror_x, src_x0, dst_x0, dst_x1,
210
clip_dst_x0, clip_dst_x1, scale_x, true);
211
212
/* Clip right side */
213
clip_coordinates(mirror_x, src_x1, dst_x1, dst_x0,
214
clip_dst_x1, clip_dst_x0, scale_x, false);
215
216
/* Clip bottom side */
217
clip_coordinates(mirror_y, src_y0, dst_y0, dst_y1,
218
clip_dst_y0, clip_dst_y1, scale_y, true);
219
220
/* Clip top side */
221
clip_coordinates(mirror_y, src_y1, dst_y1, dst_y0,
222
clip_dst_y1, clip_dst_y0, scale_y, false);
223
224
/* Check for invalid bounds
225
* Can't blit for 0-dimensions
226
*/
227
return *src_x0 == *src_x1 || *src_y0 == *src_y1
228
|| *dst_x0 == *dst_x1 || *dst_y0 == *dst_y1;
229
}
230
231
void
232
iris_blorp_surf_for_resource(struct isl_device *isl_dev,
233
struct blorp_surf *surf,
234
struct pipe_resource *p_res,
235
enum isl_aux_usage aux_usage,
236
unsigned level,
237
bool is_render_target)
238
{
239
struct iris_resource *res = (void *) p_res;
240
241
assert(!iris_resource_unfinished_aux_import(res));
242
243
*surf = (struct blorp_surf) {
244
.surf = &res->surf,
245
.addr = (struct blorp_address) {
246
.buffer = res->bo,
247
.offset = res->offset,
248
.reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
249
.mocs = iris_mocs(res->bo, isl_dev,
250
is_render_target ? ISL_SURF_USAGE_RENDER_TARGET_BIT
251
: ISL_SURF_USAGE_TEXTURE_BIT),
252
},
253
.aux_usage = aux_usage,
254
};
255
256
if (aux_usage != ISL_AUX_USAGE_NONE) {
257
surf->aux_surf = &res->aux.surf;
258
surf->aux_addr = (struct blorp_address) {
259
.buffer = res->aux.bo,
260
.offset = res->aux.offset,
261
.reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
262
.mocs = iris_mocs(res->bo, isl_dev, 0),
263
};
264
surf->clear_color =
265
iris_resource_get_clear_color(res, NULL, NULL);
266
surf->clear_color_addr = (struct blorp_address) {
267
.buffer = res->aux.clear_color_bo,
268
.offset = res->aux.clear_color_offset,
269
.reloc_flags = 0,
270
.mocs = iris_mocs(res->aux.clear_color_bo, isl_dev, 0),
271
};
272
}
273
}
274
275
static bool
276
is_astc(enum isl_format format)
277
{
278
return format != ISL_FORMAT_UNSUPPORTED &&
279
isl_format_get_layout(format)->txc == ISL_TXC_ASTC;
280
}
281
282
static void
283
tex_cache_flush_hack(struct iris_batch *batch,
284
enum isl_format view_format,
285
enum isl_format surf_format)
286
{
287
const struct intel_device_info *devinfo = &batch->screen->devinfo;
288
289
/* The WaSamplerCacheFlushBetweenRedescribedSurfaceReads workaround says:
290
*
291
* "Currently Sampler assumes that a surface would not have two
292
* different format associate with it. It will not properly cache
293
* the different views in the MT cache, causing a data corruption."
294
*
295
* We may need to handle this for texture views in general someday, but
296
* for now we handle it here, as it hurts copies and blits particularly
297
* badly because they ofter reinterpret formats.
298
*
299
* If the BO hasn't been referenced yet this batch, we assume that the
300
* texture cache doesn't contain any relevant data nor need flushing.
301
*
302
* Icelake (Gfx11+) claims to fix this issue, but seems to still have
303
* issues with ASTC formats.
304
*/
305
bool need_flush = devinfo->ver >= 11 ?
306
is_astc(surf_format) != is_astc(view_format) :
307
view_format != surf_format;
308
if (!need_flush)
309
return;
310
311
const char *reason =
312
"workaround: WaSamplerCacheFlushBetweenRedescribedSurfaceReads";
313
314
iris_emit_pipe_control_flush(batch, reason, PIPE_CONTROL_CS_STALL);
315
iris_emit_pipe_control_flush(batch, reason,
316
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
317
}
318
319
static struct iris_resource *
320
iris_resource_for_aspect(struct pipe_resource *p_res, unsigned pipe_mask)
321
{
322
if (pipe_mask == PIPE_MASK_S) {
323
struct iris_resource *junk, *s_res;
324
iris_get_depth_stencil_resources(p_res, &junk, &s_res);
325
return s_res;
326
} else {
327
return (struct iris_resource *)p_res;
328
}
329
}
330
331
static enum pipe_format
332
pipe_format_for_aspect(enum pipe_format format, unsigned pipe_mask)
333
{
334
if (pipe_mask == PIPE_MASK_S) {
335
return util_format_stencil_only(format);
336
} else if (pipe_mask == PIPE_MASK_Z) {
337
return util_format_get_depth_only(format);
338
} else {
339
return format;
340
}
341
}
342
343
/**
344
* The pipe->blit() driver hook.
345
*
346
* This performs a blit between two surfaces, which copies data but may
347
* also perform format conversion, scaling, flipping, and so on.
348
*/
349
static void
350
iris_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
351
{
352
struct iris_context *ice = (void *) ctx;
353
struct iris_screen *screen = (struct iris_screen *)ctx->screen;
354
const struct intel_device_info *devinfo = &screen->devinfo;
355
struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
356
enum blorp_batch_flags blorp_flags = 0;
357
358
/* We don't support color masking. */
359
assert((info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA ||
360
(info->mask & PIPE_MASK_RGBA) == 0);
361
362
if (info->render_condition_enable) {
363
if (ice->state.predicate == IRIS_PREDICATE_STATE_DONT_RENDER)
364
return;
365
366
if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT)
367
blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE;
368
}
369
370
float src_x0 = info->src.box.x;
371
float src_x1 = info->src.box.x + info->src.box.width;
372
float src_y0 = info->src.box.y;
373
float src_y1 = info->src.box.y + info->src.box.height;
374
float dst_x0 = info->dst.box.x;
375
float dst_x1 = info->dst.box.x + info->dst.box.width;
376
float dst_y0 = info->dst.box.y;
377
float dst_y1 = info->dst.box.y + info->dst.box.height;
378
bool mirror_x = apply_mirror(&src_x0, &src_x1);
379
bool mirror_y = apply_mirror(&src_y0, &src_y1);
380
enum blorp_filter filter;
381
382
if (info->scissor_enable) {
383
bool noop = apply_blit_scissor(&info->scissor,
384
&src_x0, &src_y0, &src_x1, &src_y1,
385
&dst_x0, &dst_y0, &dst_x1, &dst_y1,
386
mirror_x, mirror_y);
387
if (noop)
388
return;
389
}
390
391
if (abs(info->dst.box.width) == abs(info->src.box.width) &&
392
abs(info->dst.box.height) == abs(info->src.box.height)) {
393
if (info->src.resource->nr_samples > 1 &&
394
info->dst.resource->nr_samples <= 1) {
395
/* The OpenGL ES 3.2 specification, section 16.2.1, says:
396
*
397
* "If the read framebuffer is multisampled (its effective
398
* value of SAMPLE_BUFFERS is one) and the draw framebuffer
399
* is not (its value of SAMPLE_BUFFERS is zero), the samples
400
* corresponding to each pixel location in the source are
401
* converted to a single sample before being written to the
402
* destination. The filter parameter is ignored. If the
403
* source formats are integer types or stencil values, a
404
* single sample’s value is selected for each pixel. If the
405
* source formats are floating-point or normalized types,
406
* the sample values for each pixel are resolved in an
407
* implementation-dependent manner. If the source formats
408
* are depth values, sample values are resolved in an
409
* implementation-dependent manner where the result will be
410
* between the minimum and maximum depth values in the pixel."
411
*
412
* When selecting a single sample, we always choose sample 0.
413
*/
414
if (util_format_is_depth_or_stencil(info->src.format) ||
415
util_format_is_pure_integer(info->src.format)) {
416
filter = BLORP_FILTER_SAMPLE_0;
417
} else {
418
filter = BLORP_FILTER_AVERAGE;
419
}
420
} else {
421
/* The OpenGL 4.6 specification, section 18.3.1, says:
422
*
423
* "If the source and destination dimensions are identical,
424
* no filtering is applied."
425
*
426
* Using BLORP_FILTER_NONE will also handle the upsample case by
427
* replicating the one value in the source to all values in the
428
* destination.
429
*/
430
filter = BLORP_FILTER_NONE;
431
}
432
} else if (info->filter == PIPE_TEX_FILTER_LINEAR) {
433
filter = BLORP_FILTER_BILINEAR;
434
} else {
435
filter = BLORP_FILTER_NEAREST;
436
}
437
438
struct blorp_batch blorp_batch;
439
blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
440
441
float src_z_step = (float)info->src.box.depth / (float)info->dst.box.depth;
442
443
/* There is no interpolation to the pixel center during rendering, so
444
* add the 0.5 offset ourselves here.
445
*/
446
float depth_center_offset = 0;
447
if (info->src.resource->target == PIPE_TEXTURE_3D)
448
depth_center_offset = 0.5 / info->dst.box.depth * info->src.box.depth;
449
450
/* Perform a blit for each aspect requested by the caller. PIPE_MASK_R is
451
* used to represent the color aspect. */
452
unsigned aspect_mask = info->mask & (PIPE_MASK_R | PIPE_MASK_ZS);
453
while (aspect_mask) {
454
unsigned aspect = 1 << u_bit_scan(&aspect_mask);
455
456
struct iris_resource *src_res =
457
iris_resource_for_aspect(info->src.resource, aspect);
458
struct iris_resource *dst_res =
459
iris_resource_for_aspect(info->dst.resource, aspect);
460
461
enum pipe_format src_pfmt =
462
pipe_format_for_aspect(info->src.format, aspect);
463
enum pipe_format dst_pfmt =
464
pipe_format_for_aspect(info->dst.format, aspect);
465
466
if (iris_resource_unfinished_aux_import(src_res))
467
iris_resource_finish_aux_import(ctx->screen, src_res);
468
if (iris_resource_unfinished_aux_import(dst_res))
469
iris_resource_finish_aux_import(ctx->screen, dst_res);
470
471
struct iris_format_info src_fmt =
472
iris_format_for_usage(devinfo, src_pfmt, ISL_SURF_USAGE_TEXTURE_BIT);
473
enum isl_aux_usage src_aux_usage =
474
iris_resource_texture_aux_usage(ice, src_res, src_fmt.fmt);
475
476
iris_resource_prepare_texture(ice, src_res, src_fmt.fmt,
477
info->src.level, 1, info->src.box.z,
478
info->src.box.depth);
479
iris_emit_buffer_barrier_for(batch, src_res->bo,
480
IRIS_DOMAIN_OTHER_READ);
481
482
struct iris_format_info dst_fmt =
483
iris_format_for_usage(devinfo, dst_pfmt,
484
ISL_SURF_USAGE_RENDER_TARGET_BIT);
485
enum isl_aux_usage dst_aux_usage =
486
iris_resource_render_aux_usage(ice, dst_res, info->dst.level,
487
dst_fmt.fmt, false);
488
489
struct blorp_surf src_surf, dst_surf;
490
iris_blorp_surf_for_resource(&screen->isl_dev, &src_surf,
491
&src_res->base.b, src_aux_usage,
492
info->src.level, false);
493
iris_blorp_surf_for_resource(&screen->isl_dev, &dst_surf,
494
&dst_res->base.b, dst_aux_usage,
495
info->dst.level, true);
496
497
iris_resource_prepare_render(ice, dst_res, info->dst.level,
498
info->dst.box.z, info->dst.box.depth,
499
dst_aux_usage);
500
iris_emit_buffer_barrier_for(batch, dst_res->bo,
501
IRIS_DOMAIN_RENDER_WRITE);
502
503
if (iris_batch_references(batch, src_res->bo))
504
tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format);
505
506
if (dst_res->base.b.target == PIPE_BUFFER) {
507
util_range_add(&dst_res->base.b, &dst_res->valid_buffer_range,
508
dst_x0, dst_x1);
509
}
510
511
for (int slice = 0; slice < info->dst.box.depth; slice++) {
512
unsigned dst_z = info->dst.box.z + slice;
513
float src_z = info->src.box.z + slice * src_z_step +
514
depth_center_offset;
515
516
iris_batch_maybe_flush(batch, 1500);
517
iris_batch_sync_region_start(batch);
518
519
blorp_blit(&blorp_batch,
520
&src_surf, info->src.level, src_z,
521
src_fmt.fmt, src_fmt.swizzle,
522
&dst_surf, info->dst.level, dst_z,
523
dst_fmt.fmt, dst_fmt.swizzle,
524
src_x0, src_y0, src_x1, src_y1,
525
dst_x0, dst_y0, dst_x1, dst_y1,
526
filter, mirror_x, mirror_y);
527
528
iris_batch_sync_region_end(batch);
529
}
530
531
tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format);
532
533
iris_resource_finish_render(ice, dst_res, info->dst.level,
534
info->dst.box.z, info->dst.box.depth,
535
dst_aux_usage);
536
}
537
538
blorp_batch_finish(&blorp_batch);
539
540
iris_flush_and_dirty_for_history(ice, batch, (struct iris_resource *)
541
info->dst.resource,
542
PIPE_CONTROL_RENDER_TARGET_FLUSH,
543
"cache history: post-blit");
544
}
545
546
static void
547
get_copy_region_aux_settings(struct iris_context *ice,
548
struct iris_resource *res,
549
unsigned level,
550
enum isl_aux_usage *out_aux_usage,
551
bool *out_clear_supported,
552
bool is_render_target)
553
{
554
struct iris_screen *screen = (void *) ice->ctx.screen;
555
struct intel_device_info *devinfo = &screen->devinfo;
556
557
switch (res->aux.usage) {
558
case ISL_AUX_USAGE_HIZ:
559
case ISL_AUX_USAGE_HIZ_CCS:
560
case ISL_AUX_USAGE_HIZ_CCS_WT:
561
case ISL_AUX_USAGE_STC_CCS:
562
if (is_render_target) {
563
*out_aux_usage = iris_resource_render_aux_usage(ice, res, level,
564
res->surf.format,
565
false);
566
} else {
567
*out_aux_usage = iris_resource_texture_aux_usage(ice, res,
568
res->surf.format);
569
}
570
*out_clear_supported = isl_aux_usage_has_fast_clears(*out_aux_usage);
571
break;
572
case ISL_AUX_USAGE_MCS:
573
case ISL_AUX_USAGE_MCS_CCS:
574
if (!is_render_target &&
575
!iris_can_sample_mcs_with_clear(devinfo, res)) {
576
*out_aux_usage = res->aux.usage;
577
*out_clear_supported = false;
578
break;
579
}
580
FALLTHROUGH;
581
case ISL_AUX_USAGE_CCS_E:
582
case ISL_AUX_USAGE_GFX12_CCS_E:
583
*out_aux_usage = res->aux.usage;
584
585
/* blorp_copy may reinterpret the surface format and has limited support
586
* for adjusting the clear color, so clear support may only be enabled
587
* in some cases:
588
*
589
* - On gfx11+, the clear color is indirect and comes in two forms: a
590
* 32bpc representation used for rendering and a pixel representation
591
* used for sampling. blorp_copy doesn't change indirect clear colors,
592
* so clears are only supported in the sampling case.
593
*
594
* - A clear color of zeroes holds the same meaning regardless of the
595
* format. Although it could avoid more resolves, we don't use
596
* isl_color_value_is_zero because the surface format used by
597
* blorp_copy isn't guaranteed to access the same components as the
598
* original format (e.g. A8_UNORM/R8_UINT).
599
*/
600
*out_clear_supported = (devinfo->ver >= 11 && !is_render_target) ||
601
(res->aux.clear_color.u32[0] == 0 &&
602
res->aux.clear_color.u32[1] == 0 &&
603
res->aux.clear_color.u32[2] == 0 &&
604
res->aux.clear_color.u32[3] == 0);
605
break;
606
default:
607
*out_aux_usage = ISL_AUX_USAGE_NONE;
608
*out_clear_supported = false;
609
break;
610
}
611
}
612
613
/**
614
* Perform a GPU-based raw memory copy between compatible view classes.
615
*
616
* Does not perform any flushing - the new data may still be left in the
617
* render cache, and old data may remain in other caches.
618
*
619
* Wraps blorp_copy() and blorp_buffer_copy().
620
*/
621
void
622
iris_copy_region(struct blorp_context *blorp,
623
struct iris_batch *batch,
624
struct pipe_resource *dst,
625
unsigned dst_level,
626
unsigned dstx, unsigned dsty, unsigned dstz,
627
struct pipe_resource *src,
628
unsigned src_level,
629
const struct pipe_box *src_box)
630
{
631
struct blorp_batch blorp_batch;
632
struct iris_context *ice = blorp->driver_ctx;
633
struct iris_screen *screen = (void *) ice->ctx.screen;
634
struct iris_resource *src_res = (void *) src;
635
struct iris_resource *dst_res = (void *) dst;
636
637
enum isl_aux_usage src_aux_usage, dst_aux_usage;
638
bool src_clear_supported, dst_clear_supported;
639
get_copy_region_aux_settings(ice, src_res, src_level, &src_aux_usage,
640
&src_clear_supported, false);
641
get_copy_region_aux_settings(ice, dst_res, dst_level, &dst_aux_usage,
642
&dst_clear_supported, true);
643
644
if (iris_batch_references(batch, src_res->bo))
645
tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format);
646
647
if (dst->target == PIPE_BUFFER)
648
util_range_add(&dst_res->base.b, &dst_res->valid_buffer_range, dstx, dstx + src_box->width);
649
650
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
651
struct blorp_address src_addr = {
652
.buffer = iris_resource_bo(src), .offset = src_box->x,
653
};
654
struct blorp_address dst_addr = {
655
.buffer = iris_resource_bo(dst), .offset = dstx,
656
.reloc_flags = EXEC_OBJECT_WRITE,
657
};
658
659
iris_emit_buffer_barrier_for(batch, iris_resource_bo(src),
660
IRIS_DOMAIN_OTHER_READ);
661
iris_emit_buffer_barrier_for(batch, iris_resource_bo(dst),
662
IRIS_DOMAIN_RENDER_WRITE);
663
664
iris_batch_maybe_flush(batch, 1500);
665
666
iris_batch_sync_region_start(batch);
667
blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
668
blorp_buffer_copy(&blorp_batch, src_addr, dst_addr, src_box->width);
669
blorp_batch_finish(&blorp_batch);
670
iris_batch_sync_region_end(batch);
671
} else {
672
// XXX: what about one surface being a buffer and not the other?
673
674
struct blorp_surf src_surf, dst_surf;
675
iris_blorp_surf_for_resource(&screen->isl_dev, &src_surf,
676
src, src_aux_usage, src_level, false);
677
iris_blorp_surf_for_resource(&screen->isl_dev, &dst_surf,
678
dst, dst_aux_usage, dst_level, true);
679
680
iris_resource_prepare_access(ice, src_res, src_level, 1,
681
src_box->z, src_box->depth,
682
src_aux_usage, src_clear_supported);
683
iris_resource_prepare_access(ice, dst_res, dst_level, 1,
684
dstz, src_box->depth,
685
dst_aux_usage, dst_clear_supported);
686
687
iris_emit_buffer_barrier_for(batch, iris_resource_bo(src),
688
IRIS_DOMAIN_OTHER_READ);
689
iris_emit_buffer_barrier_for(batch, iris_resource_bo(dst),
690
IRIS_DOMAIN_RENDER_WRITE);
691
692
blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
693
694
for (int slice = 0; slice < src_box->depth; slice++) {
695
iris_batch_maybe_flush(batch, 1500);
696
697
iris_batch_sync_region_start(batch);
698
blorp_copy(&blorp_batch, &src_surf, src_level, src_box->z + slice,
699
&dst_surf, dst_level, dstz + slice,
700
src_box->x, src_box->y, dstx, dsty,
701
src_box->width, src_box->height);
702
iris_batch_sync_region_end(batch);
703
}
704
blorp_batch_finish(&blorp_batch);
705
706
iris_resource_finish_write(ice, dst_res, dst_level, dstz,
707
src_box->depth, dst_aux_usage);
708
}
709
710
tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format);
711
}
712
713
static struct iris_batch *
714
get_preferred_batch(struct iris_context *ice, struct iris_bo *bo)
715
{
716
/* If the compute batch is already using this buffer, we'd prefer to
717
* continue queueing in the compute batch.
718
*/
719
if (iris_batch_references(&ice->batches[IRIS_BATCH_COMPUTE], bo))
720
return &ice->batches[IRIS_BATCH_COMPUTE];
721
722
/* Otherwise default to the render batch. */
723
return &ice->batches[IRIS_BATCH_RENDER];
724
}
725
726
727
/**
728
* The pipe->resource_copy_region() driver hook.
729
*
730
* This implements ARB_copy_image semantics - a raw memory copy between
731
* compatible view classes.
732
*/
733
static void
734
iris_resource_copy_region(struct pipe_context *ctx,
735
struct pipe_resource *p_dst,
736
unsigned dst_level,
737
unsigned dstx, unsigned dsty, unsigned dstz,
738
struct pipe_resource *p_src,
739
unsigned src_level,
740
const struct pipe_box *src_box)
741
{
742
struct iris_context *ice = (void *) ctx;
743
struct iris_screen *screen = (void *) ctx->screen;
744
struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
745
struct iris_resource *src = (void *) p_src;
746
struct iris_resource *dst = (void *) p_dst;
747
748
if (iris_resource_unfinished_aux_import(src))
749
iris_resource_finish_aux_import(ctx->screen, src);
750
if (iris_resource_unfinished_aux_import(dst))
751
iris_resource_finish_aux_import(ctx->screen, dst);
752
753
/* Use MI_COPY_MEM_MEM for tiny (<= 16 byte, % 4) buffer copies. */
754
if (p_src->target == PIPE_BUFFER && p_dst->target == PIPE_BUFFER &&
755
dstx % 4 == 0 && src_box->x % 4 == 0 &&
756
src_box->width % 4 == 0 && src_box->width <= 16) {
757
struct iris_bo *dst_bo = iris_resource_bo(p_dst);
758
batch = get_preferred_batch(ice, dst_bo);
759
iris_batch_maybe_flush(batch, 24 + 5 * (src_box->width / 4));
760
iris_emit_pipe_control_flush(batch,
761
"stall for MI_COPY_MEM_MEM copy_region",
762
PIPE_CONTROL_CS_STALL);
763
screen->vtbl.copy_mem_mem(batch, dst_bo, dstx, iris_resource_bo(p_src),
764
src_box->x, src_box->width);
765
return;
766
}
767
768
iris_copy_region(&ice->blorp, batch, p_dst, dst_level, dstx, dsty, dstz,
769
p_src, src_level, src_box);
770
771
if (util_format_is_depth_and_stencil(p_dst->format) &&
772
util_format_has_stencil(util_format_description(p_src->format))) {
773
struct iris_resource *junk, *s_src_res, *s_dst_res;
774
iris_get_depth_stencil_resources(p_src, &junk, &s_src_res);
775
iris_get_depth_stencil_resources(p_dst, &junk, &s_dst_res);
776
777
iris_copy_region(&ice->blorp, batch, &s_dst_res->base.b, dst_level, dstx,
778
dsty, dstz, &s_src_res->base.b, src_level, src_box);
779
}
780
781
iris_flush_and_dirty_for_history(ice, batch, dst,
782
PIPE_CONTROL_RENDER_TARGET_FLUSH,
783
"cache history: post copy_region");
784
}
785
786
void
787
iris_init_blit_functions(struct pipe_context *ctx)
788
{
789
ctx->blit = iris_blit;
790
ctx->resource_copy_region = iris_resource_copy_region;
791
}
792
793