Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/crocus/crocus_blt.c
4570 views
1
/*
2
* Copyright © 2018 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included
12
* in all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
* DEALINGS IN THE SOFTWARE.
21
*/
22
23
/* blt command encoding for gen4/5 */
24
#include "crocus_context.h"
25
26
#include "crocus_genx_macros.h"
27
#include "crocus_genx_protos.h"
28
#include "crocus_resource.h"
29
30
#define FILE_DEBUG_FLAG DEBUG_BLIT
31
32
#if GFX_VER <= 5
33
34
static uint32_t
35
color_depth_for_cpp(int cpp)
36
{
37
switch (cpp) {
38
case 4: return COLOR_DEPTH__32bit;
39
case 2: return COLOR_DEPTH__565;
40
case 1: return COLOR_DEPTH__8bit;
41
default:
42
unreachable("not reached");
43
}
44
}
45
46
static void
47
blt_set_alpha_to_one(struct crocus_batch *batch,
48
struct crocus_resource *dst,
49
int x, int y, int width, int height)
50
{
51
const struct isl_format_layout *fmtl = isl_format_get_layout(dst->surf.format);
52
unsigned cpp = fmtl->bpb / 8;
53
uint32_t pitch = dst->surf.row_pitch_B;
54
55
if (dst->surf.tiling != ISL_TILING_LINEAR)
56
pitch /= 4;
57
/* We need to split the blit into chunks that each fit within the blitter's
58
* restrictions. We can't use a chunk size of 32768 because we need to
59
* ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
60
* a nice round power of two, big enough that performance won't suffer, and
61
* small enough to guarantee everything fits.
62
*/
63
const uint32_t max_chunk_size = 16384;
64
65
for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
66
for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
67
const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
68
const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
69
uint32_t tile_x, tile_y, offset;
70
ASSERTED uint32_t z_offset_el, array_offset;
71
isl_tiling_get_intratile_offset_el(dst->surf.tiling,
72
cpp * 8, dst->surf.row_pitch_B,
73
dst->surf.array_pitch_el_rows,
74
chunk_x, chunk_y, 0, 0,
75
&offset,
76
&tile_x, &tile_y,
77
&z_offset_el, &array_offset);
78
assert(z_offset_el == 0);
79
assert(array_offset == 0);
80
crocus_emit_cmd(batch, GENX(XY_COLOR_BLT), xyblt) {
81
xyblt.TilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;
82
xyblt.ColorDepth = color_depth_for_cpp(cpp);
83
xyblt.RasterOperation = 0xF0;
84
xyblt.DestinationPitch = pitch;
85
xyblt._32bppByteMask = 2;
86
xyblt.DestinationBaseAddress = rw_bo(dst->bo, offset);
87
xyblt.DestinationX1Coordinate = tile_x;
88
xyblt.DestinationY1Coordinate = tile_y;
89
xyblt.DestinationX2Coordinate = tile_x + chunk_w;
90
xyblt.DestinationY2Coordinate = tile_y + chunk_h;
91
xyblt.SolidPatternColor = 0xffffffff;
92
}
93
}
94
}
95
}
96
97
static bool validate_blit_for_blt(struct crocus_batch *batch,
98
const struct pipe_blit_info *info)
99
{
100
/* If the source and destination are the same size with no mirroring,
101
* the rectangles are within the size of the texture and there is no
102
* scissor, then we can probably use the blit engine.
103
*/
104
if (info->dst.box.width != info->src.box.width ||
105
info->dst.box.height != info->src.box.height)
106
return false;
107
108
if (info->scissor_enable)
109
return false;
110
111
if (info->dst.box.height < 0 || info->src.box.height < 0)
112
return false;
113
114
if (info->dst.box.depth > 1 || info->src.box.depth > 1)
115
return false;
116
117
const struct util_format_description *desc =
118
util_format_description(info->src.format);
119
int i = util_format_get_first_non_void_channel(info->src.format);
120
if (i == -1)
121
return false;
122
123
/* can't do the alpha to 1 setting for these. */
124
if ((util_format_has_alpha1(info->src.format) &&
125
util_format_has_alpha(info->dst.format) &&
126
desc->channel[i].size > 8))
127
return false;
128
return true;
129
}
130
131
static inline int crocus_resource_blt_pitch(struct crocus_resource *res)
132
{
133
int pitch = res->surf.row_pitch_B;
134
if (res->surf.tiling != ISL_TILING_LINEAR)
135
pitch /= 4;
136
return pitch;
137
}
138
139
140
static bool emit_copy_blt(struct crocus_batch *batch,
141
struct crocus_resource *src,
142
struct crocus_resource *dst,
143
unsigned cpp,
144
int32_t src_pitch,
145
unsigned src_offset,
146
int32_t dst_pitch,
147
unsigned dst_offset,
148
uint16_t src_x, uint16_t src_y,
149
uint16_t dst_x, uint16_t dst_y,
150
uint16_t w, uint16_t h)
151
152
{
153
uint32_t src_tile_w, src_tile_h;
154
uint32_t dst_tile_w, dst_tile_h;
155
int dst_y2 = dst_y + h;
156
int dst_x2 = dst_x + w;
157
158
DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
159
__func__,
160
src, src_pitch, src_offset, src_x, src_y,
161
dst, dst_pitch, dst_offset, dst_x, dst_y, w, h);
162
163
isl_get_tile_dims(src->surf.tiling, cpp, &src_tile_w, &src_tile_h);
164
isl_get_tile_dims(dst->surf.tiling, cpp, &dst_tile_w, &dst_tile_h);
165
166
/* For Tiled surfaces, the pitch has to be a multiple of the Tile width
167
* (X direction width of the Tile). This is ensured while allocating the
168
* buffer object.
169
*/
170
assert(src->surf.tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);
171
assert(dst->surf.tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);
172
173
/* For big formats (such as floating point), do the copy using 16 or
174
* 32bpp and multiply the coordinates.
175
*/
176
if (cpp > 4) {
177
if (cpp % 4 == 2) {
178
dst_x *= cpp / 2;
179
dst_x2 *= cpp / 2;
180
src_x *= cpp / 2;
181
cpp = 2;
182
} else {
183
assert(cpp % 4 == 0);
184
dst_x *= cpp / 4;
185
dst_x2 *= cpp / 4;
186
src_x *= cpp / 4;
187
cpp = 4;
188
}
189
}
190
191
/* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop
192
* the low bits. Offsets must be naturally aligned.
193
*/
194
if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
195
dst_pitch % 4 != 0 || dst_offset % cpp != 0)
196
return false;
197
198
/* For tiled source and destination, pitch value should be specified
199
* as a number of Dwords.
200
*/
201
if (dst->surf.tiling != ISL_TILING_LINEAR)
202
dst_pitch /= 4;
203
204
if (src->surf.tiling != ISL_TILING_LINEAR)
205
src_pitch /= 4;
206
207
assert(cpp <= 4);
208
crocus_emit_cmd(batch, GENX(XY_SRC_COPY_BLT), xyblt) {
209
xyblt.RasterOperation = 0xCC;
210
xyblt.DestinationTilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;
211
xyblt.SourceTilingEnable = src->surf.tiling != ISL_TILING_LINEAR;
212
xyblt.SourceBaseAddress = ro_bo(src->bo, src_offset);
213
xyblt.DestinationBaseAddress = rw_bo(dst->bo, dst_offset);
214
xyblt.ColorDepth = color_depth_for_cpp(cpp);
215
xyblt._32bppByteMask = cpp == 4 ? 0x3 : 0x1;
216
xyblt.DestinationX1Coordinate = dst_x;
217
xyblt.DestinationY1Coordinate = dst_y;
218
xyblt.DestinationX2Coordinate = dst_x2;
219
xyblt.DestinationY2Coordinate = dst_y2;
220
xyblt.DestinationPitch = dst_pitch;
221
xyblt.SourceX1Coordinate = src_x;
222
xyblt.SourceY1Coordinate = src_y;
223
xyblt.SourcePitch = src_pitch;
224
};
225
226
crocus_emit_mi_flush(batch);
227
return true;
228
}
229
230
static bool crocus_emit_blt(struct crocus_batch *batch,
231
struct crocus_resource *src,
232
struct crocus_resource *dst,
233
unsigned dst_level,
234
unsigned dst_x, unsigned dst_y,
235
unsigned dst_z,
236
unsigned src_level,
237
const struct pipe_box *src_box)
238
{
239
const struct isl_format_layout *src_fmtl = isl_format_get_layout(src->surf.format);
240
unsigned src_cpp = src_fmtl->bpb / 8;
241
const struct isl_format_layout *dst_fmtl = isl_format_get_layout(dst->surf.format);
242
const unsigned dst_cpp = dst_fmtl->bpb / 8;
243
uint16_t src_x, src_y;
244
uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
245
uint32_t src_width = src_box->width, src_height = src_box->height;
246
247
/* gen4/5 can't handle Y tiled blits. */
248
if (src->surf.tiling == ISL_TILING_Y0 || dst->surf.tiling == ISL_TILING_Y0)
249
return false;
250
251
if (src->surf.format != dst->surf.format)
252
return false;
253
254
if (src_cpp != dst_cpp)
255
return false;
256
257
src_x = src_box->x;
258
src_y = src_box->y;
259
260
assert(src_cpp == dst_cpp);
261
262
crocus_resource_get_image_offset(src, src_level, src_box->z, &src_image_x,
263
&src_image_y);
264
if (util_format_is_compressed(src->base.b.format)) {
265
int bw = util_format_get_blockwidth(src->base.b.format);
266
int bh = util_format_get_blockheight(src->base.b.format);
267
assert(src_x % bw == 0);
268
assert(src_y % bh == 0);
269
src_x /= (int)bw;
270
src_y /= (int)bh;
271
src_width = DIV_ROUND_UP(src_width, (int)bw);
272
src_height = DIV_ROUND_UP(src_height, (int)bh);
273
}
274
275
crocus_resource_get_image_offset(dst, dst_level, dst_z, &dst_image_x,
276
&dst_image_y);
277
if (util_format_is_compressed(dst->base.b.format)) {
278
int bw = util_format_get_blockwidth(dst->base.b.format);
279
int bh = util_format_get_blockheight(dst->base.b.format);
280
assert(dst_x % bw == 0);
281
assert(dst_y % bh == 0);
282
dst_x /= (int)bw;
283
dst_y /= (int)bh;
284
}
285
src_x += src_image_x;
286
src_y += src_image_y;
287
dst_x += dst_image_x;
288
dst_y += dst_image_y;
289
290
/* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
291
* Data Size Limitations):
292
*
293
* The BLT engine is capable of transferring very large quantities of
294
* graphics data. Any graphics data read from and written to the
295
* destination is permitted to represent a number of pixels that
296
* occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
297
* at the destination. The maximum number of pixels that may be
298
* represented per scan line’s worth of graphics data depends on the
299
* color depth.
300
*
301
* The blitter's pitch is a signed 16-bit integer, but measured in bytes
302
* for linear surfaces and DWords for tiled surfaces. So the maximum
303
* pitch is 32k linear and 128k tiled.
304
*/
305
if (crocus_resource_blt_pitch(src) >= 32768 ||
306
crocus_resource_blt_pitch(dst) >= 32768) {
307
return false;
308
}
309
310
/* We need to split the blit into chunks that each fit within the blitter's
311
* restrictions. We can't use a chunk size of 32768 because we need to
312
* ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
313
* a nice round power of two, big enough that performance won't suffer, and
314
* small enough to guarantee everything fits.
315
*/
316
const uint32_t max_chunk_size = 16384;
317
318
for (uint32_t chunk_x = 0; chunk_x < src_width; chunk_x += max_chunk_size) {
319
for (uint32_t chunk_y = 0; chunk_y < src_height; chunk_y += max_chunk_size) {
320
const uint32_t chunk_w = MIN2(max_chunk_size, src_width - chunk_x);
321
const uint32_t chunk_h = MIN2(max_chunk_size, src_height - chunk_y);
322
323
ASSERTED uint32_t z_offset_el, array_offset;
324
uint32_t src_offset, src_tile_x, src_tile_y;
325
isl_tiling_get_intratile_offset_el(src->surf.tiling,
326
src_cpp * 8, src->surf.row_pitch_B,
327
src->surf.array_pitch_el_rows,
328
src_x + chunk_x, src_y + chunk_y, 0, 0,
329
&src_offset,
330
&src_tile_x, &src_tile_y,
331
&z_offset_el, &array_offset);
332
assert(z_offset_el == 0);
333
assert(array_offset == 0);
334
335
uint32_t dst_offset, dst_tile_x, dst_tile_y;
336
isl_tiling_get_intratile_offset_el(dst->surf.tiling,
337
dst_cpp * 8, dst->surf.row_pitch_B,
338
dst->surf.array_pitch_el_rows,
339
dst_x + chunk_x, dst_y + chunk_y, 0, 0,
340
&dst_offset,
341
&dst_tile_x, &dst_tile_y,
342
&z_offset_el, &array_offset);
343
assert(z_offset_el == 0);
344
assert(array_offset == 0);
345
if (!emit_copy_blt(batch, src, dst,
346
src_cpp, src->surf.row_pitch_B,
347
src_offset,
348
dst->surf.row_pitch_B, dst_offset,
349
src_tile_x, src_tile_y,
350
dst_tile_x, dst_tile_y,
351
chunk_w, chunk_h)) {
352
return false;
353
}
354
}
355
}
356
357
if (util_format_has_alpha1(src->base.b.format) &&
358
util_format_has_alpha(dst->base.b.format))
359
blt_set_alpha_to_one(batch, dst, 0, 0, src_width, src_height);
360
return true;
361
}
362
363
static bool crocus_blit_blt(struct crocus_batch *batch,
364
const struct pipe_blit_info *info)
365
{
366
if (!validate_blit_for_blt(batch, info))
367
return false;
368
369
return crocus_emit_blt(batch,
370
(struct crocus_resource *)info->src.resource,
371
(struct crocus_resource *)info->dst.resource,
372
info->dst.level,
373
info->dst.box.x,
374
info->dst.box.y,
375
info->dst.box.z,
376
info->src.level,
377
&info->src.box);
378
}
379
380
381
static bool crocus_copy_region_blt(struct crocus_batch *batch,
382
struct crocus_resource *dst,
383
unsigned dst_level,
384
unsigned dstx, unsigned dsty, unsigned dstz,
385
struct crocus_resource *src,
386
unsigned src_level,
387
const struct pipe_box *src_box)
388
{
389
if (dst->base.b.target == PIPE_BUFFER || src->base.b.target == PIPE_BUFFER)
390
return false;
391
return crocus_emit_blt(batch,
392
src,
393
dst,
394
dst_level,
395
dstx, dsty, dstz,
396
src_level,
397
src_box);
398
}
399
#endif
400
401
void
402
genX(crocus_init_blt)(struct crocus_screen *screen)
403
{
404
#if GFX_VER <= 5
405
screen->vtbl.blit_blt = crocus_blit_blt;
406
screen->vtbl.copy_region_blt = crocus_copy_region_blt;
407
#else
408
screen->vtbl.blit_blt = NULL;
409
screen->vtbl.copy_region_blt = NULL;
410
#endif
411
}
412
413