Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_pass.c
4560 views
1
/*
2
* Copyright © 2019 Raspberry Pi
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "v3dv_private.h"
25
26
static uint32_t
27
num_subpass_attachments(const VkSubpassDescription *desc)
28
{
29
return desc->inputAttachmentCount +
30
desc->colorAttachmentCount +
31
(desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
32
(desc->pDepthStencilAttachment != NULL);
33
}
34
35
static void
36
set_use_tlb_resolve(struct v3dv_device *device,
37
struct v3dv_render_pass_attachment *att)
38
{
39
const struct v3dv_format *format = v3dv_X(device, get_format)(att->desc.format);
40
att->use_tlb_resolve = v3dv_X(device, format_supports_tlb_resolve)(format);
41
}
42
43
static void
44
pass_find_subpass_range_for_attachments(struct v3dv_device *device,
45
struct v3dv_render_pass *pass)
46
{
47
for (uint32_t i = 0; i < pass->attachment_count; i++) {
48
pass->attachments[i].first_subpass = pass->subpass_count - 1;
49
pass->attachments[i].last_subpass = 0;
50
}
51
52
for (uint32_t i = 0; i < pass->subpass_count; i++) {
53
const struct v3dv_subpass *subpass = &pass->subpasses[i];
54
55
for (uint32_t j = 0; j < subpass->color_count; j++) {
56
uint32_t attachment_idx = subpass->color_attachments[j].attachment;
57
if (attachment_idx == VK_ATTACHMENT_UNUSED)
58
continue;
59
60
if (i < pass->attachments[attachment_idx].first_subpass)
61
pass->attachments[attachment_idx].first_subpass = i;
62
if (i > pass->attachments[attachment_idx].last_subpass)
63
pass->attachments[attachment_idx].last_subpass = i;
64
65
if (subpass->resolve_attachments &&
66
subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) {
67
set_use_tlb_resolve(device, &pass->attachments[attachment_idx]);
68
}
69
}
70
71
uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
72
if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
73
if (i < pass->attachments[ds_attachment_idx].first_subpass)
74
pass->attachments[ds_attachment_idx].first_subpass = i;
75
if (i > pass->attachments[ds_attachment_idx].last_subpass)
76
pass->attachments[ds_attachment_idx].last_subpass = i;
77
}
78
79
for (uint32_t j = 0; j < subpass->input_count; j++) {
80
uint32_t input_attachment_idx = subpass->input_attachments[j].attachment;
81
if (input_attachment_idx == VK_ATTACHMENT_UNUSED)
82
continue;
83
if (i < pass->attachments[input_attachment_idx].first_subpass)
84
pass->attachments[input_attachment_idx].first_subpass = i;
85
if (i > pass->attachments[input_attachment_idx].last_subpass)
86
pass->attachments[input_attachment_idx].last_subpass = i;
87
}
88
89
if (subpass->resolve_attachments) {
90
for (uint32_t j = 0; j < subpass->color_count; j++) {
91
uint32_t attachment_idx = subpass->resolve_attachments[j].attachment;
92
if (attachment_idx == VK_ATTACHMENT_UNUSED)
93
continue;
94
if (i < pass->attachments[attachment_idx].first_subpass)
95
pass->attachments[attachment_idx].first_subpass = i;
96
if (i > pass->attachments[attachment_idx].last_subpass)
97
pass->attachments[attachment_idx].last_subpass = i;
98
}
99
}
100
}
101
}
102
103
104
VKAPI_ATTR VkResult VKAPI_CALL
105
v3dv_CreateRenderPass(VkDevice _device,
106
const VkRenderPassCreateInfo *pCreateInfo,
107
const VkAllocationCallbacks *pAllocator,
108
VkRenderPass *pRenderPass)
109
{
110
V3DV_FROM_HANDLE(v3dv_device, device, _device);
111
struct v3dv_render_pass *pass;
112
113
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
114
115
size_t size = sizeof(*pass);
116
size_t subpasses_offset = size;
117
size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
118
size_t attachments_offset = size;
119
size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
120
121
pass = vk_object_zalloc(&device->vk, pAllocator, size,
122
VK_OBJECT_TYPE_RENDER_PASS);
123
if (pass == NULL)
124
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
125
126
pass->attachment_count = pCreateInfo->attachmentCount;
127
pass->attachments = (void *) pass + attachments_offset;
128
pass->subpass_count = pCreateInfo->subpassCount;
129
pass->subpasses = (void *) pass + subpasses_offset;
130
131
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++)
132
pass->attachments[i].desc = pCreateInfo->pAttachments[i];
133
134
uint32_t subpass_attachment_count = 0;
135
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
136
const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
137
subpass_attachment_count += num_subpass_attachments(desc);
138
}
139
140
if (subpass_attachment_count) {
141
const size_t subpass_attachment_bytes =
142
subpass_attachment_count * sizeof(struct v3dv_subpass_attachment);
143
pass->subpass_attachments =
144
vk_alloc2(&device->vk.alloc, pAllocator, subpass_attachment_bytes, 8,
145
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
146
if (pass->subpass_attachments == NULL) {
147
vk_object_free(&device->vk, pAllocator, pass);
148
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
149
}
150
} else {
151
pass->subpass_attachments = NULL;
152
}
153
154
struct v3dv_subpass_attachment *p = pass->subpass_attachments;
155
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
156
const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
157
struct v3dv_subpass *subpass = &pass->subpasses[i];
158
159
subpass->input_count = desc->inputAttachmentCount;
160
subpass->color_count = desc->colorAttachmentCount;
161
162
if (desc->inputAttachmentCount > 0) {
163
subpass->input_attachments = p;
164
p += desc->inputAttachmentCount;
165
166
for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
167
subpass->input_attachments[j] = (struct v3dv_subpass_attachment) {
168
.attachment = desc->pInputAttachments[j].attachment,
169
.layout = desc->pInputAttachments[j].layout,
170
};
171
}
172
}
173
174
if (desc->colorAttachmentCount > 0) {
175
subpass->color_attachments = p;
176
p += desc->colorAttachmentCount;
177
178
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
179
subpass->color_attachments[j] = (struct v3dv_subpass_attachment) {
180
.attachment = desc->pColorAttachments[j].attachment,
181
.layout = desc->pColorAttachments[j].layout,
182
};
183
}
184
}
185
186
if (desc->pResolveAttachments) {
187
subpass->resolve_attachments = p;
188
p += desc->colorAttachmentCount;
189
190
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
191
subpass->resolve_attachments[j] = (struct v3dv_subpass_attachment) {
192
.attachment = desc->pResolveAttachments[j].attachment,
193
.layout = desc->pResolveAttachments[j].layout,
194
};
195
}
196
}
197
198
if (desc->pDepthStencilAttachment) {
199
subpass->ds_attachment = (struct v3dv_subpass_attachment) {
200
.attachment = desc->pDepthStencilAttachment->attachment,
201
.layout = desc->pDepthStencilAttachment->layout,
202
};
203
204
/* GFXH-1461: if depth is cleared but stencil is loaded (or viceversa),
205
* the clear might get lost. If a subpass has this then we can't emit
206
* the clear using the TLB and we have to do it as a draw call.
207
*
208
* FIXME: separate stencil.
209
*/
210
if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
211
struct v3dv_render_pass_attachment *att =
212
&pass->attachments[subpass->ds_attachment.attachment];
213
if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) {
214
if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
215
att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
216
subpass->do_depth_clear_with_draw = true;
217
} else if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD &&
218
att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
219
subpass->do_stencil_clear_with_draw = true;
220
}
221
}
222
}
223
} else {
224
subpass->ds_attachment.attachment = VK_ATTACHMENT_UNUSED;
225
}
226
}
227
228
pass_find_subpass_range_for_attachments(device, pass);
229
230
/* FIXME: handle subpass dependencies */
231
232
*pRenderPass = v3dv_render_pass_to_handle(pass);
233
234
return VK_SUCCESS;
235
}
236
237
VKAPI_ATTR void VKAPI_CALL
238
v3dv_DestroyRenderPass(VkDevice _device,
239
VkRenderPass _pass,
240
const VkAllocationCallbacks *pAllocator)
241
{
242
V3DV_FROM_HANDLE(v3dv_device, device, _device);
243
V3DV_FROM_HANDLE(v3dv_render_pass, pass, _pass);
244
245
if (!_pass)
246
return;
247
248
vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
249
vk_object_free(&device->vk, pAllocator, pass);
250
}
251
252
static void
253
subpass_get_granularity(struct v3dv_device *device,
254
struct v3dv_render_pass *pass,
255
uint32_t subpass_idx,
256
VkExtent2D *granularity)
257
{
258
static const uint8_t tile_sizes[] = {
259
64, 64,
260
64, 32,
261
32, 32,
262
32, 16,
263
16, 16,
264
16, 8,
265
8, 8
266
};
267
268
/* Our tile size depends on the number of color attachments and the maximum
269
* bpp across them.
270
*/
271
assert(subpass_idx < pass->subpass_count);
272
struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx];
273
const uint32_t color_attachment_count = subpass->color_count;
274
275
uint32_t max_internal_bpp = 0;
276
for (uint32_t i = 0; i < color_attachment_count; i++) {
277
uint32_t attachment_idx = subpass->color_attachments[i].attachment;
278
if (attachment_idx == VK_ATTACHMENT_UNUSED)
279
continue;
280
const VkAttachmentDescription *desc =
281
&pass->attachments[attachment_idx].desc;
282
const struct v3dv_format *format = v3dv_X(device, get_format)(desc->format);
283
uint32_t internal_type, internal_bpp;
284
v3dv_X(device, get_internal_type_bpp_for_output_format)
285
(format->rt_type, &internal_type, &internal_bpp);
286
287
max_internal_bpp = MAX2(max_internal_bpp, internal_bpp);
288
}
289
290
uint32_t idx = 0;
291
if (color_attachment_count > 2)
292
idx += 2;
293
else if (color_attachment_count > 1)
294
idx += 1;
295
296
idx += max_internal_bpp;
297
298
assert(idx < ARRAY_SIZE(tile_sizes));
299
*granularity = (VkExtent2D) {
300
.width = tile_sizes[idx * 2],
301
.height = tile_sizes[idx * 2 + 1]
302
};
303
}
304
305
VKAPI_ATTR void VKAPI_CALL
306
v3dv_GetRenderAreaGranularity(VkDevice _device,
307
VkRenderPass renderPass,
308
VkExtent2D *pGranularity)
309
{
310
V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass);
311
V3DV_FROM_HANDLE(v3dv_device, device, _device);
312
313
*pGranularity = (VkExtent2D) {
314
.width = 64,
315
.height = 64,
316
};
317
318
for (uint32_t i = 0; i < pass->subpass_count; i++) {
319
VkExtent2D sg;
320
subpass_get_granularity(device, pass, i, &sg);
321
pGranularity->width = MIN2(pGranularity->width, sg.width);
322
pGranularity->height = MIN2(pGranularity->height, sg.height);
323
}
324
}
325
326
/* Checks whether the render area rectangle covers a region that is aligned to
327
* tile boundaries. This means that we are writing to all pixels covered by
328
* all tiles in that area (except for pixels on edge tiles that are outside
329
* the framebuffer dimensions).
330
*
331
* When our framebuffer is aligned to tile boundaries we know we are writing
332
* valid data to all all pixels in each tile and we can apply certain
333
* optimizations, like avoiding tile loads, since we know that none of the
334
* original pixel values in each tile for that area need to be preserved.
335
* We also use this to decide if we can use TLB clears, as these clear whole
336
* tiles so we can't use them if the render area is not aligned.
337
*
338
* Note that when an image is created it will possibly include padding blocks
339
* depending on its tiling layout. When the framebuffer dimensions are not
340
* aligned to tile boundaries then edge tiles are only partially covered by the
341
* framebuffer pixels, but tile stores still seem to store full tiles
342
* writing to the padded sections. This is important when the framebuffer
343
* is aliasing a smaller section of a larger image, as in that case the edge
344
* tiles of the framebuffer would overwrite valid pixels in the larger image.
345
* In that case, we can't flag the area as being aligned.
346
*/
347
bool
348
v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
349
const VkRect2D *area,
350
struct v3dv_framebuffer *fb,
351
struct v3dv_render_pass *pass,
352
uint32_t subpass_idx)
353
{
354
assert(subpass_idx < pass->subpass_count);
355
356
VkExtent2D granularity;
357
subpass_get_granularity(device, pass, subpass_idx, &granularity);
358
359
return area->offset.x % granularity.width == 0 &&
360
area->offset.y % granularity.height == 0 &&
361
(area->extent.width % granularity.width == 0 ||
362
(fb->has_edge_padding &&
363
area->offset.x + area->extent.width >= fb->width)) &&
364
(area->extent.height % granularity.height == 0 ||
365
(fb->has_edge_padding &&
366
area->offset.y + area->extent.height >= fb->height));
367
}
368
369