Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/compiler/nir/nir_gather_xfb_info.c
4545 views
1
/*
2
* Copyright © 2018 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "nir_xfb_info.h"
25
26
#include <util/u_math.h>
27
28
static void
29
add_var_xfb_varying(nir_xfb_info *xfb,
30
nir_xfb_varyings_info *varyings,
31
unsigned buffer,
32
unsigned offset,
33
const struct glsl_type *type)
34
{
35
if (varyings == NULL)
36
return;
37
38
nir_xfb_varying_info *varying = &varyings->varyings[varyings->varying_count++];
39
40
varying->type = type;
41
varying->buffer = buffer;
42
varying->offset = offset;
43
xfb->buffers[buffer].varying_count++;
44
}
45
46
47
static nir_xfb_info *
48
nir_xfb_info_create(void *mem_ctx, uint16_t output_count)
49
{
50
return rzalloc_size(mem_ctx, nir_xfb_info_size(output_count));
51
}
52
53
static size_t
54
nir_xfb_varyings_info_size(uint16_t varying_count)
55
{
56
return sizeof(nir_xfb_info) + sizeof(nir_xfb_varying_info) * varying_count;
57
}
58
59
static nir_xfb_varyings_info *
60
nir_xfb_varyings_info_create(void *mem_ctx, uint16_t varying_count)
61
{
62
return rzalloc_size(mem_ctx, nir_xfb_varyings_info_size(varying_count));
63
}
64
65
static void
66
add_var_xfb_outputs(nir_xfb_info *xfb,
67
nir_xfb_varyings_info *varyings,
68
nir_variable *var,
69
unsigned buffer,
70
unsigned *location,
71
unsigned *offset,
72
const struct glsl_type *type,
73
bool varying_added)
74
{
75
/* If this type contains a 64-bit value, align to 8 bytes */
76
if (glsl_type_contains_64bit(type))
77
*offset = ALIGN_POT(*offset, 8);
78
79
if (glsl_type_is_array_or_matrix(type) && !var->data.compact) {
80
unsigned length = glsl_get_length(type);
81
82
const struct glsl_type *child_type = glsl_get_array_element(type);
83
if (!glsl_type_is_array(child_type) &&
84
!glsl_type_is_struct(child_type)) {
85
86
add_var_xfb_varying(xfb, varyings, buffer, *offset, type);
87
varying_added = true;
88
}
89
90
for (unsigned i = 0; i < length; i++)
91
add_var_xfb_outputs(xfb, varyings, var, buffer, location, offset,
92
child_type, varying_added);
93
} else if (glsl_type_is_struct_or_ifc(type)) {
94
unsigned length = glsl_get_length(type);
95
for (unsigned i = 0; i < length; i++) {
96
const struct glsl_type *child_type = glsl_get_struct_field(type, i);
97
add_var_xfb_outputs(xfb, varyings, var, buffer, location, offset,
98
child_type, varying_added);
99
}
100
} else {
101
assert(buffer < NIR_MAX_XFB_BUFFERS);
102
if (xfb->buffers_written & (1 << buffer)) {
103
assert(xfb->buffers[buffer].stride == var->data.xfb.stride);
104
assert(xfb->buffer_to_stream[buffer] == var->data.stream);
105
} else {
106
xfb->buffers_written |= (1 << buffer);
107
xfb->buffers[buffer].stride = var->data.xfb.stride;
108
xfb->buffer_to_stream[buffer] = var->data.stream;
109
}
110
111
assert(var->data.stream < NIR_MAX_XFB_STREAMS);
112
xfb->streams_written |= (1 << var->data.stream);
113
114
unsigned comp_slots;
115
if (var->data.compact) {
116
/* This only happens for clip/cull which are float arrays */
117
assert(glsl_without_array(type) == glsl_float_type());
118
assert(var->data.location == VARYING_SLOT_CLIP_DIST0 ||
119
var->data.location == VARYING_SLOT_CLIP_DIST1);
120
comp_slots = glsl_get_length(type);
121
} else {
122
comp_slots = glsl_get_component_slots(type);
123
124
UNUSED unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
125
assert(attrib_slots == glsl_count_attribute_slots(type, false));
126
127
/* Ensure that we don't have, for instance, a dvec2 with a
128
* location_frac of 2 which would make it crass a location boundary
129
* even though it fits in a single slot. However, you can have a
130
* dvec3 which crosses the slot boundary with a location_frac of 2.
131
*/
132
assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) ==
133
attrib_slots);
134
}
135
136
assert(var->data.location_frac + comp_slots <= 8);
137
uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;
138
unsigned comp_offset = var->data.location_frac;
139
140
if (!varying_added) {
141
add_var_xfb_varying(xfb, varyings, buffer, *offset, type);
142
}
143
144
while (comp_mask) {
145
nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
146
147
output->buffer = buffer;
148
output->offset = *offset;
149
output->location = *location;
150
output->component_mask = comp_mask & 0xf;
151
output->component_offset = comp_offset;
152
153
*offset += util_bitcount(output->component_mask) * 4;
154
(*location)++;
155
comp_mask >>= 4;
156
comp_offset = 0;
157
}
158
}
159
}
160
161
static int
162
compare_xfb_varying_offsets(const void *_a, const void *_b)
163
{
164
const nir_xfb_varying_info *a = _a, *b = _b;
165
166
if (a->buffer != b->buffer)
167
return a->buffer - b->buffer;
168
169
return a->offset - b->offset;
170
}
171
172
static int
173
compare_xfb_output_offsets(const void *_a, const void *_b)
174
{
175
const nir_xfb_output_info *a = _a, *b = _b;
176
177
return a->offset - b->offset;
178
}
179
180
nir_xfb_info *
181
nir_gather_xfb_info(const nir_shader *shader, void *mem_ctx)
182
{
183
return nir_gather_xfb_info_with_varyings(shader, mem_ctx, NULL);
184
}
185
186
nir_xfb_info *
187
nir_gather_xfb_info_with_varyings(const nir_shader *shader,
188
void *mem_ctx,
189
nir_xfb_varyings_info **varyings_info_out)
190
{
191
assert(shader->info.stage == MESA_SHADER_VERTEX ||
192
shader->info.stage == MESA_SHADER_TESS_EVAL ||
193
shader->info.stage == MESA_SHADER_GEOMETRY);
194
195
/* Compute the number of outputs we have. This is simply the number of
196
* cumulative locations consumed by all the variables. If a location is
197
* represented by multiple variables, then they each count separately in
198
* number of outputs. This is only an estimate as some variables may have
199
* an xfb_buffer but not an output so it may end up larger than we need but
200
* it should be good enough for allocation.
201
*/
202
unsigned num_outputs = 0;
203
unsigned num_varyings = 0;
204
nir_xfb_varyings_info *varyings_info = NULL;
205
nir_foreach_shader_out_variable(var, shader) {
206
if (var->data.explicit_xfb_buffer) {
207
num_outputs += glsl_count_attribute_slots(var->type, false);
208
num_varyings += glsl_varying_count(var->type);
209
}
210
}
211
if (num_outputs == 0 || num_varyings == 0)
212
return NULL;
213
214
nir_xfb_info *xfb = nir_xfb_info_create(mem_ctx, num_outputs);
215
if (varyings_info_out != NULL) {
216
*varyings_info_out = nir_xfb_varyings_info_create(mem_ctx, num_varyings);
217
varyings_info = *varyings_info_out;
218
}
219
220
/* Walk the list of outputs and add them to the array */
221
nir_foreach_shader_out_variable(var, shader) {
222
if (!var->data.explicit_xfb_buffer)
223
continue;
224
225
unsigned location = var->data.location;
226
227
/* In order to know if we have a array of blocks can't be done just by
228
* checking if we have an interface type and is an array, because due
229
* splitting we could end on a case were we received a split struct
230
* that contains an array.
231
*/
232
bool is_array_block = var->interface_type != NULL &&
233
glsl_type_is_array(var->type) &&
234
glsl_without_array(var->type) == var->interface_type;
235
236
if (var->data.explicit_offset && !is_array_block) {
237
unsigned offset = var->data.offset;
238
add_var_xfb_outputs(xfb, varyings_info, var, var->data.xfb.buffer,
239
&location, &offset, var->type, false);
240
} else if (is_array_block) {
241
assert(glsl_type_is_struct_or_ifc(var->interface_type));
242
243
unsigned aoa_size = glsl_get_aoa_size(var->type);
244
const struct glsl_type *itype = var->interface_type;
245
unsigned nfields = glsl_get_length(itype);
246
for (unsigned b = 0; b < aoa_size; b++) {
247
for (unsigned f = 0; f < nfields; f++) {
248
int foffset = glsl_get_struct_field_offset(itype, f);
249
const struct glsl_type *ftype = glsl_get_struct_field(itype, f);
250
if (foffset < 0) {
251
location += glsl_count_attribute_slots(ftype, false);
252
continue;
253
}
254
255
unsigned offset = foffset;
256
add_var_xfb_outputs(xfb, varyings_info, var, var->data.xfb.buffer + b,
257
&location, &offset, ftype, false);
258
}
259
}
260
}
261
}
262
263
/* Everything is easier in the state setup code if outputs and varyings are
264
* sorted in order of output offset (and buffer for varyings).
265
*/
266
qsort(xfb->outputs, xfb->output_count, sizeof(xfb->outputs[0]),
267
compare_xfb_output_offsets);
268
269
if (varyings_info != NULL) {
270
qsort(varyings_info->varyings, varyings_info->varying_count,
271
sizeof(varyings_info->varyings[0]),
272
compare_xfb_varying_offsets);
273
}
274
275
#ifndef NDEBUG
276
/* Finally, do a sanity check */
277
unsigned max_offset[NIR_MAX_XFB_BUFFERS] = {0};
278
for (unsigned i = 0; i < xfb->output_count; i++) {
279
assert(xfb->outputs[i].offset >= max_offset[xfb->outputs[i].buffer]);
280
assert(xfb->outputs[i].component_mask != 0);
281
unsigned slots = util_bitcount(xfb->outputs[i].component_mask);
282
max_offset[xfb->outputs[i].buffer] = xfb->outputs[i].offset + slots * 4;
283
}
284
#endif
285
286
return xfb;
287
}
288
289