Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/compiler/brw_compiler.c
4550 views
1
/*
2
* Copyright © 2015-2016 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "brw_compiler.h"
25
#include "brw_shader.h"
26
#include "brw_eu.h"
27
#include "dev/intel_debug.h"
28
#include "compiler/nir/nir.h"
29
#include "main/errors.h"
30
#include "util/debug.h"
31
32
#define COMMON_OPTIONS \
33
.lower_fdiv = true, \
34
.lower_scmp = true, \
35
.lower_flrp16 = true, \
36
.lower_fmod = true, \
37
.lower_bitfield_extract = true, \
38
.lower_bitfield_insert = true, \
39
.lower_uadd_carry = true, \
40
.lower_usub_borrow = true, \
41
.lower_flrp64 = true, \
42
.lower_isign = true, \
43
.lower_ldexp = true, \
44
.lower_device_index_to_zero = true, \
45
.vectorize_io = true, \
46
.use_interpolated_input_intrinsics = true, \
47
.lower_insert_byte = true, \
48
.lower_insert_word = true, \
49
.vertex_id_zero_based = true, \
50
.lower_base_vertex = true, \
51
.use_scoped_barrier = true, \
52
.support_16bit_alu = true, \
53
.lower_uniforms_to_ubo = true, \
54
.has_txs = true
55
56
#define COMMON_SCALAR_OPTIONS \
57
.lower_to_scalar = true, \
58
.lower_pack_half_2x16 = true, \
59
.lower_pack_snorm_2x16 = true, \
60
.lower_pack_snorm_4x8 = true, \
61
.lower_pack_unorm_2x16 = true, \
62
.lower_pack_unorm_4x8 = true, \
63
.lower_unpack_half_2x16 = true, \
64
.lower_unpack_snorm_2x16 = true, \
65
.lower_unpack_snorm_4x8 = true, \
66
.lower_unpack_unorm_2x16 = true, \
67
.lower_unpack_unorm_4x8 = true, \
68
.lower_usub_sat64 = true, \
69
.lower_hadd64 = true, \
70
.lower_bfe_with_two_constants = true, \
71
.max_unroll_iterations = 32
72
73
static const struct nir_shader_compiler_options scalar_nir_options = {
74
COMMON_OPTIONS,
75
COMMON_SCALAR_OPTIONS,
76
};
77
78
static const struct nir_shader_compiler_options vector_nir_options = {
79
COMMON_OPTIONS,
80
81
/* In the vec4 backend, our dpN instruction replicates its result to all the
82
* components of a vec4. We would like NIR to give us replicated fdot
83
* instructions because it can optimize better for us.
84
*/
85
.fdot_replicates = true,
86
87
.lower_pack_snorm_2x16 = true,
88
.lower_pack_unorm_2x16 = true,
89
.lower_unpack_snorm_2x16 = true,
90
.lower_unpack_unorm_2x16 = true,
91
.lower_extract_byte = true,
92
.lower_extract_word = true,
93
.intel_vec4 = true,
94
.max_unroll_iterations = 32,
95
};
96
97
struct brw_compiler *
98
brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
99
{
100
struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler);
101
102
compiler->devinfo = devinfo;
103
104
brw_fs_alloc_reg_sets(compiler);
105
brw_vec4_alloc_reg_set(compiler);
106
107
compiler->precise_trig = env_var_as_boolean("INTEL_PRECISE_TRIG", false);
108
109
compiler->use_tcs_8_patch =
110
devinfo->ver >= 12 ||
111
(devinfo->ver >= 9 && (INTEL_DEBUG & DEBUG_TCS_EIGHT_PATCH));
112
113
/* Default to the sampler since that's what we've done since forever */
114
compiler->indirect_ubos_use_sampler = true;
115
116
/* There is no vec4 mode on Gfx10+, and we don't use it at all on Gfx8+. */
117
for (int i = MESA_SHADER_VERTEX; i < MESA_ALL_SHADER_STAGES; i++) {
118
compiler->scalar_stage[i] = devinfo->ver >= 8 ||
119
i == MESA_SHADER_FRAGMENT || i == MESA_SHADER_COMPUTE;
120
}
121
122
for (int i = MESA_SHADER_TASK; i < MESA_VULKAN_SHADER_STAGES; i++)
123
compiler->scalar_stage[i] = true;
124
125
nir_lower_int64_options int64_options =
126
nir_lower_imul64 |
127
nir_lower_isign64 |
128
nir_lower_divmod64 |
129
nir_lower_imul_high64;
130
nir_lower_doubles_options fp64_options =
131
nir_lower_drcp |
132
nir_lower_dsqrt |
133
nir_lower_drsq |
134
nir_lower_dtrunc |
135
nir_lower_dfloor |
136
nir_lower_dceil |
137
nir_lower_dfract |
138
nir_lower_dround_even |
139
nir_lower_dmod |
140
nir_lower_dsub |
141
nir_lower_ddiv;
142
143
if (!devinfo->has_64bit_float || (INTEL_DEBUG & DEBUG_SOFT64)) {
144
int64_options |= (nir_lower_int64_options)~0;
145
fp64_options |= nir_lower_fp64_full_software;
146
}
147
148
/* The Bspec's section tittled "Instruction_multiply[DevBDW+]" claims that
149
* destination type can be Quadword and source type Doubleword for Gfx8 and
150
* Gfx9. So, lower 64 bit multiply instruction on rest of the platforms.
151
*/
152
if (devinfo->ver < 8 || devinfo->ver > 9)
153
int64_options |= nir_lower_imul_2x32_64;
154
155
/* We want the GLSL compiler to emit code that uses condition codes */
156
for (int i = 0; i < MESA_ALL_SHADER_STAGES; i++) {
157
compiler->glsl_compiler_options[i].MaxUnrollIterations = 0;
158
compiler->glsl_compiler_options[i].MaxIfDepth =
159
devinfo->ver < 6 ? 16 : UINT_MAX;
160
161
/* We handle this in NIR */
162
compiler->glsl_compiler_options[i].EmitNoIndirectInput = false;
163
compiler->glsl_compiler_options[i].EmitNoIndirectOutput = false;
164
compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
165
compiler->glsl_compiler_options[i].EmitNoIndirectTemp = false;
166
167
bool is_scalar = compiler->scalar_stage[i];
168
compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar;
169
170
struct nir_shader_compiler_options *nir_options =
171
rzalloc(compiler, struct nir_shader_compiler_options);
172
if (is_scalar) {
173
*nir_options = scalar_nir_options;
174
} else {
175
*nir_options = vector_nir_options;
176
}
177
178
/* Prior to Gfx6, there are no three source operations, and Gfx11 loses
179
* LRP.
180
*/
181
nir_options->lower_ffma16 = devinfo->ver < 6;
182
nir_options->lower_ffma32 = devinfo->ver < 6;
183
nir_options->lower_ffma64 = devinfo->ver < 6;
184
nir_options->lower_flrp32 = devinfo->ver < 6 || devinfo->ver >= 11;
185
nir_options->lower_fpow = devinfo->ver >= 12;
186
187
nir_options->lower_rotate = devinfo->ver < 11;
188
nir_options->lower_bitfield_reverse = devinfo->ver < 7;
189
190
nir_options->lower_int64_options = int64_options;
191
nir_options->lower_doubles_options = fp64_options;
192
193
/* Starting with Gfx11, we lower away 8-bit arithmetic */
194
nir_options->support_8bit_alu = devinfo->ver < 11;
195
196
nir_options->unify_interfaces = i < MESA_SHADER_FRAGMENT;
197
198
compiler->glsl_compiler_options[i].NirOptions = nir_options;
199
200
compiler->glsl_compiler_options[i].ClampBlockIndicesToArrayBounds = true;
201
}
202
203
return compiler;
204
}
205
206
static void
207
insert_u64_bit(uint64_t *val, bool add)
208
{
209
*val = (*val << 1) | !!add;
210
}
211
212
uint64_t
213
brw_get_compiler_config_value(const struct brw_compiler *compiler)
214
{
215
uint64_t config = 0;
216
insert_u64_bit(&config, compiler->precise_trig);
217
if (compiler->devinfo->ver >= 8 && compiler->devinfo->ver < 10) {
218
insert_u64_bit(&config, compiler->scalar_stage[MESA_SHADER_VERTEX]);
219
insert_u64_bit(&config, compiler->scalar_stage[MESA_SHADER_TESS_CTRL]);
220
insert_u64_bit(&config, compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);
221
insert_u64_bit(&config, compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
222
}
223
uint64_t debug_bits = INTEL_DEBUG;
224
uint64_t mask = DEBUG_DISK_CACHE_MASK;
225
while (mask != 0) {
226
const uint64_t bit = 1ULL << (ffsll(mask) - 1);
227
insert_u64_bit(&config, (debug_bits & bit) != 0);
228
mask &= ~bit;
229
}
230
return config;
231
}
232
233
unsigned
234
brw_prog_data_size(gl_shader_stage stage)
235
{
236
static const size_t stage_sizes[] = {
237
[MESA_SHADER_VERTEX] = sizeof(struct brw_vs_prog_data),
238
[MESA_SHADER_TESS_CTRL] = sizeof(struct brw_tcs_prog_data),
239
[MESA_SHADER_TESS_EVAL] = sizeof(struct brw_tes_prog_data),
240
[MESA_SHADER_GEOMETRY] = sizeof(struct brw_gs_prog_data),
241
[MESA_SHADER_FRAGMENT] = sizeof(struct brw_wm_prog_data),
242
[MESA_SHADER_COMPUTE] = sizeof(struct brw_cs_prog_data),
243
[MESA_SHADER_RAYGEN] = sizeof(struct brw_bs_prog_data),
244
[MESA_SHADER_ANY_HIT] = sizeof(struct brw_bs_prog_data),
245
[MESA_SHADER_CLOSEST_HIT] = sizeof(struct brw_bs_prog_data),
246
[MESA_SHADER_MISS] = sizeof(struct brw_bs_prog_data),
247
[MESA_SHADER_INTERSECTION] = sizeof(struct brw_bs_prog_data),
248
[MESA_SHADER_CALLABLE] = sizeof(struct brw_bs_prog_data),
249
[MESA_SHADER_KERNEL] = sizeof(struct brw_cs_prog_data),
250
};
251
assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes));
252
return stage_sizes[stage];
253
}
254
255
unsigned
256
brw_prog_key_size(gl_shader_stage stage)
257
{
258
static const size_t stage_sizes[] = {
259
[MESA_SHADER_VERTEX] = sizeof(struct brw_vs_prog_key),
260
[MESA_SHADER_TESS_CTRL] = sizeof(struct brw_tcs_prog_key),
261
[MESA_SHADER_TESS_EVAL] = sizeof(struct brw_tes_prog_key),
262
[MESA_SHADER_GEOMETRY] = sizeof(struct brw_gs_prog_key),
263
[MESA_SHADER_FRAGMENT] = sizeof(struct brw_wm_prog_key),
264
[MESA_SHADER_COMPUTE] = sizeof(struct brw_cs_prog_key),
265
[MESA_SHADER_RAYGEN] = sizeof(struct brw_bs_prog_key),
266
[MESA_SHADER_ANY_HIT] = sizeof(struct brw_bs_prog_key),
267
[MESA_SHADER_CLOSEST_HIT] = sizeof(struct brw_bs_prog_key),
268
[MESA_SHADER_MISS] = sizeof(struct brw_bs_prog_key),
269
[MESA_SHADER_INTERSECTION] = sizeof(struct brw_bs_prog_key),
270
[MESA_SHADER_CALLABLE] = sizeof(struct brw_bs_prog_key),
271
[MESA_SHADER_KERNEL] = sizeof(struct brw_cs_prog_key),
272
};
273
assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes));
274
return stage_sizes[stage];
275
}
276
277
void
278
brw_write_shader_relocs(const struct intel_device_info *devinfo,
279
void *program,
280
const struct brw_stage_prog_data *prog_data,
281
struct brw_shader_reloc_value *values,
282
unsigned num_values)
283
{
284
for (unsigned i = 0; i < prog_data->num_relocs; i++) {
285
assert(prog_data->relocs[i].offset % 8 == 0);
286
void *dst = program + prog_data->relocs[i].offset;
287
for (unsigned j = 0; j < num_values; j++) {
288
if (prog_data->relocs[i].id == values[j].id) {
289
uint32_t value = values[j].value + prog_data->relocs[i].delta;
290
switch (prog_data->relocs[i].type) {
291
case BRW_SHADER_RELOC_TYPE_U32:
292
*(uint32_t *)dst = value;
293
break;
294
case BRW_SHADER_RELOC_TYPE_MOV_IMM:
295
brw_update_reloc_imm(devinfo, dst, value);
296
break;
297
default:
298
unreachable("Invalid relocation type");
299
}
300
break;
301
}
302
}
303
}
304
}
305
306