Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/ir3/ir3_compiler.c
4565 views
1
/*
2
* Copyright (C) 2015 Rob Clark <[email protected]>
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*
23
* Authors:
24
* Rob Clark <[email protected]>
25
*/
26
27
#include "util/ralloc.h"
28
29
#include "freedreno_dev_info.h"
30
31
#include "ir3_compiler.h"
32
33
static const struct debug_named_value shader_debug_options[] = {
34
/* clang-format off */
35
{"vs", IR3_DBG_SHADER_VS, "Print shader disasm for vertex shaders"},
36
{"tcs", IR3_DBG_SHADER_TCS, "Print shader disasm for tess ctrl shaders"},
37
{"tes", IR3_DBG_SHADER_TES, "Print shader disasm for tess eval shaders"},
38
{"gs", IR3_DBG_SHADER_GS, "Print shader disasm for geometry shaders"},
39
{"fs", IR3_DBG_SHADER_FS, "Print shader disasm for fragment shaders"},
40
{"cs", IR3_DBG_SHADER_CS, "Print shader disasm for compute shaders"},
41
{"disasm", IR3_DBG_DISASM, "Dump NIR and adreno shader disassembly"},
42
{"optmsgs", IR3_DBG_OPTMSGS, "Enable optimizer debug messages"},
43
{"forces2en", IR3_DBG_FORCES2EN, "Force s2en mode for tex sampler instructions"},
44
{"nouboopt", IR3_DBG_NOUBOOPT, "Disable lowering UBO to uniform"},
45
{"nofp16", IR3_DBG_NOFP16, "Don't lower mediump to fp16"},
46
{"nocache", IR3_DBG_NOCACHE, "Disable shader cache"},
47
#ifdef DEBUG
48
/* DEBUG-only options: */
49
{"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"},
50
{"ramsgs", IR3_DBG_RAMSGS, "Enable register-allocation debug messages"},
51
#endif
52
DEBUG_NAMED_VALUE_END
53
/* clang-format on */
54
};
55
56
DEBUG_GET_ONCE_FLAGS_OPTION(ir3_shader_debug, "IR3_SHADER_DEBUG",
57
shader_debug_options, 0)
58
DEBUG_GET_ONCE_OPTION(ir3_shader_override_path, "IR3_SHADER_OVERRIDE_PATH",
59
NULL)
60
61
enum ir3_shader_debug ir3_shader_debug = 0;
62
const char *ir3_shader_override_path = NULL;
63
64
void
65
ir3_compiler_destroy(struct ir3_compiler *compiler)
66
{
67
disk_cache_destroy(compiler->disk_cache);
68
ralloc_free(compiler);
69
}
70
71
struct ir3_compiler *
72
ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id,
73
bool robust_ubo_access)
74
{
75
struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler);
76
77
ir3_shader_debug = debug_get_option_ir3_shader_debug();
78
ir3_shader_override_path =
79
!__check_suid() ? debug_get_option_ir3_shader_override_path() : NULL;
80
81
if (ir3_shader_override_path) {
82
ir3_shader_debug |= IR3_DBG_NOCACHE;
83
}
84
85
compiler->dev = dev;
86
compiler->gpu_id = gpu_id;
87
compiler->robust_ubo_access = robust_ubo_access;
88
89
/* All known GPU's have 32k local memory (aka shared) */
90
compiler->local_mem_size = 32 * 1024;
91
/* TODO see if older GPU's were different here */
92
compiler->branchstack_size = 64;
93
compiler->wave_granularity = 2;
94
compiler->max_waves = 16;
95
96
if (compiler->gpu_id >= 600) {
97
compiler->samgq_workaround = true;
98
/* a6xx split the pipeline state into geometry and fragment state, in
99
* order to let the VS run ahead of the FS. As a result there are now
100
* separate const files for the the fragment shader and everything
101
* else, and separate limits. There seems to be a shared limit, but
102
* it's higher than the vert or frag limits.
103
*
104
* TODO: The shared limit seems to be different on different on
105
* different models.
106
*/
107
compiler->max_const_pipeline = 640;
108
compiler->max_const_frag = 512;
109
compiler->max_const_geom = 512;
110
compiler->max_const_safe = 128;
111
112
/* Compute shaders don't share a const file with the FS. Instead they
113
* have their own file, which is smaller than the FS one.
114
*
115
* TODO: is this true on earlier gen's?
116
*/
117
compiler->max_const_compute = 256;
118
119
/* TODO: implement clip+cull distances on earlier gen's */
120
compiler->has_clip_cull = true;
121
122
/* TODO: implement private memory on earlier gen's */
123
compiler->has_pvtmem = true;
124
125
compiler->tess_use_shared =
126
fd_dev_info(compiler->gpu_id)->a6xx.tess_use_shared;
127
} else {
128
compiler->max_const_pipeline = 512;
129
compiler->max_const_geom = 512;
130
compiler->max_const_frag = 512;
131
compiler->max_const_compute = 512;
132
133
/* Note: this will have to change if/when we support tess+GS on
134
* earlier gen's.
135
*/
136
compiler->max_const_safe = 256;
137
}
138
139
if (compiler->gpu_id >= 600) {
140
compiler->reg_size_vec4 =
141
fd_dev_info(compiler->gpu_id)->a6xx.reg_size_vec4;
142
} else if (compiler->gpu_id >= 400) {
143
/* On a4xx-a5xx, using r24.x and above requires using the smallest
144
* threadsize.
145
*/
146
compiler->reg_size_vec4 = 48;
147
} else {
148
/* TODO: confirm this */
149
compiler->reg_size_vec4 = 96;
150
}
151
152
if (compiler->gpu_id >= 600) {
153
compiler->threadsize_base = 64;
154
} else if (compiler->gpu_id >= 400) {
155
/* TODO: Confirm this for a4xx. For a5xx this is based on the Vulkan
156
* 1.1 subgroupSize which is 32.
157
*/
158
compiler->threadsize_base = 32;
159
} else {
160
compiler->threadsize_base = 8;
161
}
162
163
if (compiler->gpu_id >= 400) {
164
/* need special handling for "flat" */
165
compiler->flat_bypass = true;
166
compiler->levels_add_one = false;
167
compiler->unminify_coords = false;
168
compiler->txf_ms_with_isaml = false;
169
compiler->array_index_add_half = true;
170
compiler->instr_align = 16;
171
compiler->const_upload_unit = 4;
172
} else {
173
/* no special handling for "flat" */
174
compiler->flat_bypass = false;
175
compiler->levels_add_one = true;
176
compiler->unminify_coords = true;
177
compiler->txf_ms_with_isaml = true;
178
compiler->array_index_add_half = false;
179
compiler->instr_align = 4;
180
compiler->const_upload_unit = 8;
181
}
182
183
ir3_disk_cache_init(compiler);
184
185
return compiler;
186
}
187
188