Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/panfrost/bifrost/bi_opt_mod_props.c
4564 views
1
/*
2
* Copyright (C) 2021 Collabora, Ltd.
3
* Copyright (C) 2021 Alyssa Rosenzweig <[email protected]>
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
14
* Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
25
#include "compiler.h"
26
#include "bi_builder.h"
27
28
static bool
29
bi_takes_fabs(bi_instr *I, bi_index repl, unsigned s)
30
{
31
switch (I->op) {
32
case BI_OPCODE_FCMP_V2F16:
33
case BI_OPCODE_FMAX_V2F16:
34
case BI_OPCODE_FMIN_V2F16:
35
/* Encoding restriction: can't have both abs if equal sources */
36
return !(I->src[1 - s].abs && bi_is_word_equiv(I->src[1 - s], repl));
37
case BI_OPCODE_V2F32_TO_V2F16:
38
/* TODO: Needs both match or lower */
39
return false;
40
case BI_OPCODE_FLOG_TABLE_F32:
41
/* TODO: Need to check mode */
42
return false;
43
default:
44
return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);
45
}
46
}
47
48
static bool
49
bi_takes_fneg(bi_instr *I, unsigned s)
50
{
51
switch (I->op) {
52
case BI_OPCODE_CUBE_SSEL:
53
case BI_OPCODE_CUBE_TSEL:
54
case BI_OPCODE_CUBEFACE:
55
/* TODO: Needs match or lower */
56
return false;
57
case BI_OPCODE_FREXPE_F32:
58
case BI_OPCODE_FREXPE_V2F16:
59
case BI_OPCODE_FLOG_TABLE_F32:
60
/* TODO: Need to check mode */
61
return false;
62
default:
63
return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);
64
}
65
}
66
67
static bool
68
bi_is_fabsneg(bi_instr *I)
69
{
70
return (I->op == BI_OPCODE_FADD_F32 || I->op == BI_OPCODE_FADD_V2F16) &&
71
(I->src[1].type == BI_INDEX_CONSTANT && I->src[1].value == 0) &&
72
(I->clamp == BI_CLAMP_NONE);
73
}
74
75
static enum bi_swizzle
76
bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)
77
{
78
assert(a <= BI_SWIZZLE_H11);
79
assert(b <= BI_SWIZZLE_H11);
80
81
bool al = (a & BI_SWIZZLE_H10);
82
bool ar = (a & BI_SWIZZLE_H01);
83
bool bl = (b & BI_SWIZZLE_H10);
84
bool br = (b & BI_SWIZZLE_H01);
85
86
return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |
87
((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);
88
}
89
90
/* Like bi_replace_index, but composes instead of overwrites */
91
92
static inline bi_index
93
bi_compose_float_index(bi_index old, bi_index repl)
94
{
95
/* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise
96
* -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */
97
repl.neg = old.neg ^ (repl.neg && !old.abs);
98
99
/* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */
100
repl.abs |= old.abs;
101
102
/* Use the old swizzle to select from the replacement swizzle */
103
repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);
104
105
return repl;
106
}
107
108
void
109
bi_opt_mod_prop_forward(bi_context *ctx)
110
{
111
bi_instr **lut = calloc(sizeof(bi_instr *), ((ctx->ssa_alloc + 1) << 2));
112
113
bi_foreach_instr_global_safe(ctx, I) {
114
if (bi_is_ssa(I->dest[0]))
115
lut[bi_word_node(I->dest[0])] = I;
116
117
bi_foreach_src(I, s) {
118
if (!bi_is_ssa(I->src[s]))
119
continue;
120
121
bi_instr *mod = lut[bi_word_node(I->src[s])];
122
123
if (!mod)
124
continue;
125
126
if (bi_opcode_props[mod->op].size != bi_opcode_props[I->op].size)
127
continue;
128
129
if (bi_is_fabsneg(mod)) {
130
if (mod->src[0].abs && !bi_takes_fabs(I, mod->src[0], s))
131
continue;
132
133
if (mod->src[0].neg && !bi_takes_fneg(I, s))
134
continue;
135
136
I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);
137
}
138
}
139
}
140
141
free(lut);
142
}
143
144
/* RSCALE has restrictions on how the clamp may be used, only used for
145
* specialized transcendental sequences that set the clamp explicitly anyway */
146
147
static bool
148
bi_takes_clamp(bi_instr *I)
149
{
150
switch (I->op) {
151
case BI_OPCODE_FMA_RSCALE_F32:
152
case BI_OPCODE_FMA_RSCALE_V2F16:
153
case BI_OPCODE_FADD_RSCALE_F32:
154
return false;
155
default:
156
return bi_opcode_props[I->op].clamp;
157
}
158
}
159
160
/* Treating clamps as functions, compute the composition f circ g. For {NONE,
161
* SAT, SAT_SIGNED, CLAMP_POS}, anything left- or right-composed with NONE is
162
* unchanged, anything composed with itself is unchanged, and any two
163
* nontrivial distinct clamps compose to SAT (left as an exercise) */
164
165
static enum bi_clamp
166
bi_compose_clamp(enum bi_clamp f, enum bi_clamp g)
167
{
168
return (f == BI_CLAMP_NONE) ? g :
169
(g == BI_CLAMP_NONE) ? f :
170
(f == g) ? f :
171
BI_CLAMP_CLAMP_0_1;
172
}
173
174
static bool
175
bi_is_fclamp(bi_instr *I)
176
{
177
return (I->op == BI_OPCODE_FADD_F32 || I->op == BI_OPCODE_FADD_V2F16) &&
178
(!I->src[0].abs && !I->src[0].neg) &&
179
(I->src[1].type == BI_INDEX_CONSTANT && I->src[1].value == 0) &&
180
(I->clamp != BI_CLAMP_NONE);
181
}
182
183
static bool
184
bi_optimizer_clamp(bi_instr *I, bi_instr *use)
185
{
186
if (bi_opcode_props[use->op].size != bi_opcode_props[I->op].size) return false;
187
if (!bi_is_fclamp(use)) return false;
188
if (!bi_takes_clamp(I)) return false;
189
if (use->src[0].neg || use->src[0].abs) return false;
190
191
I->clamp = bi_compose_clamp(I->clamp, use->clamp);
192
I->dest[0] = use->dest[0];
193
return true;
194
}
195
196
static bool
197
bi_is_var_tex(bi_instr *var, bi_instr *tex)
198
{
199
return (var->op == BI_OPCODE_LD_VAR_IMM) &&
200
(tex->op == BI_OPCODE_TEXS_2D_F16 || tex->op == BI_OPCODE_TEXS_2D_F32) &&
201
(var->register_format == BI_REGISTER_FORMAT_F32) &&
202
((var->sample == BI_SAMPLE_CENTER && var->update == BI_UPDATE_STORE) ||
203
(var->sample == BI_SAMPLE_NONE && var->update == BI_UPDATE_RETRIEVE)) &&
204
(tex->texture_index == tex->sampler_index) &&
205
(tex->texture_index < 4) &&
206
(var->index < 8);
207
}
208
209
static bool
210
bi_optimizer_var_tex(bi_context *ctx, bi_instr *var, bi_instr *tex)
211
{
212
if (!bi_is_var_tex(var, tex)) return false;
213
214
/* Construct the corresponding VAR_TEX intruction */
215
bi_builder b = bi_init_builder(ctx, bi_after_instr(var));
216
217
bi_instr *I = bi_var_tex_f32_to(&b, tex->dest[0], tex->lod_mode,
218
var->sample, var->update, tex->texture_index, var->index);
219
I->skip = tex->skip;
220
221
if (tex->op == BI_OPCODE_TEXS_2D_F16)
222
I->op = BI_OPCODE_VAR_TEX_F16;
223
224
/* Dead code elimination will clean up for us */
225
return true;
226
}
227
228
void
229
bi_opt_mod_prop_backward(bi_context *ctx)
230
{
231
unsigned count = ((ctx->ssa_alloc + 1) << 2);
232
bi_instr **uses = calloc(count, sizeof(*uses));
233
BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));
234
235
bi_foreach_instr_global_rev(ctx, I) {
236
bi_foreach_src(I, s) {
237
if (bi_is_ssa(I->src[s])) {
238
unsigned v = bi_word_node(I->src[s]);
239
240
if (uses[v] && uses[v] != I)
241
BITSET_SET(multiple, v);
242
else
243
uses[v] = I;
244
}
245
}
246
247
if (!bi_is_ssa(I->dest[0]))
248
continue;
249
250
bi_instr *use = uses[bi_word_node(I->dest[0])];
251
252
if (!use || BITSET_TEST(multiple, bi_word_node(I->dest[0])))
253
continue;
254
255
/* Destination has a single use, try to propagate */
256
bool propagated =
257
bi_optimizer_clamp(I, use) ||
258
bi_optimizer_var_tex(ctx, I, use);
259
260
if (propagated) {
261
bi_remove_instruction(use);
262
continue;
263
}
264
}
265
266
free(uses);
267
free(multiple);
268
}
269
270