Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/asahi/compiler/agx_optimizer.c
4564 views
1
/*
2
* Copyright (C) 2021 Alyssa Rosenzweig <[email protected]>
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*/
23
24
#include "agx_compiler.h"
25
#include "agx_minifloat.h"
26
27
/* AGX peephole optimizer responsible for instruction combining. It operates in
28
* a forward direction and a backward direction, in each case traversing in
29
* source order. SSA means the forward pass satisfies the invariant:
30
*
31
* Every def is visited before any of its uses.
32
*
33
* Dually, the backend pass satisfies the invariant:
34
*
35
* Every use of a def is visited before the def.
36
*
37
* This means the forward pass can propagate modifiers forward, whereas the
38
* backwards pass propagates modifiers backward. Consider an example:
39
*
40
* 1 = fabs 0
41
* 2 = fround 1
42
* 3 = fsat 1
43
*
44
* The forwards pass would propagate the fabs to the fround (since we can
45
* lookup the fabs from the fround source and do the replacement). By contrast
46
* the backwards pass would propagate the fsat back to the fround (since when
47
* we see the fround we know it has only a single user, fsat). Propagatable
48
* instruction have natural directions (like pushforwards and pullbacks).
49
*
50
* We are careful to update the tracked state whenever we modify an instruction
51
* to ensure the passes are linear-time and converge in a single iteration.
52
*
53
* Size conversions are worth special discussion. Consider the snippet:
54
*
55
* 2 = fadd 0, 1
56
* 3 = f2f16 2
57
* 4 = fround 3
58
*
59
* A priori, we can move the f2f16 in either direction. But it's not equal --
60
* if we move it up to the fadd, we get FP16 for two instructions, whereas if
61
* we push it into the fround, we effectively get FP32 for two instructions. So
62
* f2f16 is backwards. Likewise, consider
63
*
64
* 2 = fadd 0, 1
65
* 3 = f2f32 1
66
* 4 = fround 3
67
*
68
* This time if we move f2f32 up to the fadd, we get FP32 for two, but if we
69
* move it down to the fround, we get FP16 to too. So f2f32 is backwards.
70
*/
71
72
static bool
73
agx_is_fmov(agx_instr *def)
74
{
75
return (def->op == AGX_OPCODE_FADD)
76
&& agx_is_equiv(def->src[1], agx_negzero());
77
}
78
79
/* Compose floating-point modifiers with floating-point sources */
80
81
static agx_index
82
agx_compose_float_src(agx_index to, agx_index from)
83
{
84
if (to.abs)
85
from.neg = false;
86
87
from.abs |= to.abs;
88
from.neg |= to.neg;
89
90
return from;
91
}
92
93
static void
94
agx_optimizer_fmov(agx_instr **defs, agx_instr *ins, unsigned srcs)
95
{
96
for (unsigned s = 0; s < srcs; ++s) {
97
agx_index src = ins->src[s];
98
if (src.type != AGX_INDEX_NORMAL) continue;
99
100
agx_instr *def = defs[src.value];
101
if (!agx_is_fmov(def)) continue;
102
if (def->saturate) continue;
103
104
ins->src[s] = agx_compose_float_src(src, def->src[0]);
105
}
106
}
107
108
static void
109
agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I,
110
unsigned srcs, bool is_float)
111
{
112
for (unsigned s = 0; s < srcs; ++s) {
113
agx_index src = I->src[s];
114
if (src.type != AGX_INDEX_NORMAL) continue;
115
116
agx_instr *def = defs[src.value];
117
if (def->op != AGX_OPCODE_MOV_IMM) continue;
118
119
uint8_t value = def->imm;
120
bool float_src = is_float;
121
122
/* cmpselsrc takes integer immediates only */
123
if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL) float_src = false;
124
125
if (float_src) {
126
bool fp16 = (def->dest[0].size == AGX_SIZE_16);
127
assert(fp16 || (def->dest[0].size == AGX_SIZE_32));
128
129
float f = fp16 ? _mesa_half_to_float(def->imm) : uif(def->imm);
130
if (!agx_minifloat_exact(f)) continue;
131
132
value = agx_minifloat_encode(f);
133
} else if (value != def->imm) {
134
continue;
135
}
136
137
I->src[s].type = AGX_INDEX_IMMEDIATE;
138
I->src[s].value = value;
139
}
140
}
141
142
static bool
143
agx_optimizer_fmov_rev(agx_instr *I, agx_instr *use)
144
{
145
if (!agx_is_fmov(use)) return false;
146
if (use->src[0].neg || use->src[0].abs) return false;
147
148
/* saturate(saturate(x)) = saturate(x) */
149
I->saturate |= use->saturate;
150
I->dest[0] = use->dest[0];
151
return true;
152
}
153
154
static void
155
agx_optimizer_forward(agx_context *ctx)
156
{
157
agx_instr **defs = calloc(ctx->alloc, sizeof(*defs));
158
159
agx_foreach_instr_global(ctx, I) {
160
struct agx_opcode_info info = agx_opcodes_info[I->op];
161
162
for (unsigned d = 0; d < info.nr_dests; ++d) {
163
if (I->dest[d].type == AGX_INDEX_NORMAL)
164
defs[I->dest[d].value] = I;
165
}
166
167
/* Propagate fmov down */
168
if (info.is_float)
169
agx_optimizer_fmov(defs, I, info.nr_srcs);
170
171
/* Inline immediates if we can. TODO: systematic */
172
if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE && I->op != AGX_OPCODE_P_EXTRACT && I->op != AGX_OPCODE_P_COMBINE)
173
agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float);
174
}
175
176
free(defs);
177
}
178
179
static void
180
agx_optimizer_backward(agx_context *ctx)
181
{
182
agx_instr **uses = calloc(ctx->alloc, sizeof(*uses));
183
BITSET_WORD *multiple = calloc(BITSET_WORDS(ctx->alloc), sizeof(*multiple));
184
185
agx_foreach_instr_global_rev(ctx, I) {
186
struct agx_opcode_info info = agx_opcodes_info[I->op];
187
188
for (unsigned s = 0; s < info.nr_srcs; ++s) {
189
if (I->src[s].type == AGX_INDEX_NORMAL) {
190
unsigned v = I->src[s].value;
191
192
if (uses[v])
193
BITSET_SET(multiple, v);
194
else
195
uses[v] = I;
196
}
197
}
198
199
if (info.nr_dests != 1)
200
continue;
201
202
if (I->dest[0].type != AGX_INDEX_NORMAL)
203
continue;
204
205
agx_instr *use = uses[I->dest[0].value];
206
207
if (!use || BITSET_TEST(multiple, I->dest[0].value))
208
continue;
209
210
/* Destination has a single use, try to propagate */
211
if (info.is_float && agx_optimizer_fmov_rev(I, use)) {
212
agx_remove_instruction(use);
213
continue;
214
}
215
}
216
217
free(uses);
218
free(multiple);
219
}
220
221
void
222
agx_optimizer(agx_context *ctx)
223
{
224
agx_optimizer_backward(ctx);
225
agx_optimizer_forward(ctx);
226
}
227
228