Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/compiler/vir_opt_constant_alu.c
4564 views
1
/*
2
* Copyright © 2021 Raspberry Pi
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
/**
25
* @file v3d_opt_constant_alu.c
26
*
27
* Identified sequences of ALU instructions that operate on constant operands
28
* and reduces them to a uniform load.
29
*
30
* This is useful, for example, to optimize the result of removing leading
31
* ldunifa instructions in the DCE pass, which can leave a series of constant
32
* additions that increment the unifa address by 4 for each leading ldunif
33
* removed. It helps turn this:
34
*
35
* nop t1; ldunif (0x00000004 / 0.000000)
36
* nop t2; ldunif (0x00000004 / 0.000000)
37
* add t3, t1, t2
38
*
39
* into:
40
*
41
* nop t1; ldunif (0x00000004 / 0.000000)
42
* nop t2; ldunif (0x00000004 / 0.000000)
43
* nop t4; ldunif (0x00000008 / 0.000000)
44
* mov t3, t4
45
*
46
* For best results we want to run copy propagation in between this and
47
* the combine constants pass: every time we manage to convert an alu to
48
* a uniform load, we move the uniform to the original alu destination. By
49
* running copy propagation immediately after we can reuse the uniform as
50
* source in more follow-up alu instructions, making them constant and allowing
51
* this pass to continue making progress. However, if we run the small
52
* immediates optimization before that, that pass can convert some of the movs
53
* to use small immediates instead of the uniforms and prevent us from making
54
* the best of this pass, as small immediates don't get copy propagated.
55
*/
56
57
#include "v3d_compiler.h"
58
59
#include "util/half_float.h"
60
#include "util/u_math.h"
61
62
static bool
63
opt_constant_add(struct v3d_compile *c, struct qinst *inst, union fi *values)
64
{
65
/* FIXME: handle more add operations */
66
struct qreg unif = { };
67
switch (inst->qpu.alu.add.op) {
68
case V3D_QPU_A_ADD:
69
c->cursor = vir_after_inst(inst);
70
unif = vir_uniform_ui(c, values[0].ui + values[1].ui);
71
break;
72
73
case V3D_QPU_A_VFPACK: {
74
assert(inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE);
75
76
const uint32_t packed =
77
(((uint32_t)_mesa_float_to_half(values[1].f)) << 16) |
78
_mesa_float_to_half(values[0].f);
79
80
c->cursor = vir_after_inst(inst);
81
unif = vir_uniform_ui(c, packed);
82
break;
83
}
84
85
default:
86
return false;
87
}
88
89
/* Remove the original ALU instruction and replace it with a uniform
90
* load. If the original instruction loaded an implicit uniform we
91
* need to replicate that in the new instruction.
92
*/
93
struct qreg dst = inst->dst;
94
struct qinst *mov = vir_MOV_dest(c, dst, unif);
95
mov->uniform = inst->uniform;
96
vir_remove_instruction(c, inst);
97
if (dst.file == QFILE_TEMP)
98
c->defs[dst.index] = mov;
99
return true;
100
}
101
102
static bool
103
try_opt_constant_alu(struct v3d_compile *c, struct qinst *inst)
104
{
105
if(inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
106
return false;
107
108
/* If the instruction does anything other than writing the result
109
* directly to the destination, skip.
110
*/
111
if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
112
inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
113
return false;
114
}
115
116
if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
117
inst->qpu.flags.mc != V3D_QPU_COND_NONE) {
118
return false;
119
}
120
121
assert(vir_get_nsrc(inst) <= 2);
122
union fi values[2];
123
for (int i = 0; i < vir_get_nsrc(inst); i++) {
124
if (inst->src[i].file == QFILE_SMALL_IMM &&
125
v3d_qpu_small_imm_unpack(c->devinfo,
126
inst->qpu.raddr_b,
127
&values[i].ui)) {
128
continue;
129
}
130
131
if (inst->src[i].file == QFILE_TEMP) {
132
struct qinst *def = c->defs[inst->src[i].index];
133
if (!def)
134
return false;
135
136
if ((def->qpu.sig.ldunif || def->qpu.sig.ldunifrf) &&
137
c->uniform_contents[def->uniform] == QUNIFORM_CONSTANT) {
138
values[i].ui = c->uniform_data[def->uniform];
139
continue;
140
}
141
}
142
143
return false;
144
}
145
146
/* FIXME: handle mul operations */
147
if (vir_is_add(inst))
148
return opt_constant_add(c, inst, values);
149
150
return false;
151
}
152
153
bool
154
vir_opt_constant_alu(struct v3d_compile *c)
155
{
156
bool progress = false;
157
vir_for_each_block(block, c) {
158
vir_for_each_inst_safe(inst, block) {
159
progress = try_opt_constant_alu(c, inst) || progress;
160
}
161
}
162
163
return progress;
164
}
165
166