Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/ir3/ir3_lower_subgroups.c
4565 views
1
/*
2
* Copyright (C) 2021 Valve Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*/
23
24
#include "ir3.h"
25
26
/* Lower several macro-instructions needed for shader subgroup support that
27
* must be turned into if statements. We do this after RA and post-RA
28
* scheduling to give the scheduler a chance to rearrange them, because RA
29
* may need to insert OPC_META_READ_FIRST to handle splitting live ranges, and
30
* also because some (e.g. BALLOT and READ_FIRST) must produce a shared
31
* register that cannot be spilled to a normal register until after the if,
32
* which makes implementing spilling more complicated if they are already
33
* lowered.
34
*/
35
36
static void
37
replace_pred(struct ir3_block *block, struct ir3_block *old_pred,
38
struct ir3_block *new_pred)
39
{
40
for (unsigned i = 0; i < block->predecessors_count; i++) {
41
if (block->predecessors[i] == old_pred) {
42
block->predecessors[i] = new_pred;
43
return;
44
}
45
}
46
}
47
48
static void
49
replace_physical_pred(struct ir3_block *block, struct ir3_block *old_pred,
50
struct ir3_block *new_pred)
51
{
52
for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
53
if (block->physical_predecessors[i] == old_pred) {
54
block->physical_predecessors[i] = new_pred;
55
return;
56
}
57
}
58
}
59
60
static void
61
mov_immed(struct ir3_register *dst, struct ir3_block *block, unsigned immed)
62
{
63
struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
64
struct ir3_register *mov_dst = ir3_dst_create(mov, dst->num, dst->flags);
65
mov_dst->wrmask = dst->wrmask;
66
struct ir3_register *src = ir3_src_create(
67
mov, INVALID_REG, (dst->flags & IR3_REG_HALF) | IR3_REG_IMMED);
68
src->uim_val = immed;
69
mov->cat1.dst_type = (dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
70
mov->cat1.src_type = mov->cat1.dst_type;
71
mov->repeat = util_last_bit(mov_dst->wrmask) - 1;
72
}
73
74
static struct ir3_block *
75
split_block(struct ir3 *ir, struct ir3_block *before_block,
76
struct ir3_instruction *instr, struct ir3_block **then)
77
{
78
struct ir3_block *then_block = ir3_block_create(ir);
79
struct ir3_block *after_block = ir3_block_create(ir);
80
list_add(&then_block->node, &before_block->node);
81
list_add(&after_block->node, &then_block->node);
82
83
for (unsigned i = 0; i < ARRAY_SIZE(before_block->successors); i++) {
84
after_block->successors[i] = before_block->successors[i];
85
if (after_block->successors[i])
86
replace_pred(after_block->successors[i], before_block, after_block);
87
}
88
89
for (unsigned i = 0; i < ARRAY_SIZE(before_block->physical_successors);
90
i++) {
91
after_block->physical_successors[i] =
92
before_block->physical_successors[i];
93
if (after_block->physical_successors[i]) {
94
replace_physical_pred(after_block->physical_successors[i],
95
before_block, after_block);
96
}
97
}
98
99
before_block->successors[0] = then_block;
100
before_block->successors[1] = after_block;
101
before_block->physical_successors[0] = then_block;
102
before_block->physical_successors[1] = after_block;
103
ir3_block_add_predecessor(then_block, before_block);
104
ir3_block_add_predecessor(after_block, before_block);
105
ir3_block_add_physical_predecessor(then_block, before_block);
106
ir3_block_add_physical_predecessor(after_block, before_block);
107
108
then_block->successors[0] = after_block;
109
then_block->physical_successors[0] = after_block;
110
ir3_block_add_predecessor(after_block, then_block);
111
ir3_block_add_physical_predecessor(after_block, then_block);
112
113
foreach_instr_from_safe (rem_instr, &instr->node,
114
&before_block->instr_list) {
115
list_del(&rem_instr->node);
116
list_addtail(&rem_instr->node, &after_block->instr_list);
117
rem_instr->block = after_block;
118
}
119
120
after_block->brtype = before_block->brtype;
121
after_block->condition = before_block->condition;
122
123
*then = then_block;
124
return after_block;
125
}
126
127
static bool
128
lower_block(struct ir3 *ir, struct ir3_block **block)
129
{
130
bool progress = false;
131
132
foreach_instr_safe (instr, &(*block)->instr_list) {
133
switch (instr->opc) {
134
case OPC_BALLOT_MACRO:
135
case OPC_ANY_MACRO:
136
case OPC_ALL_MACRO:
137
case OPC_ELECT_MACRO:
138
case OPC_READ_COND_MACRO:
139
case OPC_READ_FIRST_MACRO:
140
case OPC_SWZ_SHARED_MACRO:
141
break;
142
default:
143
continue;
144
}
145
146
struct ir3_block *before_block = *block;
147
struct ir3_block *then_block;
148
struct ir3_block *after_block =
149
split_block(ir, before_block, instr, &then_block);
150
151
/* For ballot, the destination must be initialized to 0 before we do
152
* the movmsk because the condition may be 0 and then the movmsk will
153
* be skipped. Because it's a shared register we have to wrap the
154
* initialization in a getone block.
155
*/
156
if (instr->opc == OPC_BALLOT_MACRO) {
157
before_block->brtype = IR3_BRANCH_GETONE;
158
before_block->condition = NULL;
159
mov_immed(instr->dsts[0], then_block, 0);
160
before_block = after_block;
161
after_block = split_block(ir, before_block, instr, &then_block);
162
}
163
164
switch (instr->opc) {
165
case OPC_BALLOT_MACRO:
166
case OPC_READ_COND_MACRO:
167
case OPC_ANY_MACRO:
168
case OPC_ALL_MACRO:
169
before_block->condition = instr->srcs[0]->def->instr;
170
break;
171
default:
172
before_block->condition = NULL;
173
break;
174
}
175
176
switch (instr->opc) {
177
case OPC_BALLOT_MACRO:
178
case OPC_READ_COND_MACRO:
179
before_block->brtype = IR3_BRANCH_COND;
180
break;
181
case OPC_ANY_MACRO:
182
before_block->brtype = IR3_BRANCH_ANY;
183
break;
184
case OPC_ALL_MACRO:
185
before_block->brtype = IR3_BRANCH_ALL;
186
break;
187
case OPC_ELECT_MACRO:
188
case OPC_READ_FIRST_MACRO:
189
case OPC_SWZ_SHARED_MACRO:
190
before_block->brtype = IR3_BRANCH_GETONE;
191
break;
192
default:
193
unreachable("bad opcode");
194
}
195
196
switch (instr->opc) {
197
case OPC_ALL_MACRO:
198
case OPC_ANY_MACRO:
199
case OPC_ELECT_MACRO:
200
mov_immed(instr->dsts[0], then_block, 1);
201
mov_immed(instr->dsts[0], before_block, 0);
202
break;
203
204
case OPC_BALLOT_MACRO: {
205
unsigned comp_count = util_last_bit(instr->dsts[0]->wrmask);
206
struct ir3_instruction *movmsk =
207
ir3_instr_create(then_block, OPC_MOVMSK, 1, 0);
208
ir3_dst_create(movmsk, instr->dsts[0]->num, instr->dsts[0]->flags);
209
movmsk->repeat = comp_count - 1;
210
break;
211
}
212
213
case OPC_READ_COND_MACRO:
214
case OPC_READ_FIRST_MACRO: {
215
struct ir3_instruction *mov =
216
ir3_instr_create(then_block, OPC_MOV, 1, 1);
217
unsigned src = instr->opc == OPC_READ_COND_MACRO ? 1 : 0;
218
ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags);
219
struct ir3_register *new_src = ir3_src_create(mov, 0, 0);
220
*new_src = *instr->srcs[src];
221
mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32;
222
break;
223
}
224
225
case OPC_SWZ_SHARED_MACRO: {
226
struct ir3_instruction *swz =
227
ir3_instr_create(then_block, OPC_SWZ, 2, 2);
228
ir3_dst_create(swz, instr->dsts[0]->num, instr->dsts[0]->flags);
229
ir3_dst_create(swz, instr->dsts[1]->num, instr->dsts[1]->flags);
230
ir3_src_create(swz, instr->srcs[0]->num, instr->srcs[0]->flags);
231
ir3_src_create(swz, instr->srcs[1]->num, instr->srcs[1]->flags);
232
swz->cat1.dst_type = swz->cat1.src_type = TYPE_U32;
233
swz->repeat = 1;
234
break;
235
}
236
237
default:
238
unreachable("bad opcode");
239
}
240
241
*block = after_block;
242
list_delinit(&instr->node);
243
progress = true;
244
}
245
246
return progress;
247
}
248
249
bool
250
ir3_lower_subgroups(struct ir3 *ir)
251
{
252
bool progress = false;
253
254
foreach_block (block, &ir->block_list)
255
progress |= lower_block(ir, &block);
256
257
return progress;
258
}
259
260