Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a2xx/ir2_private.h
4574 views
1
/*
2
* Copyright (C) 2018 Jonathan Marek <[email protected]>
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*
23
* Authors:
24
* Jonathan Marek <[email protected]>
25
*/
26
27
#include <assert.h>
28
#include <stdint.h>
29
#include <stdio.h>
30
#include <stdlib.h>
31
#include <string.h>
32
33
#include "ir2/instr-a2xx.h"
34
#include "fd2_program.h"
35
#include "ir2.h"
36
37
enum ir2_src_type {
38
IR2_SRC_SSA,
39
IR2_SRC_REG,
40
IR2_SRC_INPUT,
41
IR2_SRC_CONST,
42
};
43
44
struct ir2_src {
45
/* num can mean different things
46
* ssa: index of instruction
47
* reg: index in ctx->reg array
48
* input: index in ctx->input array
49
* const: constant index (C0, C1, etc)
50
*/
51
uint16_t num;
52
uint8_t swizzle;
53
enum ir2_src_type type : 2;
54
uint8_t abs : 1;
55
uint8_t negate : 1;
56
uint8_t : 4;
57
};
58
59
struct ir2_reg_component {
60
uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */
61
bool alloc : 1; /* is it currently allocated */
62
uint8_t ref_count; /* for ra */
63
};
64
65
struct ir2_reg {
66
uint8_t idx; /* assigned hardware register */
67
uint8_t ncomp;
68
69
uint8_t loop_depth;
70
bool initialized;
71
/* block_idx to free on (-1 = free on ref_count==0) */
72
int block_idx_free;
73
struct ir2_reg_component comp[4];
74
};
75
76
struct ir2_instr {
77
unsigned idx;
78
79
unsigned block_idx;
80
81
enum {
82
IR2_NONE,
83
IR2_FETCH,
84
IR2_ALU,
85
IR2_CF,
86
} type : 2;
87
88
/* instruction needs to be emitted (for scheduling) */
89
bool need_emit : 1;
90
91
/* predicate value - (usually) same for entire block */
92
uint8_t pred : 2;
93
94
/* src */
95
uint8_t src_count;
96
struct ir2_src src[4];
97
98
/* dst */
99
bool is_ssa;
100
union {
101
struct ir2_reg ssa;
102
struct ir2_reg *reg;
103
};
104
105
/* type-specific */
106
union {
107
struct {
108
instr_fetch_opc_t opc : 5;
109
union {
110
struct {
111
uint8_t const_idx;
112
uint8_t const_idx_sel;
113
} vtx;
114
struct {
115
bool is_cube : 1;
116
bool is_rect : 1;
117
uint8_t samp_id;
118
} tex;
119
};
120
} fetch;
121
struct {
122
/* store possible opcs, then we can choose vector/scalar instr */
123
instr_scalar_opc_t scalar_opc : 6;
124
instr_vector_opc_t vector_opc : 5;
125
/* same as nir */
126
uint8_t write_mask : 4;
127
bool saturate : 1;
128
129
/* export idx (-1 no export) */
130
int8_t export;
131
132
/* for scalarized 2 src instruction */
133
uint8_t src1_swizzle;
134
} alu;
135
struct {
136
/* jmp dst block_idx */
137
uint8_t block_idx;
138
} cf;
139
};
140
};
141
142
struct ir2_sched_instr {
143
uint32_t reg_state[8];
144
struct ir2_instr *instr, *instr_s;
145
};
146
147
struct ir2_context {
148
struct fd2_shader_stateobj *so;
149
150
unsigned block_idx, pred_idx;
151
uint8_t pred;
152
bool block_has_jump[64];
153
154
unsigned loop_last_block[64];
155
unsigned loop_depth;
156
157
nir_shader *nir;
158
159
/* ssa index of position output */
160
struct ir2_src position;
161
162
/* to translate SSA ids to instruction ids */
163
int16_t ssa_map[1024];
164
165
struct ir2_shader_info *info;
166
struct ir2_frag_linkage *f;
167
168
int prev_export;
169
170
/* RA state */
171
struct ir2_reg *live_regs[64];
172
uint32_t reg_state[256 / 32]; /* 64*4 bits */
173
174
/* inputs */
175
struct ir2_reg input[16 + 1]; /* 16 + param */
176
177
/* non-ssa regs */
178
struct ir2_reg reg[64];
179
unsigned reg_count;
180
181
struct ir2_instr instr[0x300];
182
unsigned instr_count;
183
184
struct ir2_sched_instr instr_sched[0x180];
185
unsigned instr_sched_count;
186
};
187
188
void assemble(struct ir2_context *ctx, bool binning);
189
190
void ir2_nir_compile(struct ir2_context *ctx, bool binning);
191
bool ir2_nir_lower_scalar(nir_shader *shader);
192
193
void ra_count_refs(struct ir2_context *ctx);
194
void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
195
bool export, uint8_t export_writemask);
196
void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);
197
void ra_block_free(struct ir2_context *ctx, unsigned block);
198
199
void cp_src(struct ir2_context *ctx);
200
void cp_export(struct ir2_context *ctx);
201
202
/* utils */
203
enum {
204
IR2_SWIZZLE_Y = 1 << 0,
205
IR2_SWIZZLE_Z = 2 << 0,
206
IR2_SWIZZLE_W = 3 << 0,
207
208
IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
209
210
IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4,
211
212
IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
213
IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
214
IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
215
IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
216
IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
217
IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
218
IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
219
IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
220
};
221
222
#define compile_error(ctx, args...) \
223
({ \
224
printf(args); \
225
assert(0); \
226
})
227
228
static inline struct ir2_src
229
ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type)
230
{
231
return (struct ir2_src){.num = num, .swizzle = swizzle, .type = type};
232
}
233
234
/* ir2_assemble uses it .. */
235
struct ir2_src ir2_zero(struct ir2_context *ctx);
236
237
#define ir2_foreach_instr(it, ctx) \
238
for (struct ir2_instr *it = (ctx)->instr; ({ \
239
while (it != &(ctx)->instr[(ctx)->instr_count] && \
240
it->type == IR2_NONE) \
241
it++; \
242
it != &(ctx)->instr[(ctx)->instr_count]; \
243
}); \
244
it++)
245
246
#define ir2_foreach_live_reg(it, ctx) \
247
for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \
248
while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) \
249
__ptr++; \
250
__ptr != &(ctx)->live_regs[64] ? (it = *__ptr) : NULL; \
251
}); \
252
it++)
253
254
#define ir2_foreach_avail(it) \
255
for (struct ir2_instr **__instrp = avail, *it; \
256
it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
257
258
#define ir2_foreach_src(it, instr) \
259
for (struct ir2_src *it = instr->src; it != &instr->src[instr->src_count]; \
260
it++)
261
262
/* mask for register allocation
263
* 64 registers with 4 components each = 256 bits
264
*/
265
/* typedef struct {
266
uint64_t data[4];
267
} regmask_t; */
268
269
static inline bool
270
mask_isset(uint32_t *mask, unsigned num)
271
{
272
return !!(mask[num / 32] & 1 << num % 32);
273
}
274
275
static inline void
276
mask_set(uint32_t *mask, unsigned num)
277
{
278
mask[num / 32] |= 1 << num % 32;
279
}
280
281
static inline void
282
mask_unset(uint32_t *mask, unsigned num)
283
{
284
mask[num / 32] &= ~(1 << num % 32);
285
}
286
287
static inline unsigned
288
mask_reg(uint32_t *mask, unsigned num)
289
{
290
return mask[num / 8] >> num % 8 * 4 & 0xf;
291
}
292
293
static inline bool
294
is_export(struct ir2_instr *instr)
295
{
296
return instr->type == IR2_ALU && instr->alu.export >= 0;
297
}
298
299
static inline instr_alloc_type_t
300
export_buf(unsigned num)
301
{
302
return num < 32 ? SQ_PARAMETER_PIXEL : num >= 62 ? SQ_POSITION : SQ_MEMORY;
303
}
304
305
/* component c for channel i */
306
static inline unsigned
307
swiz_set(unsigned c, unsigned i)
308
{
309
return ((c - i) & 3) << i * 2;
310
}
311
312
/* get swizzle in channel i */
313
static inline unsigned
314
swiz_get(unsigned swiz, unsigned i)
315
{
316
return ((swiz >> i * 2) + i) & 3;
317
}
318
319
static inline unsigned
320
swiz_merge(unsigned swiz0, unsigned swiz1)
321
{
322
unsigned swiz = 0;
323
for (int i = 0; i < 4; i++)
324
swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
325
return swiz;
326
}
327
328
static inline void
329
swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
330
{
331
unsigned swiz = 0;
332
for (int i = 0; i < 4; i++)
333
swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
334
*swiz0 = swiz;
335
}
336
337
static inline struct ir2_reg *
338
get_reg(struct ir2_instr *instr)
339
{
340
return instr->is_ssa ? &instr->ssa : instr->reg;
341
}
342
343
static inline struct ir2_reg *
344
get_reg_src(struct ir2_context *ctx, struct ir2_src *src)
345
{
346
switch (src->type) {
347
case IR2_SRC_INPUT:
348
return &ctx->input[src->num];
349
case IR2_SRC_SSA:
350
return &ctx->instr[src->num].ssa;
351
case IR2_SRC_REG:
352
return &ctx->reg[src->num];
353
default:
354
return NULL;
355
}
356
}
357
358
/* gets a ncomp value for the dst */
359
static inline unsigned
360
dst_ncomp(struct ir2_instr *instr)
361
{
362
if (instr->is_ssa)
363
return instr->ssa.ncomp;
364
365
if (instr->type == IR2_FETCH)
366
return instr->reg->ncomp;
367
368
assert(instr->type == IR2_ALU);
369
370
unsigned ncomp = 0;
371
for (int i = 0; i < instr->reg->ncomp; i++)
372
ncomp += !!(instr->alu.write_mask & 1 << i);
373
return ncomp;
374
}
375
376
/* gets a ncomp value for the src registers */
377
static inline unsigned
378
src_ncomp(struct ir2_instr *instr)
379
{
380
if (instr->type == IR2_FETCH) {
381
switch (instr->fetch.opc) {
382
case VTX_FETCH:
383
return 1;
384
case TEX_FETCH:
385
return instr->fetch.tex.is_cube ? 3 : 2;
386
case TEX_SET_TEX_LOD:
387
return 1;
388
default:
389
assert(0);
390
}
391
}
392
393
switch (instr->alu.scalar_opc) {
394
case PRED_SETEs ... KILLONEs:
395
return 1;
396
default:
397
break;
398
}
399
400
switch (instr->alu.vector_opc) {
401
case DOT2ADDv:
402
return 2;
403
case DOT3v:
404
return 3;
405
case DOT4v:
406
case CUBEv:
407
case PRED_SETE_PUSHv:
408
return 4;
409
default:
410
return dst_ncomp(instr);
411
}
412
}
413
414