Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir.h
4574 views
1
/*
2
* Copyright 2011 Christoph Bumiller
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
* OTHER DEALINGS IN THE SOFTWARE.
21
*/
22
23
#ifndef __NV50_IR_H__
24
#define __NV50_IR_H__
25
26
#include <stdio.h>
27
#include <stdlib.h>
28
#include <stdint.h>
29
#include <deque>
30
#include <list>
31
#include <vector>
32
33
#include "codegen/unordered_set.h"
34
#include "codegen/nv50_ir_util.h"
35
#include "codegen/nv50_ir_graph.h"
36
37
#include "codegen/nv50_ir_driver.h"
38
39
namespace nv50_ir {
40
41
enum operation
42
{
43
OP_NOP = 0,
44
OP_PHI,
45
OP_UNION, // unify a new definition and several source values
46
OP_SPLIT, // $r0d -> { $r0, $r1 } ($r0d and $r0/$r1 will be coalesced)
47
OP_MERGE, // opposite of split, e.g. combine 2 32 bit into a 64 bit value
48
OP_CONSTRAINT, // copy values into consecutive registers
49
OP_MOV, // simple copy, no modifiers allowed
50
OP_LOAD,
51
OP_STORE,
52
OP_ADD, // NOTE: add u64 + u32 is legal for targets w/o 64-bit integer adds
53
OP_SUB,
54
OP_MUL,
55
OP_DIV,
56
OP_MOD,
57
OP_MAD,
58
OP_FMA,
59
OP_SAD, // abs(src0 - src1) + src2
60
OP_SHLADD,
61
// extended multiply-add (GM107+), does a lot of things.
62
// see envytools for detailed documentation
63
OP_XMAD,
64
OP_ABS,
65
OP_NEG,
66
OP_NOT,
67
OP_AND,
68
OP_OR,
69
OP_XOR,
70
OP_LOP3_LUT,
71
OP_SHL,
72
OP_SHR,
73
OP_SHF,
74
OP_MAX,
75
OP_MIN,
76
OP_SAT, // CLAMP(f32, 0.0, 1.0)
77
OP_CEIL,
78
OP_FLOOR,
79
OP_TRUNC,
80
OP_CVT,
81
OP_SET_AND, // dst = (src0 CMP src1) & src2
82
OP_SET_OR,
83
OP_SET_XOR,
84
OP_SET,
85
OP_SELP, // dst = src2 ? src0 : src1
86
OP_SLCT, // dst = (src2 CMP 0) ? src0 : src1
87
OP_RCP,
88
OP_RSQ,
89
OP_LG2,
90
OP_SIN,
91
OP_COS,
92
OP_EX2,
93
OP_EXP, // exponential (base M_E)
94
OP_LOG, // natural logarithm
95
OP_PRESIN,
96
OP_PREEX2,
97
OP_SQRT,
98
OP_POW,
99
OP_BRA,
100
OP_CALL,
101
OP_RET,
102
OP_CONT,
103
OP_BREAK,
104
OP_PRERET,
105
OP_PRECONT,
106
OP_PREBREAK,
107
OP_BRKPT, // breakpoint (not related to loops)
108
OP_JOINAT, // push control flow convergence point
109
OP_JOIN, // converge
110
OP_DISCARD,
111
OP_EXIT,
112
OP_MEMBAR, // memory barrier (mfence, lfence, sfence)
113
OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
114
OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
115
OP_AFETCH, // fetch base address of shader input (a[%r1+0x10])
116
OP_EXPORT,
117
OP_LINTERP,
118
OP_PINTERP,
119
OP_EMIT, // emit vertex
120
OP_RESTART, // restart primitive
121
OP_FINAL, // finish emitting primitives
122
OP_TEX,
123
OP_TXB, // texture bias
124
OP_TXL, // texture lod
125
OP_TXF, // texel fetch
126
OP_TXQ, // texture size query
127
OP_TXD, // texture derivatives
128
OP_TXG, // texture gather
129
OP_TXLQ, // texture query lod
130
OP_TEXCSAA, // texture op for coverage sampling
131
OP_TEXPREP, // turn cube map array into 2d array coordinates
132
OP_SULDB, // surface load (raw)
133
OP_SULDP, // surface load (formatted)
134
OP_SUSTB, // surface store (raw)
135
OP_SUSTP, // surface store (formatted)
136
OP_SUREDB,
137
OP_SUREDP, // surface reduction (atomic op)
138
OP_SULEA, // surface load effective address
139
OP_SUBFM, // surface bitfield manipulation
140
OP_SUCLAMP, // clamp surface coordinates
141
OP_SUEAU, // surface effective address
142
OP_SUQ, // surface query
143
OP_MADSP, // special integer multiply-add
144
OP_TEXBAR, // texture dependency barrier
145
OP_DFDX,
146
OP_DFDY,
147
OP_RDSV, // read system value
148
OP_WRSV, // write system value
149
OP_PIXLD, // get info about raster object or surfaces
150
OP_QUADOP,
151
OP_QUADON,
152
OP_QUADPOP,
153
OP_POPCNT, // bitcount(src0 & src1)
154
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
155
OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
156
OP_BFIND, // find highest/lowest set bit
157
OP_BREV, // bitfield reverse
158
OP_BMSK, // bitfield mask
159
OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
160
OP_SGXT,
161
OP_ATOM,
162
OP_BAR, // execution barrier, sources = { id, thread count, predicate }
163
OP_VADD, // byte/word vector operations
164
OP_VAVG,
165
OP_VMIN,
166
OP_VMAX,
167
OP_VSAD,
168
OP_VSET,
169
OP_VSHR,
170
OP_VSHL,
171
OP_VSEL,
172
OP_CCTL, // cache control
173
OP_SHFL, // warp shuffle
174
OP_VOTE,
175
OP_BUFQ, // buffer query
176
OP_WARPSYNC,
177
OP_LAST
178
};
179
180
// various instruction-specific modifier definitions Instruction::subOp
181
// MOV_FINAL marks a MOV originating from an EXPORT (used for placing TEXBARs)
182
#define NV50_IR_SUBOP_MUL_HIGH 1
183
#define NV50_IR_SUBOP_EMIT_RESTART 1
184
#define NV50_IR_SUBOP_LDC_IL 1
185
#define NV50_IR_SUBOP_LDC_IS 2
186
#define NV50_IR_SUBOP_LDC_ISL 3
187
#define NV50_IR_SUBOP_SHIFT_WRAP 1
188
#define NV50_IR_SUBOP_SHIFT_HIGH 2
189
#define NV50_IR_SUBOP_EMU_PRERET 1
190
#define NV50_IR_SUBOP_TEXBAR(n) n
191
#define NV50_IR_SUBOP_MOV_FINAL 1
192
#define NV50_IR_SUBOP_EXTBF_REV 1
193
#define NV50_IR_SUBOP_BFIND_SAMT 1
194
#define NV50_IR_SUBOP_RCPRSQ_64H 1
195
#define NV50_IR_SUBOP_PERMT_F4E 1
196
#define NV50_IR_SUBOP_PERMT_B4E 2
197
#define NV50_IR_SUBOP_PERMT_RC8 3
198
#define NV50_IR_SUBOP_PERMT_ECL 4
199
#define NV50_IR_SUBOP_PERMT_ECR 5
200
#define NV50_IR_SUBOP_PERMT_RC16 6
201
#define NV50_IR_SUBOP_BAR_SYNC 0
202
#define NV50_IR_SUBOP_BAR_ARRIVE 1
203
#define NV50_IR_SUBOP_BAR_RED_AND 2
204
#define NV50_IR_SUBOP_BAR_RED_OR 3
205
#define NV50_IR_SUBOP_BAR_RED_POPC 4
206
#define NV50_IR_SUBOP_MEMBAR_L 1
207
#define NV50_IR_SUBOP_MEMBAR_S 2
208
#define NV50_IR_SUBOP_MEMBAR_M 3
209
#define NV50_IR_SUBOP_MEMBAR_CTA (0 << 2)
210
#define NV50_IR_SUBOP_MEMBAR_GL (1 << 2)
211
#define NV50_IR_SUBOP_MEMBAR_SYS (2 << 2)
212
#define NV50_IR_SUBOP_MEMBAR_DIR(m) ((m) & 0x3)
213
#define NV50_IR_SUBOP_MEMBAR_SCOPE(m) ((m) & ~0x3)
214
#define NV50_IR_SUBOP_MEMBAR(d,s) \
215
(NV50_IR_SUBOP_MEMBAR_##d | NV50_IR_SUBOP_MEMBAR_##s)
216
#define NV50_IR_SUBOP_ATOM_ADD 0
217
#define NV50_IR_SUBOP_ATOM_MIN 1
218
#define NV50_IR_SUBOP_ATOM_MAX 2
219
#define NV50_IR_SUBOP_ATOM_INC 3
220
#define NV50_IR_SUBOP_ATOM_DEC 4
221
#define NV50_IR_SUBOP_ATOM_AND 5
222
#define NV50_IR_SUBOP_ATOM_OR 6
223
#define NV50_IR_SUBOP_ATOM_XOR 7
224
#define NV50_IR_SUBOP_ATOM_CAS 8
225
#define NV50_IR_SUBOP_ATOM_EXCH 9
226
#define NV50_IR_SUBOP_CCTL_IV 5
227
#define NV50_IR_SUBOP_CCTL_IVALL 6
228
#define NV50_IR_SUBOP_SUST_IGN 0
229
#define NV50_IR_SUBOP_SUST_TRAP 1
230
#define NV50_IR_SUBOP_SUST_SDCL 3
231
#define NV50_IR_SUBOP_SULD_ZERO 0
232
#define NV50_IR_SUBOP_SULD_TRAP 1
233
#define NV50_IR_SUBOP_SULD_SDCL 3
234
#define NV50_IR_SUBOP_SUBFM_3D 1
235
#define NV50_IR_SUBOP_SUCLAMP_2D 0x10
236
#define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0))
237
#define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0))
238
#define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0))
239
#define NV50_IR_SUBOP_PIXLD_COUNT 0
240
#define NV50_IR_SUBOP_PIXLD_COVMASK 1
241
#define NV50_IR_SUBOP_PIXLD_COVERED 2
242
#define NV50_IR_SUBOP_PIXLD_OFFSET 3
243
#define NV50_IR_SUBOP_PIXLD_CENT_OFFSET 4
244
#define NV50_IR_SUBOP_PIXLD_SAMPLEID 5
245
#define NV50_IR_SUBOP_SHFL_IDX 0
246
#define NV50_IR_SUBOP_SHFL_UP 1
247
#define NV50_IR_SUBOP_SHFL_DOWN 2
248
#define NV50_IR_SUBOP_SHFL_BFLY 3
249
#define NV50_IR_SUBOP_LOAD_LOCKED 1
250
#define NV50_IR_SUBOP_STORE_UNLOCKED 2
251
#define NV50_IR_SUBOP_MADSP_SD 0xffff
252
// Yes, we could represent those with DataType.
253
// Or put the type into operation and have a couple 1000 values in that enum.
254
// This will have to do for now.
255
// The bitfields are supposed to correspond to nve4 ISA.
256
#define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a))
257
#define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000)
258
#define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000)
259
#define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000)
260
#define NV50_IR_SUBOP_Vn(n) ((n) >> 14)
261
#define NV50_IR_SUBOP_VOTE_ALL 0
262
#define NV50_IR_SUBOP_VOTE_ANY 1
263
#define NV50_IR_SUBOP_VOTE_UNI 2
264
#define NV50_IR_SUBOP_LOP3_LUT_SRC0 0xf0
265
#define NV50_IR_SUBOP_LOP3_LUT_SRC1 0xcc
266
#define NV50_IR_SUBOP_LOP3_LUT_SRC2 0xaa
267
#define NV50_IR_SUBOP_LOP3_LUT(exp) ({ \
268
uint8_t a = NV50_IR_SUBOP_LOP3_LUT_SRC0; \
269
uint8_t b = NV50_IR_SUBOP_LOP3_LUT_SRC1; \
270
uint8_t c = NV50_IR_SUBOP_LOP3_LUT_SRC2; \
271
(uint8_t)(exp); \
272
})
273
#define NV50_IR_SUBOP_BMSK_C (0 << 0)
274
#define NV50_IR_SUBOP_BMSK_W (1 << 0)
275
276
#define NV50_IR_SUBOP_MINMAX_LOW 1
277
#define NV50_IR_SUBOP_MINMAX_MED 2
278
#define NV50_IR_SUBOP_MINMAX_HIGH 3
279
280
#define NV50_IR_SUBOP_SHF_L (0 << 0)
281
#define NV50_IR_SUBOP_SHF_R (1 << 0)
282
#define NV50_IR_SUBOP_SHF_LO (0 << 1)
283
#define NV50_IR_SUBOP_SHF_HI (1 << 1)
284
#define NV50_IR_SUBOP_SHF_C (0 << 2)
285
#define NV50_IR_SUBOP_SHF_W (1 << 2)
286
287
// xmad(src0, src1, 0) << 16 + src2
288
#define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
289
// (xmad(src0, src1, src2) & 0xffff) | (src1 << 16)
290
#define NV50_IR_SUBOP_XMAD_MRG (1 << 1)
291
// xmad(src0, src1, src2.lo)
292
#define NV50_IR_SUBOP_XMAD_CLO (1 << 2)
293
// xmad(src0, src1, src2.hi)
294
#define NV50_IR_SUBOP_XMAD_CHI (2 << 2)
295
// if both operands to the multiplication are non-zero, subtract 65536 for each
296
// negative operand
297
#define NV50_IR_SUBOP_XMAD_CSFU (3 << 2)
298
// xmad(src0, src1, src2) + src1 << 16
299
#define NV50_IR_SUBOP_XMAD_CBCC (4 << 2)
300
#define NV50_IR_SUBOP_XMAD_CMODE_SHIFT 2
301
#define NV50_IR_SUBOP_XMAD_CMODE_MASK (0x7 << NV50_IR_SUBOP_XMAD_CMODE_SHIFT)
302
303
// use the high 16 bits instead of the low 16 bits for the multiplication.
304
// if the instruction's sType is signed, sign extend the operand from 16 bits
305
// to 32 before multiplication.
306
#define NV50_IR_SUBOP_XMAD_H1_SHIFT 5
307
#define NV50_IR_SUBOP_XMAD_H1(i) (1 << (NV50_IR_SUBOP_XMAD_H1_SHIFT + (i)))
308
#define NV50_IR_SUBOP_XMAD_H1_MASK (0x3 << NV50_IR_SUBOP_XMAD_H1_SHIFT)
309
310
enum DataType
311
{
312
TYPE_NONE,
313
TYPE_U8,
314
TYPE_S8,
315
TYPE_U16,
316
TYPE_S16,
317
TYPE_U32,
318
TYPE_S32,
319
TYPE_U64, // 64 bit operations are only lowered after register allocation
320
TYPE_S64,
321
TYPE_F16,
322
TYPE_F32,
323
TYPE_F64,
324
TYPE_B96,
325
TYPE_B128
326
};
327
328
enum CondCode
329
{
330
CC_FL = 0,
331
CC_NEVER = CC_FL, // when used with FILE_FLAGS
332
CC_LT = 1,
333
CC_EQ = 2,
334
CC_NOT_P = CC_EQ, // when used with FILE_PREDICATE
335
CC_LE = 3,
336
CC_GT = 4,
337
CC_NE = 5,
338
CC_P = CC_NE,
339
CC_GE = 6,
340
CC_TR = 7,
341
CC_ALWAYS = CC_TR,
342
CC_U = 8,
343
CC_LTU = 9,
344
CC_EQU = 10,
345
CC_LEU = 11,
346
CC_GTU = 12,
347
CC_NEU = 13,
348
CC_GEU = 14,
349
CC_NO = 0x10,
350
CC_NC = 0x11,
351
CC_NS = 0x12,
352
CC_NA = 0x13,
353
CC_A = 0x14,
354
CC_S = 0x15,
355
CC_C = 0x16,
356
CC_O = 0x17
357
};
358
359
enum RoundMode
360
{
361
ROUND_N, // nearest
362
ROUND_M, // towards -inf
363
ROUND_Z, // towards 0
364
ROUND_P, // towards +inf
365
ROUND_NI, // nearest integer
366
ROUND_MI, // to integer towards -inf
367
ROUND_ZI, // to integer towards 0
368
ROUND_PI, // to integer towards +inf
369
};
370
371
enum CacheMode
372
{
373
CACHE_CA, // cache at all levels
374
CACHE_WB = CACHE_CA, // cache write back
375
CACHE_CG, // cache at global level
376
CACHE_CS, // cache streaming
377
CACHE_CV, // cache as volatile
378
CACHE_WT = CACHE_CV // cache write-through
379
};
380
381
enum DataFile
382
{
383
FILE_NULL = 0,
384
FILE_GPR,
385
FILE_PREDICATE, // boolean predicate
386
FILE_FLAGS, // zero/sign/carry/overflow bits
387
FILE_ADDRESS,
388
FILE_BARRIER,
389
LAST_REGISTER_FILE = FILE_BARRIER,
390
FILE_IMMEDIATE,
391
FILE_MEMORY_CONST,
392
FILE_SHADER_INPUT,
393
FILE_SHADER_OUTPUT,
394
FILE_MEMORY_BUFFER,
395
FILE_MEMORY_GLOBAL,
396
FILE_MEMORY_SHARED,
397
FILE_MEMORY_LOCAL,
398
FILE_SYSTEM_VALUE,
399
FILE_THREAD_STATE, // "special" barrier registers
400
DATA_FILE_COUNT
401
};
402
403
enum TexTarget
404
{
405
TEX_TARGET_1D,
406
TEX_TARGET_2D,
407
TEX_TARGET_2D_MS,
408
TEX_TARGET_3D,
409
TEX_TARGET_CUBE,
410
TEX_TARGET_1D_SHADOW,
411
TEX_TARGET_2D_SHADOW,
412
TEX_TARGET_CUBE_SHADOW,
413
TEX_TARGET_1D_ARRAY,
414
TEX_TARGET_2D_ARRAY,
415
TEX_TARGET_2D_MS_ARRAY,
416
TEX_TARGET_CUBE_ARRAY,
417
TEX_TARGET_1D_ARRAY_SHADOW,
418
TEX_TARGET_2D_ARRAY_SHADOW,
419
TEX_TARGET_RECT,
420
TEX_TARGET_RECT_SHADOW,
421
TEX_TARGET_CUBE_ARRAY_SHADOW,
422
TEX_TARGET_BUFFER,
423
TEX_TARGET_COUNT
424
};
425
426
enum ImgFormat
427
{
428
FMT_NONE,
429
430
FMT_RGBA32F,
431
FMT_RGBA16F,
432
FMT_RG32F,
433
FMT_RG16F,
434
FMT_R11G11B10F,
435
FMT_R32F,
436
FMT_R16F,
437
438
FMT_RGBA32UI,
439
FMT_RGBA16UI,
440
FMT_RGB10A2UI,
441
FMT_RGBA8UI,
442
FMT_RG32UI,
443
FMT_RG16UI,
444
FMT_RG8UI,
445
FMT_R32UI,
446
FMT_R16UI,
447
FMT_R8UI,
448
449
FMT_RGBA32I,
450
FMT_RGBA16I,
451
FMT_RGBA8I,
452
FMT_RG32I,
453
FMT_RG16I,
454
FMT_RG8I,
455
FMT_R32I,
456
FMT_R16I,
457
FMT_R8I,
458
459
FMT_RGBA16,
460
FMT_RGB10A2,
461
FMT_RGBA8,
462
FMT_RG16,
463
FMT_RG8,
464
FMT_R16,
465
FMT_R8,
466
467
FMT_RGBA16_SNORM,
468
FMT_RGBA8_SNORM,
469
FMT_RG16_SNORM,
470
FMT_RG8_SNORM,
471
FMT_R16_SNORM,
472
FMT_R8_SNORM,
473
474
FMT_BGRA8,
475
476
IMG_FORMAT_COUNT,
477
};
478
479
enum ImgType {
480
UINT,
481
SINT,
482
UNORM,
483
SNORM,
484
FLOAT,
485
};
486
487
enum SVSemantic
488
{
489
SV_POSITION, // WPOS
490
SV_VERTEX_ID,
491
SV_INSTANCE_ID,
492
SV_INVOCATION_ID,
493
SV_PRIMITIVE_ID,
494
SV_VERTEX_COUNT, // gl_PatchVerticesIn
495
SV_LAYER,
496
SV_VIEWPORT_INDEX,
497
SV_VIEWPORT_MASK,
498
SV_YDIR,
499
SV_FACE,
500
SV_POINT_SIZE,
501
SV_POINT_COORD,
502
SV_CLIP_DISTANCE,
503
SV_SAMPLE_INDEX,
504
SV_SAMPLE_POS,
505
SV_SAMPLE_MASK,
506
SV_TESS_OUTER,
507
SV_TESS_INNER,
508
SV_TESS_COORD,
509
SV_TID,
510
SV_COMBINED_TID,
511
SV_CTAID,
512
SV_NTID,
513
SV_GRIDID,
514
SV_NCTAID,
515
SV_LANEID,
516
SV_PHYSID,
517
SV_NPHYSID,
518
SV_CLOCK,
519
SV_LBASE,
520
SV_SBASE,
521
SV_VERTEX_STRIDE,
522
SV_INVOCATION_INFO,
523
SV_THREAD_KILL,
524
SV_BASEVERTEX,
525
SV_BASEINSTANCE,
526
SV_DRAWID,
527
SV_WORK_DIM,
528
SV_LANEMASK_EQ,
529
SV_LANEMASK_LT,
530
SV_LANEMASK_LE,
531
SV_LANEMASK_GT,
532
SV_LANEMASK_GE,
533
SV_UNDEFINED,
534
SV_LAST
535
};
536
537
enum TSSemantic
538
{
539
// 0-15 are fixed ones on Volta/Turing
540
TS_THREAD_STATE_ENUM0 = 0,
541
TS_THREAD_STATE_ENUM1 = 1,
542
TS_THREAD_STATE_ENUM2 = 2,
543
TS_THREAD_STATE_ENUM3 = 3,
544
TS_THREAD_STATE_ENUM4 = 4,
545
TS_TRAP_RETURN_PC_LO = 5,
546
TS_TRAP_RETURN_PC_HI = 6,
547
TS_TRAP_RETURN_MASK = 7,
548
TS_MEXITED = 8,
549
TS_MKILL = 9,
550
TS_MACTIVE = 10,
551
TS_MATEXIT = 11,
552
TS_OPT_STACK = 12,
553
TS_API_CALL_DEPTH = 13,
554
TS_ATEXIT_PC_LO = 14,
555
TS_ATEXIT_PC_HI = 15,
556
// special ones to make our life easier
557
TS_PQUAD_MACTIVE,
558
};
559
560
class Program;
561
class Function;
562
class BasicBlock;
563
564
class Target;
565
566
class Instruction;
567
class CmpInstruction;
568
class TexInstruction;
569
class FlowInstruction;
570
571
class Value;
572
class LValue;
573
class Symbol;
574
class ImmediateValue;
575
576
struct Storage
577
{
578
DataFile file;
579
int8_t fileIndex; // signed, may be indirect for CONST[]
580
uint8_t size; // this should match the Instruction type's size
581
DataType type; // mainly for pretty printing
582
union {
583
uint64_t u64; // immediate values
584
uint32_t u32;
585
uint16_t u16;
586
uint8_t u8;
587
int64_t s64;
588
int32_t s32;
589
int16_t s16;
590
int8_t s8;
591
float f32;
592
double f64;
593
int32_t offset; // offset from 0 (base of address space)
594
int32_t id; // register id (< 0 if virtual/unassigned, in units <= 4)
595
struct {
596
SVSemantic sv;
597
int index;
598
} sv;
599
TSSemantic ts;
600
} data;
601
};
602
603
// precedence: NOT after SAT after NEG after ABS
604
#define NV50_IR_MOD_ABS (1 << 0)
605
#define NV50_IR_MOD_NEG (1 << 1)
606
#define NV50_IR_MOD_SAT (1 << 2)
607
#define NV50_IR_MOD_NOT (1 << 3)
608
#define NV50_IR_MOD_NEG_ABS (NV50_IR_MOD_NEG | NV50_IR_MOD_ABS)
609
610
#define NV50_IR_INTERP_MODE_MASK 0x3
611
#define NV50_IR_INTERP_LINEAR (0 << 0)
612
#define NV50_IR_INTERP_PERSPECTIVE (1 << 0)
613
#define NV50_IR_INTERP_FLAT (2 << 0)
614
#define NV50_IR_INTERP_SC (3 << 0) // what exactly is that ?
615
#define NV50_IR_INTERP_SAMPLE_MASK 0xc
616
#define NV50_IR_INTERP_DEFAULT (0 << 2)
617
#define NV50_IR_INTERP_CENTROID (1 << 2)
618
#define NV50_IR_INTERP_OFFSET (2 << 2)
619
#define NV50_IR_INTERP_SAMPLEID (3 << 2)
620
621
// do we really want this to be a class ?
622
class Modifier
623
{
624
public:
625
Modifier() : bits(0) { }
626
Modifier(unsigned int m) : bits(m) { }
627
Modifier(operation op);
628
629
// @return new Modifier applying a after b (asserts if unrepresentable)
630
Modifier operator*(const Modifier) const;
631
Modifier operator*=(const Modifier m) { *this = *this * m; return *this; }
632
Modifier operator==(const Modifier m) const { return m.bits == bits; }
633
Modifier operator!=(const Modifier m) const { return m.bits != bits; }
634
635
inline Modifier operator&(const Modifier m) const { return bits & m.bits; }
636
inline Modifier operator|(const Modifier m) const { return bits | m.bits; }
637
inline Modifier operator^(const Modifier m) const { return bits ^ m.bits; }
638
639
operation getOp() const;
640
641
inline int neg() const { return (bits & NV50_IR_MOD_NEG) ? 1 : 0; }
642
inline int abs() const { return (bits & NV50_IR_MOD_ABS) ? 1 : 0; }
643
644
inline operator bool() const { return bits ? true : false; }
645
646
void applyTo(ImmediateValue &imm) const;
647
648
int print(char *buf, size_t size) const;
649
650
private:
651
uint8_t bits;
652
};
653
654
class ValueRef
655
{
656
public:
657
ValueRef(Value * = NULL);
658
ValueRef(const ValueRef&);
659
~ValueRef();
660
661
inline bool exists() const { return value != NULL; }
662
663
void set(Value *);
664
void set(const ValueRef&);
665
inline Value *get() const { return value; }
666
inline Value *rep() const;
667
668
inline Instruction *getInsn() const { return insn; }
669
inline void setInsn(Instruction *inst) { insn = inst; }
670
671
inline bool isIndirect(int dim) const { return indirect[dim] >= 0; }
672
inline const ValueRef *getIndirect(int dim) const;
673
674
inline DataFile getFile() const;
675
inline unsigned getSize() const;
676
677
// SSA: return eventual (traverse MOVs) literal value, if it exists
678
bool getImmediate(ImmediateValue&) const;
679
680
public:
681
Modifier mod;
682
int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src(indirect[i])
683
684
bool usedAsPtr; // for printing
685
686
private:
687
Value *value;
688
Instruction *insn;
689
};
690
691
class ValueDef
692
{
693
public:
694
ValueDef(Value * = NULL);
695
ValueDef(const ValueDef&);
696
~ValueDef();
697
698
inline bool exists() const { return value != NULL; }
699
700
inline Value *get() const { return value; }
701
inline Value *rep() const;
702
void set(Value *);
703
bool mayReplace(const ValueRef &);
704
void replace(const ValueRef &, bool doSet); // replace all uses of the old value
705
706
inline Instruction *getInsn() const { return insn; }
707
inline void setInsn(Instruction *inst) { insn = inst; }
708
709
inline DataFile getFile() const;
710
inline unsigned getSize() const;
711
712
inline void setSSA(LValue *);
713
inline const LValue *preSSA() const;
714
715
private:
716
Value *value; // should make this LValue * ...
717
LValue *origin; // pre SSA value
718
Instruction *insn;
719
};
720
721
class Value
722
{
723
public:
724
Value();
725
virtual ~Value() { }
726
727
virtual Value *clone(ClonePolicy<Function>&) const = 0;
728
729
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const = 0;
730
731
virtual bool equals(const Value *, bool strict = false) const;
732
virtual bool interfers(const Value *) const;
733
virtual bool isUniform() const { return true; }
734
735
inline Value *rep() const { return join; }
736
737
inline Instruction *getUniqueInsn() const;
738
inline Instruction *getInsn() const; // use when uniqueness is certain
739
740
inline int refCount() { return uses.size(); }
741
742
inline LValue *asLValue();
743
inline Symbol *asSym();
744
inline ImmediateValue *asImm();
745
inline const Symbol *asSym() const;
746
inline const ImmediateValue *asImm() const;
747
748
inline bool inFile(DataFile f) const { return reg.file == f; }
749
750
static inline Value *get(Iterator&);
751
752
unordered_set<ValueRef *> uses;
753
std::list<ValueDef *> defs;
754
typedef unordered_set<ValueRef *>::iterator UseIterator;
755
typedef unordered_set<ValueRef *>::const_iterator UseCIterator;
756
typedef std::list<ValueDef *>::iterator DefIterator;
757
typedef std::list<ValueDef *>::const_iterator DefCIterator;
758
759
int id;
760
Storage reg;
761
762
// TODO: these should be in LValue:
763
Interval livei;
764
Value *join;
765
};
766
767
class LValue : public Value
768
{
769
public:
770
LValue(Function *, DataFile file);
771
LValue(Function *, LValue *);
772
~LValue() { }
773
774
virtual bool isUniform() const;
775
776
virtual LValue *clone(ClonePolicy<Function>&) const;
777
778
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
779
780
public:
781
unsigned compMask : 8; // compound/component mask
782
unsigned compound : 1; // used by RA, value involved in split/merge
783
unsigned ssa : 1;
784
unsigned fixedReg : 1; // set & used by RA, earlier just use (id < 0)
785
unsigned noSpill : 1; // do not spill (e.g. if spill temporary already)
786
};
787
788
class Symbol : public Value
789
{
790
public:
791
Symbol(Program *, DataFile file = FILE_MEMORY_CONST, ubyte fileIdx = 0);
792
~Symbol() { }
793
794
virtual Symbol *clone(ClonePolicy<Function>&) const;
795
796
virtual bool equals(const Value *that, bool strict) const;
797
798
virtual bool isUniform() const;
799
800
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
801
802
// print with indirect values
803
int print(char *, size_t, Value *, Value *, DataType ty = TYPE_NONE) const;
804
805
inline void setFile(DataFile file, ubyte fileIndex = 0)
806
{
807
reg.file = file;
808
reg.fileIndex = fileIndex;
809
}
810
811
inline void setOffset(int32_t offset);
812
inline void setAddress(Symbol *base, int32_t offset);
813
inline void setSV(SVSemantic sv, uint32_t idx = 0);
814
815
inline const Symbol *getBase() const { return baseSym; }
816
817
private:
818
Symbol *baseSym; // array base for Symbols representing array elements
819
};
820
821
class ImmediateValue : public Value
822
{
823
public:
824
ImmediateValue() { }
825
ImmediateValue(Program *, uint32_t);
826
ImmediateValue(Program *, float);
827
ImmediateValue(Program *, double);
828
// NOTE: not added to program with
829
ImmediateValue(const ImmediateValue *, DataType ty);
830
~ImmediateValue() { };
831
832
virtual ImmediateValue *clone(ClonePolicy<Function>&) const;
833
834
virtual bool equals(const Value *that, bool strict) const;
835
836
// these only work if 'type' is valid (we mostly use untyped literals):
837
bool isInteger(const int ival) const; // ival is cast to this' type
838
bool isNegative() const;
839
bool isPow2() const;
840
841
void applyLog2();
842
843
// for constant folding:
844
ImmediateValue operator+(const ImmediateValue&) const;
845
ImmediateValue operator-(const ImmediateValue&) const;
846
ImmediateValue operator*(const ImmediateValue&) const;
847
ImmediateValue operator/(const ImmediateValue&) const;
848
849
ImmediateValue& operator=(const ImmediateValue&); // only sets value !
850
851
bool compare(CondCode cc, float fval) const;
852
853
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
854
};
855
856
class Instruction
857
{
858
public:
859
Instruction();
860
Instruction(Function *, operation, DataType);
861
virtual ~Instruction();
862
863
virtual Instruction *clone(ClonePolicy<Function>&,
864
Instruction * = NULL) const;
865
866
void setDef(int i, Value *);
867
void setSrc(int s, Value *);
868
void setSrc(int s, const ValueRef&);
869
void swapSources(int a, int b);
870
void moveSources(int s, int delta);
871
bool setIndirect(int s, int dim, Value *);
872
873
inline ValueRef& src(int s) { return srcs[s]; }
874
inline ValueDef& def(int s) { return defs[s]; }
875
inline const ValueRef& src(int s) const { return srcs[s]; }
876
inline const ValueDef& def(int s) const { return defs[s]; }
877
878
inline Value *getDef(int d) const { return defs[d].get(); }
879
inline Value *getSrc(int s) const { return srcs[s].get(); }
880
inline Value *getIndirect(int s, int dim) const;
881
882
inline bool defExists(unsigned d) const
883
{
884
return d < defs.size() && defs[d].exists();
885
}
886
inline bool srcExists(unsigned s) const
887
{
888
return s < srcs.size() && srcs[s].exists();
889
}
890
891
inline bool constrainedDefs() const;
892
893
bool setPredicate(CondCode ccode, Value *);
894
inline Value *getPredicate() const;
895
bool writesPredicate() const;
896
inline bool isPredicated() const { return predSrc >= 0; }
897
898
inline void setFlagsSrc(int s, Value *);
899
inline void setFlagsDef(int d, Value *);
900
inline bool usesFlags() const { return flagsSrc >= 0; }
901
902
unsigned int defCount() const { return defs.size(); };
903
unsigned int defCount(unsigned int mask, bool singleFile = false) const;
904
unsigned int srcCount() const { return srcs.size(); };
905
unsigned int srcCount(unsigned int mask, bool singleFile = false) const;
906
907
// save & remove / set indirect[0,1] and predicate source
908
void takeExtraSources(int s, Value *[3]);
909
void putExtraSources(int s, Value *[3]);
910
911
inline void setType(DataType type) { dType = sType = type; }
912
913
inline void setType(DataType dtype, DataType stype)
914
{
915
dType = dtype;
916
sType = stype;
917
}
918
919
inline bool isPseudo() const { return op < OP_MOV; }
920
bool isDead() const;
921
bool isNop() const;
922
bool isCommutationLegal(const Instruction *) const; // must be adjacent !
923
bool isActionEqual(const Instruction *) const;
924
bool isResultEqual(const Instruction *) const;
925
926
// check whether the defs interfere with srcs and defs of another instruction
927
bool canCommuteDefDef(const Instruction *) const;
928
bool canCommuteDefSrc(const Instruction *) const;
929
930
void print() const;
931
932
inline CmpInstruction *asCmp();
933
inline TexInstruction *asTex();
934
inline FlowInstruction *asFlow();
935
inline const TexInstruction *asTex() const;
936
inline const CmpInstruction *asCmp() const;
937
inline const FlowInstruction *asFlow() const;
938
939
public:
940
Instruction *next;
941
Instruction *prev;
942
int id;
943
int serial; // CFG order
944
945
operation op;
946
DataType dType; // destination or defining type
947
DataType sType; // source or secondary type
948
CondCode cc;
949
RoundMode rnd;
950
CacheMode cache;
951
952
uint16_t subOp; // quadop, 1 for mul-high, etc.
953
954
unsigned encSize : 5; // encoding size in bytes
955
unsigned saturate : 1; // to [0.0f, 1.0f]
956
unsigned join : 1; // converge control flow (use OP_JOIN until end)
957
unsigned fixed : 1; // prevent dead code elimination
958
unsigned terminator : 1; // end of basic block
959
unsigned ftz : 1; // flush denormal to zero
960
unsigned dnz : 1; // denormals, NaN are zero
961
unsigned ipa : 4; // interpolation mode
962
unsigned lanes : 4;
963
unsigned perPatch : 1;
964
unsigned exit : 1; // terminate program after insn
965
unsigned mask : 4; // for vector ops
966
// prevent algebraic optimisations that aren't bit-for-bit identical
967
unsigned precise : 1;
968
969
int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor
970
971
int8_t predSrc;
972
int8_t flagsDef;
973
int8_t flagsSrc;
974
975
uint32_t sched; // scheduling data (NOTE: maybe move to separate storage)
976
977
BasicBlock *bb;
978
979
protected:
980
std::deque<ValueDef> defs; // no gaps !
981
std::deque<ValueRef> srcs; // no gaps !
982
983
// instruction specific methods:
984
// (don't want to subclass, would need more constructors and memory pools)
985
public:
986
inline void setInterpolate(unsigned int mode) { ipa = mode; }
987
988
unsigned int getInterpMode() const { return ipa & 0x3; }
989
unsigned int getSampleMode() const { return ipa & 0xc; }
990
991
private:
992
void init();
993
};
994
995
enum TexQuery
996
{
997
TXQ_DIMS, /* x, y, z, levels */
998
TXQ_TYPE, /* ?, ?, samples, ? */
999
TXQ_SAMPLE_POSITION,
1000
TXQ_FILTER,
1001
TXQ_LOD,
1002
TXQ_WRAP,
1003
TXQ_BORDER_COLOUR
1004
};
1005
1006
class TexInstruction : public Instruction
1007
{
1008
public:
1009
class Target
1010
{
1011
public:
1012
Target(TexTarget targ = TEX_TARGET_1D) : target(targ) { }
1013
1014
const char *getName() const { return descTable[target].name; }
1015
unsigned int getArgCount() const { return descTable[target].argc; }
1016
unsigned int getDim() const { return descTable[target].dim; }
1017
int isArray() const { return descTable[target].array ? 1 : 0; }
1018
int isCube() const { return descTable[target].cube ? 1 : 0; }
1019
int isShadow() const { return descTable[target].shadow ? 1 : 0; }
1020
int isMS() const {
1021
return target == TEX_TARGET_2D_MS || target == TEX_TARGET_2D_MS_ARRAY; }
1022
void clearMS() {
1023
if (isMS()) {
1024
if (isArray())
1025
target = TEX_TARGET_2D_ARRAY;
1026
else
1027
target = TEX_TARGET_2D;
1028
}
1029
}
1030
1031
Target& operator=(TexTarget targ)
1032
{
1033
assert(targ < TEX_TARGET_COUNT);
1034
target = targ;
1035
return *this;
1036
}
1037
1038
inline bool operator==(TexTarget targ) const { return target == targ; }
1039
inline bool operator!=(TexTarget targ) const { return target != targ; }
1040
1041
enum TexTarget getEnum() const { return target; }
1042
1043
private:
1044
struct Desc
1045
{
1046
char name[19];
1047
uint8_t dim;
1048
uint8_t argc;
1049
bool array;
1050
bool cube;
1051
bool shadow;
1052
};
1053
1054
static const struct Desc descTable[TEX_TARGET_COUNT];
1055
1056
private:
1057
enum TexTarget target;
1058
};
1059
1060
public:
1061
struct ImgFormatDesc
1062
{
1063
char name[19];
1064
uint8_t components;
1065
uint8_t bits[4];
1066
ImgType type;
1067
bool bgra;
1068
};
1069
1070
static const struct ImgFormatDesc formatTable[IMG_FORMAT_COUNT];
1071
static const struct ImgFormatDesc *translateImgFormat(
1072
enum pipe_format format);
1073
1074
public:
1075
TexInstruction(Function *, operation);
1076
virtual ~TexInstruction();
1077
1078
virtual TexInstruction *clone(ClonePolicy<Function>&,
1079
Instruction * = NULL) const;
1080
1081
inline void setTexture(Target targ, uint8_t r, uint8_t s)
1082
{
1083
tex.r = r;
1084
tex.s = s;
1085
tex.target = targ;
1086
}
1087
1088
void setIndirectR(Value *);
1089
void setIndirectS(Value *);
1090
inline Value *getIndirectR() const;
1091
inline Value *getIndirectS() const;
1092
1093
public:
1094
struct {
1095
Target target;
1096
1097
uint16_t r;
1098
uint16_t s;
1099
int8_t rIndirectSrc;
1100
int8_t sIndirectSrc;
1101
1102
uint8_t mask;
1103
uint8_t gatherComp;
1104
1105
bool liveOnly; // only execute on live pixels of a quad (optimization)
1106
bool levelZero;
1107
bool derivAll;
1108
bool bindless;
1109
1110
int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets
1111
int8_t offset[3]; // only used on nv50
1112
1113
enum TexQuery query;
1114
const struct ImgFormatDesc *format;
1115
1116
bool scalar; // for GM107s TEXS, TLDS, TLD4S
1117
} tex;
1118
1119
ValueRef dPdx[3];
1120
ValueRef dPdy[3];
1121
ValueRef offset[4][3];
1122
};
1123
1124
class CmpInstruction : public Instruction
1125
{
1126
public:
1127
CmpInstruction(Function *, operation);
1128
1129
virtual CmpInstruction *clone(ClonePolicy<Function>&,
1130
Instruction * = NULL) const;
1131
1132
void setCondition(CondCode cond) { setCond = cond; }
1133
CondCode getCondition() const { return setCond; }
1134
1135
public:
1136
CondCode setCond;
1137
};
1138
1139
class FlowInstruction : public Instruction
1140
{
1141
public:
1142
FlowInstruction(Function *, operation, void *target);
1143
1144
virtual FlowInstruction *clone(ClonePolicy<Function>&,
1145
Instruction * = NULL) const;
1146
1147
public:
1148
unsigned allWarp : 1;
1149
unsigned absolute : 1;
1150
unsigned limit : 1;
1151
unsigned builtin : 1; // true for calls to emulation code
1152
unsigned indirect : 1; // target in src(0)
1153
1154
union {
1155
BasicBlock *bb;
1156
int builtin;
1157
Function *fn;
1158
} target;
1159
};
1160
1161
class BasicBlock
1162
{
1163
public:
1164
BasicBlock(Function *);
1165
~BasicBlock();
1166
1167
BasicBlock *clone(ClonePolicy<Function>&) const;
1168
1169
inline int getId() const { return id; }
1170
inline unsigned int getInsnCount() const { return numInsns; }
1171
inline bool isTerminated() const { return exit && exit->terminator; }
1172
1173
bool dominatedBy(BasicBlock *bb);
1174
inline bool reachableBy(const BasicBlock *by, const BasicBlock *term);
1175
1176
// returns mask of conditional out blocks
1177
// e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF
1178
unsigned int initiatesSimpleConditional() const;
1179
1180
public:
1181
Function *getFunction() const { return func; }
1182
Program *getProgram() const { return program; }
1183
1184
Instruction *getEntry() const { return entry; } // first non-phi instruction
1185
Instruction *getPhi() const { return phi; }
1186
Instruction *getFirst() const { return phi ? phi : entry; }
1187
Instruction *getExit() const { return exit; }
1188
1189
void insertHead(Instruction *);
1190
void insertTail(Instruction *);
1191
void insertBefore(Instruction *, Instruction *);
1192
void insertAfter(Instruction *, Instruction *);
1193
void remove(Instruction *);
1194
void permuteAdjacent(Instruction *, Instruction *);
1195
1196
BasicBlock *idom() const;
1197
1198
// NOTE: currently does not rebuild the dominator tree
1199
BasicBlock *splitBefore(Instruction *, bool attach = true);
1200
BasicBlock *splitAfter(Instruction *, bool attach = true);
1201
1202
DLList& getDF() { return df; }
1203
DLList::Iterator iterDF() { return df.iterator(); }
1204
1205
static inline BasicBlock *get(Iterator&);
1206
static inline BasicBlock *get(Graph::Node *);
1207
1208
public:
1209
Graph::Node cfg; // first edge is branch *taken* (the ELSE branch)
1210
Graph::Node dom;
1211
1212
BitSet liveSet;
1213
BitSet defSet;
1214
1215
uint32_t binPos;
1216
uint32_t binSize;
1217
1218
Instruction *joinAt; // for quick reference
1219
1220
bool explicitCont; // loop headers: true if loop contains continue stmts
1221
1222
private:
1223
int id;
1224
DLList df;
1225
1226
Instruction *phi;
1227
Instruction *entry;
1228
Instruction *exit;
1229
1230
unsigned int numInsns;
1231
1232
private:
1233
Function *func;
1234
Program *program;
1235
1236
void splitCommon(Instruction *, BasicBlock *, bool attach);
1237
};
1238
1239
class Function
1240
{
1241
public:
1242
Function(Program *, const char *name, uint32_t label);
1243
~Function();
1244
1245
static inline Function *get(Graph::Node *node);
1246
1247
inline Program *getProgram() const { return prog; }
1248
inline const char *getName() const { return name; }
1249
inline int getId() const { return id; }
1250
inline uint32_t getLabel() const { return label; }
1251
1252
void print();
1253
void printLiveIntervals() const;
1254
void printCFGraph(const char *filePath);
1255
1256
bool setEntry(BasicBlock *);
1257
bool setExit(BasicBlock *);
1258
1259
unsigned int orderInstructions(ArrayList&);
1260
1261
inline void add(BasicBlock *bb, int& id) { allBBlocks.insert(bb, id); }
1262
inline void add(Instruction *insn, int& id) { allInsns.insert(insn, id); }
1263
inline void add(LValue *lval, int& id) { allLValues.insert(lval, id); }
1264
1265
inline LValue *getLValue(int id);
1266
1267
void buildLiveSets();
1268
void buildDefSets();
1269
bool convertToSSA();
1270
1271
public:
1272
std::deque<ValueDef> ins;
1273
std::deque<ValueRef> outs;
1274
std::deque<Value *> clobbers;
1275
1276
Graph cfg;
1277
Graph::Node *cfgExit;
1278
Graph *domTree;
1279
Graph::Node call; // node in the call graph
1280
1281
BasicBlock **bbArray; // BBs in emission order
1282
int bbCount;
1283
1284
unsigned int loopNestingBound;
1285
int regClobberMax;
1286
1287
uint32_t binPos;
1288
uint32_t binSize;
1289
1290
Value *stackPtr;
1291
1292
uint32_t tlsBase; // base address for l[] space (if no stack pointer is used)
1293
uint32_t tlsSize;
1294
1295
ArrayList allBBlocks;
1296
ArrayList allInsns;
1297
ArrayList allLValues;
1298
1299
private:
1300
void buildLiveSetsPreSSA(BasicBlock *, const int sequence);
1301
void buildDefSetsPreSSA(BasicBlock *bb, const int seq);
1302
1303
private:
1304
uint32_t label;
1305
int id;
1306
const char *const name;
1307
Program *prog;
1308
};
1309
1310
enum CGStage
1311
{
1312
CG_STAGE_PRE_SSA,
1313
CG_STAGE_SSA, // expected directly before register allocation
1314
CG_STAGE_POST_RA
1315
};
1316
1317
class Program
1318
{
1319
public:
1320
enum Type
1321
{
1322
TYPE_VERTEX,
1323
TYPE_TESSELLATION_CONTROL,
1324
TYPE_TESSELLATION_EVAL,
1325
TYPE_GEOMETRY,
1326
TYPE_FRAGMENT,
1327
TYPE_COMPUTE
1328
};
1329
1330
Program(Type type, Target *targ);
1331
~Program();
1332
1333
void print();
1334
1335
Type getType() const { return progType; }
1336
1337
inline void add(Function *fn, int& id) { allFuncs.insert(fn, id); }
1338
inline void del(Function *fn, int& id) { allFuncs.remove(id); }
1339
inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }
1340
1341
bool makeFromNIR(struct nv50_ir_prog_info *,
1342
struct nv50_ir_prog_info_out *);
1343
bool makeFromTGSI(struct nv50_ir_prog_info *,
1344
struct nv50_ir_prog_info_out *);
1345
bool convertToSSA();
1346
bool optimizeSSA(int level);
1347
bool optimizePostRA(int level);
1348
bool registerAllocation();
1349
bool emitBinary(struct nv50_ir_prog_info_out *);
1350
1351
const Target *getTarget() const { return target; }
1352
1353
private:
1354
Type progType;
1355
Target *target;
1356
1357
public:
1358
Function *main;
1359
Graph calls;
1360
1361
ArrayList allFuncs;
1362
ArrayList allRValues;
1363
1364
uint32_t *code;
1365
uint32_t binSize;
1366
uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL
1367
1368
int maxGPR;
1369
bool fp64;
1370
bool persampleInvocation;
1371
1372
MemoryPool mem_Instruction;
1373
MemoryPool mem_CmpInstruction;
1374
MemoryPool mem_TexInstruction;
1375
MemoryPool mem_FlowInstruction;
1376
MemoryPool mem_LValue;
1377
MemoryPool mem_Symbol;
1378
MemoryPool mem_ImmediateValue;
1379
1380
uint32_t dbgFlags;
1381
uint8_t optLevel;
1382
1383
void *targetPriv; // e.g. to carry information between passes
1384
1385
const struct nv50_ir_prog_info *driver; // for driver configuration
1386
const struct nv50_ir_prog_info_out *driver_out; // for driver configuration
1387
1388
void releaseInstruction(Instruction *);
1389
void releaseValue(Value *);
1390
};
1391
1392
// TODO: add const version
1393
class Pass
1394
{
1395
public:
1396
bool run(Program *, bool ordered = false, bool skipPhi = false);
1397
bool run(Function *, bool ordered = false, bool skipPhi = false);
1398
1399
private:
1400
// return false to continue with next entity on next higher level
1401
virtual bool visit(Function *) { return true; }
1402
virtual bool visit(BasicBlock *) { return true; }
1403
virtual bool visit(Instruction *) { return false; }
1404
1405
bool doRun(Program *, bool ordered, bool skipPhi);
1406
bool doRun(Function *, bool ordered, bool skipPhi);
1407
1408
protected:
1409
bool err;
1410
Function *func;
1411
Program *prog;
1412
};
1413
1414
// =============================================================================
1415
1416
#include "codegen/nv50_ir_inlines.h"
1417
1418
} // namespace nv50_ir
1419
1420
#endif // __NV50_IR_H__
1421
1422