Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
4574 views
1
/*
2
* Copyright 2011 Christoph Bumiller
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
* OTHER DEALINGS IN THE SOFTWARE.
21
*/
22
23
#include "codegen/nv50_ir.h"
24
#include "codegen/nv50_ir_target.h"
25
26
namespace nv50_ir {
27
28
const uint8_t Target::operationSrcNr[] =
29
{
30
0, 0, // NOP, PHI
31
0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
32
1, 1, 2, // MOV, LOAD, STORE
33
2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34
3, 3, // SHLADD, XMAD
35
1, 1, 1, // ABS, NEG, NOT
36
2, 2, 2, 3, 2, 2, 3, // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF
37
2, 2, 1, // MAX, MIN, SAT
38
1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
39
3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
40
1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2
41
1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW
42
0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
43
0, 0, 0, // PRERET,CONT,BREAK
44
0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
45
1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
46
1, 1, 1, // EMIT, RESTART, FINAL
47
1, 1, 1, // TEX, TXB, TXL,
48
1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
49
1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
50
3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
51
0, // TEXBAR
52
1, 1, // DFDX, DFDY
53
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
54
2, 3, 2, 1, 1, 2, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT
55
2, // SGXT
56
3, 2, // ATOM, BAR
57
2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
58
2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
59
3, // SHFL
60
1, // VOTE
61
1, // BUFQ
62
1, // WARPSYNC
63
0
64
};
65
66
const OpClass Target::operationClass[] =
67
{
68
// NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
69
OPCLASS_OTHER,
70
OPCLASS_PSEUDO,
71
OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
72
// MOV; LOAD; STORE
73
OPCLASS_MOVE,
74
OPCLASS_LOAD,
75
OPCLASS_STORE,
76
// ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD
77
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
78
OPCLASS_ARITH, OPCLASS_ARITH,
79
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
80
// ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF
81
OPCLASS_CONVERT, OPCLASS_CONVERT,
82
OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
83
OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT,
84
// MAX, MIN
85
OPCLASS_COMPARE, OPCLASS_COMPARE,
86
// SAT, CEIL, FLOOR, TRUNC; CVT
87
OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
88
OPCLASS_CONVERT,
89
// SET(AND,OR,XOR); SELP, SLCT
90
OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
91
OPCLASS_COMPARE, OPCLASS_COMPARE,
92
// RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
93
OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
94
OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
95
OPCLASS_SFU, OPCLASS_SFU,
96
// BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
97
OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
98
OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
99
OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
100
// DISCARD, EXIT
101
OPCLASS_FLOW, OPCLASS_FLOW,
102
// MEMBAR
103
OPCLASS_CONTROL,
104
// VFETCH, PFETCH, AFETCH, EXPORT
105
OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
106
// LINTERP, PINTERP
107
OPCLASS_SFU, OPCLASS_SFU,
108
// EMIT, RESTART, FINAL
109
OPCLASS_CONTROL, OPCLASS_CONTROL, OPCLASS_CONTROL,
110
// TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
111
OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
112
OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
113
OPCLASS_TEXTURE, OPCLASS_TEXTURE,
114
// SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
115
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
116
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
117
// SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
118
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
119
// TEXBAR
120
OPCLASS_OTHER,
121
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
122
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
123
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
124
// POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT
125
OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
126
OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
127
// ATOM, BAR
128
OPCLASS_ATOMIC, OPCLASS_CONTROL,
129
// VADD, VAVG, VMIN, VMAX
130
OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
131
// VSAD, VSET, VSHR, VSHL
132
OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
133
// VSEL, CCTL
134
OPCLASS_VECTOR, OPCLASS_CONTROL,
135
// SHFL
136
OPCLASS_OTHER,
137
// VOTE
138
OPCLASS_OTHER,
139
// BUFQ
140
OPCLASS_OTHER,
141
// WARPSYNC
142
OPCLASS_OTHER,
143
OPCLASS_PSEUDO // LAST
144
};
145
146
147
extern Target *getTargetGV100(unsigned int chipset);
148
extern Target *getTargetGM107(unsigned int chipset);
149
extern Target *getTargetNVC0(unsigned int chipset);
150
extern Target *getTargetNV50(unsigned int chipset);
151
152
Target *Target::create(unsigned int chipset)
153
{
154
STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
155
STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
156
switch (chipset & ~0xf) {
157
case 0x160:
158
case 0x140:
159
return getTargetGV100(chipset);
160
case 0x110:
161
case 0x120:
162
case 0x130:
163
return getTargetGM107(chipset);
164
case 0xc0:
165
case 0xd0:
166
case 0xe0:
167
case 0xf0:
168
case 0x100:
169
return getTargetNVC0(chipset);
170
case 0x50:
171
case 0x80:
172
case 0x90:
173
case 0xa0:
174
return getTargetNV50(chipset);
175
default:
176
ERROR("unsupported target: NV%x\n", chipset);
177
return 0;
178
}
179
}
180
181
void Target::destroy(Target *targ)
182
{
183
delete targ;
184
}
185
186
CodeEmitter::CodeEmitter(const Target *target) : targ(target), code(NULL),
187
codeSize(0), codeSizeLimit(0), relocInfo(NULL), fixupInfo(NULL)
188
{
189
}
190
191
void
192
CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
193
{
194
code = reinterpret_cast<uint32_t *>(ptr);
195
codeSize = 0;
196
codeSizeLimit = size;
197
}
198
199
void
200
CodeEmitter::printBinary() const
201
{
202
uint32_t *bin = code - codeSize / 4;
203
INFO("program binary (%u bytes)", codeSize);
204
for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
205
if ((pos % 8) == 0)
206
INFO("\n");
207
INFO("%08x ", bin[pos]);
208
}
209
INFO("\n");
210
}
211
212
static inline uint32_t sizeToBundlesNVE4(uint32_t size)
213
{
214
return (size + 55) / 56;
215
}
216
217
void
218
CodeEmitter::prepareEmission(Program *prog)
219
{
220
for (ArrayList::Iterator fi = prog->allFuncs.iterator();
221
!fi.end(); fi.next()) {
222
Function *func = reinterpret_cast<Function *>(fi.get());
223
func->binPos = prog->binSize;
224
prepareEmission(func);
225
226
// adjust sizes & positions for scheduling info:
227
if (prog->getTarget()->hasSWSched) {
228
uint32_t adjPos = func->binPos;
229
BasicBlock *bb = NULL;
230
for (int i = 0; i < func->bbCount; ++i) {
231
bb = func->bbArray[i];
232
int32_t adjSize = bb->binSize;
233
if (adjPos % 64) {
234
adjSize -= 64 - adjPos % 64;
235
if (adjSize < 0)
236
adjSize = 0;
237
}
238
adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
239
bb->binPos = adjPos;
240
bb->binSize = adjSize;
241
adjPos += adjSize;
242
}
243
if (bb)
244
func->binSize = adjPos - func->binPos;
245
}
246
247
prog->binSize += func->binSize;
248
}
249
}
250
251
void
252
CodeEmitter::prepareEmission(Function *func)
253
{
254
func->bbCount = 0;
255
func->bbArray = new BasicBlock * [func->cfg.getSize()];
256
257
BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
258
259
for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
260
prepareEmission(BasicBlock::get(*it));
261
}
262
263
void
264
CodeEmitter::prepareEmission(BasicBlock *bb)
265
{
266
Instruction *i, *next;
267
Function *func = bb->getFunction();
268
int j;
269
unsigned int nShort;
270
271
for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
272
273
for (; j >= 0; --j) {
274
BasicBlock *in = func->bbArray[j];
275
Instruction *exit = in->getExit();
276
277
if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
278
in->binSize -= 8;
279
func->binSize -= 8;
280
281
for (++j; j < func->bbCount; ++j)
282
func->bbArray[j]->binPos -= 8;
283
284
in->remove(exit);
285
}
286
bb->binPos = in->binPos + in->binSize;
287
if (in->binSize) // no more no-op branches to bb
288
break;
289
}
290
func->bbArray[func->bbCount++] = bb;
291
292
if (!bb->getExit())
293
return;
294
295
// determine encoding size, try to group short instructions
296
nShort = 0;
297
for (i = bb->getEntry(); i; i = next) {
298
next = i->next;
299
300
i->encSize = getMinEncodingSize(i);
301
if (next && i->encSize < 8)
302
++nShort;
303
else
304
if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
305
if (i->isCommutationLegal(i->next)) {
306
bb->permuteAdjacent(i, next);
307
next->encSize = 4;
308
next = i;
309
i = i->prev;
310
++nShort;
311
} else
312
if (i->isCommutationLegal(i->prev) && next->next) {
313
bb->permuteAdjacent(i->prev, i);
314
next->encSize = 4;
315
next = next->next;
316
bb->binSize += 4;
317
++nShort;
318
} else {
319
i->encSize = 8;
320
i->prev->encSize = 8;
321
bb->binSize += 4;
322
nShort = 0;
323
}
324
} else {
325
i->encSize = 8;
326
if (nShort & 1) {
327
i->prev->encSize = 8;
328
bb->binSize += 4;
329
}
330
nShort = 0;
331
}
332
bb->binSize += i->encSize;
333
}
334
335
if (bb->getExit()->encSize == 4) {
336
assert(nShort);
337
bb->getExit()->encSize = 8;
338
bb->binSize += 4;
339
340
if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
341
bb->binSize += 8;
342
bb->getExit()->prev->encSize = 8;
343
}
344
}
345
assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
346
347
func->binSize += bb->binSize;
348
}
349
350
bool
351
Program::emitBinary(struct nv50_ir_prog_info_out *info)
352
{
353
CodeEmitter *emit = target->getCodeEmitter(progType);
354
355
emit->prepareEmission(this);
356
357
if (dbgFlags & NV50_IR_DEBUG_BASIC)
358
this->print();
359
360
if (!binSize) {
361
code = NULL;
362
return false;
363
}
364
code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
365
if (!code)
366
return false;
367
emit->setCodeLocation(code, binSize);
368
info->bin.instructions = 0;
369
370
for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
371
Function *fn = reinterpret_cast<Function *>(fi.get());
372
373
assert(emit->getCodeSize() == fn->binPos);
374
375
for (int b = 0; b < fn->bbCount; ++b) {
376
for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
377
emit->emitInstruction(i);
378
info->bin.instructions++;
379
if ((typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) &&
380
(isFloatType(i->sType) || isFloatType(i->dType)))
381
info->io.fp64 = true;
382
}
383
}
384
}
385
info->io.fp64 |= fp64;
386
info->bin.relocData = emit->getRelocInfo();
387
info->bin.fixupData = emit->getFixupInfo();
388
389
// the nvc0 driver will print the binary itself together with the header
390
if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
391
emit->printBinary();
392
393
delete emit;
394
return true;
395
}
396
397
#define RELOC_ALLOC_INCREMENT 8
398
399
bool
400
CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
401
int s)
402
{
403
unsigned int n = relocInfo ? relocInfo->count : 0;
404
405
if (!(n % RELOC_ALLOC_INCREMENT)) {
406
size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
407
relocInfo = reinterpret_cast<RelocInfo *>(
408
REALLOC(relocInfo, n ? size : 0,
409
size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
410
if (!relocInfo)
411
return false;
412
if (n == 0)
413
memset(relocInfo, 0, sizeof(RelocInfo));
414
}
415
++relocInfo->count;
416
417
relocInfo->entry[n].data = data;
418
relocInfo->entry[n].mask = m;
419
relocInfo->entry[n].offset = codeSize + w * 4;
420
relocInfo->entry[n].bitPos = s;
421
relocInfo->entry[n].type = ty;
422
423
return true;
424
}
425
426
bool
427
CodeEmitter::addInterp(int ipa, int reg, FixupApply apply)
428
{
429
unsigned int n = fixupInfo ? fixupInfo->count : 0;
430
431
if (!(n % RELOC_ALLOC_INCREMENT)) {
432
size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry);
433
fixupInfo = reinterpret_cast<FixupInfo *>(
434
REALLOC(fixupInfo, n ? size : 0,
435
size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry)));
436
if (!fixupInfo)
437
return false;
438
if (n == 0)
439
fixupInfo->count = 0;
440
}
441
++fixupInfo->count;
442
443
fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2);
444
445
return true;
446
}
447
448
void
449
RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
450
{
451
uint32_t value = 0;
452
453
switch (type) {
454
case TYPE_CODE: value = info->codePos; break;
455
case TYPE_BUILTIN: value = info->libPos; break;
456
case TYPE_DATA: value = info->dataPos; break;
457
default:
458
assert(0);
459
break;
460
}
461
value += data;
462
value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
463
464
binary[offset / 4] &= ~mask;
465
binary[offset / 4] |= value & mask;
466
}
467
468
} // namespace nv50_ir
469
470
471
#include "codegen/nv50_ir_driver.h"
472
473
extern "C" {
474
475
void
476
nv50_ir_relocate_code(void *relocData, uint32_t *code,
477
uint32_t codePos,
478
uint32_t libPos,
479
uint32_t dataPos)
480
{
481
nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
482
483
info->codePos = codePos;
484
info->libPos = libPos;
485
info->dataPos = dataPos;
486
487
for (unsigned int i = 0; i < info->count; ++i)
488
info->entry[i].apply(code, info);
489
}
490
491
void
492
nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
493
bool force_persample_interp, bool flatshade,
494
uint8_t alphatest, bool msaa)
495
{
496
nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>(
497
fixupData);
498
499
// force_persample_interp: all non-flat -> per-sample
500
// flatshade: all color -> flat
501
// alphatest: PIPE_FUNC_* to use with alphatest
502
// msaa: false = sample id -> 0 for interpolateAtSample
503
nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest, msaa);
504
for (unsigned i = 0; i < info->count; ++i)
505
info->entry[i].apply(&info->entry[i], code, data);
506
}
507
508
void
509
nv50_ir_get_target_library(uint32_t chipset,
510
const uint32_t **code, uint32_t *size)
511
{
512
nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
513
targ->getBuiltinCode(code, size);
514
nv50_ir::Target::destroy(targ);
515
}
516
517
}
518
519