Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
4574 views
1
/*
2
* Copyright 2011 Christoph Bumiller
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
* OTHER DEALINGS IN THE SOFTWARE.
21
*/
22
23
#include "codegen/nv50_ir.h"
24
#include "codegen/nv50_ir_target_nv50.h"
25
26
namespace nv50_ir {
27
28
#define NV50_OP_ENC_LONG 0
29
#define NV50_OP_ENC_SHORT 1
30
#define NV50_OP_ENC_IMM 2
31
#define NV50_OP_ENC_LONG_ALT 3
32
33
class CodeEmitterNV50 : public CodeEmitter
34
{
35
public:
36
CodeEmitterNV50(Program::Type, const TargetNV50 *);
37
38
virtual bool emitInstruction(Instruction *);
39
40
virtual uint32_t getMinEncodingSize(const Instruction *) const;
41
42
virtual void prepareEmission(Function *);
43
44
private:
45
Program::Type progType;
46
47
const TargetNV50 *targNV50;
48
49
private:
50
inline void defId(const ValueDef&, const int pos);
51
inline void srcId(const ValueRef&, const int pos);
52
inline void srcId(const ValueRef *, const int pos);
53
54
inline void srcAddr16(const ValueRef&, bool adj, const int pos);
55
inline void srcAddr8(const ValueRef&, const int pos);
56
57
void emitFlagsRd(const Instruction *);
58
void emitFlagsWr(const Instruction *);
59
60
void emitCondCode(CondCode cc, DataType ty, int pos);
61
62
inline void setARegBits(unsigned int);
63
64
void setAReg16(const Instruction *, int s);
65
void setImmediate(const Instruction *, int s);
66
67
void setDst(const Value *);
68
void setDst(const Instruction *, int d);
69
void setSrcFileBits(const Instruction *, int enc);
70
void setSrc(const Instruction *, unsigned int s, int slot);
71
72
void emitForm_MAD(const Instruction *);
73
void emitForm_ADD(const Instruction *);
74
void emitForm_MUL(const Instruction *);
75
void emitForm_IMM(const Instruction *);
76
77
void emitLoadStoreSizeLG(DataType ty, int pos);
78
void emitLoadStoreSizeCS(DataType ty);
79
80
void roundMode_MAD(const Instruction *);
81
void roundMode_CVT(RoundMode);
82
83
void emitMNeg12(const Instruction *);
84
85
void emitLOAD(const Instruction *);
86
void emitSTORE(const Instruction *);
87
void emitMOV(const Instruction *);
88
void emitRDSV(const Instruction *);
89
void emitNOP();
90
void emitINTERP(const Instruction *);
91
void emitPFETCH(const Instruction *);
92
void emitOUT(const Instruction *);
93
94
void emitUADD(const Instruction *);
95
void emitAADD(const Instruction *);
96
void emitFADD(const Instruction *);
97
void emitDADD(const Instruction *);
98
void emitIMUL(const Instruction *);
99
void emitFMUL(const Instruction *);
100
void emitDMUL(const Instruction *);
101
void emitFMAD(const Instruction *);
102
void emitDMAD(const Instruction *);
103
void emitIMAD(const Instruction *);
104
void emitISAD(const Instruction *);
105
106
void emitMINMAX(const Instruction *);
107
108
void emitPreOp(const Instruction *);
109
void emitSFnOp(const Instruction *, uint8_t subOp);
110
111
void emitShift(const Instruction *);
112
void emitARL(const Instruction *, unsigned int shl);
113
void emitLogicOp(const Instruction *);
114
void emitNOT(const Instruction *);
115
116
void emitCVT(const Instruction *);
117
void emitSET(const Instruction *);
118
119
void emitTEX(const TexInstruction *);
120
void emitTXQ(const TexInstruction *);
121
void emitTEXPREP(const TexInstruction *);
122
123
void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
124
125
void emitFlow(const Instruction *, uint8_t flowOp);
126
void emitPRERETEmu(const FlowInstruction *);
127
void emitBAR(const Instruction *);
128
129
void emitATOM(const Instruction *);
130
};
131
132
#define SDATA(a) ((a).rep()->reg.data)
133
#define DDATA(a) ((a).rep()->reg.data)
134
135
void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
136
{
137
assert(src.get());
138
code[pos / 32] |= SDATA(src).id << (pos % 32);
139
}
140
141
void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
142
{
143
assert(src->get());
144
code[pos / 32] |= SDATA(*src).id << (pos % 32);
145
}
146
147
void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
148
{
149
assert(src.get());
150
151
int32_t offset = SDATA(src).offset;
152
153
assert(!adj || src.get()->reg.size <= 4);
154
if (adj)
155
offset /= src.get()->reg.size;
156
157
assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
158
159
if (offset < 0)
160
offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
161
162
code[pos / 32] |= offset << (pos % 32);
163
}
164
165
void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
166
{
167
assert(src.get());
168
169
uint32_t offset = SDATA(src).offset;
170
171
assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
172
173
code[pos / 32] |= (offset >> 2) << (pos % 32);
174
}
175
176
void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
177
{
178
assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
179
180
code[pos / 32] |= DDATA(def).id << (pos % 32);
181
}
182
183
void
184
CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
185
{
186
switch (insn->rnd) {
187
case ROUND_M: code[1] |= 1 << 22; break;
188
case ROUND_P: code[1] |= 2 << 22; break;
189
case ROUND_Z: code[1] |= 3 << 22; break;
190
default:
191
assert(insn->rnd == ROUND_N);
192
break;
193
}
194
}
195
196
void
197
CodeEmitterNV50::emitMNeg12(const Instruction *i)
198
{
199
code[1] |= i->src(0).mod.neg() << 26;
200
code[1] |= i->src(1).mod.neg() << 27;
201
}
202
203
void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
204
{
205
uint8_t enc;
206
207
assert(pos >= 32 || pos <= 27);
208
209
switch (cc) {
210
case CC_LT: enc = 0x1; break;
211
case CC_LTU: enc = 0x9; break;
212
case CC_EQ: enc = 0x2; break;
213
case CC_EQU: enc = 0xa; break;
214
case CC_LE: enc = 0x3; break;
215
case CC_LEU: enc = 0xb; break;
216
case CC_GT: enc = 0x4; break;
217
case CC_GTU: enc = 0xc; break;
218
case CC_NE: enc = 0x5; break;
219
case CC_NEU: enc = 0xd; break;
220
case CC_GE: enc = 0x6; break;
221
case CC_GEU: enc = 0xe; break;
222
case CC_TR: enc = 0xf; break;
223
case CC_FL: enc = 0x0; break;
224
225
case CC_O: enc = 0x10; break;
226
case CC_C: enc = 0x11; break;
227
case CC_A: enc = 0x12; break;
228
case CC_S: enc = 0x13; break;
229
case CC_NS: enc = 0x1c; break;
230
case CC_NA: enc = 0x1d; break;
231
case CC_NC: enc = 0x1e; break;
232
case CC_NO: enc = 0x1f; break;
233
234
default:
235
enc = 0;
236
assert(!"invalid condition code");
237
break;
238
}
239
if (ty != TYPE_NONE && !isFloatType(ty))
240
enc &= ~0x8; // unordered only exists for float types
241
242
code[pos / 32] |= enc << (pos % 32);
243
}
244
245
void
246
CodeEmitterNV50::emitFlagsRd(const Instruction *i)
247
{
248
int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
249
250
assert(!(code[1] & 0x00003f80));
251
252
if (s >= 0) {
253
assert(i->getSrc(s)->reg.file == FILE_FLAGS);
254
emitCondCode(i->cc, TYPE_NONE, 32 + 7);
255
srcId(i->src(s), 32 + 12);
256
} else {
257
code[1] |= 0x0780;
258
}
259
}
260
261
void
262
CodeEmitterNV50::emitFlagsWr(const Instruction *i)
263
{
264
assert(!(code[1] & 0x70));
265
266
int flagsDef = i->flagsDef;
267
268
// find flags definition and check that it is the last def
269
if (flagsDef < 0) {
270
for (int d = 0; i->defExists(d); ++d)
271
if (i->def(d).getFile() == FILE_FLAGS)
272
flagsDef = d;
273
if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
274
WARN("Instruction::flagsDef was not set properly\n");
275
}
276
if (flagsDef == 0 && i->defExists(1))
277
WARN("flags def should not be the primary definition\n");
278
279
if (flagsDef >= 0)
280
code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
281
282
}
283
284
void
285
CodeEmitterNV50::setARegBits(unsigned int u)
286
{
287
code[0] |= (u & 3) << 26;
288
code[1] |= (u & 4);
289
}
290
291
void
292
CodeEmitterNV50::setAReg16(const Instruction *i, int s)
293
{
294
if (i->srcExists(s)) {
295
s = i->src(s).indirect[0];
296
if (s >= 0)
297
setARegBits(SDATA(i->src(s)).id + 1);
298
}
299
}
300
301
void
302
CodeEmitterNV50::setImmediate(const Instruction *i, int s)
303
{
304
const ImmediateValue *imm = i->src(s).get()->asImm();
305
assert(imm);
306
307
uint32_t u = imm->reg.data.u32;
308
309
if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
310
u = ~u;
311
312
code[1] |= 3;
313
code[0] |= (u & 0x3f) << 16;
314
code[1] |= (u >> 6) << 2;
315
}
316
317
void
318
CodeEmitterNV50::setDst(const Value *dst)
319
{
320
const Storage *reg = &dst->join->reg;
321
322
assert(reg->file != FILE_ADDRESS);
323
324
if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
325
code[0] |= (127 << 2) | 1;
326
code[1] |= 8;
327
} else {
328
int id;
329
if (reg->file == FILE_SHADER_OUTPUT) {
330
code[1] |= 8;
331
id = reg->data.offset / 4;
332
} else {
333
id = reg->data.id;
334
}
335
code[0] |= id << 2;
336
}
337
}
338
339
void
340
CodeEmitterNV50::setDst(const Instruction *i, int d)
341
{
342
if (i->defExists(d)) {
343
setDst(i->getDef(d));
344
} else
345
if (!d) {
346
code[0] |= 0x01fc; // bit bucket
347
code[1] |= 0x0008;
348
}
349
}
350
351
// 3 * 2 bits:
352
// 0: r
353
// 1: a/s
354
// 2: c
355
// 3: i
356
void
357
CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
358
{
359
uint8_t mode = 0;
360
361
for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
362
switch (i->src(s).getFile()) {
363
case FILE_GPR:
364
break;
365
case FILE_MEMORY_SHARED:
366
case FILE_SHADER_INPUT:
367
mode |= 1 << (s * 2);
368
break;
369
case FILE_MEMORY_CONST:
370
mode |= 2 << (s * 2);
371
break;
372
case FILE_IMMEDIATE:
373
mode |= 3 << (s * 2);
374
break;
375
default:
376
ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
377
assert(0);
378
break;
379
}
380
}
381
switch (mode) {
382
case 0x00: // rrr
383
break;
384
case 0x01: // arr/grr
385
if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
386
code[0] |= 0x01800000;
387
if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
388
code[1] |= 0x00200000;
389
} else {
390
if (enc == NV50_OP_ENC_SHORT)
391
code[0] |= 0x01000000;
392
else
393
code[1] |= 0x00200000;
394
}
395
break;
396
case 0x03: // irr
397
assert(i->op == OP_MOV);
398
return;
399
case 0x0c: // rir
400
break;
401
case 0x0d: // gir
402
assert(progType == Program::TYPE_GEOMETRY ||
403
progType == Program::TYPE_COMPUTE);
404
code[0] |= 0x01000000;
405
if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
406
int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
407
assert(reg < 3);
408
code[0] |= (reg + 1) << 26;
409
}
410
break;
411
case 0x08: // rcr
412
code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
413
code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
414
break;
415
case 0x09: // acr/gcr
416
if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
417
code[0] |= 0x01800000;
418
} else {
419
code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
420
code[1] |= 0x00200000;
421
}
422
code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
423
break;
424
case 0x20: // rrc
425
code[0] |= 0x01000000;
426
code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
427
break;
428
case 0x21: // arc
429
code[0] |= 0x01000000;
430
code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
431
assert(progType != Program::TYPE_GEOMETRY);
432
break;
433
default:
434
ERROR("not encodable: %x\n", mode);
435
assert(0);
436
break;
437
}
438
if (progType != Program::TYPE_COMPUTE)
439
return;
440
441
if ((mode & 3) == 1) {
442
const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;
443
444
switch (i->sType) {
445
case TYPE_U8:
446
break;
447
case TYPE_U16:
448
code[0] |= 1 << pos;
449
break;
450
case TYPE_S16:
451
code[0] |= 2 << pos;
452
break;
453
default:
454
code[0] |= 3 << pos;
455
assert(i->getSrc(0)->reg.size == 4);
456
break;
457
}
458
}
459
}
460
461
void
462
CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
463
{
464
if (Target::operationSrcNr[i->op] <= s)
465
return;
466
const Storage *reg = &i->src(s).rep()->reg;
467
468
unsigned int id = (reg->file == FILE_GPR) ?
469
reg->data.id :
470
reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
471
472
switch (slot) {
473
case 0: code[0] |= id << 9; break;
474
case 1: code[0] |= id << 16; break;
475
case 2: code[1] |= id << 14; break;
476
default:
477
assert(0);
478
break;
479
}
480
}
481
482
// the default form:
483
// - long instruction
484
// - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
485
// - address & flags
486
void
487
CodeEmitterNV50::emitForm_MAD(const Instruction *i)
488
{
489
assert(i->encSize == 8);
490
code[0] |= 1;
491
492
emitFlagsRd(i);
493
emitFlagsWr(i);
494
495
setDst(i, 0);
496
497
setSrcFileBits(i, NV50_OP_ENC_LONG);
498
setSrc(i, 0, 0);
499
setSrc(i, 1, 1);
500
setSrc(i, 2, 2);
501
502
if (i->getIndirect(0, 0)) {
503
assert(!i->srcExists(1) || !i->getIndirect(1, 0));
504
assert(!i->srcExists(2) || !i->getIndirect(2, 0));
505
setAReg16(i, 0);
506
} else if (i->srcExists(1) && i->getIndirect(1, 0)) {
507
assert(!i->srcExists(2) || !i->getIndirect(2, 0));
508
setAReg16(i, 1);
509
} else {
510
setAReg16(i, 2);
511
}
512
}
513
514
// like default form, but 2nd source in slot 2, and no 3rd source
515
void
516
CodeEmitterNV50::emitForm_ADD(const Instruction *i)
517
{
518
assert(i->encSize == 8);
519
code[0] |= 1;
520
521
emitFlagsRd(i);
522
emitFlagsWr(i);
523
524
setDst(i, 0);
525
526
setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
527
setSrc(i, 0, 0);
528
if (i->predSrc != 1)
529
setSrc(i, 1, 2);
530
531
if (i->getIndirect(0, 0)) {
532
assert(!i->getIndirect(1, 0));
533
setAReg16(i, 0);
534
} else {
535
setAReg16(i, 1);
536
}
537
}
538
539
// default short form (rr, ar, rc, gr)
540
void
541
CodeEmitterNV50::emitForm_MUL(const Instruction *i)
542
{
543
assert(i->encSize == 4 && !(code[0] & 1));
544
assert(i->defExists(0));
545
assert(!i->getPredicate());
546
547
setDst(i, 0);
548
549
setSrcFileBits(i, NV50_OP_ENC_SHORT);
550
setSrc(i, 0, 0);
551
setSrc(i, 1, 1);
552
}
553
554
// usual immediate form
555
// - 1 to 3 sources where second is immediate (rir, gir)
556
// - no address or predicate possible
557
void
558
CodeEmitterNV50::emitForm_IMM(const Instruction *i)
559
{
560
assert(i->encSize == 8);
561
code[0] |= 1;
562
563
assert(i->defExists(0) && i->srcExists(0));
564
565
setDst(i, 0);
566
567
setSrcFileBits(i, NV50_OP_ENC_IMM);
568
if (Target::operationSrcNr[i->op] > 1) {
569
setSrc(i, 0, 0);
570
setImmediate(i, 1);
571
// If there is another source, it has to be the same as the dest reg.
572
} else {
573
setImmediate(i, 0);
574
}
575
}
576
577
void
578
CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
579
{
580
uint8_t enc;
581
582
switch (ty) {
583
case TYPE_F32: // fall through
584
case TYPE_S32: // fall through
585
case TYPE_U32: enc = 0x6; break;
586
case TYPE_B128: enc = 0x5; break;
587
case TYPE_F64: // fall through
588
case TYPE_S64: // fall through
589
case TYPE_U64: enc = 0x4; break;
590
case TYPE_S16: enc = 0x3; break;
591
case TYPE_U16: enc = 0x2; break;
592
case TYPE_S8: enc = 0x1; break;
593
case TYPE_U8: enc = 0x0; break;
594
default:
595
enc = 0;
596
assert(!"invalid load/store type");
597
break;
598
}
599
code[pos / 32] |= enc << (pos % 32);
600
}
601
602
void
603
CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
604
{
605
switch (ty) {
606
case TYPE_U8: break;
607
case TYPE_U16: code[1] |= 0x4000; break;
608
case TYPE_S16: code[1] |= 0x8000; break;
609
case TYPE_F32:
610
case TYPE_S32:
611
case TYPE_U32: code[1] |= 0xc000; break;
612
default:
613
assert(0);
614
break;
615
}
616
}
617
618
void
619
CodeEmitterNV50::emitLOAD(const Instruction *i)
620
{
621
DataFile sf = i->src(0).getFile();
622
ASSERTED int32_t offset = i->getSrc(0)->reg.data.offset;
623
624
switch (sf) {
625
case FILE_SHADER_INPUT:
626
if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
627
code[0] = 0x11800001;
628
else
629
// use 'mov' where we can
630
code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
631
code[1] = 0x00200000 | (i->lanes << 14);
632
if (typeSizeof(i->dType) == 4)
633
code[1] |= 0x04000000;
634
break;
635
case FILE_MEMORY_SHARED:
636
if (targ->getChipset() >= 0x84) {
637
assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
638
code[0] = 0x10000001;
639
code[1] = 0x40000000;
640
641
if (typeSizeof(i->dType) == 4)
642
code[1] |= 0x04000000;
643
644
emitLoadStoreSizeCS(i->sType);
645
646
if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)
647
code[1] |= 0x00800000;
648
} else {
649
assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
650
code[0] = 0x10000001;
651
code[1] = 0x00200000 | (i->lanes << 14);
652
emitLoadStoreSizeCS(i->sType);
653
}
654
break;
655
case FILE_MEMORY_CONST:
656
code[0] = 0x10000001;
657
code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
658
if (typeSizeof(i->dType) == 4)
659
code[1] |= 0x04000000;
660
emitLoadStoreSizeCS(i->sType);
661
break;
662
case FILE_MEMORY_LOCAL:
663
code[0] = 0xd0000001;
664
code[1] = 0x40000000;
665
break;
666
case FILE_MEMORY_GLOBAL:
667
code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
668
code[1] = 0x80000000;
669
break;
670
default:
671
assert(!"invalid load source file");
672
break;
673
}
674
if (sf == FILE_MEMORY_LOCAL ||
675
sf == FILE_MEMORY_GLOBAL)
676
emitLoadStoreSizeLG(i->sType, 21 + 32);
677
678
setDst(i, 0);
679
680
emitFlagsRd(i);
681
emitFlagsWr(i);
682
683
if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
684
srcId(*i->src(0).getIndirect(0), 9);
685
} else {
686
setAReg16(i, 0);
687
srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
688
}
689
}
690
691
void
692
CodeEmitterNV50::emitSTORE(const Instruction *i)
693
{
694
DataFile f = i->getSrc(0)->reg.file;
695
int32_t offset = i->getSrc(0)->reg.data.offset;
696
697
switch (f) {
698
case FILE_SHADER_OUTPUT:
699
code[0] = 0x00000001 | ((offset >> 2) << 9);
700
code[1] = 0x80c00000;
701
srcId(i->src(1), 32 + 14);
702
break;
703
case FILE_MEMORY_GLOBAL:
704
code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
705
code[1] = 0xa0000000;
706
emitLoadStoreSizeLG(i->dType, 21 + 32);
707
srcId(i->src(1), 2);
708
break;
709
case FILE_MEMORY_LOCAL:
710
code[0] = 0xd0000001;
711
code[1] = 0x60000000;
712
emitLoadStoreSizeLG(i->dType, 21 + 32);
713
srcId(i->src(1), 2);
714
break;
715
case FILE_MEMORY_SHARED:
716
code[0] = 0x00000001;
717
code[1] = 0xe0000000;
718
if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)
719
code[1] |= 0x00800000;
720
switch (typeSizeof(i->dType)) {
721
case 1:
722
code[0] |= offset << 9;
723
code[1] |= 0x00400000;
724
break;
725
case 2:
726
code[0] |= (offset >> 1) << 9;
727
break;
728
case 4:
729
code[0] |= (offset >> 2) << 9;
730
code[1] |= 0x04200000;
731
break;
732
default:
733
assert(0);
734
break;
735
}
736
srcId(i->src(1), 32 + 14);
737
break;
738
default:
739
assert(!"invalid store destination file");
740
break;
741
}
742
743
if (f == FILE_MEMORY_GLOBAL)
744
srcId(*i->src(0).getIndirect(0), 9);
745
else
746
setAReg16(i, 0);
747
748
if (f == FILE_MEMORY_LOCAL)
749
srcAddr16(i->src(0), false, 9);
750
751
emitFlagsRd(i);
752
}
753
754
void
755
CodeEmitterNV50::emitMOV(const Instruction *i)
756
{
757
DataFile sf = i->getSrc(0)->reg.file;
758
DataFile df = i->getDef(0)->reg.file;
759
760
assert(sf == FILE_GPR || df == FILE_GPR);
761
762
if (sf == FILE_FLAGS) {
763
assert(i->flagsSrc >= 0);
764
code[0] = 0x00000001;
765
code[1] = 0x20000000;
766
defId(i->def(0), 2);
767
emitFlagsRd(i);
768
} else
769
if (sf == FILE_ADDRESS) {
770
code[0] = 0x00000001;
771
code[1] = 0x40000000;
772
defId(i->def(0), 2);
773
setARegBits(SDATA(i->src(0)).id + 1);
774
emitFlagsRd(i);
775
} else
776
if (df == FILE_FLAGS) {
777
assert(i->flagsDef >= 0);
778
code[0] = 0x00000001;
779
code[1] = 0xa0000000;
780
srcId(i->src(0), 9);
781
emitFlagsRd(i);
782
emitFlagsWr(i);
783
} else
784
if (sf == FILE_IMMEDIATE) {
785
code[0] = 0x10000001;
786
code[1] = 0x00000003;
787
emitForm_IMM(i);
788
789
code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;
790
} else {
791
if (i->encSize == 4) {
792
code[0] = 0x10000000;
793
code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;
794
defId(i->def(0), 2);
795
} else {
796
code[0] = 0x10000001;
797
code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
798
code[1] |= (i->lanes << 14);
799
setDst(i, 0);
800
emitFlagsRd(i);
801
}
802
srcId(i->src(0), 9);
803
}
804
if (df == FILE_SHADER_OUTPUT) {
805
assert(i->encSize == 8);
806
code[1] |= 0x8;
807
}
808
}
809
810
static inline uint8_t getSRegEncoding(const ValueRef &ref)
811
{
812
switch (SDATA(ref).sv.sv) {
813
case SV_PHYSID: return 0;
814
case SV_CLOCK: return 1;
815
case SV_VERTEX_STRIDE: return 3;
816
// case SV_PM_COUNTER: return 4 + SDATA(ref).sv.index;
817
case SV_SAMPLE_INDEX: return 8;
818
default:
819
assert(!"no sreg for system value");
820
return 0;
821
}
822
}
823
824
void
825
CodeEmitterNV50::emitRDSV(const Instruction *i)
826
{
827
code[0] = 0x00000001;
828
code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
829
defId(i->def(0), 2);
830
emitFlagsRd(i);
831
}
832
833
void
834
CodeEmitterNV50::emitNOP()
835
{
836
code[0] = 0xf0000001;
837
code[1] = 0xe0000000;
838
}
839
840
void
841
CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
842
{
843
code[0] = 0xc0000000 | (lane << 16);
844
code[1] = 0x80000000;
845
846
code[0] |= (quOp & 0x03) << 20;
847
code[1] |= (quOp & 0xfc) << 20;
848
849
emitForm_ADD(i);
850
851
if (!i->srcExists(1) || i->predSrc == 1)
852
srcId(i->src(0), 32 + 14);
853
}
854
855
/* NOTE: This returns the base address of a vertex inside the primitive.
856
* src0 is an immediate, the index (not offset) of the vertex
857
* inside the primitive. XXX: signed or unsigned ?
858
* src1 (may be NULL) should use whatever units the hardware requires
859
* (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
860
*/
861
void
862
CodeEmitterNV50::emitPFETCH(const Instruction *i)
863
{
864
const uint32_t prim = i->src(0).get()->reg.data.u32;
865
assert(prim <= 127);
866
867
if (i->def(0).getFile() == FILE_ADDRESS) {
868
// shl $aX a[] 0
869
code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
870
code[1] = 0xc0200000;
871
code[0] |= prim << 9;
872
assert(!i->srcExists(1));
873
} else
874
if (i->srcExists(1)) {
875
// ld b32 $rX a[$aX+base]
876
code[0] = 0x00000001;
877
code[1] = 0x04200000 | (0xf << 14);
878
defId(i->def(0), 2);
879
code[0] |= prim << 9;
880
setARegBits(SDATA(i->src(1)).id + 1);
881
} else {
882
// mov b32 $rX a[]
883
code[0] = 0x10000001;
884
code[1] = 0x04200000 | (0xf << 14);
885
defId(i->def(0), 2);
886
code[0] |= prim << 9;
887
}
888
emitFlagsRd(i);
889
}
890
891
void
892
nv50_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
893
{
894
int ipa = entry->ipa;
895
int encSize = entry->reg;
896
int loc = entry->loc;
897
898
if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
899
(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
900
if (data.force_persample_interp) {
901
if (encSize == 8)
902
code[loc + 1] |= 1 << 16;
903
else
904
code[loc + 0] |= 1 << 24;
905
} else {
906
if (encSize == 8)
907
code[loc + 1] &= ~(1 << 16);
908
else
909
code[loc + 0] &= ~(1 << 24);
910
}
911
}
912
}
913
914
void
915
CodeEmitterNV50::emitINTERP(const Instruction *i)
916
{
917
code[0] = 0x80000000;
918
919
defId(i->def(0), 2);
920
srcAddr8(i->src(0), 16);
921
setAReg16(i, 0);
922
923
if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
924
code[0] |= 1 << 8;
925
} else {
926
if (i->op == OP_PINTERP) {
927
code[0] |= 1 << 25;
928
srcId(i->src(1), 9);
929
}
930
if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
931
code[0] |= 1 << 24;
932
}
933
934
if (i->encSize == 8) {
935
if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
936
code[1] = 4 << 16;
937
else
938
code[1] = (code[0] & (3 << 24)) >> (24 - 16);
939
code[0] &= ~0x03000000;
940
code[0] |= 1;
941
emitFlagsRd(i);
942
}
943
944
addInterp(i->ipa, i->encSize, nv50_interpApply);
945
}
946
947
void
948
CodeEmitterNV50::emitMINMAX(const Instruction *i)
949
{
950
if (i->dType == TYPE_F64) {
951
code[0] = 0xe0000000;
952
code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
953
} else {
954
code[0] = 0x30000000;
955
code[1] = 0x80000000;
956
if (i->op == OP_MIN)
957
code[1] |= 0x20000000;
958
959
switch (i->dType) {
960
case TYPE_F32: code[0] |= 0x80000000; break;
961
case TYPE_S32: code[1] |= 0x8c000000; break;
962
case TYPE_U32: code[1] |= 0x84000000; break;
963
case TYPE_S16: code[1] |= 0x80000000; break;
964
case TYPE_U16: break;
965
default:
966
assert(0);
967
break;
968
}
969
}
970
971
code[1] |= i->src(0).mod.abs() << 20;
972
code[1] |= i->src(0).mod.neg() << 26;
973
code[1] |= i->src(1).mod.abs() << 19;
974
code[1] |= i->src(1).mod.neg() << 27;
975
976
emitForm_MAD(i);
977
}
978
979
void
980
CodeEmitterNV50::emitFMAD(const Instruction *i)
981
{
982
const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
983
const int neg_add = i->src(2).mod.neg();
984
985
code[0] = 0xe0000000;
986
987
if (i->src(1).getFile() == FILE_IMMEDIATE) {
988
code[1] = 0;
989
emitForm_IMM(i);
990
code[0] |= neg_mul << 15;
991
code[0] |= neg_add << 22;
992
if (i->saturate)
993
code[0] |= 1 << 8;
994
} else
995
if (i->encSize == 4) {
996
emitForm_MUL(i);
997
code[0] |= neg_mul << 15;
998
code[0] |= neg_add << 22;
999
if (i->saturate)
1000
code[0] |= 1 << 8;
1001
} else {
1002
code[1] = neg_mul << 26;
1003
code[1] |= neg_add << 27;
1004
if (i->saturate)
1005
code[1] |= 1 << 29;
1006
emitForm_MAD(i);
1007
}
1008
}
1009
1010
void
1011
CodeEmitterNV50::emitDMAD(const Instruction *i)
1012
{
1013
const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1014
const int neg_add = i->src(2).mod.neg();
1015
1016
assert(i->encSize == 8);
1017
assert(!i->saturate);
1018
1019
code[1] = 0x40000000;
1020
code[0] = 0xe0000000;
1021
1022
code[1] |= neg_mul << 26;
1023
code[1] |= neg_add << 27;
1024
1025
roundMode_MAD(i);
1026
1027
emitForm_MAD(i);
1028
}
1029
1030
void
1031
CodeEmitterNV50::emitFADD(const Instruction *i)
1032
{
1033
const int neg0 = i->src(0).mod.neg();
1034
const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1035
1036
code[0] = 0xb0000000;
1037
1038
assert(!(i->src(0).mod | i->src(1).mod).abs());
1039
1040
if (i->src(1).getFile() == FILE_IMMEDIATE) {
1041
code[1] = 0;
1042
emitForm_IMM(i);
1043
code[0] |= neg0 << 15;
1044
code[0] |= neg1 << 22;
1045
if (i->saturate)
1046
code[0] |= 1 << 8;
1047
} else
1048
if (i->encSize == 8) {
1049
code[1] = 0;
1050
emitForm_ADD(i);
1051
code[1] |= neg0 << 26;
1052
code[1] |= neg1 << 27;
1053
if (i->saturate)
1054
code[1] |= 1 << 29;
1055
} else {
1056
emitForm_MUL(i);
1057
code[0] |= neg0 << 15;
1058
code[0] |= neg1 << 22;
1059
if (i->saturate)
1060
code[0] |= 1 << 8;
1061
}
1062
}
1063
1064
void
1065
CodeEmitterNV50::emitDADD(const Instruction *i)
1066
{
1067
const int neg0 = i->src(0).mod.neg();
1068
const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1069
1070
assert(!(i->src(0).mod | i->src(1).mod).abs());
1071
assert(!i->saturate);
1072
assert(i->encSize == 8);
1073
1074
code[1] = 0x60000000;
1075
code[0] = 0xe0000000;
1076
1077
emitForm_ADD(i);
1078
1079
code[1] |= neg0 << 26;
1080
code[1] |= neg1 << 27;
1081
}
1082
1083
void
1084
CodeEmitterNV50::emitUADD(const Instruction *i)
1085
{
1086
const int neg0 = i->src(0).mod.neg();
1087
const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1088
1089
code[0] = 0x20000000;
1090
1091
if (i->src(1).getFile() == FILE_IMMEDIATE) {
1092
code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;
1093
code[1] = 0;
1094
emitForm_IMM(i);
1095
} else
1096
if (i->encSize == 8) {
1097
code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
1098
emitForm_ADD(i);
1099
} else {
1100
code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;
1101
emitForm_MUL(i);
1102
}
1103
assert(!(neg0 && neg1));
1104
code[0] |= neg0 << 28;
1105
code[0] |= neg1 << 22;
1106
1107
if (i->flagsSrc >= 0) {
1108
// addc == sub | subr
1109
assert(!(code[0] & 0x10400000) && !i->getPredicate());
1110
code[0] |= 0x10400000;
1111
srcId(i->src(i->flagsSrc), 32 + 12);
1112
}
1113
}
1114
1115
void
1116
CodeEmitterNV50::emitAADD(const Instruction *i)
1117
{
1118
const int s = (i->op == OP_MOV) ? 0 : 1;
1119
1120
code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
1121
code[1] = 0x20000000;
1122
1123
code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1124
1125
emitFlagsRd(i);
1126
1127
if (s && i->srcExists(0))
1128
setARegBits(SDATA(i->src(0)).id + 1);
1129
}
1130
1131
void
1132
CodeEmitterNV50::emitIMUL(const Instruction *i)
1133
{
1134
code[0] = 0x40000000;
1135
1136
if (i->src(1).getFile() == FILE_IMMEDIATE) {
1137
if (i->sType == TYPE_S16)
1138
code[0] |= 0x8100;
1139
code[1] = 0;
1140
emitForm_IMM(i);
1141
} else
1142
if (i->encSize == 8) {
1143
code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
1144
emitForm_MAD(i);
1145
} else {
1146
if (i->sType == TYPE_S16)
1147
code[0] |= 0x8100;
1148
emitForm_MUL(i);
1149
}
1150
}
1151
1152
void
1153
CodeEmitterNV50::emitFMUL(const Instruction *i)
1154
{
1155
const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1156
1157
code[0] = 0xc0000000;
1158
1159
if (i->src(1).getFile() == FILE_IMMEDIATE) {
1160
code[1] = 0;
1161
emitForm_IMM(i);
1162
if (neg)
1163
code[0] |= 0x8000;
1164
if (i->saturate)
1165
code[0] |= 1 << 8;
1166
} else
1167
if (i->encSize == 8) {
1168
code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
1169
if (neg)
1170
code[1] |= 0x08000000;
1171
if (i->saturate)
1172
code[1] |= 1 << 20;
1173
emitForm_MAD(i);
1174
} else {
1175
emitForm_MUL(i);
1176
if (neg)
1177
code[0] |= 0x8000;
1178
if (i->saturate)
1179
code[0] |= 1 << 8;
1180
}
1181
}
1182
1183
void
1184
CodeEmitterNV50::emitDMUL(const Instruction *i)
1185
{
1186
const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1187
1188
assert(!i->saturate);
1189
assert(i->encSize == 8);
1190
1191
code[1] = 0x80000000;
1192
code[0] = 0xe0000000;
1193
1194
if (neg)
1195
code[1] |= 0x08000000;
1196
1197
roundMode_CVT(i->rnd);
1198
1199
emitForm_MAD(i);
1200
}
1201
1202
void
1203
CodeEmitterNV50::emitIMAD(const Instruction *i)
1204
{
1205
int mode;
1206
code[0] = 0x60000000;
1207
1208
assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod);
1209
if (!isSignedType(i->sType))
1210
mode = 0;
1211
else if (i->saturate)
1212
mode = 2;
1213
else
1214
mode = 1;
1215
1216
if (i->src(1).getFile() == FILE_IMMEDIATE) {
1217
code[1] = 0;
1218
emitForm_IMM(i);
1219
code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1220
if (i->flagsSrc >= 0) {
1221
assert(!(code[0] & 0x10400000));
1222
assert(SDATA(i->src(i->flagsSrc)).id == 0);
1223
code[0] |= 0x10400000;
1224
}
1225
} else
1226
if (i->encSize == 4) {
1227
emitForm_MUL(i);
1228
code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1229
if (i->flagsSrc >= 0) {
1230
assert(!(code[0] & 0x10400000));
1231
assert(SDATA(i->src(i->flagsSrc)).id == 0);
1232
code[0] |= 0x10400000;
1233
}
1234
} else {
1235
code[1] = mode << 29;
1236
emitForm_MAD(i);
1237
1238
if (i->flagsSrc >= 0) {
1239
// add with carry from $cX
1240
assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1241
code[1] |= 0xc << 24;
1242
srcId(i->src(i->flagsSrc), 32 + 12);
1243
}
1244
}
1245
}
1246
1247
void
1248
CodeEmitterNV50::emitISAD(const Instruction *i)
1249
{
1250
if (i->encSize == 8) {
1251
code[0] = 0x50000000;
1252
switch (i->sType) {
1253
case TYPE_U32: code[1] = 0x04000000; break;
1254
case TYPE_S32: code[1] = 0x0c000000; break;
1255
case TYPE_U16: code[1] = 0x00000000; break;
1256
case TYPE_S16: code[1] = 0x08000000; break;
1257
default:
1258
assert(0);
1259
break;
1260
}
1261
emitForm_MAD(i);
1262
} else {
1263
switch (i->sType) {
1264
case TYPE_U32: code[0] = 0x50008000; break;
1265
case TYPE_S32: code[0] = 0x50008100; break;
1266
case TYPE_U16: code[0] = 0x50000000; break;
1267
case TYPE_S16: code[0] = 0x50000100; break;
1268
default:
1269
assert(0);
1270
break;
1271
}
1272
emitForm_MUL(i);
1273
}
1274
}
1275
1276
static void
1277
alphatestSet(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1278
{
1279
int loc = entry->loc;
1280
int enc;
1281
1282
switch (data.alphatest) {
1283
case PIPE_FUNC_NEVER: enc = 0x0; break;
1284
case PIPE_FUNC_LESS: enc = 0x1; break;
1285
case PIPE_FUNC_EQUAL: enc = 0x2; break;
1286
case PIPE_FUNC_LEQUAL: enc = 0x3; break;
1287
case PIPE_FUNC_GREATER: enc = 0x4; break;
1288
case PIPE_FUNC_NOTEQUAL: enc = 0x5; break;
1289
case PIPE_FUNC_GEQUAL: enc = 0x6; break;
1290
default:
1291
case PIPE_FUNC_ALWAYS: enc = 0xf; break;
1292
}
1293
1294
code[loc + 1] &= ~(0x1f << 14);
1295
code[loc + 1] |= enc << 14;
1296
}
1297
1298
void
1299
CodeEmitterNV50::emitSET(const Instruction *i)
1300
{
1301
code[0] = 0x30000000;
1302
code[1] = 0x60000000;
1303
1304
switch (i->sType) {
1305
case TYPE_F64:
1306
code[0] = 0xe0000000;
1307
code[1] = 0xe0000000;
1308
break;
1309
case TYPE_F32: code[0] |= 0x80000000; break;
1310
case TYPE_S32: code[1] |= 0x0c000000; break;
1311
case TYPE_U32: code[1] |= 0x04000000; break;
1312
case TYPE_S16: code[1] |= 0x08000000; break;
1313
case TYPE_U16: break;
1314
default:
1315
assert(0);
1316
break;
1317
}
1318
1319
emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1320
1321
if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1322
if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1323
if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1324
if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1325
1326
emitForm_MAD(i);
1327
1328
if (i->subOp == 1) {
1329
addInterp(0, 0, alphatestSet);
1330
}
1331
}
1332
1333
void
1334
CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1335
{
1336
switch (rnd) {
1337
case ROUND_NI: code[1] |= 0x08000000; break;
1338
case ROUND_M: code[1] |= 0x00020000; break;
1339
case ROUND_MI: code[1] |= 0x08020000; break;
1340
case ROUND_P: code[1] |= 0x00040000; break;
1341
case ROUND_PI: code[1] |= 0x08040000; break;
1342
case ROUND_Z: code[1] |= 0x00060000; break;
1343
case ROUND_ZI: code[1] |= 0x08060000; break;
1344
default:
1345
assert(rnd == ROUND_N);
1346
break;
1347
}
1348
}
1349
1350
void
1351
CodeEmitterNV50::emitCVT(const Instruction *i)
1352
{
1353
const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1354
RoundMode rnd;
1355
DataType dType;
1356
1357
switch (i->op) {
1358
case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
1359
case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1360
case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1361
default:
1362
rnd = i->rnd;
1363
break;
1364
}
1365
1366
if (i->op == OP_NEG && i->dType == TYPE_U32)
1367
dType = TYPE_S32;
1368
else
1369
dType = i->dType;
1370
1371
code[0] = 0xa0000000;
1372
1373
switch (dType) {
1374
case TYPE_F64:
1375
switch (i->sType) {
1376
case TYPE_F64: code[1] = 0xc4404000; break;
1377
case TYPE_S64: code[1] = 0x44414000; break;
1378
case TYPE_U64: code[1] = 0x44404000; break;
1379
case TYPE_F32: code[1] = 0xc4400000; break;
1380
case TYPE_S32: code[1] = 0x44410000; break;
1381
case TYPE_U32: code[1] = 0x44400000; break;
1382
default:
1383
assert(0);
1384
break;
1385
}
1386
break;
1387
case TYPE_S64:
1388
switch (i->sType) {
1389
case TYPE_F64: code[1] = 0x8c404000; break;
1390
case TYPE_F32: code[1] = 0x8c400000; break;
1391
default:
1392
assert(0);
1393
break;
1394
}
1395
break;
1396
case TYPE_U64:
1397
switch (i->sType) {
1398
case TYPE_F64: code[1] = 0x84404000; break;
1399
case TYPE_F32: code[1] = 0x84400000; break;
1400
default:
1401
assert(0);
1402
break;
1403
}
1404
break;
1405
case TYPE_F32:
1406
switch (i->sType) {
1407
case TYPE_F64: code[1] = 0xc0404000; break;
1408
case TYPE_S64: code[1] = 0x40414000; break;
1409
case TYPE_U64: code[1] = 0x40404000; break;
1410
case TYPE_F32: code[1] = 0xc4004000; break;
1411
case TYPE_S32: code[1] = 0x44014000; break;
1412
case TYPE_U32: code[1] = 0x44004000; break;
1413
case TYPE_F16: code[1] = 0xc4000000; break;
1414
case TYPE_U16: code[1] = 0x44000000; break;
1415
case TYPE_S16: code[1] = 0x44010000; break;
1416
case TYPE_S8: code[1] = 0x44018000; break;
1417
case TYPE_U8: code[1] = 0x44008000; break;
1418
default:
1419
assert(0);
1420
break;
1421
}
1422
break;
1423
case TYPE_S32:
1424
switch (i->sType) {
1425
case TYPE_F64: code[1] = 0x88404000; break;
1426
case TYPE_F32: code[1] = 0x8c004000; break;
1427
case TYPE_S32: code[1] = 0x0c014000; break;
1428
case TYPE_U32: code[1] = 0x0c004000; break;
1429
case TYPE_F16: code[1] = 0x8c000000; break;
1430
case TYPE_S16: code[1] = 0x0c010000; break;
1431
case TYPE_U16: code[1] = 0x0c000000; break;
1432
case TYPE_S8: code[1] = 0x0c018000; break;
1433
case TYPE_U8: code[1] = 0x0c008000; break;
1434
default:
1435
assert(0);
1436
break;
1437
}
1438
break;
1439
case TYPE_U32:
1440
switch (i->sType) {
1441
case TYPE_F64: code[1] = 0x80404000; break;
1442
case TYPE_F32: code[1] = 0x84004000; break;
1443
case TYPE_S32: code[1] = 0x04014000; break;
1444
case TYPE_U32: code[1] = 0x04004000; break;
1445
case TYPE_F16: code[1] = 0x84000000; break;
1446
case TYPE_S16: code[1] = 0x04010000; break;
1447
case TYPE_U16: code[1] = 0x04000000; break;
1448
case TYPE_S8: code[1] = 0x04018000; break;
1449
case TYPE_U8: code[1] = 0x04008000; break;
1450
default:
1451
assert(0);
1452
break;
1453
}
1454
break;
1455
case TYPE_F16:
1456
switch (i->sType) {
1457
case TYPE_F16: code[1] = 0xc0000000; break;
1458
case TYPE_F32: code[1] = 0xc0004000; break;
1459
default:
1460
assert(0);
1461
break;
1462
}
1463
break;
1464
case TYPE_S16:
1465
switch (i->sType) {
1466
case TYPE_F32: code[1] = 0x88004000; break;
1467
case TYPE_S32: code[1] = 0x08014000; break;
1468
case TYPE_U32: code[1] = 0x08004000; break;
1469
case TYPE_F16: code[1] = 0x88000000; break;
1470
case TYPE_S16: code[1] = 0x08010000; break;
1471
case TYPE_U16: code[1] = 0x08000000; break;
1472
case TYPE_S8: code[1] = 0x08018000; break;
1473
case TYPE_U8: code[1] = 0x08008000; break;
1474
default:
1475
assert(0);
1476
break;
1477
}
1478
break;
1479
case TYPE_U16:
1480
switch (i->sType) {
1481
case TYPE_F32: code[1] = 0x80004000; break;
1482
case TYPE_S32: code[1] = 0x00014000; break;
1483
case TYPE_U32: code[1] = 0x00004000; break;
1484
case TYPE_F16: code[1] = 0x80000000; break;
1485
case TYPE_S16: code[1] = 0x00010000; break;
1486
case TYPE_U16: code[1] = 0x00000000; break;
1487
case TYPE_S8: code[1] = 0x00018000; break;
1488
case TYPE_U8: code[1] = 0x00008000; break;
1489
default:
1490
assert(0);
1491
break;
1492
}
1493
break;
1494
case TYPE_S8:
1495
switch (i->sType) {
1496
case TYPE_S32: code[1] = 0x08094000; break;
1497
case TYPE_U32: code[1] = 0x08084000; break;
1498
case TYPE_F16: code[1] = 0x88080000; break;
1499
case TYPE_S16: code[1] = 0x08090000; break;
1500
case TYPE_U16: code[1] = 0x08080000; break;
1501
case TYPE_S8: code[1] = 0x08098000; break;
1502
case TYPE_U8: code[1] = 0x08088000; break;
1503
default:
1504
assert(0);
1505
break;
1506
}
1507
break;
1508
case TYPE_U8:
1509
switch (i->sType) {
1510
case TYPE_S32: code[1] = 0x00094000; break;
1511
case TYPE_U32: code[1] = 0x00084000; break;
1512
case TYPE_F16: code[1] = 0x80080000; break;
1513
case TYPE_S16: code[1] = 0x00090000; break;
1514
case TYPE_U16: code[1] = 0x00080000; break;
1515
case TYPE_S8: code[1] = 0x00098000; break;
1516
case TYPE_U8: code[1] = 0x00088000; break;
1517
default:
1518
assert(0);
1519
break;
1520
}
1521
break;
1522
default:
1523
assert(0);
1524
break;
1525
}
1526
if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1527
code[1] |= 0x00004000;
1528
1529
roundMode_CVT(rnd);
1530
1531
switch (i->op) {
1532
case OP_ABS: code[1] |= 1 << 20; break;
1533
case OP_SAT: code[1] |= 1 << 19; break;
1534
case OP_NEG: code[1] |= 1 << 29; break;
1535
default:
1536
break;
1537
}
1538
code[1] ^= i->src(0).mod.neg() << 29;
1539
code[1] |= i->src(0).mod.abs() << 20;
1540
if (i->saturate)
1541
code[1] |= 1 << 19;
1542
1543
assert(i->op != OP_ABS || !i->src(0).mod.neg());
1544
1545
emitForm_MAD(i);
1546
}
1547
1548
void
1549
CodeEmitterNV50::emitPreOp(const Instruction *i)
1550
{
1551
code[0] = 0xb0000000;
1552
code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1553
1554
code[1] |= i->src(0).mod.abs() << 20;
1555
code[1] |= i->src(0).mod.neg() << 26;
1556
1557
emitForm_MAD(i);
1558
}
1559
1560
void
1561
CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1562
{
1563
code[0] = 0x90000000;
1564
1565
if (i->encSize == 4) {
1566
assert(i->op == OP_RCP);
1567
assert(!i->saturate);
1568
code[0] |= i->src(0).mod.abs() << 15;
1569
code[0] |= i->src(0).mod.neg() << 22;
1570
emitForm_MUL(i);
1571
} else {
1572
code[1] = subOp << 29;
1573
code[1] |= i->src(0).mod.abs() << 20;
1574
code[1] |= i->src(0).mod.neg() << 26;
1575
if (i->saturate) {
1576
assert(subOp == 6 && i->op == OP_EX2);
1577
code[1] |= 1 << 27;
1578
}
1579
emitForm_MAD(i);
1580
}
1581
}
1582
1583
void
1584
CodeEmitterNV50::emitNOT(const Instruction *i)
1585
{
1586
code[0] = 0xd0000000;
1587
code[1] = 0x0002c000;
1588
1589
switch (i->sType) {
1590
case TYPE_U32:
1591
case TYPE_S32:
1592
code[1] |= 0x04000000;
1593
break;
1594
default:
1595
break;
1596
}
1597
emitForm_MAD(i);
1598
setSrc(i, 0, 1);
1599
}
1600
1601
void
1602
CodeEmitterNV50::emitLogicOp(const Instruction *i)
1603
{
1604
code[0] = 0xd0000000;
1605
code[1] = 0;
1606
1607
if (i->src(1).getFile() == FILE_IMMEDIATE) {
1608
switch (i->op) {
1609
case OP_OR: code[0] |= 0x0100; break;
1610
case OP_XOR: code[0] |= 0x8000; break;
1611
default:
1612
assert(i->op == OP_AND);
1613
break;
1614
}
1615
if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1616
code[0] |= 1 << 22;
1617
1618
emitForm_IMM(i);
1619
} else {
1620
switch (i->op) {
1621
case OP_AND: code[1] = 0x00000000; break;
1622
case OP_OR: code[1] = 0x00004000; break;
1623
case OP_XOR: code[1] = 0x00008000; break;
1624
default:
1625
assert(0);
1626
break;
1627
}
1628
if (typeSizeof(i->dType) == 4)
1629
code[1] |= 0x04000000;
1630
if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1631
code[1] |= 1 << 16;
1632
if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1633
code[1] |= 1 << 17;
1634
1635
emitForm_MAD(i);
1636
}
1637
}
1638
1639
void
1640
CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1641
{
1642
code[0] = 0x00000001 | (shl << 16);
1643
code[1] = 0xc0000000;
1644
1645
code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1646
1647
setSrcFileBits(i, NV50_OP_ENC_IMM);
1648
setSrc(i, 0, 0);
1649
emitFlagsRd(i);
1650
}
1651
1652
void
1653
CodeEmitterNV50::emitShift(const Instruction *i)
1654
{
1655
if (i->def(0).getFile() == FILE_ADDRESS) {
1656
assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1657
emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1658
} else {
1659
code[0] = 0x30000001;
1660
code[1] = (i->op == OP_SHR) ? 0xe0000000 : 0xc0000000;
1661
if (typeSizeof(i->dType) == 4)
1662
code[1] |= 0x04000000;
1663
if (i->op == OP_SHR && isSignedType(i->sType))
1664
code[1] |= 1 << 27;
1665
1666
if (i->src(1).getFile() == FILE_IMMEDIATE) {
1667
code[1] |= 1 << 20;
1668
code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1669
defId(i->def(0), 2);
1670
srcId(i->src(0), 9);
1671
emitFlagsRd(i);
1672
} else {
1673
emitForm_MAD(i);
1674
}
1675
}
1676
}
1677
1678
void
1679
CodeEmitterNV50::emitOUT(const Instruction *i)
1680
{
1681
code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
1682
code[1] = 0xc0000000;
1683
1684
emitFlagsRd(i);
1685
}
1686
1687
void
1688
CodeEmitterNV50::emitTEX(const TexInstruction *i)
1689
{
1690
code[0] = 0xf0000001;
1691
code[1] = 0x00000000;
1692
1693
switch (i->op) {
1694
case OP_TXB:
1695
code[1] = 0x20000000;
1696
break;
1697
case OP_TXL:
1698
code[1] = 0x40000000;
1699
break;
1700
case OP_TXF:
1701
code[0] |= 0x01000000;
1702
break;
1703
case OP_TXG:
1704
code[0] |= 0x01000000;
1705
code[1] = 0x80000000;
1706
break;
1707
case OP_TXLQ:
1708
code[1] = 0x60020000;
1709
break;
1710
default:
1711
assert(i->op == OP_TEX);
1712
break;
1713
}
1714
1715
code[0] |= i->tex.r << 9;
1716
code[0] |= i->tex.s << 17;
1717
1718
int argc = i->tex.target.getArgCount();
1719
1720
if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1721
argc += 1;
1722
if (i->tex.target.isShadow())
1723
argc += 1;
1724
assert(argc <= 4);
1725
1726
code[0] |= (argc - 1) << 22;
1727
1728
if (i->tex.target.isCube()) {
1729
code[0] |= 0x08000000;
1730
} else
1731
if (i->tex.useOffsets) {
1732
code[1] |= (i->tex.offset[0] & 0xf) << 24;
1733
code[1] |= (i->tex.offset[1] & 0xf) << 20;
1734
code[1] |= (i->tex.offset[2] & 0xf) << 16;
1735
}
1736
1737
code[0] |= (i->tex.mask & 0x3) << 25;
1738
code[1] |= (i->tex.mask & 0xc) << 12;
1739
1740
if (i->tex.liveOnly)
1741
code[1] |= 1 << 2;
1742
if (i->tex.derivAll)
1743
code[1] |= 1 << 3;
1744
1745
defId(i->def(0), 2);
1746
1747
emitFlagsRd(i);
1748
}
1749
1750
void
1751
CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1752
{
1753
assert(i->tex.query == TXQ_DIMS);
1754
1755
code[0] = 0xf0000001;
1756
code[1] = 0x60000000;
1757
1758
code[0] |= i->tex.r << 9;
1759
code[0] |= i->tex.s << 17;
1760
1761
code[0] |= (i->tex.mask & 0x3) << 25;
1762
code[1] |= (i->tex.mask & 0xc) << 12;
1763
1764
defId(i->def(0), 2);
1765
1766
emitFlagsRd(i);
1767
}
1768
1769
void
1770
CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
1771
{
1772
code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
1773
code[1] = 0x60010000;
1774
1775
code[0] |= (i->tex.mask & 0x3) << 25;
1776
code[1] |= (i->tex.mask & 0xc) << 12;
1777
defId(i->def(0), 2);
1778
1779
emitFlagsRd(i);
1780
}
1781
1782
void
1783
CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1784
{
1785
uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1786
1787
code[0] = 0x10000003; // bra
1788
code[1] = 0x00000780; // always
1789
1790
switch (i->subOp) {
1791
case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1792
break;
1793
case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1794
pos += 8;
1795
break;
1796
default:
1797
assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1798
code[0] = 0x20000003; // call
1799
code[1] = 0x00000000; // no predicate
1800
break;
1801
}
1802
addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1803
addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1804
}
1805
1806
void
1807
CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1808
{
1809
const FlowInstruction *f = i->asFlow();
1810
bool hasPred = false;
1811
bool hasTarg = false;
1812
1813
code[0] = 0x00000003 | (flowOp << 28);
1814
code[1] = 0x00000000;
1815
1816
switch (i->op) {
1817
case OP_BRA:
1818
hasPred = true;
1819
hasTarg = true;
1820
break;
1821
case OP_BREAK:
1822
case OP_BRKPT:
1823
case OP_DISCARD:
1824
case OP_RET:
1825
hasPred = true;
1826
break;
1827
case OP_CALL:
1828
case OP_PREBREAK:
1829
case OP_JOINAT:
1830
hasTarg = true;
1831
break;
1832
case OP_PRERET:
1833
hasTarg = true;
1834
if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1835
emitPRERETEmu(f);
1836
return;
1837
}
1838
break;
1839
default:
1840
break;
1841
}
1842
1843
if (hasPred)
1844
emitFlagsRd(i);
1845
1846
if (hasTarg && f) {
1847
uint32_t pos;
1848
1849
if (f->op == OP_CALL) {
1850
if (f->builtin) {
1851
pos = targNV50->getBuiltinOffset(f->target.builtin);
1852
} else {
1853
pos = f->target.fn->binPos;
1854
}
1855
} else {
1856
pos = f->target.bb->binPos;
1857
}
1858
1859
code[0] |= ((pos >> 2) & 0xffff) << 11;
1860
code[1] |= ((pos >> 18) & 0x003f) << 14;
1861
1862
RelocEntry::Type relocTy;
1863
1864
relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1865
1866
addReloc(relocTy, 0, pos, 0x07fff800, 9);
1867
addReloc(relocTy, 1, pos, 0x000fc000, -4);
1868
}
1869
}
1870
1871
void
1872
CodeEmitterNV50::emitBAR(const Instruction *i)
1873
{
1874
ImmediateValue *barId = i->getSrc(0)->asImm();
1875
assert(barId);
1876
1877
code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
1878
code[1] = 0x00004000;
1879
1880
if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
1881
code[0] |= 1 << 26;
1882
}
1883
1884
void
1885
CodeEmitterNV50::emitATOM(const Instruction *i)
1886
{
1887
uint8_t subOp;
1888
switch (i->subOp) {
1889
case NV50_IR_SUBOP_ATOM_ADD: subOp = 0x0; break;
1890
case NV50_IR_SUBOP_ATOM_MIN: subOp = 0x7; break;
1891
case NV50_IR_SUBOP_ATOM_MAX: subOp = 0x6; break;
1892
case NV50_IR_SUBOP_ATOM_INC: subOp = 0x4; break;
1893
case NV50_IR_SUBOP_ATOM_DEC: subOp = 0x5; break;
1894
case NV50_IR_SUBOP_ATOM_AND: subOp = 0xa; break;
1895
case NV50_IR_SUBOP_ATOM_OR: subOp = 0xb; break;
1896
case NV50_IR_SUBOP_ATOM_XOR: subOp = 0xc; break;
1897
case NV50_IR_SUBOP_ATOM_CAS: subOp = 0x2; break;
1898
case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
1899
default:
1900
assert(!"invalid subop");
1901
return;
1902
}
1903
code[0] = 0xd0000001;
1904
code[1] = 0xc0c00000 | (subOp << 2);
1905
if (isSignedType(i->dType))
1906
code[1] |= 1 << 21;
1907
1908
// args
1909
emitFlagsRd(i);
1910
if (i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
1911
i->subOp == NV50_IR_SUBOP_ATOM_CAS ||
1912
i->defExists(0)) {
1913
code[1] |= 0x20000000;
1914
setDst(i, 0);
1915
setSrc(i, 1, 1);
1916
// g[] pointer
1917
code[0] |= i->getSrc(0)->reg.fileIndex << 23;
1918
} else {
1919
srcId(i->src(1), 2);
1920
// g[] pointer
1921
code[0] |= i->getSrc(0)->reg.fileIndex << 16;
1922
}
1923
if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1924
setSrc(i, 2, 2);
1925
1926
srcId(i->getIndirect(0, 0), 9);
1927
}
1928
1929
bool
1930
CodeEmitterNV50::emitInstruction(Instruction *insn)
1931
{
1932
if (!insn->encSize) {
1933
ERROR("skipping unencodable instruction: "); insn->print();
1934
return false;
1935
} else
1936
if (codeSize + insn->encSize > codeSizeLimit) {
1937
ERROR("code emitter output buffer too small\n");
1938
return false;
1939
}
1940
1941
if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1942
INFO("EMIT: "); insn->print();
1943
}
1944
1945
switch (insn->op) {
1946
case OP_MOV:
1947
emitMOV(insn);
1948
break;
1949
case OP_EXIT:
1950
case OP_NOP:
1951
case OP_JOIN:
1952
emitNOP();
1953
break;
1954
case OP_VFETCH:
1955
case OP_LOAD:
1956
emitLOAD(insn);
1957
break;
1958
case OP_EXPORT:
1959
case OP_STORE:
1960
emitSTORE(insn);
1961
break;
1962
case OP_PFETCH:
1963
emitPFETCH(insn);
1964
break;
1965
case OP_RDSV:
1966
emitRDSV(insn);
1967
break;
1968
case OP_LINTERP:
1969
case OP_PINTERP:
1970
emitINTERP(insn);
1971
break;
1972
case OP_ADD:
1973
case OP_SUB:
1974
if (insn->dType == TYPE_F64)
1975
emitDADD(insn);
1976
else if (isFloatType(insn->dType))
1977
emitFADD(insn);
1978
else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1979
emitAADD(insn);
1980
else
1981
emitUADD(insn);
1982
break;
1983
case OP_MUL:
1984
if (insn->dType == TYPE_F64)
1985
emitDMUL(insn);
1986
else if (isFloatType(insn->dType))
1987
emitFMUL(insn);
1988
else
1989
emitIMUL(insn);
1990
break;
1991
case OP_MAD:
1992
case OP_FMA:
1993
if (insn->dType == TYPE_F64)
1994
emitDMAD(insn);
1995
else if (isFloatType(insn->dType))
1996
emitFMAD(insn);
1997
else
1998
emitIMAD(insn);
1999
break;
2000
case OP_SAD:
2001
emitISAD(insn);
2002
break;
2003
case OP_NOT:
2004
emitNOT(insn);
2005
break;
2006
case OP_AND:
2007
case OP_OR:
2008
case OP_XOR:
2009
emitLogicOp(insn);
2010
break;
2011
case OP_SHL:
2012
case OP_SHR:
2013
emitShift(insn);
2014
break;
2015
case OP_SET:
2016
emitSET(insn);
2017
break;
2018
case OP_MIN:
2019
case OP_MAX:
2020
emitMINMAX(insn);
2021
break;
2022
case OP_CEIL:
2023
case OP_FLOOR:
2024
case OP_TRUNC:
2025
case OP_ABS:
2026
case OP_NEG:
2027
case OP_SAT:
2028
emitCVT(insn);
2029
break;
2030
case OP_CVT:
2031
if (insn->def(0).getFile() == FILE_ADDRESS)
2032
emitARL(insn, 0);
2033
else
2034
if (insn->def(0).getFile() == FILE_FLAGS ||
2035
insn->src(0).getFile() == FILE_FLAGS ||
2036
insn->src(0).getFile() == FILE_ADDRESS)
2037
emitMOV(insn);
2038
else
2039
emitCVT(insn);
2040
break;
2041
case OP_RCP:
2042
emitSFnOp(insn, 0);
2043
break;
2044
case OP_RSQ:
2045
emitSFnOp(insn, 2);
2046
break;
2047
case OP_LG2:
2048
emitSFnOp(insn, 3);
2049
break;
2050
case OP_SIN:
2051
emitSFnOp(insn, 4);
2052
break;
2053
case OP_COS:
2054
emitSFnOp(insn, 5);
2055
break;
2056
case OP_EX2:
2057
emitSFnOp(insn, 6);
2058
break;
2059
case OP_PRESIN:
2060
case OP_PREEX2:
2061
emitPreOp(insn);
2062
break;
2063
case OP_TEX:
2064
case OP_TXB:
2065
case OP_TXL:
2066
case OP_TXF:
2067
case OP_TXG:
2068
case OP_TXLQ:
2069
emitTEX(insn->asTex());
2070
break;
2071
case OP_TXQ:
2072
emitTXQ(insn->asTex());
2073
break;
2074
case OP_TEXPREP:
2075
emitTEXPREP(insn->asTex());
2076
break;
2077
case OP_EMIT:
2078
case OP_RESTART:
2079
emitOUT(insn);
2080
break;
2081
case OP_DISCARD:
2082
emitFlow(insn, 0x0);
2083
break;
2084
case OP_BRA:
2085
emitFlow(insn, 0x1);
2086
break;
2087
case OP_CALL:
2088
emitFlow(insn, 0x2);
2089
break;
2090
case OP_RET:
2091
emitFlow(insn, 0x3);
2092
break;
2093
case OP_PREBREAK:
2094
emitFlow(insn, 0x4);
2095
break;
2096
case OP_BREAK:
2097
emitFlow(insn, 0x5);
2098
break;
2099
case OP_QUADON:
2100
emitFlow(insn, 0x6);
2101
break;
2102
case OP_QUADPOP:
2103
emitFlow(insn, 0x7);
2104
break;
2105
case OP_JOINAT:
2106
emitFlow(insn, 0xa);
2107
break;
2108
case OP_PRERET:
2109
emitFlow(insn, 0xd);
2110
break;
2111
case OP_QUADOP:
2112
emitQUADOP(insn, insn->lanes, insn->subOp);
2113
break;
2114
case OP_DFDX:
2115
emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
2116
break;
2117
case OP_DFDY:
2118
emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
2119
break;
2120
case OP_ATOM:
2121
emitATOM(insn);
2122
break;
2123
case OP_BAR:
2124
emitBAR(insn);
2125
break;
2126
case OP_PHI:
2127
case OP_UNION:
2128
case OP_CONSTRAINT:
2129
ERROR("operation should have been eliminated\n");
2130
return false;
2131
case OP_EXP:
2132
case OP_LOG:
2133
case OP_SQRT:
2134
case OP_POW:
2135
case OP_SELP:
2136
case OP_SLCT:
2137
case OP_TXD:
2138
case OP_PRECONT:
2139
case OP_CONT:
2140
case OP_POPCNT:
2141
case OP_INSBF:
2142
case OP_EXTBF:
2143
ERROR("operation should have been lowered\n");
2144
return false;
2145
default:
2146
ERROR("unknown op: %u\n", insn->op);
2147
return false;
2148
}
2149
if (insn->join || insn->op == OP_JOIN)
2150
code[1] |= 0x2;
2151
else
2152
if (insn->exit || insn->op == OP_EXIT)
2153
code[1] |= 0x1;
2154
2155
assert((insn->encSize == 8) == (code[0] & 1));
2156
2157
code += insn->encSize / 4;
2158
codeSize += insn->encSize;
2159
return true;
2160
}
2161
2162
uint32_t
2163
CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
2164
{
2165
const Target::OpInfo &info = targ->getOpInfo(i);
2166
2167
if (info.minEncSize > 4 || i->dType == TYPE_F64)
2168
return 8;
2169
2170
// check constraints on dst and src operands
2171
for (int d = 0; i->defExists(d); ++d) {
2172
if (i->def(d).rep()->reg.data.id > 63 ||
2173
i->def(d).rep()->reg.file != FILE_GPR)
2174
return 8;
2175
}
2176
2177
for (int s = 0; i->srcExists(s); ++s) {
2178
DataFile sf = i->src(s).getFile();
2179
if (sf != FILE_GPR)
2180
if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
2181
return 8;
2182
if (i->src(s).rep()->reg.data.id > 63)
2183
return 8;
2184
}
2185
2186
// check modifiers & rounding
2187
if (i->join || i->lanes != 0xf || i->exit)
2188
return 8;
2189
if (i->op == OP_MUL && i->rnd != ROUND_N)
2190
return 8;
2191
2192
if (i->asTex())
2193
return 8; // TODO: short tex encoding
2194
2195
// check constraints on short MAD
2196
if (info.srcNr >= 2 && i->srcExists(2)) {
2197
if (!i->defExists(0) ||
2198
(i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) ||
2199
DDATA(i->def(0)).id != SDATA(i->src(2)).id)
2200
return 8;
2201
}
2202
2203
return info.minEncSize;
2204
}
2205
2206
// Change the encoding size of an instruction after BBs have been scheduled.
2207
static void
2208
makeInstructionLong(Instruction *insn)
2209
{
2210
if (insn->encSize == 8)
2211
return;
2212
Function *fn = insn->bb->getFunction();
2213
int n = 0;
2214
int adj = 4;
2215
2216
for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
2217
2218
if (n & 1) {
2219
adj = 8;
2220
insn->next->encSize = 8;
2221
} else
2222
if (insn->prev && insn->prev->encSize == 4) {
2223
adj = 8;
2224
insn->prev->encSize = 8;
2225
}
2226
insn->encSize = 8;
2227
2228
for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
2229
fn->bbArray[i]->binPos += adj;
2230
}
2231
fn->binSize += adj;
2232
insn->bb->binSize += adj;
2233
}
2234
2235
static bool
2236
trySetExitModifier(Instruction *insn)
2237
{
2238
if (insn->op == OP_DISCARD ||
2239
insn->op == OP_QUADON ||
2240
insn->op == OP_QUADPOP)
2241
return false;
2242
for (int s = 0; insn->srcExists(s); ++s)
2243
if (insn->src(s).getFile() == FILE_IMMEDIATE)
2244
return false;
2245
if (insn->asFlow()) {
2246
if (insn->op == OP_CALL) // side effects !
2247
return false;
2248
if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
2249
return false;
2250
insn->op = OP_EXIT;
2251
}
2252
insn->exit = 1;
2253
makeInstructionLong(insn);
2254
return true;
2255
}
2256
2257
static void
2258
replaceExitWithModifier(Function *func)
2259
{
2260
BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2261
2262
if (!epilogue->getExit() ||
2263
epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
2264
return;
2265
2266
if (epilogue->getEntry()->op != OP_EXIT) {
2267
Instruction *insn = epilogue->getExit()->prev;
2268
if (!insn || !trySetExitModifier(insn))
2269
return;
2270
insn->exit = 1;
2271
} else {
2272
for (Graph::EdgeIterator ei = func->cfgExit->incident();
2273
!ei.end(); ei.next()) {
2274
BasicBlock *bb = BasicBlock::get(ei.getNode());
2275
Instruction *i = bb->getExit();
2276
2277
if (!i || !trySetExitModifier(i))
2278
return;
2279
}
2280
}
2281
2282
int adj = epilogue->getExit()->encSize;
2283
epilogue->binSize -= adj;
2284
func->binSize -= adj;
2285
delete_Instruction(func->getProgram(), epilogue->getExit());
2286
2287
// There may be BB's that are laid out after the exit block
2288
for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) {
2289
func->bbArray[i]->binPos -= adj;
2290
}
2291
}
2292
2293
void
2294
CodeEmitterNV50::prepareEmission(Function *func)
2295
{
2296
CodeEmitter::prepareEmission(func);
2297
2298
replaceExitWithModifier(func);
2299
}
2300
2301
CodeEmitterNV50::CodeEmitterNV50(Program::Type type, const TargetNV50 *target) :
2302
CodeEmitter(target), progType(type), targNV50(target)
2303
{
2304
targ = target; // specialized
2305
code = NULL;
2306
codeSize = codeSizeLimit = 0;
2307
relocInfo = NULL;
2308
}
2309
2310
CodeEmitter *
2311
TargetNV50::getCodeEmitter(Program::Type type)
2312
{
2313
CodeEmitterNV50 *emit = new CodeEmitterNV50(type, this);
2314
return emit;
2315
}
2316
2317
} // namespace nv50_ir
2318
2319