Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
4574 views
1
/*
2
* Copyright 2014 Red Hat Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
* OTHER DEALINGS IN THE SOFTWARE.
21
*
22
* Authors: Ben Skeggs <[email protected]>
23
*/
24
25
#include "codegen/nv50_ir_target_gm107.h"
26
#include "codegen/nv50_ir_sched_gm107.h"
27
28
//#define GM107_DEBUG_SCHED_DATA
29
30
namespace nv50_ir {
31
32
class CodeEmitterGM107 : public CodeEmitter
33
{
34
public:
35
CodeEmitterGM107(const TargetGM107 *);
36
37
virtual bool emitInstruction(Instruction *);
38
virtual uint32_t getMinEncodingSize(const Instruction *) const;
39
40
virtual void prepareEmission(Program *);
41
virtual void prepareEmission(Function *);
42
43
inline void setProgramType(Program::Type pType) { progType = pType; }
44
45
private:
46
const TargetGM107 *targGM107;
47
48
Program::Type progType;
49
50
const Instruction *insn;
51
const bool writeIssueDelays;
52
uint32_t *data;
53
54
private:
55
inline void emitField(uint32_t *, int, int, uint32_t);
56
inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
57
58
inline void emitInsn(uint32_t, bool);
59
inline void emitInsn(uint32_t o) { emitInsn(o, true); }
60
inline void emitPred();
61
inline void emitGPR(int, const Value *);
62
inline void emitGPR(int pos) {
63
emitGPR(pos, (const Value *)NULL);
64
}
65
inline void emitGPR(int pos, const ValueRef &ref) {
66
emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
67
}
68
inline void emitGPR(int pos, const ValueRef *ref) {
69
emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
70
}
71
inline void emitGPR(int pos, const ValueDef &def) {
72
emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
73
}
74
inline void emitSYS(int, const Value *);
75
inline void emitSYS(int pos, const ValueRef &ref) {
76
emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
77
}
78
inline void emitPRED(int, const Value *);
79
inline void emitPRED(int pos) {
80
emitPRED(pos, (const Value *)NULL);
81
}
82
inline void emitPRED(int pos, const ValueRef &ref) {
83
emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
84
}
85
inline void emitPRED(int pos, const ValueDef &def) {
86
emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
87
}
88
inline void emitADDR(int, int, int, int, const ValueRef &);
89
inline void emitCBUF(int, int, int, int, int, const ValueRef &);
90
inline bool longIMMD(const ValueRef &);
91
inline void emitIMMD(int, int, const ValueRef &);
92
93
void emitCond3(int, CondCode);
94
void emitCond4(int, CondCode);
95
void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
96
inline void emitO(int);
97
inline void emitP(int);
98
inline void emitSAT(int);
99
inline void emitCC(int);
100
inline void emitX(int);
101
inline void emitABS(int, const ValueRef &);
102
inline void emitNEG(int, const ValueRef &);
103
inline void emitNEG2(int, const ValueRef &, const ValueRef &);
104
inline void emitFMZ(int, int);
105
inline void emitRND(int, RoundMode, int);
106
inline void emitRND(int pos) {
107
emitRND(pos, insn->rnd, -1);
108
}
109
inline void emitPDIV(int);
110
inline void emitINV(int, const ValueRef &);
111
112
void emitEXIT();
113
void emitBRA();
114
void emitCAL();
115
void emitPCNT();
116
void emitCONT();
117
void emitPBK();
118
void emitBRK();
119
void emitPRET();
120
void emitRET();
121
void emitSSY();
122
void emitSYNC();
123
void emitSAM();
124
void emitRAM();
125
126
void emitPSETP();
127
128
void emitMOV();
129
void emitS2R();
130
void emitCS2R();
131
void emitF2F();
132
void emitF2I();
133
void emitI2F();
134
void emitI2I();
135
void emitSEL();
136
void emitSHFL();
137
138
void emitDADD();
139
void emitDMUL();
140
void emitDFMA();
141
void emitDMNMX();
142
void emitDSET();
143
void emitDSETP();
144
145
void emitFADD();
146
void emitFMUL();
147
void emitFFMA();
148
void emitMUFU();
149
void emitFMNMX();
150
void emitRRO();
151
void emitFCMP();
152
void emitFSET();
153
void emitFSETP();
154
void emitFSWZADD();
155
156
void emitLOP();
157
void emitNOT();
158
void emitIADD();
159
void emitIMUL();
160
void emitIMAD();
161
void emitISCADD();
162
void emitXMAD();
163
void emitIMNMX();
164
void emitICMP();
165
void emitISET();
166
void emitISETP();
167
void emitSHL();
168
void emitSHR();
169
void emitSHF();
170
void emitPOPC();
171
void emitBFI();
172
void emitBFE();
173
void emitFLO();
174
void emitPRMT();
175
176
void emitLDSTs(int, DataType);
177
void emitLDSTc(int);
178
void emitLDC();
179
void emitLDL();
180
void emitLDS();
181
void emitLD();
182
void emitSTL();
183
void emitSTS();
184
void emitST();
185
void emitALD();
186
void emitAST();
187
void emitISBERD();
188
void emitAL2P();
189
void emitIPA();
190
void emitATOM();
191
void emitATOMS();
192
void emitRED();
193
void emitCCTL();
194
195
void emitPIXLD();
196
197
void emitTEXs(int);
198
void emitTEX();
199
void emitTEXS();
200
void emitTLD();
201
void emitTLD4();
202
void emitTXD();
203
void emitTXQ();
204
void emitTMML();
205
void emitDEPBAR();
206
207
void emitNOP();
208
void emitKIL();
209
void emitOUT();
210
211
void emitBAR();
212
void emitMEMBAR();
213
214
void emitVOTE();
215
216
void emitSUTarget();
217
void emitSUHandle(const int s);
218
void emitSUSTx();
219
void emitSULDx();
220
void emitSUREDx();
221
};
222
223
/*******************************************************************************
224
* general instruction layout/fields
225
******************************************************************************/
226
227
void
228
CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
229
{
230
if (b >= 0) {
231
uint32_t m = ((1ULL << s) - 1);
232
uint64_t d = (uint64_t)(v & m) << b;
233
assert(!(v & ~m) || (v & ~m) == ~m);
234
data[1] |= d >> 32;
235
data[0] |= d;
236
}
237
}
238
239
void
240
CodeEmitterGM107::emitPred()
241
{
242
if (insn->predSrc >= 0) {
243
emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
244
emitField(19, 1, insn->cc == CC_NOT_P);
245
} else {
246
emitField(16, 3, 7);
247
}
248
}
249
250
void
251
CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
252
{
253
code[0] = 0x00000000;
254
code[1] = hi;
255
if (pred)
256
emitPred();
257
}
258
259
void
260
CodeEmitterGM107::emitGPR(int pos, const Value *val)
261
{
262
emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
263
val->reg.data.id : 255);
264
}
265
266
void
267
CodeEmitterGM107::emitSYS(int pos, const Value *val)
268
{
269
int id = val ? val->reg.data.id : -1;
270
271
switch (id) {
272
case SV_LANEID : id = 0x00; break;
273
case SV_VERTEX_COUNT : id = 0x10; break;
274
case SV_INVOCATION_ID : id = 0x11; break;
275
case SV_THREAD_KILL : id = 0x13; break;
276
case SV_INVOCATION_INFO: id = 0x1d; break;
277
case SV_COMBINED_TID : id = 0x20; break;
278
case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
279
case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
280
case SV_LANEMASK_EQ : id = 0x38; break;
281
case SV_LANEMASK_LT : id = 0x39; break;
282
case SV_LANEMASK_LE : id = 0x3a; break;
283
case SV_LANEMASK_GT : id = 0x3b; break;
284
case SV_LANEMASK_GE : id = 0x3c; break;
285
case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break;
286
default:
287
assert(!"invalid system value");
288
id = 0;
289
break;
290
}
291
292
emitField(pos, 8, id);
293
}
294
295
void
296
CodeEmitterGM107::emitPRED(int pos, const Value *val)
297
{
298
emitField(pos, 3, val ? val->reg.data.id : 7);
299
}
300
301
void
302
CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
303
const ValueRef &ref)
304
{
305
const Value *v = ref.get();
306
assert(!(v->reg.data.offset & ((1 << shr) - 1)));
307
if (gpr >= 0)
308
emitGPR(gpr, ref.getIndirect(0));
309
emitField(off, len, v->reg.data.offset >> shr);
310
}
311
312
void
313
CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
314
const ValueRef &ref)
315
{
316
const Value *v = ref.get();
317
const Symbol *s = v->asSym();
318
319
assert(!(s->reg.data.offset & ((1 << shr) - 1)));
320
321
emitField(buf, 5, v->reg.fileIndex);
322
if (gpr >= 0)
323
emitGPR(gpr, ref.getIndirect(0));
324
emitField(off, 16, s->reg.data.offset >> shr);
325
}
326
327
bool
328
CodeEmitterGM107::longIMMD(const ValueRef &ref)
329
{
330
if (ref.getFile() == FILE_IMMEDIATE) {
331
const ImmediateValue *imm = ref.get()->asImm();
332
if (isFloatType(insn->sType))
333
return imm->reg.data.u32 & 0xfff;
334
else
335
return imm->reg.data.s32 > 0x7ffff || imm->reg.data.s32 < -0x80000;
336
}
337
return false;
338
}
339
340
void
341
CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
342
{
343
const ImmediateValue *imm = ref.get()->asImm();
344
uint32_t val = imm->reg.data.u32;
345
346
if (len == 19) {
347
if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
348
assert(!(val & 0x00000fff));
349
val >>= 12;
350
} else if (insn->sType == TYPE_F64) {
351
assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
352
val = imm->reg.data.u64 >> 44;
353
} else {
354
assert(!(val & 0xfff80000) || (val & 0xfff80000) == 0xfff80000);
355
}
356
emitField( 56, 1, (val & 0x80000) >> 19);
357
emitField(pos, len, (val & 0x7ffff));
358
} else {
359
emitField(pos, len, val);
360
}
361
}
362
363
/*******************************************************************************
364
* modifiers
365
******************************************************************************/
366
367
void
368
CodeEmitterGM107::emitCond3(int pos, CondCode code)
369
{
370
int data = 0;
371
372
switch (code) {
373
case CC_FL : data = 0x00; break;
374
case CC_LTU:
375
case CC_LT : data = 0x01; break;
376
case CC_EQU:
377
case CC_EQ : data = 0x02; break;
378
case CC_LEU:
379
case CC_LE : data = 0x03; break;
380
case CC_GTU:
381
case CC_GT : data = 0x04; break;
382
case CC_NEU:
383
case CC_NE : data = 0x05; break;
384
case CC_GEU:
385
case CC_GE : data = 0x06; break;
386
case CC_TR : data = 0x07; break;
387
default:
388
assert(!"invalid cond3");
389
break;
390
}
391
392
emitField(pos, 3, data);
393
}
394
395
void
396
CodeEmitterGM107::emitCond4(int pos, CondCode code)
397
{
398
int data = 0;
399
400
switch (code) {
401
case CC_FL: data = 0x00; break;
402
case CC_LT: data = 0x01; break;
403
case CC_EQ: data = 0x02; break;
404
case CC_LE: data = 0x03; break;
405
case CC_GT: data = 0x04; break;
406
case CC_NE: data = 0x05; break;
407
case CC_GE: data = 0x06; break;
408
// case CC_NUM: data = 0x07; break;
409
// case CC_NAN: data = 0x08; break;
410
case CC_LTU: data = 0x09; break;
411
case CC_EQU: data = 0x0a; break;
412
case CC_LEU: data = 0x0b; break;
413
case CC_GTU: data = 0x0c; break;
414
case CC_NEU: data = 0x0d; break;
415
case CC_GEU: data = 0x0e; break;
416
case CC_TR: data = 0x0f; break;
417
default:
418
assert(!"invalid cond4");
419
break;
420
}
421
422
emitField(pos, 4, data);
423
}
424
425
void
426
CodeEmitterGM107::emitO(int pos)
427
{
428
emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
429
}
430
431
void
432
CodeEmitterGM107::emitP(int pos)
433
{
434
emitField(pos, 1, insn->perPatch);
435
}
436
437
void
438
CodeEmitterGM107::emitSAT(int pos)
439
{
440
emitField(pos, 1, insn->saturate);
441
}
442
443
void
444
CodeEmitterGM107::emitCC(int pos)
445
{
446
emitField(pos, 1, insn->flagsDef >= 0);
447
}
448
449
void
450
CodeEmitterGM107::emitX(int pos)
451
{
452
emitField(pos, 1, insn->flagsSrc >= 0);
453
}
454
455
void
456
CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
457
{
458
emitField(pos, 1, ref.mod.abs());
459
}
460
461
void
462
CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
463
{
464
emitField(pos, 1, ref.mod.neg());
465
}
466
467
void
468
CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
469
{
470
emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
471
}
472
473
void
474
CodeEmitterGM107::emitFMZ(int pos, int len)
475
{
476
emitField(pos, len, insn->dnz << 1 | insn->ftz);
477
}
478
479
void
480
CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
481
{
482
int rm = 0, ri = 0;
483
switch (rnd) {
484
case ROUND_NI: ri = 1;
485
case ROUND_N : rm = 0; break;
486
case ROUND_MI: ri = 1;
487
case ROUND_M : rm = 1; break;
488
case ROUND_PI: ri = 1;
489
case ROUND_P : rm = 2; break;
490
case ROUND_ZI: ri = 1;
491
case ROUND_Z : rm = 3; break;
492
default:
493
assert(!"invalid round mode");
494
break;
495
}
496
emitField(rip, 1, ri);
497
emitField(rmp, 2, rm);
498
}
499
500
void
501
CodeEmitterGM107::emitPDIV(int pos)
502
{
503
assert(insn->postFactor >= -3 && insn->postFactor <= 3);
504
if (insn->postFactor > 0)
505
emitField(pos, 3, 7 - insn->postFactor);
506
else
507
emitField(pos, 3, 0 - insn->postFactor);
508
}
509
510
void
511
CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
512
{
513
emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
514
}
515
516
/*******************************************************************************
517
* control flow
518
******************************************************************************/
519
520
void
521
CodeEmitterGM107::emitEXIT()
522
{
523
emitInsn (0xe3000000);
524
emitCond5(0x00, CC_TR);
525
}
526
527
void
528
CodeEmitterGM107::emitBRA()
529
{
530
const FlowInstruction *insn = this->insn->asFlow();
531
int gpr = -1;
532
533
if (insn->indirect) {
534
if (insn->absolute)
535
emitInsn(0xe2000000); // JMX
536
else
537
emitInsn(0xe2500000); // BRX
538
gpr = 0x08;
539
} else {
540
if (insn->absolute)
541
emitInsn(0xe2100000); // JMP
542
else
543
emitInsn(0xe2400000); // BRA
544
emitField(0x07, 1, insn->allWarp);
545
}
546
547
emitField(0x06, 1, insn->limit);
548
emitCond5(0x00, CC_TR);
549
550
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
551
int32_t pos = insn->target.bb->binPos;
552
if (writeIssueDelays && !(pos & 0x1f))
553
pos += 8;
554
if (!insn->absolute)
555
emitField(0x14, 24, pos - (codeSize + 8));
556
else
557
emitField(0x14, 32, pos);
558
} else {
559
emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
560
emitField(0x05, 1, 1);
561
}
562
}
563
564
void
565
CodeEmitterGM107::emitCAL()
566
{
567
const FlowInstruction *insn = this->insn->asFlow();
568
569
if (insn->absolute) {
570
emitInsn(0xe2200000, 0); // JCAL
571
} else {
572
emitInsn(0xe2600000, 0); // CAL
573
}
574
575
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
576
if (!insn->absolute)
577
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
578
else {
579
if (insn->builtin) {
580
int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
581
addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
582
addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
583
} else {
584
emitField(0x14, 32, insn->target.bb->binPos);
585
}
586
}
587
} else {
588
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
589
emitField(0x05, 1, 1);
590
}
591
}
592
593
void
594
CodeEmitterGM107::emitPCNT()
595
{
596
const FlowInstruction *insn = this->insn->asFlow();
597
598
emitInsn(0xe2b00000, 0);
599
600
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
601
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
602
} else {
603
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
604
emitField(0x05, 1, 1);
605
}
606
}
607
608
void
609
CodeEmitterGM107::emitCONT()
610
{
611
emitInsn (0xe3500000);
612
emitCond5(0x00, CC_TR);
613
}
614
615
void
616
CodeEmitterGM107::emitPBK()
617
{
618
const FlowInstruction *insn = this->insn->asFlow();
619
620
emitInsn(0xe2a00000, 0);
621
622
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
623
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
624
} else {
625
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
626
emitField(0x05, 1, 1);
627
}
628
}
629
630
void
631
CodeEmitterGM107::emitBRK()
632
{
633
emitInsn (0xe3400000);
634
emitCond5(0x00, CC_TR);
635
}
636
637
void
638
CodeEmitterGM107::emitPRET()
639
{
640
const FlowInstruction *insn = this->insn->asFlow();
641
642
emitInsn(0xe2700000, 0);
643
644
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
645
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
646
} else {
647
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
648
emitField(0x05, 1, 1);
649
}
650
}
651
652
void
653
CodeEmitterGM107::emitRET()
654
{
655
emitInsn (0xe3200000);
656
emitCond5(0x00, CC_TR);
657
}
658
659
void
660
CodeEmitterGM107::emitSSY()
661
{
662
const FlowInstruction *insn = this->insn->asFlow();
663
664
emitInsn(0xe2900000, 0);
665
666
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
667
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
668
} else {
669
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
670
emitField(0x05, 1, 1);
671
}
672
}
673
674
void
675
CodeEmitterGM107::emitSYNC()
676
{
677
emitInsn (0xf0f80000);
678
emitCond5(0x00, CC_TR);
679
}
680
681
void
682
CodeEmitterGM107::emitSAM()
683
{
684
emitInsn(0xe3700000, 0);
685
}
686
687
void
688
CodeEmitterGM107::emitRAM()
689
{
690
emitInsn(0xe3800000, 0);
691
}
692
693
/*******************************************************************************
694
* predicate/cc
695
******************************************************************************/
696
697
void
698
CodeEmitterGM107::emitPSETP()
699
{
700
701
emitInsn(0x50900000);
702
703
switch (insn->op) {
704
case OP_AND: emitField(0x18, 3, 0); break;
705
case OP_OR: emitField(0x18, 3, 1); break;
706
case OP_XOR: emitField(0x18, 3, 2); break;
707
default:
708
assert(!"unexpected operation");
709
break;
710
}
711
712
// emitINV (0x2a);
713
emitPRED(0x27); // TODO: support 3-arg
714
emitINV (0x20, insn->src(1));
715
emitPRED(0x1d, insn->src(1));
716
emitINV (0x0f, insn->src(0));
717
emitPRED(0x0c, insn->src(0));
718
emitPRED(0x03, insn->def(0));
719
emitPRED(0x00);
720
}
721
722
/*******************************************************************************
723
* movement / conversion
724
******************************************************************************/
725
726
void
727
CodeEmitterGM107::emitMOV()
728
{
729
if (insn->src(0).getFile() != FILE_IMMEDIATE) {
730
switch (insn->src(0).getFile()) {
731
case FILE_GPR:
732
if (insn->def(0).getFile() == FILE_PREDICATE) {
733
emitInsn(0x5b6a0000);
734
emitGPR (0x08);
735
} else {
736
emitInsn(0x5c980000);
737
}
738
emitGPR (0x14, insn->src(0));
739
break;
740
case FILE_MEMORY_CONST:
741
emitInsn(0x4c980000);
742
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
743
break;
744
case FILE_IMMEDIATE:
745
emitInsn(0x38980000);
746
emitIMMD(0x14, 19, insn->src(0));
747
break;
748
case FILE_PREDICATE:
749
emitInsn(0x50880000);
750
emitPRED(0x0c, insn->src(0));
751
emitPRED(0x1d);
752
emitPRED(0x27);
753
break;
754
default:
755
assert(!"bad src file");
756
break;
757
}
758
if (insn->def(0).getFile() != FILE_PREDICATE &&
759
insn->src(0).getFile() != FILE_PREDICATE)
760
emitField(0x27, 4, insn->lanes);
761
} else {
762
emitInsn (0x01000000);
763
emitIMMD (0x14, 32, insn->src(0));
764
emitField(0x0c, 4, insn->lanes);
765
}
766
767
if (insn->def(0).getFile() == FILE_PREDICATE) {
768
emitPRED(0x27);
769
emitPRED(0x03, insn->def(0));
770
emitPRED(0x00);
771
} else {
772
emitGPR(0x00, insn->def(0));
773
}
774
}
775
776
void
777
CodeEmitterGM107::emitS2R()
778
{
779
emitInsn(0xf0c80000);
780
emitSYS (0x14, insn->src(0));
781
emitGPR (0x00, insn->def(0));
782
}
783
784
void
785
CodeEmitterGM107::emitCS2R()
786
{
787
emitInsn(0x50c80000);
788
emitSYS (0x14, insn->src(0));
789
emitGPR (0x00, insn->def(0));
790
}
791
792
void
793
CodeEmitterGM107::emitF2F()
794
{
795
RoundMode rnd = insn->rnd;
796
797
switch (insn->op) {
798
case OP_FLOOR: rnd = ROUND_MI; break;
799
case OP_CEIL : rnd = ROUND_PI; break;
800
case OP_TRUNC: rnd = ROUND_ZI; break;
801
default:
802
break;
803
}
804
805
switch (insn->src(0).getFile()) {
806
case FILE_GPR:
807
emitInsn(0x5ca80000);
808
emitGPR (0x14, insn->src(0));
809
break;
810
case FILE_MEMORY_CONST:
811
emitInsn(0x4ca80000);
812
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
813
break;
814
case FILE_IMMEDIATE:
815
emitInsn(0x38a80000);
816
emitIMMD(0x14, 19, insn->src(0));
817
break;
818
default:
819
assert(!"bad src0 file");
820
break;
821
}
822
823
emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
824
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
825
emitCC (0x2f);
826
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
827
emitFMZ (0x2c, 1);
828
emitField(0x29, 1, insn->subOp);
829
emitRND (0x27, rnd, 0x2a);
830
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
831
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
832
emitGPR (0x00, insn->def(0));
833
}
834
835
void
836
CodeEmitterGM107::emitF2I()
837
{
838
RoundMode rnd = insn->rnd;
839
840
switch (insn->op) {
841
case OP_FLOOR: rnd = ROUND_M; break;
842
case OP_CEIL : rnd = ROUND_P; break;
843
case OP_TRUNC: rnd = ROUND_Z; break;
844
default:
845
break;
846
}
847
848
switch (insn->src(0).getFile()) {
849
case FILE_GPR:
850
emitInsn(0x5cb00000);
851
emitGPR (0x14, insn->src(0));
852
break;
853
case FILE_MEMORY_CONST:
854
emitInsn(0x4cb00000);
855
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
856
break;
857
case FILE_IMMEDIATE:
858
emitInsn(0x38b00000);
859
emitIMMD(0x14, 19, insn->src(0));
860
break;
861
default:
862
assert(!"bad src0 file");
863
break;
864
}
865
866
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
867
emitCC (0x2f);
868
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
869
emitFMZ (0x2c, 1);
870
emitRND (0x27, rnd, 0x2a);
871
emitField(0x0c, 1, isSignedType(insn->dType));
872
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
873
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
874
emitGPR (0x00, insn->def(0));
875
}
876
877
void
878
CodeEmitterGM107::emitI2F()
879
{
880
RoundMode rnd = insn->rnd;
881
882
switch (insn->op) {
883
case OP_FLOOR: rnd = ROUND_M; break;
884
case OP_CEIL : rnd = ROUND_P; break;
885
case OP_TRUNC: rnd = ROUND_Z; break;
886
default:
887
break;
888
}
889
890
switch (insn->src(0).getFile()) {
891
case FILE_GPR:
892
emitInsn(0x5cb80000);
893
emitGPR (0x14, insn->src(0));
894
break;
895
case FILE_MEMORY_CONST:
896
emitInsn(0x4cb80000);
897
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
898
break;
899
case FILE_IMMEDIATE:
900
emitInsn(0x38b80000);
901
emitIMMD(0x14, 19, insn->src(0));
902
break;
903
default:
904
assert(!"bad src0 file");
905
break;
906
}
907
908
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
909
emitCC (0x2f);
910
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
911
emitField(0x29, 2, insn->subOp);
912
emitRND (0x27, rnd, -1);
913
emitField(0x0d, 1, isSignedType(insn->sType));
914
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
915
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
916
emitGPR (0x00, insn->def(0));
917
}
918
919
void
920
CodeEmitterGM107::emitI2I()
921
{
922
switch (insn->src(0).getFile()) {
923
case FILE_GPR:
924
emitInsn(0x5ce00000);
925
emitGPR (0x14, insn->src(0));
926
break;
927
case FILE_MEMORY_CONST:
928
emitInsn(0x4ce00000);
929
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
930
break;
931
case FILE_IMMEDIATE:
932
emitInsn(0x38e00000);
933
emitIMMD(0x14, 19, insn->src(0));
934
break;
935
default:
936
assert(!"bad src0 file");
937
break;
938
}
939
940
emitSAT (0x32);
941
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
942
emitCC (0x2f);
943
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
944
emitField(0x29, 2, insn->subOp);
945
emitField(0x0d, 1, isSignedType(insn->sType));
946
emitField(0x0c, 1, isSignedType(insn->dType));
947
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
948
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
949
emitGPR (0x00, insn->def(0));
950
}
951
952
void
953
gm107_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
954
{
955
int loc = entry->loc;
956
bool val = false;
957
switch (entry->ipa) {
958
case 0:
959
val = data.force_persample_interp;
960
break;
961
case 1:
962
val = data.msaa;
963
break;
964
}
965
if (val)
966
code[loc + 1] |= 1 << 10;
967
else
968
code[loc + 1] &= ~(1 << 10);
969
}
970
971
void
972
CodeEmitterGM107::emitSEL()
973
{
974
switch (insn->src(1).getFile()) {
975
case FILE_GPR:
976
emitInsn(0x5ca00000);
977
emitGPR (0x14, insn->src(1));
978
break;
979
case FILE_MEMORY_CONST:
980
emitInsn(0x4ca00000);
981
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
982
break;
983
case FILE_IMMEDIATE:
984
emitInsn(0x38a00000);
985
emitIMMD(0x14, 19, insn->src(1));
986
break;
987
default:
988
assert(!"bad src1 file");
989
break;
990
}
991
992
emitINV (0x2a, insn->src(2));
993
emitPRED(0x27, insn->src(2));
994
emitGPR (0x08, insn->src(0));
995
emitGPR (0x00, insn->def(0));
996
997
if (insn->subOp >= 1) {
998
addInterp(insn->subOp - 1, 0, gm107_selpFlip);
999
}
1000
}
1001
1002
void
1003
CodeEmitterGM107::emitSHFL()
1004
{
1005
int type = 0;
1006
1007
emitInsn (0xef100000);
1008
1009
switch (insn->src(1).getFile()) {
1010
case FILE_GPR:
1011
emitGPR(0x14, insn->src(1));
1012
break;
1013
case FILE_IMMEDIATE:
1014
emitIMMD(0x14, 5, insn->src(1));
1015
type |= 1;
1016
break;
1017
default:
1018
assert(!"invalid src1 file");
1019
break;
1020
}
1021
1022
switch (insn->src(2).getFile()) {
1023
case FILE_GPR:
1024
emitGPR(0x27, insn->src(2));
1025
break;
1026
case FILE_IMMEDIATE:
1027
emitIMMD(0x22, 13, insn->src(2));
1028
type |= 2;
1029
break;
1030
default:
1031
assert(!"invalid src2 file");
1032
break;
1033
}
1034
1035
if (!insn->defExists(1))
1036
emitPRED(0x30);
1037
else {
1038
assert(insn->def(1).getFile() == FILE_PREDICATE);
1039
emitPRED(0x30, insn->def(1));
1040
}
1041
1042
emitField(0x1e, 2, insn->subOp);
1043
emitField(0x1c, 2, type);
1044
emitGPR (0x08, insn->src(0));
1045
emitGPR (0x00, insn->def(0));
1046
}
1047
1048
/*******************************************************************************
1049
* double
1050
******************************************************************************/
1051
1052
void
1053
CodeEmitterGM107::emitDADD()
1054
{
1055
switch (insn->src(1).getFile()) {
1056
case FILE_GPR:
1057
emitInsn(0x5c700000);
1058
emitGPR (0x14, insn->src(1));
1059
break;
1060
case FILE_MEMORY_CONST:
1061
emitInsn(0x4c700000);
1062
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1063
break;
1064
case FILE_IMMEDIATE:
1065
emitInsn(0x38700000);
1066
emitIMMD(0x14, 19, insn->src(1));
1067
break;
1068
default:
1069
assert(!"bad src1 file");
1070
break;
1071
}
1072
emitABS(0x31, insn->src(1));
1073
emitNEG(0x30, insn->src(0));
1074
emitCC (0x2f);
1075
emitABS(0x2e, insn->src(0));
1076
emitNEG(0x2d, insn->src(1));
1077
1078
if (insn->op == OP_SUB)
1079
code[1] ^= 0x00002000;
1080
1081
emitGPR(0x08, insn->src(0));
1082
emitGPR(0x00, insn->def(0));
1083
}
1084
1085
void
1086
CodeEmitterGM107::emitDMUL()
1087
{
1088
switch (insn->src(1).getFile()) {
1089
case FILE_GPR:
1090
emitInsn(0x5c800000);
1091
emitGPR (0x14, insn->src(1));
1092
break;
1093
case FILE_MEMORY_CONST:
1094
emitInsn(0x4c800000);
1095
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1096
break;
1097
case FILE_IMMEDIATE:
1098
emitInsn(0x38800000);
1099
emitIMMD(0x14, 19, insn->src(1));
1100
break;
1101
default:
1102
assert(!"bad src1 file");
1103
break;
1104
}
1105
1106
emitNEG2(0x30, insn->src(0), insn->src(1));
1107
emitCC (0x2f);
1108
emitRND (0x27);
1109
emitGPR (0x08, insn->src(0));
1110
emitGPR (0x00, insn->def(0));
1111
}
1112
1113
void
1114
CodeEmitterGM107::emitDFMA()
1115
{
1116
switch(insn->src(2).getFile()) {
1117
case FILE_GPR:
1118
switch (insn->src(1).getFile()) {
1119
case FILE_GPR:
1120
emitInsn(0x5b700000);
1121
emitGPR (0x14, insn->src(1));
1122
break;
1123
case FILE_MEMORY_CONST:
1124
emitInsn(0x4b700000);
1125
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1126
break;
1127
case FILE_IMMEDIATE:
1128
emitInsn(0x36700000);
1129
emitIMMD(0x14, 19, insn->src(1));
1130
break;
1131
default:
1132
assert(!"bad src1 file");
1133
break;
1134
}
1135
emitGPR (0x27, insn->src(2));
1136
break;
1137
case FILE_MEMORY_CONST:
1138
emitInsn(0x53700000);
1139
emitGPR (0x27, insn->src(1));
1140
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1141
break;
1142
default:
1143
assert(!"bad src2 file");
1144
break;
1145
}
1146
1147
emitRND (0x32);
1148
emitNEG (0x31, insn->src(2));
1149
emitNEG2(0x30, insn->src(0), insn->src(1));
1150
emitCC (0x2f);
1151
emitGPR (0x08, insn->src(0));
1152
emitGPR (0x00, insn->def(0));
1153
}
1154
1155
void
1156
CodeEmitterGM107::emitDMNMX()
1157
{
1158
switch (insn->src(1).getFile()) {
1159
case FILE_GPR:
1160
emitInsn(0x5c500000);
1161
emitGPR (0x14, insn->src(1));
1162
break;
1163
case FILE_MEMORY_CONST:
1164
emitInsn(0x4c500000);
1165
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1166
break;
1167
case FILE_IMMEDIATE:
1168
emitInsn(0x38500000);
1169
emitIMMD(0x14, 19, insn->src(1));
1170
break;
1171
default:
1172
assert(!"bad src1 file");
1173
break;
1174
}
1175
1176
emitABS (0x31, insn->src(1));
1177
emitNEG (0x30, insn->src(0));
1178
emitCC (0x2f);
1179
emitABS (0x2e, insn->src(0));
1180
emitNEG (0x2d, insn->src(1));
1181
emitField(0x2a, 1, insn->op == OP_MAX);
1182
emitPRED (0x27);
1183
emitGPR (0x08, insn->src(0));
1184
emitGPR (0x00, insn->def(0));
1185
}
1186
1187
void
1188
CodeEmitterGM107::emitDSET()
1189
{
1190
const CmpInstruction *insn = this->insn->asCmp();
1191
1192
switch (insn->src(1).getFile()) {
1193
case FILE_GPR:
1194
emitInsn(0x59000000);
1195
emitGPR (0x14, insn->src(1));
1196
break;
1197
case FILE_MEMORY_CONST:
1198
emitInsn(0x49000000);
1199
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1200
break;
1201
case FILE_IMMEDIATE:
1202
emitInsn(0x32000000);
1203
emitIMMD(0x14, 19, insn->src(1));
1204
break;
1205
default:
1206
assert(!"bad src1 file");
1207
break;
1208
}
1209
1210
if (insn->op != OP_SET) {
1211
switch (insn->op) {
1212
case OP_SET_AND: emitField(0x2d, 2, 0); break;
1213
case OP_SET_OR : emitField(0x2d, 2, 1); break;
1214
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1215
default:
1216
assert(!"invalid set op");
1217
break;
1218
}
1219
emitPRED(0x27, insn->src(2));
1220
} else {
1221
emitPRED(0x27);
1222
}
1223
1224
emitABS (0x36, insn->src(0));
1225
emitNEG (0x35, insn->src(1));
1226
emitField(0x34, 1, insn->dType == TYPE_F32);
1227
emitCond4(0x30, insn->setCond);
1228
emitCC (0x2f);
1229
emitABS (0x2c, insn->src(1));
1230
emitNEG (0x2b, insn->src(0));
1231
emitGPR (0x08, insn->src(0));
1232
emitGPR (0x00, insn->def(0));
1233
}
1234
1235
void
1236
CodeEmitterGM107::emitDSETP()
1237
{
1238
const CmpInstruction *insn = this->insn->asCmp();
1239
1240
switch (insn->src(1).getFile()) {
1241
case FILE_GPR:
1242
emitInsn(0x5b800000);
1243
emitGPR (0x14, insn->src(1));
1244
break;
1245
case FILE_MEMORY_CONST:
1246
emitInsn(0x4b800000);
1247
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1248
break;
1249
case FILE_IMMEDIATE:
1250
emitInsn(0x36800000);
1251
emitIMMD(0x14, 19, insn->src(1));
1252
break;
1253
default:
1254
assert(!"bad src1 file");
1255
break;
1256
}
1257
1258
if (insn->op != OP_SET) {
1259
switch (insn->op) {
1260
case OP_SET_AND: emitField(0x2d, 2, 0); break;
1261
case OP_SET_OR : emitField(0x2d, 2, 1); break;
1262
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1263
default:
1264
assert(!"invalid set op");
1265
break;
1266
}
1267
emitPRED(0x27, insn->src(2));
1268
} else {
1269
emitPRED(0x27);
1270
}
1271
1272
emitCond4(0x30, insn->setCond);
1273
emitABS (0x2c, insn->src(1));
1274
emitNEG (0x2b, insn->src(0));
1275
emitGPR (0x08, insn->src(0));
1276
emitABS (0x07, insn->src(0));
1277
emitNEG (0x06, insn->src(1));
1278
emitPRED (0x03, insn->def(0));
1279
if (insn->defExists(1))
1280
emitPRED(0x00, insn->def(1));
1281
else
1282
emitPRED(0x00);
1283
}
1284
1285
/*******************************************************************************
1286
* float
1287
******************************************************************************/
1288
1289
void
1290
CodeEmitterGM107::emitFADD()
1291
{
1292
if (!longIMMD(insn->src(1))) {
1293
switch (insn->src(1).getFile()) {
1294
case FILE_GPR:
1295
emitInsn(0x5c580000);
1296
emitGPR (0x14, insn->src(1));
1297
break;
1298
case FILE_MEMORY_CONST:
1299
emitInsn(0x4c580000);
1300
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1301
break;
1302
case FILE_IMMEDIATE:
1303
emitInsn(0x38580000);
1304
emitIMMD(0x14, 19, insn->src(1));
1305
break;
1306
default:
1307
assert(!"bad src1 file");
1308
break;
1309
}
1310
emitSAT(0x32);
1311
emitABS(0x31, insn->src(1));
1312
emitNEG(0x30, insn->src(0));
1313
emitCC (0x2f);
1314
emitABS(0x2e, insn->src(0));
1315
emitNEG(0x2d, insn->src(1));
1316
emitFMZ(0x2c, 1);
1317
1318
if (insn->op == OP_SUB)
1319
code[1] ^= 0x00002000;
1320
} else {
1321
emitInsn(0x08000000);
1322
emitABS(0x39, insn->src(1));
1323
emitNEG(0x38, insn->src(0));
1324
emitFMZ(0x37, 1);
1325
emitABS(0x36, insn->src(0));
1326
emitNEG(0x35, insn->src(1));
1327
emitCC (0x34);
1328
emitIMMD(0x14, 32, insn->src(1));
1329
1330
if (insn->op == OP_SUB)
1331
code[1] ^= 0x00080000;
1332
}
1333
1334
emitGPR(0x08, insn->src(0));
1335
emitGPR(0x00, insn->def(0));
1336
}
1337
1338
void
1339
CodeEmitterGM107::emitFMUL()
1340
{
1341
if (!longIMMD(insn->src(1))) {
1342
switch (insn->src(1).getFile()) {
1343
case FILE_GPR:
1344
emitInsn(0x5c680000);
1345
emitGPR (0x14, insn->src(1));
1346
break;
1347
case FILE_MEMORY_CONST:
1348
emitInsn(0x4c680000);
1349
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1350
break;
1351
case FILE_IMMEDIATE:
1352
emitInsn(0x38680000);
1353
emitIMMD(0x14, 19, insn->src(1));
1354
break;
1355
default:
1356
assert(!"bad src1 file");
1357
break;
1358
}
1359
emitSAT (0x32);
1360
emitNEG2(0x30, insn->src(0), insn->src(1));
1361
emitCC (0x2f);
1362
emitFMZ (0x2c, 2);
1363
emitPDIV(0x29);
1364
emitRND (0x27);
1365
} else {
1366
emitInsn(0x1e000000);
1367
emitSAT (0x37);
1368
emitFMZ (0x35, 2);
1369
emitCC (0x34);
1370
emitIMMD(0x14, 32, insn->src(1));
1371
if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1372
code[1] ^= 0x00080000; /* flip immd sign bit */
1373
}
1374
1375
emitGPR(0x08, insn->src(0));
1376
emitGPR(0x00, insn->def(0));
1377
}
1378
1379
void
1380
CodeEmitterGM107::emitFFMA()
1381
{
1382
bool isLongIMMD = false;
1383
switch(insn->src(2).getFile()) {
1384
case FILE_GPR:
1385
switch (insn->src(1).getFile()) {
1386
case FILE_GPR:
1387
emitInsn(0x59800000);
1388
emitGPR (0x14, insn->src(1));
1389
break;
1390
case FILE_MEMORY_CONST:
1391
emitInsn(0x49800000);
1392
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1393
break;
1394
case FILE_IMMEDIATE:
1395
if (longIMMD(insn->getSrc(1))) {
1396
assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);
1397
isLongIMMD = true;
1398
emitInsn(0x0c000000);
1399
emitIMMD(0x14, 32, insn->src(1));
1400
} else {
1401
emitInsn(0x32800000);
1402
emitIMMD(0x14, 19, insn->src(1));
1403
}
1404
break;
1405
default:
1406
assert(!"bad src1 file");
1407
break;
1408
}
1409
if (!isLongIMMD)
1410
emitGPR (0x27, insn->src(2));
1411
break;
1412
case FILE_MEMORY_CONST:
1413
emitInsn(0x51800000);
1414
emitGPR (0x27, insn->src(1));
1415
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1416
break;
1417
default:
1418
assert(!"bad src2 file");
1419
break;
1420
}
1421
1422
if (isLongIMMD) {
1423
emitNEG (0x39, insn->src(2));
1424
emitNEG2(0x38, insn->src(0), insn->src(1));
1425
emitSAT (0x37);
1426
emitCC (0x34);
1427
} else {
1428
emitRND (0x33);
1429
emitSAT (0x32);
1430
emitNEG (0x31, insn->src(2));
1431
emitNEG2(0x30, insn->src(0), insn->src(1));
1432
emitCC (0x2f);
1433
}
1434
1435
emitFMZ(0x35, 2);
1436
emitGPR(0x08, insn->src(0));
1437
emitGPR(0x00, insn->def(0));
1438
}
1439
1440
void
1441
CodeEmitterGM107::emitMUFU()
1442
{
1443
int mufu = 0;
1444
1445
switch (insn->op) {
1446
case OP_COS: mufu = 0; break;
1447
case OP_SIN: mufu = 1; break;
1448
case OP_EX2: mufu = 2; break;
1449
case OP_LG2: mufu = 3; break;
1450
case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1451
case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1452
case OP_SQRT: mufu = 8; break;
1453
default:
1454
assert(!"invalid mufu");
1455
break;
1456
}
1457
1458
emitInsn (0x50800000);
1459
emitSAT (0x32);
1460
emitNEG (0x30, insn->src(0));
1461
emitABS (0x2e, insn->src(0));
1462
emitField(0x14, 4, mufu);
1463
emitGPR (0x08, insn->src(0));
1464
emitGPR (0x00, insn->def(0));
1465
}
1466
1467
void
1468
CodeEmitterGM107::emitFMNMX()
1469
{
1470
switch (insn->src(1).getFile()) {
1471
case FILE_GPR:
1472
emitInsn(0x5c600000);
1473
emitGPR (0x14, insn->src(1));
1474
break;
1475
case FILE_MEMORY_CONST:
1476
emitInsn(0x4c600000);
1477
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1478
break;
1479
case FILE_IMMEDIATE:
1480
emitInsn(0x38600000);
1481
emitIMMD(0x14, 19, insn->src(1));
1482
break;
1483
default:
1484
assert(!"bad src1 file");
1485
break;
1486
}
1487
1488
emitField(0x2a, 1, insn->op == OP_MAX);
1489
emitPRED (0x27);
1490
1491
emitABS(0x31, insn->src(1));
1492
emitNEG(0x30, insn->src(0));
1493
emitCC (0x2f);
1494
emitABS(0x2e, insn->src(0));
1495
emitNEG(0x2d, insn->src(1));
1496
emitFMZ(0x2c, 1);
1497
emitGPR(0x08, insn->src(0));
1498
emitGPR(0x00, insn->def(0));
1499
}
1500
1501
void
1502
CodeEmitterGM107::emitRRO()
1503
{
1504
switch (insn->src(0).getFile()) {
1505
case FILE_GPR:
1506
emitInsn(0x5c900000);
1507
emitGPR (0x14, insn->src(0));
1508
break;
1509
case FILE_MEMORY_CONST:
1510
emitInsn(0x4c900000);
1511
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1512
break;
1513
case FILE_IMMEDIATE:
1514
emitInsn(0x38900000);
1515
emitIMMD(0x14, 19, insn->src(0));
1516
break;
1517
default:
1518
assert(!"bad src file");
1519
break;
1520
}
1521
1522
emitABS (0x31, insn->src(0));
1523
emitNEG (0x2d, insn->src(0));
1524
emitField(0x27, 1, insn->op == OP_PREEX2);
1525
emitGPR (0x00, insn->def(0));
1526
}
1527
1528
void
1529
CodeEmitterGM107::emitFCMP()
1530
{
1531
const CmpInstruction *insn = this->insn->asCmp();
1532
CondCode cc = insn->setCond;
1533
1534
if (insn->src(2).mod.neg())
1535
cc = reverseCondCode(cc);
1536
1537
switch(insn->src(2).getFile()) {
1538
case FILE_GPR:
1539
switch (insn->src(1).getFile()) {
1540
case FILE_GPR:
1541
emitInsn(0x5ba00000);
1542
emitGPR (0x14, insn->src(1));
1543
break;
1544
case FILE_MEMORY_CONST:
1545
emitInsn(0x4ba00000);
1546
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1547
break;
1548
case FILE_IMMEDIATE:
1549
emitInsn(0x36a00000);
1550
emitIMMD(0x14, 19, insn->src(1));
1551
break;
1552
default:
1553
assert(!"bad src1 file");
1554
break;
1555
}
1556
emitGPR (0x27, insn->src(2));
1557
break;
1558
case FILE_MEMORY_CONST:
1559
emitInsn(0x53a00000);
1560
emitGPR (0x27, insn->src(1));
1561
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1562
break;
1563
default:
1564
assert(!"bad src2 file");
1565
break;
1566
}
1567
1568
emitCond4(0x30, cc);
1569
emitFMZ (0x2f, 1);
1570
emitGPR (0x08, insn->src(0));
1571
emitGPR (0x00, insn->def(0));
1572
}
1573
1574
void
1575
CodeEmitterGM107::emitFSET()
1576
{
1577
const CmpInstruction *insn = this->insn->asCmp();
1578
1579
switch (insn->src(1).getFile()) {
1580
case FILE_GPR:
1581
emitInsn(0x58000000);
1582
emitGPR (0x14, insn->src(1));
1583
break;
1584
case FILE_MEMORY_CONST:
1585
emitInsn(0x48000000);
1586
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1587
break;
1588
case FILE_IMMEDIATE:
1589
emitInsn(0x30000000);
1590
emitIMMD(0x14, 19, insn->src(1));
1591
break;
1592
default:
1593
assert(!"bad src1 file");
1594
break;
1595
}
1596
1597
if (insn->op != OP_SET) {
1598
switch (insn->op) {
1599
case OP_SET_AND: emitField(0x2d, 2, 0); break;
1600
case OP_SET_OR : emitField(0x2d, 2, 1); break;
1601
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1602
default:
1603
assert(!"invalid set op");
1604
break;
1605
}
1606
emitPRED(0x27, insn->src(2));
1607
} else {
1608
emitPRED(0x27);
1609
}
1610
1611
emitFMZ (0x37, 1);
1612
emitABS (0x36, insn->src(0));
1613
emitNEG (0x35, insn->src(1));
1614
emitField(0x34, 1, insn->dType == TYPE_F32);
1615
emitCond4(0x30, insn->setCond);
1616
emitCC (0x2f);
1617
emitABS (0x2c, insn->src(1));
1618
emitNEG (0x2b, insn->src(0));
1619
emitGPR (0x08, insn->src(0));
1620
emitGPR (0x00, insn->def(0));
1621
}
1622
1623
void
1624
CodeEmitterGM107::emitFSETP()
1625
{
1626
const CmpInstruction *insn = this->insn->asCmp();
1627
1628
switch (insn->src(1).getFile()) {
1629
case FILE_GPR:
1630
emitInsn(0x5bb00000);
1631
emitGPR (0x14, insn->src(1));
1632
break;
1633
case FILE_MEMORY_CONST:
1634
emitInsn(0x4bb00000);
1635
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1636
break;
1637
case FILE_IMMEDIATE:
1638
emitInsn(0x36b00000);
1639
emitIMMD(0x14, 19, insn->src(1));
1640
break;
1641
default:
1642
assert(!"bad src1 file");
1643
break;
1644
}
1645
1646
if (insn->op != OP_SET) {
1647
switch (insn->op) {
1648
case OP_SET_AND: emitField(0x2d, 2, 0); break;
1649
case OP_SET_OR : emitField(0x2d, 2, 1); break;
1650
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1651
default:
1652
assert(!"invalid set op");
1653
break;
1654
}
1655
emitPRED(0x27, insn->src(2));
1656
} else {
1657
emitPRED(0x27);
1658
}
1659
1660
emitCond4(0x30, insn->setCond);
1661
emitFMZ (0x2f, 1);
1662
emitABS (0x2c, insn->src(1));
1663
emitNEG (0x2b, insn->src(0));
1664
emitGPR (0x08, insn->src(0));
1665
emitABS (0x07, insn->src(0));
1666
emitNEG (0x06, insn->src(1));
1667
emitPRED (0x03, insn->def(0));
1668
if (insn->defExists(1))
1669
emitPRED(0x00, insn->def(1));
1670
else
1671
emitPRED(0x00);
1672
}
1673
1674
void
1675
CodeEmitterGM107::emitFSWZADD()
1676
{
1677
emitInsn (0x50f80000);
1678
emitCC (0x2f);
1679
emitFMZ (0x2c, 1);
1680
emitRND (0x27);
1681
emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1682
emitField(0x1c, 8, insn->subOp);
1683
if (insn->predSrc != 1)
1684
emitGPR (0x14, insn->src(1));
1685
else
1686
emitGPR (0x14);
1687
emitGPR (0x08, insn->src(0));
1688
emitGPR (0x00, insn->def(0));
1689
}
1690
1691
/*******************************************************************************
1692
* integer
1693
******************************************************************************/
1694
1695
void
1696
CodeEmitterGM107::emitLOP()
1697
{
1698
int lop = 0;
1699
1700
switch (insn->op) {
1701
case OP_AND: lop = 0; break;
1702
case OP_OR : lop = 1; break;
1703
case OP_XOR: lop = 2; break;
1704
default:
1705
assert(!"invalid lop");
1706
break;
1707
}
1708
1709
if (!longIMMD(insn->src(1))) {
1710
switch (insn->src(1).getFile()) {
1711
case FILE_GPR:
1712
emitInsn(0x5c400000);
1713
emitGPR (0x14, insn->src(1));
1714
break;
1715
case FILE_MEMORY_CONST:
1716
emitInsn(0x4c400000);
1717
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1718
break;
1719
case FILE_IMMEDIATE:
1720
emitInsn(0x38400000);
1721
emitIMMD(0x14, 19, insn->src(1));
1722
break;
1723
default:
1724
assert(!"bad src1 file");
1725
break;
1726
}
1727
emitPRED (0x30);
1728
emitCC (0x2f);
1729
emitX (0x2b);
1730
emitField(0x29, 2, lop);
1731
emitINV (0x28, insn->src(1));
1732
emitINV (0x27, insn->src(0));
1733
} else {
1734
emitInsn (0x04000000);
1735
emitX (0x39);
1736
emitINV (0x38, insn->src(1));
1737
emitINV (0x37, insn->src(0));
1738
emitField(0x35, 2, lop);
1739
emitCC (0x34);
1740
emitIMMD (0x14, 32, insn->src(1));
1741
}
1742
1743
emitGPR (0x08, insn->src(0));
1744
emitGPR (0x00, insn->def(0));
1745
}
1746
1747
/* special-case of emitLOP(): lop pass_b dst 0 ~src */
1748
void
1749
CodeEmitterGM107::emitNOT()
1750
{
1751
if (!longIMMD(insn->src(0))) {
1752
switch (insn->src(0).getFile()) {
1753
case FILE_GPR:
1754
emitInsn(0x5c400700);
1755
emitGPR (0x14, insn->src(0));
1756
break;
1757
case FILE_MEMORY_CONST:
1758
emitInsn(0x4c400700);
1759
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1760
break;
1761
case FILE_IMMEDIATE:
1762
emitInsn(0x38400700);
1763
emitIMMD(0x14, 19, insn->src(0));
1764
break;
1765
default:
1766
assert(!"bad src1 file");
1767
break;
1768
}
1769
emitPRED (0x30);
1770
} else {
1771
emitInsn (0x05600000);
1772
emitIMMD (0x14, 32, insn->src(1));
1773
}
1774
1775
emitGPR(0x08);
1776
emitGPR(0x00, insn->def(0));
1777
}
1778
1779
void
1780
CodeEmitterGM107::emitIADD()
1781
{
1782
if (!longIMMD(insn->src(1))) {
1783
switch (insn->src(1).getFile()) {
1784
case FILE_GPR:
1785
emitInsn(0x5c100000);
1786
emitGPR (0x14, insn->src(1));
1787
break;
1788
case FILE_MEMORY_CONST:
1789
emitInsn(0x4c100000);
1790
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1791
break;
1792
case FILE_IMMEDIATE:
1793
emitInsn(0x38100000);
1794
emitIMMD(0x14, 19, insn->src(1));
1795
break;
1796
default:
1797
assert(!"bad src1 file");
1798
break;
1799
}
1800
emitSAT(0x32);
1801
emitNEG(0x31, insn->src(0));
1802
emitNEG(0x30, insn->src(1));
1803
emitCC (0x2f);
1804
emitX (0x2b);
1805
} else {
1806
emitInsn(0x1c000000);
1807
emitNEG (0x38, insn->src(0));
1808
emitSAT (0x36);
1809
emitX (0x35);
1810
emitCC (0x34);
1811
emitIMMD(0x14, 32, insn->src(1));
1812
}
1813
1814
if (insn->op == OP_SUB)
1815
code[1] ^= 0x00010000;
1816
1817
emitGPR(0x08, insn->src(0));
1818
emitGPR(0x00, insn->def(0));
1819
}
1820
1821
void
1822
CodeEmitterGM107::emitIMUL()
1823
{
1824
if (!longIMMD(insn->src(1))) {
1825
switch (insn->src(1).getFile()) {
1826
case FILE_GPR:
1827
emitInsn(0x5c380000);
1828
emitGPR (0x14, insn->src(1));
1829
break;
1830
case FILE_MEMORY_CONST:
1831
emitInsn(0x4c380000);
1832
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1833
break;
1834
case FILE_IMMEDIATE:
1835
emitInsn(0x38380000);
1836
emitIMMD(0x14, 19, insn->src(1));
1837
break;
1838
default:
1839
assert(!"bad src1 file");
1840
break;
1841
}
1842
emitCC (0x2f);
1843
emitField(0x29, 1, isSignedType(insn->sType));
1844
emitField(0x28, 1, isSignedType(insn->dType));
1845
emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1846
} else {
1847
emitInsn (0x1f000000);
1848
emitField(0x37, 1, isSignedType(insn->sType));
1849
emitField(0x36, 1, isSignedType(insn->dType));
1850
emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1851
emitCC (0x34);
1852
emitIMMD (0x14, 32, insn->src(1));
1853
}
1854
1855
emitGPR(0x08, insn->src(0));
1856
emitGPR(0x00, insn->def(0));
1857
}
1858
1859
void
1860
CodeEmitterGM107::emitIMAD()
1861
{
1862
/*XXX: imad32i exists, but not using it as third src overlaps dst */
1863
switch(insn->src(2).getFile()) {
1864
case FILE_GPR:
1865
switch (insn->src(1).getFile()) {
1866
case FILE_GPR:
1867
emitInsn(0x5a000000);
1868
emitGPR (0x14, insn->src(1));
1869
break;
1870
case FILE_MEMORY_CONST:
1871
emitInsn(0x4a000000);
1872
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1873
break;
1874
case FILE_IMMEDIATE:
1875
emitInsn(0x34000000);
1876
emitIMMD(0x14, 19, insn->src(1));
1877
break;
1878
default:
1879
assert(!"bad src1 file");
1880
break;
1881
}
1882
emitGPR (0x27, insn->src(2));
1883
break;
1884
case FILE_MEMORY_CONST:
1885
emitInsn(0x52000000);
1886
emitGPR (0x27, insn->src(1));
1887
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1888
break;
1889
default:
1890
assert(!"bad src2 file");
1891
break;
1892
}
1893
1894
emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1895
emitField(0x35, 1, isSignedType(insn->sType));
1896
emitNEG (0x34, insn->src(2));
1897
emitNEG2 (0x33, insn->src(0), insn->src(1));
1898
emitSAT (0x32);
1899
emitX (0x31);
1900
emitField(0x30, 1, isSignedType(insn->dType));
1901
emitCC (0x2f);
1902
emitGPR (0x08, insn->src(0));
1903
emitGPR (0x00, insn->def(0));
1904
}
1905
1906
void
1907
CodeEmitterGM107::emitISCADD()
1908
{
1909
assert(insn->src(1).get()->asImm());
1910
1911
switch (insn->src(2).getFile()) {
1912
case FILE_GPR:
1913
emitInsn(0x5c180000);
1914
emitGPR (0x14, insn->src(2));
1915
break;
1916
case FILE_MEMORY_CONST:
1917
emitInsn(0x4c180000);
1918
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1919
break;
1920
case FILE_IMMEDIATE:
1921
emitInsn(0x38180000);
1922
emitIMMD(0x14, 19, insn->src(2));
1923
break;
1924
default:
1925
assert(!"bad src1 file");
1926
break;
1927
}
1928
emitNEG (0x31, insn->src(0));
1929
emitNEG (0x30, insn->src(2));
1930
emitCC (0x2f);
1931
emitIMMD(0x27, 5, insn->src(1));
1932
emitGPR (0x08, insn->src(0));
1933
emitGPR (0x00, insn->def(0));
1934
}
1935
1936
void
1937
CodeEmitterGM107::emitXMAD()
1938
{
1939
assert(insn->src(0).getFile() == FILE_GPR);
1940
1941
bool constbuf = false;
1942
bool psl_mrg = true;
1943
bool immediate = false;
1944
if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
1945
assert(insn->src(1).getFile() == FILE_GPR);
1946
constbuf = true;
1947
psl_mrg = false;
1948
emitInsn(0x51000000);
1949
emitGPR(0x27, insn->src(1));
1950
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1951
} else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
1952
assert(insn->src(2).getFile() == FILE_GPR);
1953
constbuf = true;
1954
emitInsn(0x4e000000);
1955
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1956
emitGPR(0x27, insn->src(2));
1957
} else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
1958
assert(insn->src(2).getFile() == FILE_GPR);
1959
assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
1960
immediate = true;
1961
emitInsn(0x36000000);
1962
emitIMMD(0x14, 16, insn->src(1));
1963
emitGPR(0x27, insn->src(2));
1964
} else {
1965
assert(insn->src(1).getFile() == FILE_GPR);
1966
assert(insn->src(2).getFile() == FILE_GPR);
1967
emitInsn(0x5b000000);
1968
emitGPR(0x14, insn->src(1));
1969
emitGPR(0x27, insn->src(2));
1970
}
1971
1972
if (psl_mrg)
1973
emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
1974
1975
unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
1976
cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
1977
emitField(0x32, constbuf ? 2 : 3, cmode);
1978
1979
emitX(constbuf ? 0x36 : 0x26);
1980
emitCC(0x2f);
1981
1982
emitGPR(0x0, insn->def(0));
1983
emitGPR(0x8, insn->src(0));
1984
1985
// source flags
1986
if (isSignedType(insn->sType)) {
1987
uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
1988
emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
1989
}
1990
emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
1991
if (!immediate) {
1992
bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
1993
emitField(constbuf ? 0x34 : 0x23, 1, h1);
1994
}
1995
}
1996
1997
void
1998
CodeEmitterGM107::emitIMNMX()
1999
{
2000
switch (insn->src(1).getFile()) {
2001
case FILE_GPR:
2002
emitInsn(0x5c200000);
2003
emitGPR (0x14, insn->src(1));
2004
break;
2005
case FILE_MEMORY_CONST:
2006
emitInsn(0x4c200000);
2007
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2008
break;
2009
case FILE_IMMEDIATE:
2010
emitInsn(0x38200000);
2011
emitIMMD(0x14, 19, insn->src(1));
2012
break;
2013
default:
2014
assert(!"bad src1 file");
2015
break;
2016
}
2017
2018
emitField(0x30, 1, isSignedType(insn->dType));
2019
emitCC (0x2f);
2020
emitField(0x2b, 2, insn->subOp);
2021
emitField(0x2a, 1, insn->op == OP_MAX);
2022
emitPRED (0x27);
2023
emitGPR (0x08, insn->src(0));
2024
emitGPR (0x00, insn->def(0));
2025
}
2026
2027
void
2028
CodeEmitterGM107::emitICMP()
2029
{
2030
const CmpInstruction *insn = this->insn->asCmp();
2031
CondCode cc = insn->setCond;
2032
2033
if (insn->src(2).mod.neg())
2034
cc = reverseCondCode(cc);
2035
2036
switch(insn->src(2).getFile()) {
2037
case FILE_GPR:
2038
switch (insn->src(1).getFile()) {
2039
case FILE_GPR:
2040
emitInsn(0x5b400000);
2041
emitGPR (0x14, insn->src(1));
2042
break;
2043
case FILE_MEMORY_CONST:
2044
emitInsn(0x4b400000);
2045
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2046
break;
2047
case FILE_IMMEDIATE:
2048
emitInsn(0x36400000);
2049
emitIMMD(0x14, 19, insn->src(1));
2050
break;
2051
default:
2052
assert(!"bad src1 file");
2053
break;
2054
}
2055
emitGPR (0x27, insn->src(2));
2056
break;
2057
case FILE_MEMORY_CONST:
2058
emitInsn(0x53400000);
2059
emitGPR (0x27, insn->src(1));
2060
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2061
break;
2062
default:
2063
assert(!"bad src2 file");
2064
break;
2065
}
2066
2067
emitCond3(0x31, cc);
2068
emitField(0x30, 1, isSignedType(insn->sType));
2069
emitGPR (0x08, insn->src(0));
2070
emitGPR (0x00, insn->def(0));
2071
}
2072
2073
void
2074
CodeEmitterGM107::emitISET()
2075
{
2076
const CmpInstruction *insn = this->insn->asCmp();
2077
2078
switch (insn->src(1).getFile()) {
2079
case FILE_GPR:
2080
emitInsn(0x5b500000);
2081
emitGPR (0x14, insn->src(1));
2082
break;
2083
case FILE_MEMORY_CONST:
2084
emitInsn(0x4b500000);
2085
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2086
break;
2087
case FILE_IMMEDIATE:
2088
emitInsn(0x36500000);
2089
emitIMMD(0x14, 19, insn->src(1));
2090
break;
2091
default:
2092
assert(!"bad src1 file");
2093
break;
2094
}
2095
2096
if (insn->op != OP_SET) {
2097
switch (insn->op) {
2098
case OP_SET_AND: emitField(0x2d, 2, 0); break;
2099
case OP_SET_OR : emitField(0x2d, 2, 1); break;
2100
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2101
default:
2102
assert(!"invalid set op");
2103
break;
2104
}
2105
emitPRED(0x27, insn->src(2));
2106
} else {
2107
emitPRED(0x27);
2108
}
2109
2110
emitCond3(0x31, insn->setCond);
2111
emitField(0x30, 1, isSignedType(insn->sType));
2112
emitCC (0x2f);
2113
emitField(0x2c, 1, insn->dType == TYPE_F32);
2114
emitX (0x2b);
2115
emitGPR (0x08, insn->src(0));
2116
emitGPR (0x00, insn->def(0));
2117
}
2118
2119
void
2120
CodeEmitterGM107::emitISETP()
2121
{
2122
const CmpInstruction *insn = this->insn->asCmp();
2123
2124
switch (insn->src(1).getFile()) {
2125
case FILE_GPR:
2126
emitInsn(0x5b600000);
2127
emitGPR (0x14, insn->src(1));
2128
break;
2129
case FILE_MEMORY_CONST:
2130
emitInsn(0x4b600000);
2131
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2132
break;
2133
case FILE_IMMEDIATE:
2134
emitInsn(0x36600000);
2135
emitIMMD(0x14, 19, insn->src(1));
2136
break;
2137
default:
2138
assert(!"bad src1 file");
2139
break;
2140
}
2141
2142
if (insn->op != OP_SET) {
2143
switch (insn->op) {
2144
case OP_SET_AND: emitField(0x2d, 2, 0); break;
2145
case OP_SET_OR : emitField(0x2d, 2, 1); break;
2146
case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2147
default:
2148
assert(!"invalid set op");
2149
break;
2150
}
2151
emitPRED(0x27, insn->src(2));
2152
} else {
2153
emitPRED(0x27);
2154
}
2155
2156
emitCond3(0x31, insn->setCond);
2157
emitField(0x30, 1, isSignedType(insn->sType));
2158
emitX (0x2b);
2159
emitGPR (0x08, insn->src(0));
2160
emitPRED (0x03, insn->def(0));
2161
if (insn->defExists(1))
2162
emitPRED(0x00, insn->def(1));
2163
else
2164
emitPRED(0x00);
2165
}
2166
2167
void
2168
CodeEmitterGM107::emitSHL()
2169
{
2170
switch (insn->src(1).getFile()) {
2171
case FILE_GPR:
2172
emitInsn(0x5c480000);
2173
emitGPR (0x14, insn->src(1));
2174
break;
2175
case FILE_MEMORY_CONST:
2176
emitInsn(0x4c480000);
2177
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2178
break;
2179
case FILE_IMMEDIATE:
2180
emitInsn(0x38480000);
2181
emitIMMD(0x14, 19, insn->src(1));
2182
break;
2183
default:
2184
assert(!"bad src1 file");
2185
break;
2186
}
2187
2188
emitCC (0x2f);
2189
emitX (0x2b);
2190
emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2191
emitGPR (0x08, insn->src(0));
2192
emitGPR (0x00, insn->def(0));
2193
}
2194
2195
void
2196
CodeEmitterGM107::emitSHR()
2197
{
2198
switch (insn->src(1).getFile()) {
2199
case FILE_GPR:
2200
emitInsn(0x5c280000);
2201
emitGPR (0x14, insn->src(1));
2202
break;
2203
case FILE_MEMORY_CONST:
2204
emitInsn(0x4c280000);
2205
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2206
break;
2207
case FILE_IMMEDIATE:
2208
emitInsn(0x38280000);
2209
emitIMMD(0x14, 19, insn->src(1));
2210
break;
2211
default:
2212
assert(!"bad src1 file");
2213
break;
2214
}
2215
2216
emitField(0x30, 1, isSignedType(insn->dType));
2217
emitCC (0x2f);
2218
emitX (0x2c);
2219
emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2220
emitGPR (0x08, insn->src(0));
2221
emitGPR (0x00, insn->def(0));
2222
}
2223
2224
void
2225
CodeEmitterGM107::emitSHF()
2226
{
2227
unsigned type;
2228
2229
switch (insn->src(1).getFile()) {
2230
case FILE_GPR:
2231
emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2232
emitGPR(0x14, insn->src(1));
2233
break;
2234
case FILE_IMMEDIATE:
2235
emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2236
emitIMMD(0x14, 19, insn->src(1));
2237
break;
2238
default:
2239
assert(!"bad src1 file");
2240
break;
2241
}
2242
2243
switch (insn->sType) {
2244
case TYPE_U64:
2245
type = 2;
2246
break;
2247
case TYPE_S64:
2248
type = 3;
2249
break;
2250
default:
2251
type = 0;
2252
break;
2253
}
2254
2255
emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2256
emitX (0x31);
2257
emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2258
emitCC (0x2f);
2259
emitGPR (0x27, insn->src(2));
2260
emitField(0x25, 2, type);
2261
emitGPR (0x08, insn->src(0));
2262
emitGPR (0x00, insn->def(0));
2263
}
2264
2265
void
2266
CodeEmitterGM107::emitPOPC()
2267
{
2268
switch (insn->src(0).getFile()) {
2269
case FILE_GPR:
2270
emitInsn(0x5c080000);
2271
emitGPR (0x14, insn->src(0));
2272
break;
2273
case FILE_MEMORY_CONST:
2274
emitInsn(0x4c080000);
2275
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2276
break;
2277
case FILE_IMMEDIATE:
2278
emitInsn(0x38080000);
2279
emitIMMD(0x14, 19, insn->src(0));
2280
break;
2281
default:
2282
assert(!"bad src1 file");
2283
break;
2284
}
2285
2286
emitINV(0x28, insn->src(0));
2287
emitGPR(0x00, insn->def(0));
2288
}
2289
2290
void
2291
CodeEmitterGM107::emitBFI()
2292
{
2293
switch(insn->src(2).getFile()) {
2294
case FILE_GPR:
2295
switch (insn->src(1).getFile()) {
2296
case FILE_GPR:
2297
emitInsn(0x5bf00000);
2298
emitGPR (0x14, insn->src(1));
2299
break;
2300
case FILE_MEMORY_CONST:
2301
emitInsn(0x4bf00000);
2302
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2303
break;
2304
case FILE_IMMEDIATE:
2305
emitInsn(0x36f00000);
2306
emitIMMD(0x14, 19, insn->src(1));
2307
break;
2308
default:
2309
assert(!"bad src1 file");
2310
break;
2311
}
2312
emitGPR (0x27, insn->src(2));
2313
break;
2314
case FILE_MEMORY_CONST:
2315
emitInsn(0x53f00000);
2316
emitGPR (0x27, insn->src(1));
2317
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2318
break;
2319
default:
2320
assert(!"bad src2 file");
2321
break;
2322
}
2323
2324
emitCC (0x2f);
2325
emitGPR (0x08, insn->src(0));
2326
emitGPR (0x00, insn->def(0));
2327
}
2328
2329
void
2330
CodeEmitterGM107::emitBFE()
2331
{
2332
switch (insn->src(1).getFile()) {
2333
case FILE_GPR:
2334
emitInsn(0x5c000000);
2335
emitGPR (0x14, insn->src(1));
2336
break;
2337
case FILE_MEMORY_CONST:
2338
emitInsn(0x4c000000);
2339
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2340
break;
2341
case FILE_IMMEDIATE:
2342
emitInsn(0x38000000);
2343
emitIMMD(0x14, 19, insn->src(1));
2344
break;
2345
default:
2346
assert(!"bad src1 file");
2347
break;
2348
}
2349
2350
emitField(0x30, 1, isSignedType(insn->dType));
2351
emitCC (0x2f);
2352
emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2353
emitGPR (0x08, insn->src(0));
2354
emitGPR (0x00, insn->def(0));
2355
}
2356
2357
void
2358
CodeEmitterGM107::emitFLO()
2359
{
2360
switch (insn->src(0).getFile()) {
2361
case FILE_GPR:
2362
emitInsn(0x5c300000);
2363
emitGPR (0x14, insn->src(0));
2364
break;
2365
case FILE_MEMORY_CONST:
2366
emitInsn(0x4c300000);
2367
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2368
break;
2369
case FILE_IMMEDIATE:
2370
emitInsn(0x38300000);
2371
emitIMMD(0x14, 19, insn->src(0));
2372
break;
2373
default:
2374
assert(!"bad src1 file");
2375
break;
2376
}
2377
2378
emitField(0x30, 1, isSignedType(insn->dType));
2379
emitCC (0x2f);
2380
emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2381
emitINV (0x28, insn->src(0));
2382
emitGPR (0x00, insn->def(0));
2383
}
2384
2385
void
2386
CodeEmitterGM107::emitPRMT()
2387
{
2388
switch (insn->src(1).getFile()) {
2389
case FILE_GPR:
2390
emitInsn(0x5bc00000);
2391
emitGPR (0x14, insn->src(1));
2392
break;
2393
case FILE_MEMORY_CONST:
2394
emitInsn(0x4bc00000);
2395
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2396
break;
2397
case FILE_IMMEDIATE:
2398
emitInsn(0x36c00000);
2399
emitIMMD(0x14, 19, insn->src(1));
2400
break;
2401
default:
2402
assert(!"bad src1 file");
2403
break;
2404
}
2405
2406
emitField(0x30, 3, insn->subOp);
2407
emitGPR (0x27, insn->src(2));
2408
emitGPR (0x08, insn->src(0));
2409
emitGPR (0x00, insn->def(0));
2410
}
2411
2412
/*******************************************************************************
2413
* memory
2414
******************************************************************************/
2415
2416
void
2417
CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2418
{
2419
int data = 0;
2420
2421
switch (typeSizeof(type)) {
2422
case 1: data = isSignedType(type) ? 1 : 0; break;
2423
case 2: data = isSignedType(type) ? 3 : 2; break;
2424
case 4: data = 4; break;
2425
case 8: data = 5; break;
2426
case 16: data = 6; break;
2427
default:
2428
assert(!"bad type");
2429
break;
2430
}
2431
2432
emitField(pos, 3, data);
2433
}
2434
2435
void
2436
CodeEmitterGM107::emitLDSTc(int pos)
2437
{
2438
int mode = 0;
2439
2440
switch (insn->cache) {
2441
case CACHE_CA: mode = 0; break;
2442
case CACHE_CG: mode = 1; break;
2443
case CACHE_CS: mode = 2; break;
2444
case CACHE_CV: mode = 3; break;
2445
default:
2446
assert(!"invalid caching mode");
2447
break;
2448
}
2449
2450
emitField(pos, 2, mode);
2451
}
2452
2453
void
2454
CodeEmitterGM107::emitLDC()
2455
{
2456
emitInsn (0xef900000);
2457
emitLDSTs(0x30, insn->dType);
2458
emitField(0x2c, 2, insn->subOp);
2459
emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2460
emitGPR (0x00, insn->def(0));
2461
}
2462
2463
void
2464
CodeEmitterGM107::emitLDL()
2465
{
2466
emitInsn (0xef400000);
2467
emitLDSTs(0x30, insn->dType);
2468
emitLDSTc(0x2c);
2469
emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2470
emitGPR (0x00, insn->def(0));
2471
}
2472
2473
void
2474
CodeEmitterGM107::emitLDS()
2475
{
2476
emitInsn (0xef480000);
2477
emitLDSTs(0x30, insn->dType);
2478
emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2479
emitGPR (0x00, insn->def(0));
2480
}
2481
2482
void
2483
CodeEmitterGM107::emitLD()
2484
{
2485
emitInsn (0x80000000);
2486
emitPRED (0x3a);
2487
emitLDSTc(0x38);
2488
emitLDSTs(0x35, insn->dType);
2489
emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2490
emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2491
emitGPR (0x00, insn->def(0));
2492
}
2493
2494
void
2495
CodeEmitterGM107::emitSTL()
2496
{
2497
emitInsn (0xef500000);
2498
emitLDSTs(0x30, insn->dType);
2499
emitLDSTc(0x2c);
2500
emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2501
emitGPR (0x00, insn->src(1));
2502
}
2503
2504
void
2505
CodeEmitterGM107::emitSTS()
2506
{
2507
emitInsn (0xef580000);
2508
emitLDSTs(0x30, insn->dType);
2509
emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2510
emitGPR (0x00, insn->src(1));
2511
}
2512
2513
void
2514
CodeEmitterGM107::emitST()
2515
{
2516
emitInsn (0xa0000000);
2517
emitPRED (0x3a);
2518
emitLDSTc(0x38);
2519
emitLDSTs(0x35, insn->dType);
2520
emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2521
emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2522
emitGPR (0x00, insn->src(1));
2523
}
2524
2525
void
2526
CodeEmitterGM107::emitALD()
2527
{
2528
emitInsn (0xefd80000);
2529
emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2530
emitGPR (0x27, insn->src(0).getIndirect(1));
2531
emitO (0x20);
2532
emitP (0x1f);
2533
emitADDR (0x08, 20, 10, 0, insn->src(0));
2534
emitGPR (0x00, insn->def(0));
2535
}
2536
2537
void
2538
CodeEmitterGM107::emitAST()
2539
{
2540
emitInsn (0xeff00000);
2541
emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2542
emitGPR (0x27, insn->src(0).getIndirect(1));
2543
emitP (0x1f);
2544
emitADDR (0x08, 20, 10, 0, insn->src(0));
2545
emitGPR (0x00, insn->src(1));
2546
}
2547
2548
void
2549
CodeEmitterGM107::emitISBERD()
2550
{
2551
emitInsn(0xefd00000);
2552
emitGPR (0x08, insn->src(0));
2553
emitGPR (0x00, insn->def(0));
2554
}
2555
2556
void
2557
CodeEmitterGM107::emitAL2P()
2558
{
2559
emitInsn (0xefa00000);
2560
emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2561
emitPRED (0x2c);
2562
emitO (0x20);
2563
emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2564
emitGPR (0x08, insn->src(0).getIndirect(0));
2565
emitGPR (0x00, insn->def(0));
2566
}
2567
2568
void
2569
gm107_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2570
{
2571
int ipa = entry->ipa;
2572
int reg = entry->reg;
2573
int loc = entry->loc;
2574
2575
if (data.flatshade &&
2576
(ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2577
ipa = NV50_IR_INTERP_FLAT;
2578
reg = 0xff;
2579
} else if (data.force_persample_interp &&
2580
(ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2581
(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2582
ipa |= NV50_IR_INTERP_CENTROID;
2583
}
2584
code[loc + 1] &= ~(0xf << 0x14);
2585
code[loc + 1] |= (ipa & 0x3) << 0x16;
2586
code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2587
code[loc + 0] &= ~(0xff << 0x14);
2588
code[loc + 0] |= reg << 0x14;
2589
}
2590
2591
void
2592
CodeEmitterGM107::emitIPA()
2593
{
2594
int ipam = 0, ipas = 0;
2595
2596
switch (insn->getInterpMode()) {
2597
case NV50_IR_INTERP_LINEAR : ipam = 0; break;
2598
case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2599
case NV50_IR_INTERP_FLAT : ipam = 2; break;
2600
case NV50_IR_INTERP_SC : ipam = 3; break;
2601
default:
2602
assert(!"invalid ipa mode");
2603
break;
2604
}
2605
2606
switch (insn->getSampleMode()) {
2607
case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2608
case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2609
case NV50_IR_INTERP_OFFSET : ipas = 2; break;
2610
default:
2611
assert(!"invalid ipa sample mode");
2612
break;
2613
}
2614
2615
emitInsn (0xe0000000);
2616
emitField(0x36, 2, ipam);
2617
emitField(0x34, 2, ipas);
2618
emitSAT (0x33);
2619
emitField(0x2f, 3, 7);
2620
emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2621
if ((code[0] & 0x0000ff00) != 0x0000ff00)
2622
code[1] |= 0x00000040; /* .idx */
2623
emitGPR(0x00, insn->def(0));
2624
2625
if (insn->op == OP_PINTERP) {
2626
emitGPR(0x14, insn->src(1));
2627
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2628
emitGPR(0x27, insn->src(2));
2629
addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gm107_interpApply);
2630
} else {
2631
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2632
emitGPR(0x27, insn->src(1));
2633
emitGPR(0x14);
2634
addInterp(insn->ipa, 0xff, gm107_interpApply);
2635
}
2636
2637
if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2638
emitGPR(0x27);
2639
}
2640
2641
void
2642
CodeEmitterGM107::emitATOM()
2643
{
2644
unsigned dType, subOp;
2645
2646
if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2647
switch (insn->dType) {
2648
case TYPE_U32: dType = 0; break;
2649
case TYPE_U64: dType = 1; break;
2650
default: assert(!"unexpected dType"); dType = 0; break;
2651
}
2652
subOp = 15;
2653
2654
emitInsn (0xee000000);
2655
} else {
2656
switch (insn->dType) {
2657
case TYPE_U32: dType = 0; break;
2658
case TYPE_S32: dType = 1; break;
2659
case TYPE_U64: dType = 2; break;
2660
case TYPE_F32: dType = 3; break;
2661
case TYPE_B128: dType = 4; break;
2662
case TYPE_S64: dType = 5; break;
2663
default: assert(!"unexpected dType"); dType = 0; break;
2664
}
2665
if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2666
subOp = 8;
2667
else
2668
subOp = insn->subOp;
2669
2670
emitInsn (0xed000000);
2671
}
2672
2673
emitField(0x34, 4, subOp);
2674
emitField(0x31, 3, dType);
2675
emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2676
emitGPR (0x14, insn->src(1));
2677
emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2678
emitGPR (0x00, insn->def(0));
2679
}
2680
2681
void
2682
CodeEmitterGM107::emitATOMS()
2683
{
2684
unsigned dType, subOp;
2685
2686
if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2687
switch (insn->dType) {
2688
case TYPE_U32: dType = 0; break;
2689
case TYPE_U64: dType = 1; break;
2690
default: assert(!"unexpected dType"); dType = 0; break;
2691
}
2692
subOp = 4;
2693
2694
emitInsn (0xee000000);
2695
emitField(0x34, 1, dType);
2696
} else {
2697
switch (insn->dType) {
2698
case TYPE_U32: dType = 0; break;
2699
case TYPE_S32: dType = 1; break;
2700
case TYPE_U64: dType = 2; break;
2701
case TYPE_S64: dType = 3; break;
2702
default: assert(!"unexpected dType"); dType = 0; break;
2703
}
2704
2705
if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2706
subOp = 8;
2707
else
2708
subOp = insn->subOp;
2709
2710
emitInsn (0xec000000);
2711
emitField(0x1c, 3, dType);
2712
}
2713
2714
emitField(0x34, 4, subOp);
2715
emitGPR (0x14, insn->src(1));
2716
emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2717
emitGPR (0x00, insn->def(0));
2718
}
2719
2720
void
2721
CodeEmitterGM107::emitRED()
2722
{
2723
unsigned dType;
2724
2725
switch (insn->dType) {
2726
case TYPE_U32: dType = 0; break;
2727
case TYPE_S32: dType = 1; break;
2728
case TYPE_U64: dType = 2; break;
2729
case TYPE_F32: dType = 3; break;
2730
case TYPE_B128: dType = 4; break;
2731
case TYPE_S64: dType = 5; break;
2732
default: assert(!"unexpected dType"); dType = 0; break;
2733
}
2734
2735
emitInsn (0xebf80000);
2736
emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2737
emitField(0x17, 3, insn->subOp);
2738
emitField(0x14, 3, dType);
2739
emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2740
emitGPR (0x00, insn->src(1));
2741
}
2742
2743
void
2744
CodeEmitterGM107::emitCCTL()
2745
{
2746
unsigned width;
2747
if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2748
emitInsn(0xef600000);
2749
width = 30;
2750
} else {
2751
emitInsn(0xef800000);
2752
width = 22;
2753
}
2754
emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2755
emitADDR (0x08, 0x16, width, 2, insn->src(0));
2756
emitField(0x00, 4, insn->subOp);
2757
}
2758
2759
/*******************************************************************************
2760
* surface
2761
******************************************************************************/
2762
2763
void
2764
CodeEmitterGM107::emitPIXLD()
2765
{
2766
emitInsn (0xefe80000);
2767
emitPRED (0x2d);
2768
emitField(0x1f, 3, insn->subOp);
2769
emitGPR (0x08, insn->src(0));
2770
emitGPR (0x00, insn->def(0));
2771
}
2772
2773
/*******************************************************************************
2774
* texture
2775
******************************************************************************/
2776
2777
void
2778
CodeEmitterGM107::emitTEXs(int pos)
2779
{
2780
int src1 = insn->predSrc == 1 ? 2 : 1;
2781
if (insn->srcExists(src1))
2782
emitGPR(pos, insn->src(src1));
2783
else
2784
emitGPR(pos);
2785
}
2786
2787
static uint8_t
2788
getTEXSMask(uint8_t mask)
2789
{
2790
switch (mask) {
2791
case 0x1: return 0x0;
2792
case 0x2: return 0x1;
2793
case 0x3: return 0x4;
2794
case 0x4: return 0x2;
2795
case 0x7: return 0x0;
2796
case 0x8: return 0x3;
2797
case 0x9: return 0x5;
2798
case 0xa: return 0x6;
2799
case 0xb: return 0x1;
2800
case 0xc: return 0x7;
2801
case 0xd: return 0x2;
2802
case 0xe: return 0x3;
2803
case 0xf: return 0x4;
2804
default:
2805
assert(!"invalid mask");
2806
return 0;
2807
}
2808
}
2809
2810
static uint8_t
2811
getTEXSTarget(const TexInstruction *tex)
2812
{
2813
assert(tex->op == OP_TEX || tex->op == OP_TXL);
2814
2815
switch (tex->tex.target.getEnum()) {
2816
case TEX_TARGET_1D:
2817
assert(tex->tex.levelZero);
2818
return 0x0;
2819
case TEX_TARGET_2D:
2820
case TEX_TARGET_RECT:
2821
if (tex->tex.levelZero)
2822
return 0x2;
2823
if (tex->op == OP_TXL)
2824
return 0x3;
2825
return 0x1;
2826
case TEX_TARGET_2D_SHADOW:
2827
case TEX_TARGET_RECT_SHADOW:
2828
if (tex->tex.levelZero)
2829
return 0x6;
2830
if (tex->op == OP_TXL)
2831
return 0x5;
2832
return 0x4;
2833
case TEX_TARGET_2D_ARRAY:
2834
if (tex->tex.levelZero)
2835
return 0x8;
2836
return 0x7;
2837
case TEX_TARGET_2D_ARRAY_SHADOW:
2838
assert(tex->tex.levelZero);
2839
return 0x9;
2840
case TEX_TARGET_3D:
2841
if (tex->tex.levelZero)
2842
return 0xb;
2843
assert(tex->op != OP_TXL);
2844
return 0xa;
2845
case TEX_TARGET_CUBE:
2846
assert(!tex->tex.levelZero);
2847
if (tex->op == OP_TXL)
2848
return 0xd;
2849
return 0xc;
2850
default:
2851
assert(false);
2852
return 0x0;
2853
}
2854
}
2855
2856
static uint8_t
2857
getTLDSTarget(const TexInstruction *tex)
2858
{
2859
switch (tex->tex.target.getEnum()) {
2860
case TEX_TARGET_1D:
2861
if (tex->tex.levelZero)
2862
return 0x0;
2863
return 0x1;
2864
case TEX_TARGET_2D:
2865
case TEX_TARGET_RECT:
2866
if (tex->tex.levelZero)
2867
return tex->tex.useOffsets ? 0x4 : 0x2;
2868
return tex->tex.useOffsets ? 0xc : 0x5;
2869
case TEX_TARGET_2D_MS:
2870
assert(tex->tex.levelZero);
2871
return 0x6;
2872
case TEX_TARGET_3D:
2873
assert(tex->tex.levelZero);
2874
return 0x7;
2875
case TEX_TARGET_2D_ARRAY:
2876
assert(tex->tex.levelZero);
2877
return 0x8;
2878
2879
default:
2880
assert(false);
2881
return 0x0;
2882
}
2883
}
2884
2885
void
2886
CodeEmitterGM107::emitTEX()
2887
{
2888
const TexInstruction *insn = this->insn->asTex();
2889
int lodm = 0;
2890
2891
if (!insn->tex.levelZero) {
2892
switch (insn->op) {
2893
case OP_TEX: lodm = 0; break;
2894
case OP_TXB: lodm = 2; break;
2895
case OP_TXL: lodm = 3; break;
2896
default:
2897
assert(!"invalid tex op");
2898
break;
2899
}
2900
} else {
2901
lodm = 1;
2902
}
2903
2904
if (insn->tex.rIndirectSrc >= 0) {
2905
emitInsn (0xdeb80000);
2906
emitField(0x25, 2, lodm);
2907
emitField(0x24, 1, insn->tex.useOffsets == 1);
2908
} else {
2909
emitInsn (0xc0380000);
2910
emitField(0x37, 2, lodm);
2911
emitField(0x36, 1, insn->tex.useOffsets == 1);
2912
emitField(0x24, 13, insn->tex.r);
2913
}
2914
2915
emitField(0x32, 1, insn->tex.target.isShadow());
2916
emitField(0x31, 1, insn->tex.liveOnly);
2917
emitField(0x23, 1, insn->tex.derivAll);
2918
emitField(0x1f, 4, insn->tex.mask);
2919
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2920
insn->tex.target.getDim() - 1);
2921
emitField(0x1c, 1, insn->tex.target.isArray());
2922
emitTEXs (0x14);
2923
emitGPR (0x08, insn->src(0));
2924
emitGPR (0x00, insn->def(0));
2925
}
2926
2927
void
2928
CodeEmitterGM107::emitTEXS()
2929
{
2930
const TexInstruction *insn = this->insn->asTex();
2931
assert(!insn->tex.derivAll);
2932
2933
switch (insn->op) {
2934
case OP_TEX:
2935
case OP_TXL:
2936
emitInsn (0xd8000000);
2937
emitField(0x35, 4, getTEXSTarget(insn));
2938
emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2939
break;
2940
case OP_TXF:
2941
emitInsn (0xda000000);
2942
emitField(0x35, 4, getTLDSTarget(insn));
2943
emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2944
break;
2945
case OP_TXG:
2946
assert(insn->tex.useOffsets != 4);
2947
emitInsn (0xdf000000);
2948
emitField(0x34, 2, insn->tex.gatherComp);
2949
emitField(0x33, 1, insn->tex.useOffsets == 1);
2950
emitField(0x32, 1, insn->tex.target.isShadow());
2951
break;
2952
default:
2953
unreachable("unknown op in emitTEXS()");
2954
break;
2955
}
2956
2957
emitField(0x31, 1, insn->tex.liveOnly);
2958
emitField(0x24, 13, insn->tex.r);
2959
if (insn->defExists(1))
2960
emitGPR(0x1c, insn->def(1));
2961
else
2962
emitGPR(0x1c);
2963
if (insn->srcExists(1))
2964
emitGPR(0x14, insn->getSrc(1));
2965
else
2966
emitGPR(0x14);
2967
emitGPR (0x08, insn->src(0));
2968
emitGPR (0x00, insn->def(0));
2969
}
2970
2971
void
2972
CodeEmitterGM107::emitTLD()
2973
{
2974
const TexInstruction *insn = this->insn->asTex();
2975
2976
if (insn->tex.rIndirectSrc >= 0) {
2977
emitInsn (0xdd380000);
2978
} else {
2979
emitInsn (0xdc380000);
2980
emitField(0x24, 13, insn->tex.r);
2981
}
2982
2983
emitField(0x37, 1, insn->tex.levelZero == 0);
2984
emitField(0x32, 1, insn->tex.target.isMS());
2985
emitField(0x31, 1, insn->tex.liveOnly);
2986
emitField(0x23, 1, insn->tex.useOffsets == 1);
2987
emitField(0x1f, 4, insn->tex.mask);
2988
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2989
insn->tex.target.getDim() - 1);
2990
emitField(0x1c, 1, insn->tex.target.isArray());
2991
emitTEXs (0x14);
2992
emitGPR (0x08, insn->src(0));
2993
emitGPR (0x00, insn->def(0));
2994
}
2995
2996
void
2997
CodeEmitterGM107::emitTLD4()
2998
{
2999
const TexInstruction *insn = this->insn->asTex();
3000
3001
if (insn->tex.rIndirectSrc >= 0) {
3002
emitInsn (0xdef80000);
3003
emitField(0x26, 2, insn->tex.gatherComp);
3004
emitField(0x25, 2, insn->tex.useOffsets == 4);
3005
emitField(0x24, 2, insn->tex.useOffsets == 1);
3006
} else {
3007
emitInsn (0xc8380000);
3008
emitField(0x38, 2, insn->tex.gatherComp);
3009
emitField(0x37, 2, insn->tex.useOffsets == 4);
3010
emitField(0x36, 2, insn->tex.useOffsets == 1);
3011
emitField(0x24, 13, insn->tex.r);
3012
}
3013
3014
emitField(0x32, 1, insn->tex.target.isShadow());
3015
emitField(0x31, 1, insn->tex.liveOnly);
3016
emitField(0x23, 1, insn->tex.derivAll);
3017
emitField(0x1f, 4, insn->tex.mask);
3018
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3019
insn->tex.target.getDim() - 1);
3020
emitField(0x1c, 1, insn->tex.target.isArray());
3021
emitTEXs (0x14);
3022
emitGPR (0x08, insn->src(0));
3023
emitGPR (0x00, insn->def(0));
3024
}
3025
3026
void
3027
CodeEmitterGM107::emitTXD()
3028
{
3029
const TexInstruction *insn = this->insn->asTex();
3030
3031
if (insn->tex.rIndirectSrc >= 0) {
3032
emitInsn (0xde780000);
3033
} else {
3034
emitInsn (0xde380000);
3035
emitField(0x24, 13, insn->tex.r);
3036
}
3037
3038
emitField(0x31, 1, insn->tex.liveOnly);
3039
emitField(0x23, 1, insn->tex.useOffsets == 1);
3040
emitField(0x1f, 4, insn->tex.mask);
3041
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3042
insn->tex.target.getDim() - 1);
3043
emitField(0x1c, 1, insn->tex.target.isArray());
3044
emitTEXs (0x14);
3045
emitGPR (0x08, insn->src(0));
3046
emitGPR (0x00, insn->def(0));
3047
}
3048
3049
void
3050
CodeEmitterGM107::emitTMML()
3051
{
3052
const TexInstruction *insn = this->insn->asTex();
3053
3054
if (insn->tex.rIndirectSrc >= 0) {
3055
emitInsn (0xdf600000);
3056
} else {
3057
emitInsn (0xdf580000);
3058
emitField(0x24, 13, insn->tex.r);
3059
}
3060
3061
emitField(0x31, 1, insn->tex.liveOnly);
3062
emitField(0x23, 1, insn->tex.derivAll);
3063
emitField(0x1f, 4, insn->tex.mask);
3064
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3065
insn->tex.target.getDim() - 1);
3066
emitField(0x1c, 1, insn->tex.target.isArray());
3067
emitTEXs (0x14);
3068
emitGPR (0x08, insn->src(0));
3069
emitGPR (0x00, insn->def(0));
3070
}
3071
3072
void
3073
CodeEmitterGM107::emitTXQ()
3074
{
3075
const TexInstruction *insn = this->insn->asTex();
3076
int type = 0;
3077
3078
switch (insn->tex.query) {
3079
case TXQ_DIMS : type = 0x01; break;
3080
case TXQ_TYPE : type = 0x02; break;
3081
case TXQ_SAMPLE_POSITION: type = 0x05; break;
3082
case TXQ_FILTER : type = 0x10; break;
3083
case TXQ_LOD : type = 0x12; break;
3084
case TXQ_WRAP : type = 0x14; break;
3085
case TXQ_BORDER_COLOUR : type = 0x16; break;
3086
default:
3087
assert(!"invalid txq query");
3088
break;
3089
}
3090
3091
if (insn->tex.rIndirectSrc >= 0) {
3092
emitInsn (0xdf500000);
3093
} else {
3094
emitInsn (0xdf480000);
3095
emitField(0x24, 13, insn->tex.r);
3096
}
3097
3098
emitField(0x31, 1, insn->tex.liveOnly);
3099
emitField(0x1f, 4, insn->tex.mask);
3100
emitField(0x16, 6, type);
3101
emitGPR (0x08, insn->src(0));
3102
emitGPR (0x00, insn->def(0));
3103
}
3104
3105
void
3106
CodeEmitterGM107::emitDEPBAR()
3107
{
3108
emitInsn (0xf0f00000);
3109
emitField(0x1d, 1, 1); /* le */
3110
emitField(0x1a, 3, 5);
3111
emitField(0x14, 6, insn->subOp);
3112
emitField(0x00, 6, insn->subOp);
3113
}
3114
3115
/*******************************************************************************
3116
* misc
3117
******************************************************************************/
3118
3119
void
3120
CodeEmitterGM107::emitNOP()
3121
{
3122
emitInsn(0x50b00000);
3123
}
3124
3125
void
3126
CodeEmitterGM107::emitKIL()
3127
{
3128
emitInsn (0xe3300000);
3129
emitCond5(0x00, CC_TR);
3130
}
3131
3132
void
3133
CodeEmitterGM107::emitOUT()
3134
{
3135
const int cut = insn->op == OP_RESTART || insn->subOp;
3136
const int emit = insn->op == OP_EMIT;
3137
3138
switch (insn->src(1).getFile()) {
3139
case FILE_GPR:
3140
emitInsn(0xfbe00000);
3141
emitGPR (0x14, insn->src(1));
3142
break;
3143
case FILE_IMMEDIATE:
3144
emitInsn(0xf6e00000);
3145
emitIMMD(0x14, 19, insn->src(1));
3146
break;
3147
case FILE_MEMORY_CONST:
3148
emitInsn(0xebe00000);
3149
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
3150
break;
3151
default:
3152
assert(!"bad src1 file");
3153
break;
3154
}
3155
3156
emitField(0x27, 2, (cut << 1) | emit);
3157
emitGPR (0x08, insn->src(0));
3158
emitGPR (0x00, insn->def(0));
3159
}
3160
3161
void
3162
CodeEmitterGM107::emitBAR()
3163
{
3164
uint8_t subop;
3165
3166
emitInsn (0xf0a80000);
3167
3168
switch (insn->subOp) {
3169
case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
3170
case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;
3171
case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;
3172
case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;
3173
default:
3174
subop = 0x80;
3175
assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
3176
break;
3177
}
3178
3179
emitField(0x20, 8, subop);
3180
3181
// barrier id
3182
if (insn->src(0).getFile() == FILE_GPR) {
3183
emitGPR(0x08, insn->src(0));
3184
} else {
3185
ImmediateValue *imm = insn->getSrc(0)->asImm();
3186
assert(imm);
3187
emitField(0x08, 8, imm->reg.data.u32);
3188
emitField(0x2b, 1, 1);
3189
}
3190
3191
// thread count
3192
if (insn->src(1).getFile() == FILE_GPR) {
3193
emitGPR(0x14, insn->src(1));
3194
} else {
3195
ImmediateValue *imm = insn->getSrc(0)->asImm();
3196
assert(imm);
3197
emitField(0x14, 12, imm->reg.data.u32);
3198
emitField(0x2c, 1, 1);
3199
}
3200
3201
if (insn->srcExists(2) && (insn->predSrc != 2)) {
3202
emitPRED (0x27, insn->src(2));
3203
emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
3204
} else {
3205
emitField(0x27, 3, 7);
3206
}
3207
}
3208
3209
void
3210
CodeEmitterGM107::emitMEMBAR()
3211
{
3212
emitInsn (0xef980000);
3213
emitField(0x08, 2, insn->subOp >> 2);
3214
}
3215
3216
void
3217
CodeEmitterGM107::emitVOTE()
3218
{
3219
const ImmediateValue *imm;
3220
uint32_t u32;
3221
3222
int r = -1, p = -1;
3223
for (int i = 0; insn->defExists(i); i++) {
3224
if (insn->def(i).getFile() == FILE_GPR)
3225
r = i;
3226
else if (insn->def(i).getFile() == FILE_PREDICATE)
3227
p = i;
3228
}
3229
3230
emitInsn (0x50d80000);
3231
emitField(0x30, 2, insn->subOp);
3232
if (r >= 0)
3233
emitGPR (0x00, insn->def(r));
3234
else
3235
emitGPR (0x00);
3236
if (p >= 0)
3237
emitPRED (0x2d, insn->def(p));
3238
else
3239
emitPRED (0x2d);
3240
3241
switch (insn->src(0).getFile()) {
3242
case FILE_PREDICATE:
3243
emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
3244
emitPRED (0x27, insn->src(0));
3245
break;
3246
case FILE_IMMEDIATE:
3247
imm = insn->getSrc(0)->asImm();
3248
assert(imm);
3249
u32 = imm->reg.data.u32;
3250
assert(u32 == 0 || u32 == 1);
3251
emitPRED(0x27);
3252
emitField(0x2a, 1, u32 == 0);
3253
break;
3254
default:
3255
assert(!"Unhandled src");
3256
break;
3257
}
3258
}
3259
3260
void
3261
CodeEmitterGM107::emitSUTarget()
3262
{
3263
const TexInstruction *insn = this->insn->asTex();
3264
int target = 0;
3265
3266
assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3267
3268
if (insn->tex.target == TEX_TARGET_BUFFER) {
3269
target = 2;
3270
} else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
3271
target = 4;
3272
} else if (insn->tex.target == TEX_TARGET_2D ||
3273
insn->tex.target == TEX_TARGET_RECT) {
3274
target = 6;
3275
} else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
3276
insn->tex.target == TEX_TARGET_CUBE ||
3277
insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
3278
target = 8;
3279
} else if (insn->tex.target == TEX_TARGET_3D) {
3280
target = 10;
3281
} else {
3282
assert(insn->tex.target == TEX_TARGET_1D);
3283
}
3284
emitField(0x20, 4, target);
3285
}
3286
3287
void
3288
CodeEmitterGM107::emitSUHandle(const int s)
3289
{
3290
const TexInstruction *insn = this->insn->asTex();
3291
3292
assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3293
3294
if (insn->src(s).getFile() == FILE_GPR) {
3295
emitGPR(0x27, insn->src(s));
3296
} else {
3297
ImmediateValue *imm = insn->getSrc(s)->asImm();
3298
assert(imm);
3299
emitField(0x33, 1, 1);
3300
emitField(0x24, 13, imm->reg.data.u32);
3301
}
3302
}
3303
3304
void
3305
CodeEmitterGM107::emitSUSTx()
3306
{
3307
const TexInstruction *insn = this->insn->asTex();
3308
3309
emitInsn(0xeb200000);
3310
if (insn->op == OP_SUSTB)
3311
emitField(0x34, 1, 1);
3312
emitSUTarget();
3313
3314
emitLDSTc(0x18);
3315
emitField(0x14, 4, 0xf); // rgba
3316
emitGPR (0x08, insn->src(0));
3317
emitGPR (0x00, insn->src(1));
3318
3319
emitSUHandle(2);
3320
}
3321
3322
void
3323
CodeEmitterGM107::emitSULDx()
3324
{
3325
const TexInstruction *insn = this->insn->asTex();
3326
int type = 0;
3327
3328
emitInsn(0xeb000000);
3329
if (insn->op == OP_SULDB)
3330
emitField(0x34, 1, 1);
3331
emitSUTarget();
3332
3333
switch (insn->dType) {
3334
case TYPE_S8: type = 1; break;
3335
case TYPE_U16: type = 2; break;
3336
case TYPE_S16: type = 3; break;
3337
case TYPE_U32: type = 4; break;
3338
case TYPE_U64: type = 5; break;
3339
case TYPE_B128: type = 6; break;
3340
default:
3341
assert(insn->dType == TYPE_U8);
3342
break;
3343
}
3344
emitLDSTc(0x18);
3345
emitField(0x14, 3, type);
3346
emitGPR (0x00, insn->def(0));
3347
emitGPR (0x08, insn->src(0));
3348
3349
emitSUHandle(1);
3350
}
3351
3352
void
3353
CodeEmitterGM107::emitSUREDx()
3354
{
3355
const TexInstruction *insn = this->insn->asTex();
3356
uint8_t type = 0, subOp;
3357
3358
if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3359
emitInsn(0xeac00000);
3360
else
3361
emitInsn(0xea600000);
3362
3363
if (insn->op == OP_SUREDB)
3364
emitField(0x34, 1, 1);
3365
emitSUTarget();
3366
3367
// destination type
3368
switch (insn->dType) {
3369
case TYPE_S32: type = 1; break;
3370
case TYPE_U64: type = 2; break;
3371
case TYPE_F32: type = 3; break;
3372
case TYPE_S64: type = 5; break;
3373
default:
3374
assert(insn->dType == TYPE_U32);
3375
break;
3376
}
3377
3378
// atomic operation
3379
if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3380
subOp = 0;
3381
} else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3382
subOp = 8;
3383
} else {
3384
subOp = insn->subOp;
3385
}
3386
3387
emitField(0x24, 3, type);
3388
emitField(0x1d, 4, subOp);
3389
emitGPR (0x14, insn->src(1));
3390
emitGPR (0x08, insn->src(0));
3391
emitGPR (0x00, insn->def(0));
3392
3393
emitSUHandle(2);
3394
}
3395
3396
/*******************************************************************************
3397
* assembler front-end
3398
******************************************************************************/
3399
3400
bool
3401
CodeEmitterGM107::emitInstruction(Instruction *i)
3402
{
3403
const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3404
bool ret = true;
3405
3406
insn = i;
3407
3408
if (insn->encSize != 8) {
3409
ERROR("skipping undecodable instruction: "); insn->print();
3410
return false;
3411
} else
3412
if (codeSize + size > codeSizeLimit) {
3413
ERROR("code emitter output buffer too small\n");
3414
return false;
3415
}
3416
3417
if (writeIssueDelays) {
3418
int n = ((codeSize & 0x1f) / 8) - 1;
3419
if (n < 0) {
3420
data = code;
3421
data[0] = 0x00000000;
3422
data[1] = 0x00000000;
3423
code += 2;
3424
codeSize += 8;
3425
n++;
3426
}
3427
3428
emitField(data, n * 21, 21, insn->sched);
3429
}
3430
3431
switch (insn->op) {
3432
case OP_EXIT:
3433
emitEXIT();
3434
break;
3435
case OP_BRA:
3436
emitBRA();
3437
break;
3438
case OP_CALL:
3439
emitCAL();
3440
break;
3441
case OP_PRECONT:
3442
emitPCNT();
3443
break;
3444
case OP_CONT:
3445
emitCONT();
3446
break;
3447
case OP_PREBREAK:
3448
emitPBK();
3449
break;
3450
case OP_BREAK:
3451
emitBRK();
3452
break;
3453
case OP_PRERET:
3454
emitPRET();
3455
break;
3456
case OP_RET:
3457
emitRET();
3458
break;
3459
case OP_JOINAT:
3460
emitSSY();
3461
break;
3462
case OP_JOIN:
3463
emitSYNC();
3464
break;
3465
case OP_QUADON:
3466
emitSAM();
3467
break;
3468
case OP_QUADPOP:
3469
emitRAM();
3470
break;
3471
case OP_MOV:
3472
emitMOV();
3473
break;
3474
case OP_RDSV:
3475
if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
3476
emitCS2R();
3477
else
3478
emitS2R();
3479
break;
3480
case OP_ABS:
3481
case OP_NEG:
3482
case OP_SAT:
3483
case OP_FLOOR:
3484
case OP_CEIL:
3485
case OP_TRUNC:
3486
case OP_CVT:
3487
if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3488
insn->src(0).getFile() == FILE_PREDICATE)) {
3489
emitMOV();
3490
} else if (isFloatType(insn->dType)) {
3491
if (isFloatType(insn->sType))
3492
emitF2F();
3493
else
3494
emitI2F();
3495
} else {
3496
if (isFloatType(insn->sType))
3497
emitF2I();
3498
else
3499
emitI2I();
3500
}
3501
break;
3502
case OP_SHFL:
3503
emitSHFL();
3504
break;
3505
case OP_ADD:
3506
case OP_SUB:
3507
if (isFloatType(insn->dType)) {
3508
if (insn->dType == TYPE_F64)
3509
emitDADD();
3510
else
3511
emitFADD();
3512
} else {
3513
emitIADD();
3514
}
3515
break;
3516
case OP_MUL:
3517
if (isFloatType(insn->dType)) {
3518
if (insn->dType == TYPE_F64)
3519
emitDMUL();
3520
else
3521
emitFMUL();
3522
} else {
3523
emitIMUL();
3524
}
3525
break;
3526
case OP_MAD:
3527
case OP_FMA:
3528
if (isFloatType(insn->dType)) {
3529
if (insn->dType == TYPE_F64)
3530
emitDFMA();
3531
else
3532
emitFFMA();
3533
} else {
3534
emitIMAD();
3535
}
3536
break;
3537
case OP_SHLADD:
3538
emitISCADD();
3539
break;
3540
case OP_XMAD:
3541
emitXMAD();
3542
break;
3543
case OP_MIN:
3544
case OP_MAX:
3545
if (isFloatType(insn->dType)) {
3546
if (insn->dType == TYPE_F64)
3547
emitDMNMX();
3548
else
3549
emitFMNMX();
3550
} else {
3551
emitIMNMX();
3552
}
3553
break;
3554
case OP_SHL:
3555
if (typeSizeof(insn->sType) == 8)
3556
emitSHF();
3557
else
3558
emitSHL();
3559
break;
3560
case OP_SHR:
3561
if (typeSizeof(insn->sType) == 8)
3562
emitSHF();
3563
else
3564
emitSHR();
3565
break;
3566
case OP_POPCNT:
3567
emitPOPC();
3568
break;
3569
case OP_INSBF:
3570
emitBFI();
3571
break;
3572
case OP_EXTBF:
3573
emitBFE();
3574
break;
3575
case OP_BFIND:
3576
emitFLO();
3577
break;
3578
case OP_PERMT:
3579
emitPRMT();
3580
break;
3581
case OP_SLCT:
3582
if (isFloatType(insn->dType))
3583
emitFCMP();
3584
else
3585
emitICMP();
3586
break;
3587
case OP_SET:
3588
case OP_SET_AND:
3589
case OP_SET_OR:
3590
case OP_SET_XOR:
3591
if (insn->def(0).getFile() != FILE_PREDICATE) {
3592
if (isFloatType(insn->sType))
3593
if (insn->sType == TYPE_F64)
3594
emitDSET();
3595
else
3596
emitFSET();
3597
else
3598
emitISET();
3599
} else {
3600
if (isFloatType(insn->sType))
3601
if (insn->sType == TYPE_F64)
3602
emitDSETP();
3603
else
3604
emitFSETP();
3605
else
3606
emitISETP();
3607
}
3608
break;
3609
case OP_SELP:
3610
emitSEL();
3611
break;
3612
case OP_PRESIN:
3613
case OP_PREEX2:
3614
emitRRO();
3615
break;
3616
case OP_COS:
3617
case OP_SIN:
3618
case OP_EX2:
3619
case OP_LG2:
3620
case OP_RCP:
3621
case OP_RSQ:
3622
case OP_SQRT:
3623
emitMUFU();
3624
break;
3625
case OP_AND:
3626
case OP_OR:
3627
case OP_XOR:
3628
switch (insn->def(0).getFile()) {
3629
case FILE_GPR: emitLOP(); break;
3630
case FILE_PREDICATE: emitPSETP(); break;
3631
default:
3632
assert(!"invalid bool op");
3633
}
3634
break;
3635
case OP_NOT:
3636
emitNOT();
3637
break;
3638
case OP_LOAD:
3639
switch (insn->src(0).getFile()) {
3640
case FILE_MEMORY_CONST : emitLDC(); break;
3641
case FILE_MEMORY_LOCAL : emitLDL(); break;
3642
case FILE_MEMORY_SHARED: emitLDS(); break;
3643
case FILE_MEMORY_GLOBAL: emitLD(); break;
3644
default:
3645
assert(!"invalid load");
3646
emitNOP();
3647
break;
3648
}
3649
break;
3650
case OP_STORE:
3651
switch (insn->src(0).getFile()) {
3652
case FILE_MEMORY_LOCAL : emitSTL(); break;
3653
case FILE_MEMORY_SHARED: emitSTS(); break;
3654
case FILE_MEMORY_GLOBAL: emitST(); break;
3655
default:
3656
assert(!"invalid store");
3657
emitNOP();
3658
break;
3659
}
3660
break;
3661
case OP_ATOM:
3662
if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3663
emitATOMS();
3664
else
3665
if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3666
emitRED();
3667
else
3668
emitATOM();
3669
break;
3670
case OP_CCTL:
3671
emitCCTL();
3672
break;
3673
case OP_VFETCH:
3674
emitALD();
3675
break;
3676
case OP_EXPORT:
3677
emitAST();
3678
break;
3679
case OP_PFETCH:
3680
emitISBERD();
3681
break;
3682
case OP_AFETCH:
3683
emitAL2P();
3684
break;
3685
case OP_LINTERP:
3686
case OP_PINTERP:
3687
emitIPA();
3688
break;
3689
case OP_PIXLD:
3690
emitPIXLD();
3691
break;
3692
case OP_TEX:
3693
case OP_TXL:
3694
if (insn->asTex()->tex.scalar)
3695
emitTEXS();
3696
else
3697
emitTEX();
3698
break;
3699
case OP_TXB:
3700
emitTEX();
3701
break;
3702
case OP_TXF:
3703
if (insn->asTex()->tex.scalar)
3704
emitTEXS();
3705
else
3706
emitTLD();
3707
break;
3708
case OP_TXG:
3709
if (insn->asTex()->tex.scalar)
3710
emitTEXS();
3711
else
3712
emitTLD4();
3713
break;
3714
case OP_TXD:
3715
emitTXD();
3716
break;
3717
case OP_TXQ:
3718
emitTXQ();
3719
break;
3720
case OP_TXLQ:
3721
emitTMML();
3722
break;
3723
case OP_TEXBAR:
3724
emitDEPBAR();
3725
break;
3726
case OP_QUADOP:
3727
emitFSWZADD();
3728
break;
3729
case OP_NOP:
3730
emitNOP();
3731
break;
3732
case OP_DISCARD:
3733
emitKIL();
3734
break;
3735
case OP_EMIT:
3736
case OP_RESTART:
3737
emitOUT();
3738
break;
3739
case OP_BAR:
3740
emitBAR();
3741
break;
3742
case OP_MEMBAR:
3743
emitMEMBAR();
3744
break;
3745
case OP_VOTE:
3746
emitVOTE();
3747
break;
3748
case OP_SUSTB:
3749
case OP_SUSTP:
3750
emitSUSTx();
3751
break;
3752
case OP_SULDB:
3753
case OP_SULDP:
3754
emitSULDx();
3755
break;
3756
case OP_SUREDB:
3757
case OP_SUREDP:
3758
emitSUREDx();
3759
break;
3760
default:
3761
assert(!"invalid opcode");
3762
emitNOP();
3763
ret = false;
3764
break;
3765
}
3766
3767
if (insn->join) {
3768
/*XXX*/
3769
}
3770
3771
code += 2;
3772
codeSize += 8;
3773
return ret;
3774
}
3775
3776
uint32_t
3777
CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3778
{
3779
return 8;
3780
}
3781
3782
/*******************************************************************************
3783
* sched data calculator
3784
******************************************************************************/
3785
3786
inline void
3787
SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3788
{
3789
assert(cnt < 16);
3790
insn->sched |= cnt;
3791
}
3792
3793
inline void
3794
SchedDataCalculatorGM107::emitYield(Instruction *insn)
3795
{
3796
insn->sched |= 1 << 4;
3797
}
3798
3799
inline void
3800
SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3801
{
3802
assert(id < 6);
3803
if ((insn->sched & 0xe0) == 0xe0)
3804
insn->sched ^= 0xe0;
3805
insn->sched |= id << 5;
3806
}
3807
3808
inline void
3809
SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3810
{
3811
assert(id < 6);
3812
if ((insn->sched & 0x700) == 0x700)
3813
insn->sched ^= 0x700;
3814
insn->sched |= id << 8;
3815
}
3816
3817
inline void
3818
SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3819
{
3820
assert(id < 6);
3821
insn->sched |= 1 << (11 + id);
3822
}
3823
3824
inline void
3825
SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3826
{
3827
assert(id < 4);
3828
insn->sched |= 1 << (17 + id);
3829
}
3830
3831
inline void
3832
SchedDataCalculatorGM107::printSchedInfo(int cycle,
3833
const Instruction *insn) const
3834
{
3835
uint8_t st, yl, wr, rd, wt, ru;
3836
3837
st = (insn->sched & 0x00000f) >> 0;
3838
yl = (insn->sched & 0x000010) >> 4;
3839
wr = (insn->sched & 0x0000e0) >> 5;
3840
rd = (insn->sched & 0x000700) >> 8;
3841
wt = (insn->sched & 0x01f800) >> 11;
3842
ru = (insn->sched & 0x1e0000) >> 17;
3843
3844
INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3845
cycle, st, yl, wr, rd, wt, ru);
3846
}
3847
3848
inline int
3849
SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3850
{
3851
return insn->sched & 0xf;
3852
}
3853
3854
inline int
3855
SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3856
{
3857
return (insn->sched & 0x0000e0) >> 5;
3858
}
3859
3860
inline int
3861
SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3862
{
3863
return (insn->sched & 0x000700) >> 8;
3864
}
3865
3866
inline int
3867
SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3868
{
3869
return (insn->sched & 0x01f800) >> 11;
3870
}
3871
3872
// Emit the reuse flag which allows to make use of the new memory hierarchy
3873
// introduced since Maxwell, the operand reuse cache.
3874
//
3875
// It allows to reduce bank conflicts by caching operands. Each time you issue
3876
// an instruction, that flag can tell the hw which operands are going to be
3877
// re-used by the next instruction. Note that the next instruction has to use
3878
// the same GPR id in the same operand slot.
3879
void
3880
SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3881
{
3882
Instruction *next = insn->next;
3883
BitSet defs(255, 1);
3884
3885
if (!targ->isReuseSupported(insn))
3886
return;
3887
3888
for (int d = 0; insn->defExists(d); ++d) {
3889
const Value *def = insn->def(d).rep();
3890
if (insn->def(d).getFile() != FILE_GPR)
3891
continue;
3892
if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3893
continue;
3894
defs.set(def->reg.data.id);
3895
}
3896
3897
for (int s = 0; insn->srcExists(s); s++) {
3898
const Value *src = insn->src(s).rep();
3899
if (insn->src(s).getFile() != FILE_GPR)
3900
continue;
3901
if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3902
continue;
3903
if (defs.test(src->reg.data.id))
3904
continue;
3905
if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3906
continue;
3907
if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3908
continue;
3909
assert(s < 4);
3910
emitReuse(insn, s);
3911
}
3912
}
3913
3914
void
3915
SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3916
{
3917
int a = v->reg.data.id, b;
3918
3919
switch (v->reg.file) {
3920
case FILE_GPR:
3921
b = a + v->reg.size / 4;
3922
for (int r = a; r < b; ++r)
3923
score->rd.r[r] = ready;
3924
break;
3925
case FILE_PREDICATE:
3926
// To immediately use a predicate set by any instructions, the minimum
3927
// number of stall counts is 13.
3928
score->rd.p[a] = cycle + 13;
3929
break;
3930
case FILE_FLAGS:
3931
score->rd.c = ready;
3932
break;
3933
default:
3934
break;
3935
}
3936
}
3937
3938
void
3939
SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3940
{
3941
int a = v->reg.data.id, b;
3942
int ready = cycle;
3943
3944
switch (v->reg.file) {
3945
case FILE_GPR:
3946
b = a + v->reg.size / 4;
3947
for (int r = a; r < b; ++r)
3948
ready = MAX2(ready, score->rd.r[r]);
3949
break;
3950
case FILE_PREDICATE:
3951
ready = MAX2(ready, score->rd.p[a]);
3952
break;
3953
case FILE_FLAGS:
3954
ready = MAX2(ready, score->rd.c);
3955
break;
3956
default:
3957
break;
3958
}
3959
if (cycle < ready)
3960
delay = MAX2(delay, ready - cycle);
3961
}
3962
3963
void
3964
SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3965
{
3966
const int ready = cycle + targ->getLatency(insn);
3967
3968
for (int d = 0; insn->defExists(d); ++d)
3969
recordWr(insn->getDef(d), cycle, ready);
3970
3971
#ifdef GM107_DEBUG_SCHED_DATA
3972
score->print(cycle);
3973
#endif
3974
}
3975
3976
#define GM107_MIN_ISSUE_DELAY 0x1
3977
#define GM107_MAX_ISSUE_DELAY 0xf
3978
3979
int
3980
SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3981
{
3982
int delay = 0, ready = cycle;
3983
3984
for (int s = 0; insn->srcExists(s); ++s)
3985
checkRd(insn->getSrc(s), cycle, delay);
3986
3987
// TODO: make use of getReadLatency()!
3988
3989
return MAX2(delay, ready - cycle);
3990
}
3991
3992
void
3993
SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3994
const Instruction *next)
3995
{
3996
const OpClass cl = targ->getOpClass(insn->op);
3997
int wr, rd;
3998
3999
if (insn->op == OP_EXIT ||
4000
insn->op == OP_BAR ||
4001
insn->op == OP_MEMBAR) {
4002
delay = GM107_MAX_ISSUE_DELAY;
4003
} else
4004
if (insn->op == OP_QUADON ||
4005
insn->op == OP_QUADPOP) {
4006
delay = 0xd;
4007
} else
4008
if (cl == OPCLASS_FLOW || insn->join) {
4009
delay = 0xd;
4010
}
4011
4012
if (!next || !targ->canDualIssue(insn, next)) {
4013
delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
4014
} else {
4015
delay = 0x0; // dual-issue
4016
}
4017
4018
wr = getWrDepBar(insn);
4019
rd = getRdDepBar(insn);
4020
4021
if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
4022
// Barriers take one additional clock cycle to become active on top of
4023
// the clock consumed by the instruction producing it.
4024
if (!next || insn->bb != next->bb) {
4025
delay = 0x2;
4026
} else {
4027
int wt = getWtDepBar(next);
4028
if ((wt & (1 << wr)) | (wt & (1 << rd)))
4029
delay = 0x2;
4030
}
4031
}
4032
4033
emitStall(insn, delay);
4034
}
4035
4036
4037
// Return true when the given instruction needs to emit a read dependency
4038
// barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
4039
// setting the maximum number of stall counts is not enough.
4040
bool
4041
SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
4042
{
4043
BitSet srcs(255, 1), defs(255, 1);
4044
int a, b;
4045
4046
if (!targ->isBarrierRequired(insn))
4047
return false;
4048
4049
// Do not emit a read dependency barrier when the instruction doesn't use
4050
// any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
4051
for (int s = 0; insn->srcExists(s); ++s) {
4052
const Value *src = insn->src(s).rep();
4053
if (insn->src(s).getFile() != FILE_GPR)
4054
continue;
4055
if (src->reg.data.id == 255)
4056
continue;
4057
4058
a = src->reg.data.id;
4059
b = a + src->reg.size / 4;
4060
for (int r = a; r < b; ++r)
4061
srcs.set(r);
4062
}
4063
4064
if (!srcs.popCount())
4065
return false;
4066
4067
// Do not emit a read dependency barrier when the output GPRs are equal to
4068
// the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
4069
// be produced and WaR hazards are prevented.
4070
for (int d = 0; insn->defExists(d); ++d) {
4071
const Value *def = insn->def(d).rep();
4072
if (insn->def(d).getFile() != FILE_GPR)
4073
continue;
4074
if (def->reg.data.id == 255)
4075
continue;
4076
4077
a = def->reg.data.id;
4078
b = a + def->reg.size / 4;
4079
for (int r = a; r < b; ++r)
4080
defs.set(r);
4081
}
4082
4083
srcs.andNot(defs);
4084
if (!srcs.popCount())
4085
return false;
4086
4087
return true;
4088
}
4089
4090
// Return true when the given instruction needs to emit a write dependency
4091
// barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
4092
// setting the maximum number of stall counts is not enough. This is only legal
4093
// if the instruction output something.
4094
bool
4095
SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
4096
{
4097
if (!targ->isBarrierRequired(insn))
4098
return false;
4099
4100
for (int d = 0; insn->defExists(d); ++d) {
4101
if (insn->def(d).getFile() == FILE_GPR ||
4102
insn->def(d).getFile() == FILE_FLAGS ||
4103
insn->def(d).getFile() == FILE_PREDICATE)
4104
return true;
4105
}
4106
return false;
4107
}
4108
4109
// Helper function for findFirstUse() and findFirstDef()
4110
bool
4111
SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction *insn,
4112
const Value *val) const
4113
{
4114
if (val->reg.file != FILE_GPR &&
4115
val->reg.file != FILE_PREDICATE &&
4116
val->reg.file != FILE_FLAGS)
4117
return false;
4118
4119
for (int d = 0; insn->defExists(d); ++d) {
4120
const Value* def = insn->getDef(d);
4121
int minGPR = def->reg.data.id;
4122
int maxGPR = minGPR + def->reg.size / 4 - 1;
4123
4124
if (def->reg.file != val->reg.file)
4125
continue;
4126
4127
if (def->reg.file == FILE_GPR) {
4128
if (val->reg.data.id + val->reg.size / 4 - 1 < minGPR ||
4129
val->reg.data.id > maxGPR)
4130
continue;
4131
return true;
4132
} else
4133
if (def->reg.file == FILE_PREDICATE) {
4134
if (val->reg.data.id != minGPR)
4135
continue;
4136
return true;
4137
} else
4138
if (def->reg.file == FILE_FLAGS) {
4139
if (val->reg.data.id != minGPR)
4140
continue;
4141
return true;
4142
}
4143
}
4144
4145
return false;
4146
}
4147
4148
// Find the next instruction inside the same basic block which uses (reads or
4149
// writes from) the output of the given instruction in order to avoid RaW and
4150
// WaW hazards.
4151
Instruction *
4152
SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
4153
{
4154
Instruction *insn, *next;
4155
4156
if (!bari->defExists(0))
4157
return NULL;
4158
4159
for (insn = bari->next; insn != NULL; insn = next) {
4160
next = insn->next;
4161
4162
for (int s = 0; insn->srcExists(s); ++s)
4163
if (doesInsnWriteTo(bari, insn->getSrc(s)))
4164
return insn;
4165
4166
for (int d = 0; insn->defExists(d); ++d)
4167
if (doesInsnWriteTo(bari, insn->getDef(d)))
4168
return insn;
4169
}
4170
return NULL;
4171
}
4172
4173
// Find the next instruction inside the same basic block which overwrites, at
4174
// least, one source of the given instruction in order to avoid WaR hazards.
4175
Instruction *
4176
SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
4177
{
4178
Instruction *insn, *next;
4179
4180
if (!bari->srcExists(0))
4181
return NULL;
4182
4183
for (insn = bari->next; insn != NULL; insn = next) {
4184
next = insn->next;
4185
4186
for (int s = 0; bari->srcExists(s); ++s)
4187
if (doesInsnWriteTo(insn, bari->getSrc(s)))
4188
return insn;
4189
}
4190
return NULL;
4191
}
4192
4193
// Dependency barriers:
4194
// This pass is a bit ugly and could probably be improved by performing a
4195
// better allocation.
4196
//
4197
// The main idea is to avoid WaR and RaW hazards by emitting read/write
4198
// dependency barriers using the control codes.
4199
bool
4200
SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
4201
{
4202
std::list<LiveBarUse> live_uses;
4203
std::list<LiveBarDef> live_defs;
4204
Instruction *insn, *next;
4205
BitSet bars(6, 1);
4206
int bar_id;
4207
4208
for (insn = bb->getEntry(); insn != NULL; insn = next) {
4209
Instruction *usei = NULL, *defi = NULL;
4210
bool need_wr_bar, need_rd_bar;
4211
4212
next = insn->next;
4213
4214
// Expire old barrier uses.
4215
for (std::list<LiveBarUse>::iterator it = live_uses.begin();
4216
it != live_uses.end();) {
4217
if (insn->serial >= it->usei->serial) {
4218
int wr = getWrDepBar(it->insn);
4219
emitWtDepBar(insn, wr);
4220
bars.clr(wr); // free barrier
4221
it = live_uses.erase(it);
4222
continue;
4223
}
4224
++it;
4225
}
4226
4227
// Expire old barrier defs.
4228
for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4229
it != live_defs.end();) {
4230
if (insn->serial >= it->defi->serial) {
4231
int rd = getRdDepBar(it->insn);
4232
emitWtDepBar(insn, rd);
4233
bars.clr(rd); // free barrier
4234
it = live_defs.erase(it);
4235
continue;
4236
}
4237
++it;
4238
}
4239
4240
need_wr_bar = needWrDepBar(insn);
4241
need_rd_bar = needRdDepBar(insn);
4242
4243
if (need_wr_bar) {
4244
// When the instruction requires to emit a write dependency barrier
4245
// (all which write something at a variable latency), find the next
4246
// instruction which reads the outputs (or writes to them, potentially
4247
// completing before this insn.
4248
usei = findFirstUse(insn);
4249
4250
// Allocate and emit a new barrier.
4251
bar_id = bars.findFreeRange(1);
4252
if (bar_id == -1)
4253
bar_id = 5;
4254
bars.set(bar_id);
4255
emitWrDepBar(insn, bar_id);
4256
if (usei)
4257
live_uses.push_back(LiveBarUse(insn, usei));
4258
}
4259
4260
if (need_rd_bar) {
4261
// When the instruction requires to emit a read dependency barrier
4262
// (all which read something at a variable latency), find the next
4263
// instruction which will write the inputs.
4264
defi = findFirstDef(insn);
4265
4266
if (usei && defi && usei->serial <= defi->serial)
4267
continue;
4268
4269
// Allocate and emit a new barrier.
4270
bar_id = bars.findFreeRange(1);
4271
if (bar_id == -1)
4272
bar_id = 5;
4273
bars.set(bar_id);
4274
emitRdDepBar(insn, bar_id);
4275
if (defi)
4276
live_defs.push_back(LiveBarDef(insn, defi));
4277
}
4278
}
4279
4280
// Remove unnecessary barrier waits.
4281
BitSet alive_bars(6, 1);
4282
for (insn = bb->getEntry(); insn != NULL; insn = next) {
4283
int wr, rd, wt;
4284
4285
next = insn->next;
4286
4287
wr = getWrDepBar(insn);
4288
rd = getRdDepBar(insn);
4289
wt = getWtDepBar(insn);
4290
4291
for (int idx = 0; idx < 6; ++idx) {
4292
if (!(wt & (1 << idx)))
4293
continue;
4294
if (!alive_bars.test(idx)) {
4295
insn->sched &= ~(1 << (11 + idx));
4296
} else {
4297
alive_bars.clr(idx);
4298
}
4299
}
4300
4301
if (wr < 6)
4302
alive_bars.set(wr);
4303
if (rd < 6)
4304
alive_bars.set(rd);
4305
}
4306
4307
return true;
4308
}
4309
4310
bool
4311
SchedDataCalculatorGM107::visit(Function *func)
4312
{
4313
ArrayList insns;
4314
4315
func->orderInstructions(insns);
4316
4317
scoreBoards.resize(func->cfg.getSize());
4318
for (size_t i = 0; i < scoreBoards.size(); ++i)
4319
scoreBoards[i].wipe();
4320
return true;
4321
}
4322
4323
bool
4324
SchedDataCalculatorGM107::visit(BasicBlock *bb)
4325
{
4326
Instruction *insn, *next = NULL;
4327
int cycle = 0;
4328
4329
for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4330
/*XXX*/
4331
insn->sched = 0x7e0;
4332
}
4333
4334
if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4335
return true;
4336
4337
// Insert read/write dependency barriers for instructions which don't
4338
// operate at a fixed latency.
4339
insertBarriers(bb);
4340
4341
score = &scoreBoards.at(bb->getId());
4342
4343
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4344
// back branches will wait until all target dependencies are satisfied
4345
if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4346
continue;
4347
BasicBlock *in = BasicBlock::get(ei.getNode());
4348
score->setMax(&scoreBoards.at(in->getId()));
4349
}
4350
4351
#ifdef GM107_DEBUG_SCHED_DATA
4352
INFO("=== BB:%i initial scores\n", bb->getId());
4353
score->print(cycle);
4354
#endif
4355
4356
// Because barriers are allocated locally (intra-BB), we have to make sure
4357
// that all produced barriers have been consumed before entering inside a
4358
// new basic block. The best way is to do a global allocation pre RA but
4359
// it's really more difficult, especially because of the phi nodes. Anyways,
4360
// it seems like that waiting on a barrier which has already been consumed
4361
// doesn't add any additional cost, it's just not elegant!
4362
Instruction *start = bb->getEntry();
4363
if (start && bb->cfg.incidentCount() > 0) {
4364
for (int b = 0; b < 6; b++)
4365
emitWtDepBar(start, b);
4366
}
4367
4368
for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4369
next = insn->next;
4370
4371
commitInsn(insn, cycle);
4372
int delay = calcDelay(next, cycle);
4373
setDelay(insn, delay, next);
4374
cycle += getStall(insn);
4375
4376
setReuseFlag(insn);
4377
4378
// XXX: The yield flag seems to destroy a bunch of things when it is
4379
// set on every instruction, need investigation.
4380
//emitYield(insn);
4381
4382
#ifdef GM107_DEBUG_SCHED_DATA
4383
printSchedInfo(cycle, insn);
4384
insn->print();
4385
next->print();
4386
#endif
4387
}
4388
4389
if (!insn)
4390
return true;
4391
commitInsn(insn, cycle);
4392
4393
int bbDelay = -1;
4394
4395
#ifdef GM107_DEBUG_SCHED_DATA
4396
fprintf(stderr, "last instruction is : ");
4397
insn->print();
4398
fprintf(stderr, "cycle=%d\n", cycle);
4399
#endif
4400
4401
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4402
BasicBlock *out = BasicBlock::get(ei.getNode());
4403
4404
if (ei.getType() != Graph::Edge::BACK) {
4405
// Only test the first instruction of the outgoing block.
4406
next = out->getEntry();
4407
if (next) {
4408
bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4409
} else {
4410
// When the outgoing BB is empty, make sure to set the number of
4411
// stall counts needed by the instruction because we don't know the
4412
// next instruction.
4413
bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4414
}
4415
} else {
4416
// Wait until all dependencies are satisfied.
4417
const int regsFree = score->getLatest();
4418
next = out->getFirst();
4419
for (int c = cycle; next && c < regsFree; next = next->next) {
4420
bbDelay = MAX2(bbDelay, calcDelay(next, c));
4421
c += getStall(next);
4422
}
4423
next = NULL;
4424
}
4425
}
4426
if (bb->cfg.outgoingCount() != 1)
4427
next = NULL;
4428
setDelay(insn, bbDelay, next);
4429
cycle += getStall(insn);
4430
4431
score->rebase(cycle); // common base for initializing out blocks' scores
4432
return true;
4433
}
4434
4435
/*******************************************************************************
4436
* main
4437
******************************************************************************/
4438
4439
void
4440
CodeEmitterGM107::prepareEmission(Function *func)
4441
{
4442
SchedDataCalculatorGM107 sched(targGM107);
4443
CodeEmitter::prepareEmission(func);
4444
sched.run(func, true, true);
4445
}
4446
4447
static inline uint32_t sizeToBundlesGM107(uint32_t size)
4448
{
4449
return (size + 23) / 24;
4450
}
4451
4452
void
4453
CodeEmitterGM107::prepareEmission(Program *prog)
4454
{
4455
for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4456
!fi.end(); fi.next()) {
4457
Function *func = reinterpret_cast<Function *>(fi.get());
4458
func->binPos = prog->binSize;
4459
prepareEmission(func);
4460
4461
// adjust sizes & positions for schedulding info:
4462
if (prog->getTarget()->hasSWSched) {
4463
uint32_t adjPos = func->binPos;
4464
BasicBlock *bb = NULL;
4465
for (int i = 0; i < func->bbCount; ++i) {
4466
bb = func->bbArray[i];
4467
int32_t adjSize = bb->binSize;
4468
if (adjPos % 32) {
4469
adjSize -= 32 - adjPos % 32;
4470
if (adjSize < 0)
4471
adjSize = 0;
4472
}
4473
adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4474
bb->binPos = adjPos;
4475
bb->binSize = adjSize;
4476
adjPos += adjSize;
4477
}
4478
if (bb)
4479
func->binSize = adjPos - func->binPos;
4480
}
4481
4482
prog->binSize += func->binSize;
4483
}
4484
}
4485
4486
CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4487
: CodeEmitter(target),
4488
targGM107(target),
4489
progType(Program::TYPE_VERTEX),
4490
insn(NULL),
4491
writeIssueDelays(target->hasSWSched),
4492
data(NULL)
4493
{
4494
code = NULL;
4495
codeSize = codeSizeLimit = 0;
4496
relocInfo = NULL;
4497
}
4498
4499
CodeEmitter *
4500
TargetGM107::createCodeEmitterGM107(Program::Type type)
4501
{
4502
CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4503
emit->setProgramType(type);
4504
return emit;
4505
}
4506
4507
} // namespace nv50_ir
4508
4509