Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp
4574 views
1
/*
2
* Copyright 2020 Red Hat Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
* OTHER DEALINGS IN THE SOFTWARE.
21
*/
22
#include "codegen/nv50_ir.h"
23
#include "codegen/nv50_ir_build_util.h"
24
25
#include "codegen/nv50_ir_target_nvc0.h"
26
#include "codegen/nv50_ir_lowering_gv100.h"
27
28
#include <limits>
29
30
namespace nv50_ir {
31
32
bool
33
GV100LegalizeSSA::handleCMP(Instruction *i)
34
{
35
Value *pred = bld.getSSA(1, FILE_PREDICATE);
36
37
bld.mkCmp(OP_SET, reverseCondCode(i->asCmp()->setCond), TYPE_U8, pred,
38
i->sType, bld.mkImm(0), i->getSrc(2))->ftz = i->ftz;
39
bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
40
return true;
41
}
42
43
// NIR deals with most of these for us, but codegen generates more in pointer
44
// calculations from other lowering passes.
45
bool
46
GV100LegalizeSSA::handleIADD64(Instruction *i)
47
{
48
Value *carry = bld.getSSA(1, FILE_PREDICATE);
49
Value *def[2] = { bld.getSSA(), bld.getSSA() };
50
Value *src[2][2];
51
52
for (int s = 0; s < 2; s++) {
53
if (i->getSrc(s)->reg.size == 8) {
54
bld.mkSplit(src[s], 4, i->getSrc(s));
55
} else {
56
src[s][0] = i->getSrc(s);
57
src[s][1] = bld.mkImm(0);
58
}
59
}
60
61
bld.mkOp2(OP_ADD, TYPE_U32, def[0], src[0][0], src[1][0])->
62
setFlagsDef(1, carry);
63
bld.mkOp2(OP_ADD, TYPE_U32, def[1], src[0][1], src[1][1])->
64
setFlagsSrc(2, carry);
65
bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]);
66
return true;
67
}
68
69
bool
70
GV100LegalizeSSA::handleIMAD_HIGH(Instruction *i)
71
{
72
Value *def = bld.getSSA(8), *defs[2];
73
Value *src2;
74
75
if (i->srcExists(2) &&
76
(!i->getSrc(2)->asImm() || i->getSrc(2)->asImm()->reg.data.u32)) {
77
Value *src2s[2] = { bld.getSSA(), bld.getSSA() };
78
bld.mkMov(src2s[0], bld.mkImm(0));
79
bld.mkMov(src2s[1], i->getSrc(2));
80
src2 = bld.mkOp2(OP_MERGE, TYPE_U64, bld.getSSA(8), src2s[0], src2s[1])->getDef(0);
81
} else {
82
src2 = bld.mkImm(0);
83
}
84
85
bld.mkOp3(OP_MAD, isSignedType(i->sType) ? TYPE_S64 : TYPE_U64, def,
86
i->getSrc(0), i->getSrc(1), src2);
87
88
bld.mkSplit(defs, 4, def);
89
i->def(0).replace(defs[1], false);
90
return true;
91
}
92
93
// XXX: We should be able to do this in GV100LoweringPass, but codegen messes
94
// up somehow and swaps the condcode without swapping the sources.
95
// - tests/spec/glsl-1.50/execution/geometry/primitive-id-in.shader_test
96
bool
97
GV100LegalizeSSA::handleIMNMX(Instruction *i)
98
{
99
Value *pred = bld.getSSA(1, FILE_PREDICATE);
100
101
bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, i->dType, pred,
102
i->sType, i->getSrc(0), i->getSrc(1));
103
bld.mkOp3(OP_SELP, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
104
return true;
105
}
106
107
bool
108
GV100LegalizeSSA::handleIMUL(Instruction *i)
109
{
110
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
111
return handleIMAD_HIGH(i);
112
113
bld.mkOp3(OP_MAD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1),
114
bld.mkImm(0));
115
return true;
116
}
117
118
bool
119
GV100LegalizeSSA::handleLOP2(Instruction *i)
120
{
121
uint8_t src0 = NV50_IR_SUBOP_LOP3_LUT_SRC0;
122
uint8_t src1 = NV50_IR_SUBOP_LOP3_LUT_SRC1;
123
uint8_t subOp;
124
125
if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
126
src0 = ~src0;
127
if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
128
src1 = ~src1;
129
130
switch (i->op) {
131
case OP_AND: subOp = src0 & src1; break;
132
case OP_OR : subOp = src0 | src1; break;
133
case OP_XOR: subOp = src0 ^ src1; break;
134
default:
135
unreachable("invalid LOP2 opcode");
136
}
137
138
bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1),
139
bld.mkImm(0))->subOp = subOp;
140
return true;
141
}
142
143
bool
144
GV100LegalizeSSA::handleNOT(Instruction *i)
145
{
146
bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), bld.mkImm(0), i->getSrc(0),
147
bld.mkImm(0))->subOp = (uint8_t)~NV50_IR_SUBOP_LOP3_LUT_SRC1;
148
return true;
149
}
150
151
bool
152
GV100LegalizeSSA::handlePREEX2(Instruction *i)
153
{
154
i->def(0).replace(i->src(0), false);
155
return true;
156
}
157
158
bool
159
GV100LegalizeSSA::handleQUADON(Instruction *i)
160
{
161
bld.mkBMov(i->getDef(0), bld.mkTSVal(TS_MACTIVE));
162
Instruction *b = bld.mkBMov(bld.mkTSVal(TS_PQUAD_MACTIVE), i->getDef(0));
163
b->fixed = 1;
164
return true;
165
}
166
167
bool
168
GV100LegalizeSSA::handleQUADPOP(Instruction *i)
169
{
170
Instruction *b = bld.mkBMov(bld.mkTSVal(TS_MACTIVE), i->getSrc(0));
171
b->fixed = 1;
172
return true;
173
}
174
175
bool
176
GV100LegalizeSSA::handleSET(Instruction *i)
177
{
178
Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
179
Value *pred = bld.getSSA(1, FILE_PREDICATE), *met;
180
Instruction *xsetp;
181
182
if (isFloatType(i->dType)) {
183
if (i->sType == TYPE_F32)
184
return false; // HW has FSET.BF
185
met = bld.mkImm(0x3f800000);
186
} else {
187
met = bld.mkImm(0xffffffff);
188
}
189
190
xsetp = bld.mkCmp(i->op, i->asCmp()->setCond, TYPE_U8, pred, i->sType,
191
i->getSrc(0), i->getSrc(1));
192
xsetp->src(0).mod = i->src(0).mod;
193
xsetp->src(1).mod = i->src(1).mod;
194
xsetp->setSrc(2, src2);
195
xsetp->ftz = i->ftz;
196
197
i = bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), bld.mkImm(0), met, pred);
198
i->src(2).mod = Modifier(NV50_IR_MOD_NOT);
199
return true;
200
}
201
202
bool
203
GV100LegalizeSSA::handleSHFL(Instruction *i)
204
{
205
Instruction *sync = new_Instruction(func, OP_WARPSYNC, TYPE_NONE);
206
sync->fixed = 1;
207
sync->setSrc(0, bld.mkImm(0xffffffff));
208
i->bb->insertBefore(i, sync);
209
return false;
210
}
211
212
bool
213
GV100LegalizeSSA::handleShift(Instruction *i)
214
{
215
Value *zero = bld.mkImm(0);
216
Value *src1 = i->getSrc(1);
217
Value *src0, *src2;
218
uint8_t subOp = i->op == OP_SHL ? NV50_IR_SUBOP_SHF_L : NV50_IR_SUBOP_SHF_R;
219
220
if (i->op == OP_SHL && i->src(0).getFile() == FILE_GPR) {
221
src0 = i->getSrc(0);
222
src2 = zero;
223
} else {
224
src0 = zero;
225
src2 = i->getSrc(0);
226
subOp |= NV50_IR_SUBOP_SHF_HI;
227
}
228
if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP)
229
subOp |= NV50_IR_SUBOP_SHF_W;
230
231
bld.mkOp3(OP_SHF, i->dType, i->getDef(0), src0, src1, src2)->subOp = subOp;
232
return true;
233
}
234
235
bool
236
GV100LegalizeSSA::handleSUB(Instruction *i)
237
{
238
Instruction *xadd =
239
bld.mkOp2(OP_ADD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1));
240
xadd->src(0).mod = i->src(0).mod;
241
xadd->src(1).mod = i->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
242
xadd->ftz = i->ftz;
243
return true;
244
}
245
246
bool
247
GV100LegalizeSSA::visit(Instruction *i)
248
{
249
bool lowered = false;
250
251
bld.setPosition(i, false);
252
if (i->sType == TYPE_F32 && i->dType != TYPE_F16 &&
253
prog->getType() != Program::TYPE_COMPUTE)
254
handleFTZ(i);
255
256
switch (i->op) {
257
case OP_AND:
258
case OP_OR:
259
case OP_XOR:
260
if (i->def(0).getFile() != FILE_PREDICATE)
261
lowered = handleLOP2(i);
262
break;
263
case OP_NOT:
264
lowered = handleNOT(i);
265
break;
266
case OP_SHL:
267
case OP_SHR:
268
lowered = handleShift(i);
269
break;
270
case OP_SET:
271
case OP_SET_AND:
272
case OP_SET_OR:
273
case OP_SET_XOR:
274
if (i->def(0).getFile() != FILE_PREDICATE)
275
lowered = handleSET(i);
276
break;
277
case OP_SLCT:
278
lowered = handleCMP(i);
279
break;
280
case OP_PREEX2:
281
lowered = handlePREEX2(i);
282
break;
283
case OP_MUL:
284
if (!isFloatType(i->dType))
285
lowered = handleIMUL(i);
286
break;
287
case OP_MAD:
288
if (!isFloatType(i->dType) && i->subOp == NV50_IR_SUBOP_MUL_HIGH)
289
lowered = handleIMAD_HIGH(i);
290
break;
291
case OP_SHFL:
292
lowered = handleSHFL(i);
293
break;
294
case OP_QUADON:
295
lowered = handleQUADON(i);
296
break;
297
case OP_QUADPOP:
298
lowered = handleQUADPOP(i);
299
break;
300
case OP_SUB:
301
lowered = handleSUB(i);
302
break;
303
case OP_MAX:
304
case OP_MIN:
305
if (!isFloatType(i->dType))
306
lowered = handleIMNMX(i);
307
break;
308
case OP_ADD:
309
if (!isFloatType(i->dType) && typeSizeof(i->dType) == 8)
310
lowered = handleIADD64(i);
311
break;
312
case OP_PFETCH:
313
handlePFETCH(i);
314
break;
315
case OP_LOAD:
316
handleLOAD(i);
317
break;
318
default:
319
break;
320
}
321
322
if (lowered)
323
delete_Instruction(prog, i);
324
325
return true;
326
}
327
328
bool
329
GV100LoweringPass::handleDMNMX(Instruction *i)
330
{
331
Value *pred = bld.getSSA(1, FILE_PREDICATE);
332
Value *src0[2], *src1[2], *dest[2];
333
334
bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, TYPE_U32, pred,
335
i->sType, i->getSrc(0), i->getSrc(1));
336
bld.mkSplit(src0, 4, i->getSrc(0));
337
bld.mkSplit(src1, 4, i->getSrc(1));
338
bld.mkSplit(dest, 4, i->getDef(0));
339
bld.mkOp3(OP_SELP, TYPE_U32, dest[0], src0[0], src1[0], pred);
340
bld.mkOp3(OP_SELP, TYPE_U32, dest[1], src0[1], src1[1], pred);
341
bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), dest[0], dest[1]);
342
return true;
343
}
344
345
bool
346
GV100LoweringPass::handleEXTBF(Instruction *i)
347
{
348
Value *bit = bld.getScratch();
349
Value *cnt = bld.getScratch();
350
Value *mask = bld.getScratch();
351
Value *zero = bld.mkImm(0);
352
353
bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
354
bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
355
bld.mkOp2(OP_BMSK, TYPE_U32, mask, bit, cnt);
356
bld.mkOp2(OP_AND, TYPE_U32, mask, i->getSrc(0), mask);
357
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), mask, bit);
358
if (isSignedType(i->dType))
359
bld.mkOp2(OP_SGXT, TYPE_S32, i->getDef(0), i->getDef(0), cnt);
360
361
return true;
362
}
363
364
bool
365
GV100LoweringPass::handleFLOW(Instruction *i)
366
{
367
i->op = OP_BRA;
368
return false;
369
}
370
371
bool
372
GV100LoweringPass::handleI2I(Instruction *i)
373
{
374
bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), i->sType, i->getSrc(0))->
375
subOp = i->subOp;
376
bld.mkCvt(OP_CVT, i->dType, i->getDef(0), TYPE_F32, i->getDef(0));
377
return true;
378
}
379
380
bool
381
GV100LoweringPass::handleINSBF(Instruction *i)
382
{
383
Value *bit = bld.getScratch();
384
Value *cnt = bld.getScratch();
385
Value *mask = bld.getScratch();
386
Value *src0 = bld.getScratch();
387
Value *zero = bld.mkImm(0);
388
389
bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
390
bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
391
bld.mkOp2(OP_BMSK, TYPE_U32, mask, zero, cnt);
392
393
bld.mkOp2(OP_AND, TYPE_U32, src0, i->getSrc(0), mask);
394
bld.mkOp2(OP_SHL, TYPE_U32, src0, src0, bit);
395
396
bld.mkOp2(OP_SHL, TYPE_U32, mask, mask, bit);
397
bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), src0, i->getSrc(2), mask)->
398
subOp = NV50_IR_SUBOP_LOP3_LUT(a | (b & ~c));
399
400
return true;
401
}
402
403
bool
404
GV100LoweringPass::handlePINTERP(Instruction *i)
405
{
406
Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
407
Instruction *ipa, *mul;
408
409
ipa = bld.mkOp2(OP_LINTERP, TYPE_F32, i->getDef(0), i->getSrc(0), src2);
410
ipa->ipa = i->ipa;
411
mul = bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), i->getSrc(1));
412
413
if (i->getInterpMode() == NV50_IR_INTERP_SC) {
414
ipa->setDef(1, bld.getSSA(1, FILE_PREDICATE));
415
mul->setPredicate(CC_NOT_P, ipa->getDef(1));
416
}
417
418
return true;
419
}
420
421
bool
422
GV100LoweringPass::handlePREFLOW(Instruction *i)
423
{
424
return true;
425
}
426
427
bool
428
GV100LoweringPass::handlePRESIN(Instruction *i)
429
{
430
const float f = 1.0 / (2.0 * 3.14159265);
431
bld.mkOp2(OP_MUL, i->dType, i->getDef(0), i->getSrc(0), bld.mkImm(f));
432
return true;
433
}
434
435
bool
436
GV100LoweringPass::visit(Instruction *i)
437
{
438
bool lowered = false;
439
440
bld.setPosition(i, false);
441
442
switch (i->op) {
443
case OP_BREAK:
444
case OP_CONT:
445
lowered = handleFLOW(i);
446
break;
447
case OP_PREBREAK:
448
case OP_PRECONT:
449
lowered = handlePREFLOW(i);
450
break;
451
case OP_CVT:
452
if (i->src(0).getFile() != FILE_PREDICATE &&
453
i->def(0).getFile() != FILE_PREDICATE &&
454
!isFloatType(i->dType) && !isFloatType(i->sType))
455
lowered = handleI2I(i);
456
break;
457
case OP_EXTBF:
458
lowered = handleEXTBF(i);
459
break;
460
case OP_INSBF:
461
lowered = handleINSBF(i);
462
break;
463
case OP_MAX:
464
case OP_MIN:
465
if (i->dType == TYPE_F64)
466
lowered = handleDMNMX(i);
467
break;
468
case OP_PINTERP:
469
lowered = handlePINTERP(i);
470
break;
471
case OP_PRESIN:
472
lowered = handlePRESIN(i);
473
break;
474
default:
475
break;
476
}
477
478
if (lowered)
479
delete_Instruction(prog, i);
480
481
return true;
482
}
483
484
} // namespace nv50_ir
485
486