CoCalc -- nv50_ir_lowering

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
⁷⁴⁴⁴ views
1
/*
2
 * Copyright 2011 Christoph Bumiller
3
 *           2014 Red Hat Inc.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice shall be included in
13
 * all copies or substantial portions of the Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
 * OTHER DEALINGS IN THE SOFTWARE.
22
 */
23

24
#include "codegen/nv50_ir.h"
25
#include "codegen/nv50_ir_build_util.h"
26

27
#include "codegen/nv50_ir_target_nvc0.h"
28
#include "codegen/nv50_ir_lowering_gm107.h"
29

30
#include <limits>
31

32
namespace nv50_ir {
33

34
#define QOP_ADD  0
35
#define QOP_SUBR 1
36
#define QOP_SUB  2
37
#define QOP_MOV2 3
38

39
//             UL UR LL LR
40
#define QUADOP(q, r, s, t)                      \
41
   ((QOP_##q << 6) | (QOP_##r << 4) |           \
42
    (QOP_##s << 2) | (QOP_##t << 0))
43

44
#define SHFL_BOUND_QUAD 0x1c03
45

46
void
47
GM107LegalizeSSA::handlePFETCH(Instruction *i)
48
{
49
   Value *src0;
50

51
   if (i->src(0).getFile() == FILE_GPR && !i->srcExists(1))
52
      return;
53

54
   bld.setPosition(i, false);
55
   src0 = bld.getSSA();
56

57
   if (i->srcExists(1))
58
      bld.mkOp2(OP_ADD , TYPE_U32, src0, i->getSrc(0), i->getSrc(1));
59
   else
60
      bld.mkOp1(OP_MOV , TYPE_U32, src0, i->getSrc(0));
61

62
   i->setSrc(0, src0);
63
   i->setSrc(1, NULL);
64
}
65

66
void
67
GM107LegalizeSSA::handleLOAD(Instruction *i)
68
{
69
   if (i->src(0).getFile() != FILE_MEMORY_CONST)
70
      return;
71
   if (i->src(0).isIndirect(0))
72
      return;
73
   if (typeSizeof(i->dType) != 4)
74
      return;
75

76
   i->op = OP_MOV;
77
}
78

79
void
80
GM107LegalizeSSA::handleQUADON(Instruction *i)
81
{
82
   i->setDef(0, NULL);
83
}
84

85
void
86
GM107LegalizeSSA::handleQUADPOP(Instruction *i)
87
{
88
   i->setSrc(0, NULL);
89
}
90

91
bool
92
GM107LegalizeSSA::visit(Instruction *i)
93
{
94
   switch (i->op) {
95
   case OP_QUADON:
96
      handleQUADON(i);
97
      break;
98
   case OP_QUADPOP:
99
      handleQUADPOP(i);
100
      break;
101
   case OP_PFETCH:
102
      handlePFETCH(i);
103
      break;
104
   case OP_LOAD:
105
      handleLOAD(i);
106
      break;
107
   default:
108
      break;
109
   }
110
   return true;
111
}
112

113
bool
114
GM107LoweringPass::handleManualTXD(TexInstruction *i)
115
{
116
   // See NVC0LoweringPass::handleManualTXD for rationale. This function
117
   // implements the same logic, but using SM50-friendly primitives.
118
   static const uint8_t qOps[2] =
119
      { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(MOV2, MOV2, ADD,  ADD) };
120
   Value *def[4][4];
121
   Value *crd[3], *arr, *shadow;
122
   Value *tmp;
123
   Instruction *tex, *add;
124
   Value *quad = bld.mkImm(SHFL_BOUND_QUAD);
125
   int l, c;
126
   const int dim = i->tex.target.getDim() + i->tex.target.isCube();
127
   const int array = i->tex.target.isArray();
128
   const int indirect = i->tex.rIndirectSrc >= 0;
129

130
   i->op = OP_TEX; // no need to clone dPdx/dPdy later
131

132
   for (c = 0; c < dim; ++c)
133
      crd[c] = bld.getScratch();
134
   arr = bld.getScratch();
135
   shadow = bld.getScratch();
136
   tmp = bld.getScratch();
137

138
   for (l = 0; l < 4; ++l) {
139
      Value *bar = bld.getSSA(4, FILE_BARRIER);
140
      Value *src[3], *val;
141
      Value *lane = bld.mkImm(l);
142
      bld.mkOp(OP_QUADON, TYPE_U32, bar);
143
      // Make sure lane 0 has the appropriate array/depth compare values
144
      if (l != 0) {
145
         if (array)
146
            bld.mkOp3(OP_SHFL, TYPE_F32, arr, i->getSrc(0), lane, quad);
147
         if (i->tex.target.isShadow())
148
            bld.mkOp3(OP_SHFL, TYPE_F32, shadow, i->getSrc(array + dim + indirect), lane, quad);
149
      }
150

151
      // mov coordinates from lane l to all lanes
152
      for (c = 0; c < dim; ++c) {
153
         bld.mkOp3(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), lane, quad);
154
      }
155

156
      // add dPdx from lane l to lanes dx
157
      for (c = 0; c < dim; ++c) {
158
         bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), lane, quad);
159
         add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
160
         add->subOp = qOps[0];
161
         add->lanes = 1; /* abused for .ndv */
162
      }
163

164
      // add dPdy from lane l to lanes dy
165
      for (c = 0; c < dim; ++c) {
166
         bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), lane, quad);
167
         add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
168
         add->subOp = qOps[1];
169
         add->lanes = 1; /* abused for .ndv */
170
      }
171

172
      // normalize cube coordinates if necessary
173
      if (i->tex.target.isCube()) {
174
         for (c = 0; c < 3; ++c)
175
            src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
176
         val = bld.getScratch();
177
         bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
178
         bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
179
         bld.mkOp1(OP_RCP, TYPE_F32, val, val);
180
         for (c = 0; c < 3; ++c)
181
            src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
182
      } else {
183
         for (c = 0; c < dim; ++c)
184
            src[c] = crd[c];
185
      }
186

187
      // texture
188
      bld.insert(tex = cloneForward(func, i));
189
      if (l != 0) {
190
         if (array)
191
            tex->setSrc(0, arr);
192
         if (i->tex.target.isShadow())
193
            tex->setSrc(array + dim + indirect, shadow);
194
      }
195
      for (c = 0; c < dim; ++c)
196
         tex->setSrc(c + array, src[c]);
197
      // broadcast results from lane 0 to all lanes
198
      if (l != 0)
199
         for (c = 0; i->defExists(c); ++c)
200
            bld.mkOp3(OP_SHFL, TYPE_F32, tex->getDef(c), tex->getDef(c), bld.mkImm(0), quad);
201
      bld.mkOp1(OP_QUADPOP, TYPE_U32, NULL, bar)->fixed = 1;
202

203
      // save results
204
      for (c = 0; i->defExists(c); ++c) {
205
         Instruction *mov;
206
         def[c][l] = bld.getSSA();
207
         mov = bld.mkMov(def[c][l], tex->getDef(c));
208
         mov->fixed = 1;
209
         mov->lanes = 1 << l;
210
      }
211
   }
212

213
   for (c = 0; i->defExists(c); ++c) {
214
      Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
215
      for (l = 0; l < 4; ++l)
216
         u->setSrc(l, def[c][l]);
217
   }
218

219
   i->bb->remove(i);
220
   return true;
221
}
222

223
bool
224
GM107LoweringPass::handleDFDX(Instruction *insn)
225
{
226
   Instruction *shfl;
227
   int qop = 0, xid = 0;
228

229
   switch (insn->op) {
230
   case OP_DFDX:
231
      qop = QUADOP(SUB, SUBR, SUB, SUBR);
232
      xid = 1;
233
      break;
234
   case OP_DFDY:
235
      qop = QUADOP(SUB, SUB, SUBR, SUBR);
236
      xid = 2;
237
      break;
238
   default:
239
      assert(!"invalid dfdx opcode");
240
      break;
241
   }
242

243
   shfl = bld.mkOp3(OP_SHFL, TYPE_F32, bld.getScratch(), insn->getSrc(0),
244
                    bld.mkImm(xid), bld.mkImm(SHFL_BOUND_QUAD));
245
   shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY;
246
   insn->op = OP_QUADOP;
247
   insn->subOp = qop;
248
   insn->lanes = 0; /* abused for !.ndv */
249
   insn->setSrc(1, insn->getSrc(0));
250
   insn->setSrc(0, shfl->getDef(0));
251
   return true;
252
}
253

254
bool
255
GM107LoweringPass::handlePFETCH(Instruction *i)
256
{
257
   Value *tmp0 = bld.getScratch();
258
   Value *tmp1 = bld.getScratch();
259
   Value *tmp2 = bld.getScratch();
260
   bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
261
   bld.mkOp3(OP_PERMT, TYPE_U32, tmp1, tmp0, bld.mkImm(0x4442), bld.mkImm(0));
262
   bld.mkOp3(OP_PERMT, TYPE_U32, tmp0, tmp0, bld.mkImm(0x4440), bld.mkImm(0));
263
   if (i->getSrc(1))
264
      bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1));
265
   else
266
      bld.mkOp1(OP_MOV , TYPE_U32, tmp2, i->getSrc(0));
267
   bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2);
268
   i->setSrc(0, tmp0);
269
   i->setSrc(1, NULL);
270
   return true;
271
}
272

273
bool
274
GM107LoweringPass::handlePOPCNT(Instruction *i)
275
{
276
   Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(),
277
                           i->getSrc(0), i->getSrc(1));
278
   i->setSrc(0, tmp);
279
   i->setSrc(1, NULL);
280
   return true;
281
}
282

283
bool
284
GM107LoweringPass::handleSUQ(TexInstruction *suq)
285
{
286
   Value *ind = suq->getIndirectR();
287
   Value *handle;
288
   const int slot = suq->tex.r;
289
   const int mask = suq->tex.mask;
290

291
   if (suq->tex.bindless)
292
      handle = ind;
293
   else
294
      handle = loadTexHandle(ind, slot + 32);
295

296
   suq->tex.r = 0xff;
297
   suq->tex.s = 0x1f;
298

299
   suq->setIndirectR(NULL);
300
   suq->setSrc(0, handle);
301
   suq->tex.rIndirectSrc = 0;
302
   suq->setSrc(1, bld.loadImm(NULL, 0));
303
   suq->tex.query = TXQ_DIMS;
304
   suq->op = OP_TXQ;
305

306
   // We store CUBE / CUBE_ARRAY as a 2D ARRAY. Make sure that depth gets
307
   // divided by 6.
308
   if (mask & 0x4 && suq->tex.target.isCube()) {
309
      int d = util_bitcount(mask & 0x3);
310
      bld.setPosition(suq, true);
311
      bld.mkOp2(OP_DIV, TYPE_U32, suq->getDef(d), suq->getDef(d),
312
                bld.loadImm(NULL, 6));
313
   }
314

315
   // Samples come from a different query. If we want both samples and dims,
316
   // create a second suq.
317
   if (mask & 0x8) {
318
      int d = util_bitcount(mask & 0x7);
319
      Value *dst = suq->getDef(d);
320
      TexInstruction *samples = suq;
321
      assert(dst);
322

323
      if (mask != 0x8) {
324
         suq->setDef(d, NULL);
325
         suq->tex.mask &= 0x7;
326
         samples = cloneShallow(func, suq);
327
         for (int i = 0; i < d; i++)
328
            samples->setDef(d, NULL);
329
         samples->setDef(0, dst);
330
         suq->bb->insertAfter(suq, samples);
331
      }
332
      samples->tex.mask = 0x4;
333
      samples->tex.query = TXQ_TYPE;
334
   }
335

336
   if (suq->tex.target.isMS()) {
337
      bld.setPosition(suq, true);
338

339
      if (mask & 0x1)
340
         bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0),
341
                   loadMsAdjInfo32(suq->tex.target, 0, slot, ind, suq->tex.bindless));
342
      if (mask & 0x2) {
343
         int d = util_bitcount(mask & 0x1);
344
         bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d),
345
                   loadMsAdjInfo32(suq->tex.target, 1, slot, ind, suq->tex.bindless));
346
      }
347
   }
348

349
   return true;
350
}
351

352
//
353
// - add quadop dance for texturing
354
// - put FP outputs in GPRs
355
// - convert instruction sequences
356
//
357
bool
358
GM107LoweringPass::visit(Instruction *i)
359
{
360
   bld.setPosition(i, false);
361

362
   if (i->cc != CC_ALWAYS)
363
      checkPredicate(i);
364

365
   switch (i->op) {
366
   case OP_PFETCH:
367
      return handlePFETCH(i);
368
   case OP_DFDX:
369
   case OP_DFDY:
370
      return handleDFDX(i);
371
   case OP_POPCNT:
372
      return handlePOPCNT(i);
373
   case OP_SUQ:
374
      return handleSUQ(i->asTex());
375
   default:
376
      return NVC0LoweringPass::visit(i);
377
   }
378
}
379

380
} // namespace nv50_ir
381

382
Product

Resources

Company