Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
4574 views
1
/*
2
* Copyright 2011 Christoph Bumiller
3
* 2014 Red Hat Inc.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice shall be included in
13
* all copies or substantial portions of the Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
* OTHER DEALINGS IN THE SOFTWARE.
22
*/
23
24
#include "codegen/nv50_ir_target_gm107.h"
25
#include "codegen/nv50_ir_lowering_gm107.h"
26
27
namespace nv50_ir {
28
29
Target *getTargetGM107(unsigned int chipset)
30
{
31
return new TargetGM107(chipset);
32
}
33
34
// BULTINS / LIBRARY FUNCTIONS:
35
36
// lazyness -> will just hardcode everything for the time being
37
38
#include "lib/gm107.asm.h"
39
40
void
41
TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
42
{
43
*code = (const uint32_t *)&gm107_builtin_code[0];
44
*size = sizeof(gm107_builtin_code);
45
}
46
47
uint32_t
48
TargetGM107::getBuiltinOffset(int builtin) const
49
{
50
assert(builtin < NVC0_BUILTIN_COUNT);
51
return gm107_builtin_offsets[builtin];
52
}
53
54
bool
55
TargetGM107::isOpSupported(operation op, DataType ty) const
56
{
57
switch (op) {
58
case OP_SAD:
59
case OP_POW:
60
case OP_DIV:
61
case OP_MOD:
62
return false;
63
case OP_SQRT:
64
if (ty == TYPE_F64)
65
return false;
66
return chipset >= NVISA_GM200_CHIPSET;
67
case OP_XMAD:
68
if (isFloatType(ty))
69
return false;
70
break;
71
default:
72
break;
73
}
74
75
return true;
76
}
77
78
// Return true when an instruction supports the reuse flag. When supported, the
79
// hardware will use the operand reuse cache introduced since Maxwell, which
80
// should try to reduce bank conflicts by caching values for the subsequent
81
// instructions. Note that the next instructions have to use the same GPR id in
82
// the same operand slot.
83
bool
84
TargetGM107::isReuseSupported(const Instruction *insn) const
85
{
86
const OpClass cl = getOpClass(insn->op);
87
88
// TODO: double-check!
89
switch (cl) {
90
case OPCLASS_ARITH:
91
case OPCLASS_COMPARE:
92
case OPCLASS_LOGIC:
93
case OPCLASS_MOVE:
94
case OPCLASS_SHIFT:
95
return true;
96
case OPCLASS_BITFIELD:
97
if (insn->op == OP_INSBF || insn->op == OP_EXTBF)
98
return true;
99
break;
100
default:
101
break;
102
}
103
return false;
104
}
105
106
// Return true when an instruction requires to set up a barrier because it
107
// doesn't operate at a fixed latency. Variable latency instructions are memory
108
// operations, double precision operations, special function unit operations
109
// and other low throughput instructions.
110
bool
111
TargetGM107::isBarrierRequired(const Instruction *insn) const
112
{
113
const OpClass cl = getOpClass(insn->op);
114
115
if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64)
116
return true;
117
118
switch (cl) {
119
case OPCLASS_ATOMIC:
120
case OPCLASS_LOAD:
121
case OPCLASS_STORE:
122
case OPCLASS_SURFACE:
123
case OPCLASS_TEXTURE:
124
return true;
125
case OPCLASS_SFU:
126
switch (insn->op) {
127
case OP_COS:
128
case OP_EX2:
129
case OP_LG2:
130
case OP_LINTERP:
131
case OP_PINTERP:
132
case OP_RCP:
133
case OP_RSQ:
134
case OP_SIN:
135
case OP_SQRT:
136
return true;
137
default:
138
break;
139
}
140
break;
141
case OPCLASS_BITFIELD:
142
switch (insn->op) {
143
case OP_BFIND:
144
case OP_POPCNT:
145
return true;
146
default:
147
break;
148
}
149
break;
150
case OPCLASS_CONTROL:
151
switch (insn->op) {
152
case OP_EMIT:
153
case OP_RESTART:
154
return true;
155
default:
156
break;
157
}
158
break;
159
case OPCLASS_OTHER:
160
switch (insn->op) {
161
case OP_AFETCH:
162
case OP_PFETCH:
163
case OP_PIXLD:
164
case OP_SHFL:
165
return true;
166
case OP_RDSV:
167
return !isCS2RSV(insn->getSrc(0)->reg.data.sv.sv);
168
default:
169
break;
170
}
171
break;
172
case OPCLASS_ARITH:
173
if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
174
!isFloatType(insn->dType))
175
return true;
176
break;
177
case OPCLASS_CONVERT:
178
if (insn->def(0).getFile() != FILE_PREDICATE &&
179
insn->src(0).getFile() != FILE_PREDICATE)
180
return true;
181
break;
182
default:
183
break;
184
}
185
return false;
186
}
187
188
bool
189
TargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const
190
{
191
// TODO
192
return false;
193
}
194
195
// Return the number of stall counts needed to complete a single instruction.
196
// On Maxwell GPUs, the pipeline depth is 6, but some instructions require
197
// different number of stall counts like memory operations.
198
int
199
TargetGM107::getLatency(const Instruction *insn) const
200
{
201
// TODO: better values! This should be good enough for now though.
202
switch (insn->op) {
203
case OP_EMIT:
204
case OP_EXPORT:
205
case OP_PIXLD:
206
case OP_RESTART:
207
case OP_STORE:
208
case OP_SUSTB:
209
case OP_SUSTP:
210
return 1;
211
case OP_SHFL:
212
return 2;
213
case OP_ADD:
214
case OP_AND:
215
case OP_EXTBF:
216
case OP_FMA:
217
case OP_INSBF:
218
case OP_MAD:
219
case OP_MAX:
220
case OP_MIN:
221
case OP_MOV:
222
case OP_MUL:
223
case OP_NOT:
224
case OP_OR:
225
case OP_PREEX2:
226
case OP_PRESIN:
227
case OP_QUADOP:
228
case OP_SELP:
229
case OP_SET:
230
case OP_SET_AND:
231
case OP_SET_OR:
232
case OP_SET_XOR:
233
case OP_SHL:
234
case OP_SHLADD:
235
case OP_SHR:
236
case OP_SLCT:
237
case OP_SUB:
238
case OP_VOTE:
239
case OP_XOR:
240
case OP_XMAD:
241
if (insn->dType != TYPE_F64)
242
return 6;
243
break;
244
case OP_RDSV:
245
return isCS2RSV(insn->getSrc(0)->reg.data.sv.sv) ? 6 : 15;
246
case OP_ABS:
247
case OP_CEIL:
248
case OP_CVT:
249
case OP_FLOOR:
250
case OP_NEG:
251
case OP_SAT:
252
case OP_TRUNC:
253
if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
254
insn->src(0).getFile() == FILE_PREDICATE))
255
return 6;
256
break;
257
case OP_BFIND:
258
case OP_COS:
259
case OP_EX2:
260
case OP_LG2:
261
case OP_POPCNT:
262
case OP_QUADON:
263
case OP_QUADPOP:
264
case OP_RCP:
265
case OP_RSQ:
266
case OP_SIN:
267
case OP_SQRT:
268
return 13;
269
default:
270
break;
271
}
272
// Use the maximum number of stall counts for other instructions.
273
return 15;
274
}
275
276
// Return the operand read latency which is the number of stall counts before
277
// an instruction can read its sources. For memory operations like ATOM, LOAD
278
// and STORE, the memory access has to be indirect.
279
int
280
TargetGM107::getReadLatency(const Instruction *insn) const
281
{
282
switch (insn->op) {
283
case OP_ABS:
284
case OP_BFIND:
285
case OP_CEIL:
286
case OP_COS:
287
case OP_EX2:
288
case OP_FLOOR:
289
case OP_LG2:
290
case OP_NEG:
291
case OP_POPCNT:
292
case OP_RCP:
293
case OP_RSQ:
294
case OP_SAT:
295
case OP_SIN:
296
case OP_SQRT:
297
case OP_SULDB:
298
case OP_SULDP:
299
case OP_SUREDB:
300
case OP_SUREDP:
301
case OP_SUSTB:
302
case OP_SUSTP:
303
case OP_TRUNC:
304
return 4;
305
case OP_CVT:
306
if (insn->def(0).getFile() != FILE_PREDICATE &&
307
insn->src(0).getFile() != FILE_PREDICATE)
308
return 4;
309
break;
310
case OP_ATOM:
311
case OP_LOAD:
312
case OP_STORE:
313
if (insn->src(0).isIndirect(0)) {
314
switch (insn->src(0).getFile()) {
315
case FILE_MEMORY_SHARED:
316
case FILE_MEMORY_CONST:
317
return 2;
318
case FILE_MEMORY_GLOBAL:
319
case FILE_MEMORY_LOCAL:
320
return 4;
321
default:
322
break;
323
}
324
}
325
break;
326
case OP_EXPORT:
327
case OP_PFETCH:
328
case OP_SHFL:
329
case OP_VFETCH:
330
return 2;
331
default:
332
break;
333
}
334
return 0;
335
}
336
337
bool
338
TargetGM107::isCS2RSV(SVSemantic sv) const
339
{
340
return sv == SV_CLOCK;
341
}
342
343
bool
344
TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
345
{
346
if (stage == CG_STAGE_PRE_SSA) {
347
GM107LoweringPass pass(prog);
348
return pass.run(prog, false, true);
349
} else
350
if (stage == CG_STAGE_POST_RA) {
351
NVC0LegalizePostRA pass(prog);
352
return pass.run(prog, false, true);
353
} else
354
if (stage == CG_STAGE_SSA) {
355
GM107LegalizeSSA pass;
356
return pass.run(prog, false, true);
357
}
358
return false;
359
}
360
361
CodeEmitter *
362
TargetGM107::getCodeEmitter(Program::Type type)
363
{
364
return createCodeEmitterGM107(type);
365
}
366
367
} // namespace nv50_ir
368
369