Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nv50/nv50_program.c
4574 views
1
/*
2
* Copyright 2010 Christoph Bumiller
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
* OTHER DEALINGS IN THE SOFTWARE.
21
*/
22
23
#include "pipe/p_defines.h"
24
25
#include "compiler/nir/nir.h"
26
27
#include "nv50/nv50_context.h"
28
#include "nv50/nv50_program.h"
29
30
#include "codegen/nv50_ir_driver.h"
31
32
static inline unsigned
33
bitcount4(const uint32_t val)
34
{
35
static const uint8_t cnt[16]
36
= { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
37
return cnt[val & 0xf];
38
}
39
40
static int
41
nv50_vertprog_assign_slots(struct nv50_ir_prog_info_out *info)
42
{
43
struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
44
unsigned i, n, c;
45
46
n = 0;
47
for (i = 0; i < info->numInputs; ++i) {
48
prog->in[i].id = i;
49
prog->in[i].sn = info->in[i].sn;
50
prog->in[i].si = info->in[i].si;
51
prog->in[i].hw = n;
52
prog->in[i].mask = info->in[i].mask;
53
54
prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
55
56
for (c = 0; c < 4; ++c)
57
if (info->in[i].mask & (1 << c))
58
info->in[i].slot[c] = n++;
59
60
if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
61
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
62
}
63
prog->in_nr = info->numInputs;
64
65
for (i = 0; i < info->numSysVals; ++i) {
66
switch (info->sv[i].sn) {
67
case TGSI_SEMANTIC_INSTANCEID:
68
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
69
continue;
70
case TGSI_SEMANTIC_VERTEXID:
71
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
72
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
73
continue;
74
default:
75
break;
76
}
77
}
78
79
/*
80
* Corner case: VP has no inputs, but we will still need to submit data to
81
* draw it. HW will shout at us and won't draw anything if we don't enable
82
* any input, so let's just pretend it's the first one.
83
*/
84
if (prog->vp.attrs[0] == 0 &&
85
prog->vp.attrs[1] == 0 &&
86
prog->vp.attrs[2] == 0)
87
prog->vp.attrs[0] |= 0xf;
88
89
/* VertexID before InstanceID */
90
if (info->io.vertexId < info->numSysVals)
91
info->sv[info->io.vertexId].slot[0] = n++;
92
if (info->io.instanceId < info->numSysVals)
93
info->sv[info->io.instanceId].slot[0] = n++;
94
95
n = 0;
96
for (i = 0; i < info->numOutputs; ++i) {
97
switch (info->out[i].sn) {
98
case TGSI_SEMANTIC_PSIZE:
99
prog->vp.psiz = i;
100
break;
101
case TGSI_SEMANTIC_CLIPDIST:
102
prog->vp.clpd[info->out[i].si] = n;
103
break;
104
case TGSI_SEMANTIC_EDGEFLAG:
105
prog->vp.edgeflag = i;
106
break;
107
case TGSI_SEMANTIC_BCOLOR:
108
prog->vp.bfc[info->out[i].si] = i;
109
break;
110
case TGSI_SEMANTIC_LAYER:
111
prog->gp.has_layer = true;
112
prog->gp.layerid = n;
113
break;
114
case TGSI_SEMANTIC_VIEWPORT_INDEX:
115
prog->gp.has_viewport = true;
116
prog->gp.viewportid = n;
117
break;
118
default:
119
break;
120
}
121
prog->out[i].id = i;
122
prog->out[i].sn = info->out[i].sn;
123
prog->out[i].si = info->out[i].si;
124
prog->out[i].hw = n;
125
prog->out[i].mask = info->out[i].mask;
126
127
for (c = 0; c < 4; ++c)
128
if (info->out[i].mask & (1 << c))
129
info->out[i].slot[c] = n++;
130
}
131
prog->out_nr = info->numOutputs;
132
prog->max_out = n;
133
if (!prog->max_out)
134
prog->max_out = 1;
135
136
if (prog->vp.psiz < info->numOutputs)
137
prog->vp.psiz = prog->out[prog->vp.psiz].hw;
138
139
return 0;
140
}
141
142
static int
143
nv50_fragprog_assign_slots(struct nv50_ir_prog_info_out *info)
144
{
145
struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
146
unsigned i, n, m, c;
147
unsigned nvary;
148
unsigned nflat;
149
unsigned nintp = 0;
150
151
/* count recorded non-flat inputs */
152
for (m = 0, i = 0; i < info->numInputs; ++i) {
153
switch (info->in[i].sn) {
154
case TGSI_SEMANTIC_POSITION:
155
continue;
156
default:
157
m += info->in[i].flat ? 0 : 1;
158
break;
159
}
160
}
161
/* careful: id may be != i in info->in[prog->in[i].id] */
162
163
/* Fill prog->in[] so that non-flat inputs are first and
164
* kick out special inputs that don't use the RESULT_MAP.
165
*/
166
for (n = 0, i = 0; i < info->numInputs; ++i) {
167
if (info->in[i].sn == TGSI_SEMANTIC_POSITION) {
168
prog->fp.interp |= info->in[i].mask << 24;
169
for (c = 0; c < 4; ++c)
170
if (info->in[i].mask & (1 << c))
171
info->in[i].slot[c] = nintp++;
172
} else {
173
unsigned j = info->in[i].flat ? m++ : n++;
174
175
if (info->in[i].sn == TGSI_SEMANTIC_COLOR)
176
prog->vp.bfc[info->in[i].si] = j;
177
else if (info->in[i].sn == TGSI_SEMANTIC_PRIMID)
178
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID;
179
180
prog->in[j].id = i;
181
prog->in[j].mask = info->in[i].mask;
182
prog->in[j].sn = info->in[i].sn;
183
prog->in[j].si = info->in[i].si;
184
prog->in[j].linear = info->in[i].linear;
185
186
prog->in_nr++;
187
}
188
}
189
if (!(prog->fp.interp & (8 << 24))) {
190
++nintp;
191
prog->fp.interp |= 8 << 24;
192
}
193
194
for (i = 0; i < prog->in_nr; ++i) {
195
int j = prog->in[i].id;
196
197
prog->in[i].hw = nintp;
198
for (c = 0; c < 4; ++c)
199
if (prog->in[i].mask & (1 << c))
200
info->in[j].slot[c] = nintp++;
201
}
202
/* (n == m) if m never increased, i.e. no flat inputs */
203
nflat = (n < m) ? (nintp - prog->in[n].hw) : 0;
204
nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */
205
nvary = nintp - nflat;
206
207
prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
208
prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
209
210
/* put front/back colors right after HPOS */
211
prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT;
212
for (i = 0; i < 2; ++i)
213
if (prog->vp.bfc[i] < 0xff)
214
prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16;
215
216
/* FP outputs */
217
218
if (info->prop.fp.numColourResults > 1)
219
prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
220
221
for (i = 0; i < info->numOutputs; ++i) {
222
prog->out[i].id = i;
223
prog->out[i].sn = info->out[i].sn;
224
prog->out[i].si = info->out[i].si;
225
prog->out[i].mask = info->out[i].mask;
226
227
if (i == info->io.fragDepth || i == info->io.sampleMask)
228
continue;
229
prog->out[i].hw = info->out[i].si * 4;
230
231
for (c = 0; c < 4; ++c)
232
info->out[i].slot[c] = prog->out[i].hw + c;
233
234
prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4);
235
}
236
237
if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) {
238
info->out[info->io.sampleMask].slot[0] = prog->max_out++;
239
prog->fp.has_samplemask = 1;
240
}
241
242
if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
243
info->out[info->io.fragDepth].slot[2] = prog->max_out++;
244
245
if (!prog->max_out)
246
prog->max_out = 4;
247
248
return 0;
249
}
250
251
static int
252
nv50_program_assign_varying_slots(struct nv50_ir_prog_info_out *info)
253
{
254
switch (info->type) {
255
case PIPE_SHADER_VERTEX:
256
return nv50_vertprog_assign_slots(info);
257
case PIPE_SHADER_GEOMETRY:
258
return nv50_vertprog_assign_slots(info);
259
case PIPE_SHADER_FRAGMENT:
260
return nv50_fragprog_assign_slots(info);
261
case PIPE_SHADER_COMPUTE:
262
return 0;
263
default:
264
return -1;
265
}
266
}
267
268
static struct nv50_stream_output_state *
269
nv50_program_create_strmout_state(const struct nv50_ir_prog_info_out *info,
270
const struct pipe_stream_output_info *pso)
271
{
272
struct nv50_stream_output_state *so;
273
unsigned b, i, c;
274
unsigned base[4];
275
276
so = MALLOC_STRUCT(nv50_stream_output_state);
277
if (!so)
278
return NULL;
279
memset(so->map, 0xff, sizeof(so->map));
280
281
for (b = 0; b < 4; ++b)
282
so->num_attribs[b] = 0;
283
for (i = 0; i < pso->num_outputs; ++i) {
284
unsigned end = pso->output[i].dst_offset + pso->output[i].num_components;
285
b = pso->output[i].output_buffer;
286
assert(b < 4);
287
so->num_attribs[b] = MAX2(so->num_attribs[b], end);
288
}
289
290
so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED;
291
292
so->stride[0] = pso->stride[0] * 4;
293
base[0] = 0;
294
for (b = 1; b < 4; ++b) {
295
assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]);
296
so->stride[b] = so->num_attribs[b] * 4;
297
if (so->num_attribs[b])
298
so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT;
299
base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4);
300
}
301
if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) {
302
assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX);
303
so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT;
304
}
305
306
so->map_size = base[3] + so->num_attribs[3];
307
308
for (i = 0; i < pso->num_outputs; ++i) {
309
const unsigned s = pso->output[i].start_component;
310
const unsigned p = pso->output[i].dst_offset;
311
const unsigned r = pso->output[i].register_index;
312
b = pso->output[i].output_buffer;
313
314
if (r >= info->numOutputs)
315
continue;
316
317
for (c = 0; c < pso->output[i].num_components; ++c)
318
so->map[base[b] + p + c] = info->out[r].slot[s + c];
319
}
320
321
return so;
322
}
323
324
bool
325
nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
326
struct pipe_debug_callback *debug)
327
{
328
struct nv50_ir_prog_info *info;
329
struct nv50_ir_prog_info_out info_out = {};
330
int i, ret;
331
const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;
332
333
info = CALLOC_STRUCT(nv50_ir_prog_info);
334
if (!info)
335
return false;
336
337
info->type = prog->type;
338
info->target = chipset;
339
340
info->bin.sourceRep = prog->pipe.type;
341
switch (prog->pipe.type) {
342
case PIPE_SHADER_IR_TGSI:
343
info->bin.source = (void *)prog->pipe.tokens;
344
break;
345
case PIPE_SHADER_IR_NIR:
346
info->bin.source = (void *)nir_shader_clone(NULL, prog->pipe.ir.nir);
347
break;
348
default:
349
assert(!"unsupported IR!");
350
free(info);
351
return false;
352
}
353
354
info->bin.smemSize = prog->cp.smem_size;
355
info->io.auxCBSlot = 15;
356
info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
357
info->io.genUserClip = prog->vp.clpd_nr;
358
if (prog->fp.alphatest)
359
info->io.alphaRefBase = NV50_CB_AUX_ALPHATEST_OFFSET;
360
361
info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
362
info->io.bufInfoBase = NV50_CB_AUX_BUF_INFO(0);
363
info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET;
364
info->io.msInfoCBSlot = 15;
365
info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET;
366
367
info->io.membarOffset = NV50_CB_AUX_MEMBAR_OFFSET;
368
info->io.gmemMembar = 15;
369
370
info->assignSlots = nv50_program_assign_varying_slots;
371
372
prog->vp.bfc[0] = 0xff;
373
prog->vp.bfc[1] = 0xff;
374
prog->vp.edgeflag = 0xff;
375
prog->vp.clpd[0] = map_undef;
376
prog->vp.clpd[1] = map_undef;
377
prog->vp.psiz = map_undef;
378
prog->gp.has_layer = 0;
379
prog->gp.has_viewport = 0;
380
381
if (prog->type == PIPE_SHADER_COMPUTE)
382
info->prop.cp.inputOffset = 0x14;
383
384
info_out.driverPriv = prog;
385
386
#ifndef NDEBUG
387
info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
388
info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
389
info->omitLineNum = debug_get_num_option("NV50_PROG_DEBUG_OMIT_LINENUM", 0);
390
#else
391
info->optLevel = 3;
392
#endif
393
394
ret = nv50_ir_generate_code(info, &info_out);
395
if (ret) {
396
NOUVEAU_ERR("shader translation failed: %i\n", ret);
397
goto out;
398
}
399
400
prog->code = info_out.bin.code;
401
prog->code_size = info_out.bin.codeSize;
402
prog->fixups = info_out.bin.relocData;
403
prog->interps = info_out.bin.fixupData;
404
prog->max_gpr = MAX2(4, (info_out.bin.maxGPR >> 1) + 1);
405
prog->tls_space = info_out.bin.tlsSpace;
406
prog->cp.smem_size = info_out.bin.smemSize;
407
prog->mul_zero_wins = info->io.mul_zero_wins;
408
prog->vp.need_vertex_id = info_out.io.vertexId < PIPE_MAX_SHADER_INPUTS;
409
410
prog->vp.clip_enable = (1 << info_out.io.clipDistances) - 1;
411
prog->vp.cull_enable =
412
((1 << info_out.io.cullDistances) - 1) << info_out.io.clipDistances;
413
prog->vp.clip_mode = 0;
414
for (i = 0; i < info_out.io.cullDistances; ++i)
415
prog->vp.clip_mode |= 1 << ((info_out.io.clipDistances + i) * 4);
416
417
if (prog->type == PIPE_SHADER_FRAGMENT) {
418
if (info_out.prop.fp.writesDepth) {
419
prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
420
prog->fp.flags[1] = 0x11;
421
}
422
if (info_out.prop.fp.usesDiscard)
423
prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
424
} else
425
if (prog->type == PIPE_SHADER_GEOMETRY) {
426
switch (info_out.prop.gp.outputPrim) {
427
case PIPE_PRIM_LINE_STRIP:
428
prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP;
429
break;
430
case PIPE_PRIM_TRIANGLE_STRIP:
431
prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP;
432
break;
433
case PIPE_PRIM_POINTS:
434
default:
435
assert(info_out.prop.gp.outputPrim == PIPE_PRIM_POINTS);
436
prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS;
437
break;
438
}
439
prog->gp.vert_count = CLAMP(info_out.prop.gp.maxVertices, 1, 1024);
440
} else
441
if (prog->type == PIPE_SHADER_COMPUTE) {
442
for (i = 0; i < NV50_MAX_GLOBALS; i++) {
443
prog->cp.gmem[i] = (struct nv50_gmem_state){
444
.valid = info_out.prop.cp.gmem[i].valid,
445
.image = info_out.prop.cp.gmem[i].image,
446
.slot = info_out.prop.cp.gmem[i].slot
447
};
448
}
449
}
450
451
if (prog->pipe.stream_output.num_outputs)
452
prog->so = nv50_program_create_strmout_state(&info_out,
453
&prog->pipe.stream_output);
454
455
pipe_debug_message(debug, SHADER_INFO,
456
"type: %d, local: %d, shared: %d, gpr: %d, inst: %d, bytes: %d",
457
prog->type, info_out.bin.tlsSpace, info_out.bin.smemSize,
458
prog->max_gpr, info_out.bin.instructions,
459
info_out.bin.codeSize);
460
461
out:
462
if (info->bin.sourceRep == PIPE_SHADER_IR_NIR)
463
ralloc_free((void *)info->bin.source);
464
FREE(info);
465
return !ret;
466
}
467
468
bool
469
nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
470
{
471
struct nouveau_heap *heap;
472
int ret;
473
uint32_t size = align(prog->code_size, 0x40);
474
uint8_t prog_type;
475
476
switch (prog->type) {
477
case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break;
478
case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break;
479
case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break;
480
case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break;
481
default:
482
assert(!"invalid program type");
483
return false;
484
}
485
486
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
487
if (ret) {
488
/* Out of space: evict everything to compactify the code segment, hoping
489
* the working set is much smaller and drifts slowly. Improve me !
490
*/
491
while (heap->next) {
492
struct nv50_program *evict = heap->next->priv;
493
if (evict)
494
nouveau_heap_free(&evict->mem);
495
}
496
debug_printf("WARNING: out of code space, evicting all shaders.\n");
497
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
498
if (ret) {
499
NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
500
return false;
501
}
502
}
503
504
if (prog->type == PIPE_SHADER_COMPUTE) {
505
/* CP code must be uploaded in FP code segment. */
506
prog_type = 1;
507
} else {
508
prog->code_base = prog->mem->start;
509
prog_type = prog->type;
510
}
511
512
ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
513
if (ret < 0) {
514
nouveau_heap_free(&prog->mem);
515
return false;
516
}
517
if (ret > 0)
518
nv50->state.new_tls_space = true;
519
520
if (prog->fixups)
521
nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
522
if (prog->interps)
523
nv50_ir_apply_fixups(prog->interps, prog->code,
524
prog->fp.force_persample_interp,
525
false /* flatshade */,
526
prog->fp.alphatest - 1,
527
false /* msaa */);
528
529
nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
530
(prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
531
NOUVEAU_BO_VRAM, prog->code_size, prog->code);
532
533
BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
534
PUSH_DATA (nv50->base.pushbuf, 0);
535
536
return true;
537
}
538
539
void
540
nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
541
{
542
const struct pipe_shader_state pipe = p->pipe;
543
const ubyte type = p->type;
544
545
if (p->mem)
546
nouveau_heap_free(&p->mem);
547
548
FREE(p->code);
549
550
FREE(p->fixups);
551
FREE(p->interps);
552
FREE(p->so);
553
554
memset(p, 0, sizeof(*p));
555
556
p->pipe = pipe;
557
p->type = type;
558
}
559
560