Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
4574 views
1
#include "util/format/u_format.h"
2
#include "util/u_framebuffer.h"
3
#include "util/u_math.h"
4
#include "util/u_viewport.h"
5
6
#include "nvc0/nvc0_context.h"
7
8
#if 0
9
static void
10
nvc0_validate_zcull(struct nvc0_context *nvc0)
11
{
12
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
13
struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
14
struct nv50_surface *sf = nv50_surface(fb->zsbuf);
15
struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
16
struct nouveau_bo *bo = mt->base.bo;
17
uint32_t size;
18
uint32_t offset = align(mt->total_size, 1 << 17);
19
unsigned width, height;
20
21
assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2);
22
23
size = mt->total_size * 2;
24
25
height = align(fb->height, 32);
26
width = fb->width % 224;
27
if (width)
28
width = fb->width + (224 - width);
29
else
30
width = fb->width;
31
32
BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1);
33
PUSH_DATA (push, 0);
34
BEGIN_NVC0(push, NVC0_3D(ZCULL_ADDRESS_HIGH), 2);
35
PUSH_DATAh(push, bo->offset + offset);
36
PUSH_DATA (push, bo->offset + offset);
37
offset += 1 << 17;
38
BEGIN_NVC0(push, NVC0_3D(ZCULL_LIMIT_HIGH), 2);
39
PUSH_DATAh(push, bo->offset + offset);
40
PUSH_DATA (push, bo->offset + offset);
41
BEGIN_NVC0(push, SUBC_3D(0x07e0), 2);
42
PUSH_DATA (push, size);
43
PUSH_DATA (push, size >> 16);
44
BEGIN_NVC0(push, SUBC_3D(0x15c8), 1); /* bits 0x3 */
45
PUSH_DATA (push, 2);
46
BEGIN_NVC0(push, NVC0_3D(ZCULL_WIDTH), 4);
47
PUSH_DATA (push, width);
48
PUSH_DATA (push, height);
49
PUSH_DATA (push, 1);
50
PUSH_DATA (push, 0);
51
BEGIN_NVC0(push, NVC0_3D(ZCULL_WINDOW_OFFSET_X), 2);
52
PUSH_DATA (push, 0);
53
PUSH_DATA (push, 0);
54
BEGIN_NVC0(push, NVC0_3D(ZCULL_INVALIDATE), 1);
55
PUSH_DATA (push, 0);
56
}
57
#endif
58
59
static inline void
60
nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)
61
{
62
BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
63
PUSH_DATA (push, 0);
64
PUSH_DATA (push, 0);
65
PUSH_DATA (push, 64); // width
66
PUSH_DATA (push, 0); // height
67
PUSH_DATA (push, 0); // format
68
PUSH_DATA (push, 0); // tile mode
69
PUSH_DATA (push, layers); // layers
70
PUSH_DATA (push, 0); // layer stride
71
PUSH_DATA (push, 0); // base layer
72
}
73
74
static uint32_t
75
gm200_encode_cb_sample_location(uint8_t x, uint8_t y)
76
{
77
static const uint8_t lut[] = {
78
0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
79
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};
80
uint32_t result = 0;
81
/* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */
82
result |= lut[x] << 8 | lut[y] << 24;
83
/* fill in gaps with data in a representation for SV_SAMPLE_POS */
84
result |= x << 12 | y << 28;
85
return result;
86
}
87
88
static void
89
gm200_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
90
{
91
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
92
struct nvc0_screen *screen = nvc0->screen;
93
unsigned grid_width, grid_height, hw_grid_width;
94
uint8_t sample_locations[16][2];
95
unsigned cb[64];
96
unsigned i, pixel, pixel_y, pixel_x, sample;
97
uint32_t packed_locations[4] = {};
98
99
screen->base.base.get_sample_pixel_grid(
100
&screen->base.base, ms, &grid_width, &grid_height);
101
102
hw_grid_width = grid_width;
103
if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */
104
hw_grid_width = 4;
105
106
if (nvc0->sample_locations_enabled) {
107
uint8_t locations[2 * 4 * 8];
108
memcpy(locations, nvc0->sample_locations, sizeof(locations));
109
util_sample_locations_flip_y(
110
&screen->base.base, nvc0->framebuffer.height, ms, locations);
111
112
for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) {
113
for (sample = 0; sample < ms; sample++) {
114
unsigned pixel_x = pixel % hw_grid_width;
115
unsigned pixel_y = pixel / hw_grid_width;
116
unsigned wi = pixel * ms + sample;
117
unsigned ri = (pixel_y * grid_width + pixel_x % grid_width);
118
ri = ri * ms + sample;
119
sample_locations[wi][0] = locations[ri] & 0xf;
120
sample_locations[wi][1] = 16 - (locations[ri] >> 4);
121
}
122
}
123
} else {
124
const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
125
for (i = 0; i < 16; i++) {
126
sample_locations[i][0] = ptr[i % ms][0];
127
sample_locations[i][1] = ptr[i % ms][1];
128
}
129
}
130
131
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
132
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
133
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
134
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
135
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 64);
136
PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
137
for (pixel_y = 0; pixel_y < 4; pixel_y++) {
138
for (pixel_x = 0; pixel_x < 2; pixel_x++) {
139
for (sample = 0; sample < ms; sample++) {
140
unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample;
141
unsigned read_index = pixel_y % grid_height * hw_grid_width;
142
uint8_t x, y;
143
read_index += pixel_x % grid_width;
144
read_index = read_index * ms + sample;
145
x = sample_locations[read_index][0];
146
y = sample_locations[read_index][1];
147
cb[write_index] = gm200_encode_cb_sample_location(x, y);
148
}
149
}
150
}
151
PUSH_DATAp(push, cb, 64);
152
153
for (i = 0; i < 16; i++) {
154
packed_locations[i / 4] |= sample_locations[i][0] << ((i % 4) * 8);
155
packed_locations[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4);
156
}
157
158
BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
159
PUSH_DATAp(push, packed_locations, 4);
160
}
161
162
static void
163
nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
164
{
165
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
166
struct nvc0_screen *screen = nvc0->screen;
167
unsigned i;
168
169
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
170
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
171
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
172
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
173
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
174
PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
175
for (i = 0; i < ms; i++) {
176
float xy[2];
177
nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
178
PUSH_DATAf(push, xy[0]);
179
PUSH_DATAf(push, xy[1]);
180
}
181
}
182
183
static void
184
validate_sample_locations(struct nvc0_context *nvc0)
185
{
186
unsigned ms = util_framebuffer_get_num_samples(&nvc0->framebuffer);
187
188
if (nvc0->screen->base.class_3d >= GM200_3D_CLASS)
189
gm200_validate_sample_locations(nvc0, ms);
190
else
191
nvc0_validate_sample_locations(nvc0, ms);
192
}
193
194
static void
195
nvc0_validate_fb(struct nvc0_context *nvc0)
196
{
197
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
198
struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
199
unsigned i;
200
unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
201
unsigned nr_cbufs = fb->nr_cbufs;
202
bool serialize = false;
203
204
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
205
206
BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
207
PUSH_DATA (push, fb->width << 16);
208
PUSH_DATA (push, fb->height << 16);
209
210
for (i = 0; i < fb->nr_cbufs; ++i) {
211
struct nv50_surface *sf;
212
struct nv04_resource *res;
213
struct nouveau_bo *bo;
214
215
if (!fb->cbufs[i]) {
216
nvc0_fb_set_null_rt(push, i, 0);
217
continue;
218
}
219
220
sf = nv50_surface(fb->cbufs[i]);
221
res = nv04_resource(sf->base.texture);
222
bo = res->bo;
223
224
BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
225
PUSH_DATAh(push, res->address + sf->offset);
226
PUSH_DATA (push, res->address + sf->offset);
227
if (likely(nouveau_bo_memtype(bo))) {
228
struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
229
230
assert(sf->base.texture->target != PIPE_BUFFER);
231
232
PUSH_DATA(push, sf->width);
233
PUSH_DATA(push, sf->height);
234
PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
235
PUSH_DATA(push, (mt->layout_3d << 16) |
236
mt->level[sf->base.u.tex.level].tile_mode);
237
PUSH_DATA(push, sf->base.u.tex.first_layer + sf->depth);
238
PUSH_DATA(push, mt->layer_stride >> 2);
239
PUSH_DATA(push, sf->base.u.tex.first_layer);
240
241
ms_mode = mt->ms_mode;
242
} else {
243
if (res->base.target == PIPE_BUFFER) {
244
PUSH_DATA(push, 262144);
245
PUSH_DATA(push, 1);
246
} else {
247
PUSH_DATA(push, nv50_miptree(sf->base.texture)->level[0].pitch);
248
PUSH_DATA(push, sf->height);
249
}
250
PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
251
PUSH_DATA(push, 1 << 12);
252
PUSH_DATA(push, 1);
253
PUSH_DATA(push, 0);
254
PUSH_DATA(push, 0);
255
256
nvc0_resource_fence(res, NOUVEAU_BO_WR);
257
258
assert(!fb->zsbuf);
259
}
260
261
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING)
262
serialize = true;
263
res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
264
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
265
266
/* only register for writing, otherwise we'd always serialize here */
267
BCTX_REFN(nvc0->bufctx_3d, 3D_FB, res, WR);
268
}
269
270
if (fb->zsbuf) {
271
struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
272
struct nv50_surface *sf = nv50_surface(fb->zsbuf);
273
int unk = mt->base.base.target == PIPE_TEXTURE_2D;
274
275
BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5);
276
PUSH_DATAh(push, mt->base.address + sf->offset);
277
PUSH_DATA (push, mt->base.address + sf->offset);
278
PUSH_DATA (push, nvc0_format_table[fb->zsbuf->format].rt);
279
PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
280
PUSH_DATA (push, mt->layer_stride >> 2);
281
BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
282
PUSH_DATA (push, 1);
283
BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3);
284
PUSH_DATA (push, sf->width);
285
PUSH_DATA (push, sf->height);
286
PUSH_DATA (push, (unk << 16) |
287
(sf->base.u.tex.first_layer + sf->depth));
288
BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
289
PUSH_DATA (push, sf->base.u.tex.first_layer);
290
291
ms_mode = mt->ms_mode;
292
293
if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
294
serialize = true;
295
mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
296
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
297
298
BCTX_REFN(nvc0->bufctx_3d, 3D_FB, &mt->base, WR);
299
} else {
300
BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
301
PUSH_DATA (push, 0);
302
}
303
304
if (nr_cbufs == 0 && !fb->zsbuf) {
305
assert(util_is_power_of_two_or_zero(fb->samples));
306
assert(fb->samples <= 8);
307
308
nvc0_fb_set_null_rt(push, 0, fb->layers);
309
310
if (fb->samples > 1)
311
ms_mode = ffs(fb->samples) - 1;
312
nr_cbufs = 1;
313
}
314
315
BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
316
PUSH_DATA (push, (076543210 << 4) | nr_cbufs);
317
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
318
319
if (serialize)
320
IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
321
322
NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, serialize);
323
}
324
325
static void
326
nvc0_validate_blend_colour(struct nvc0_context *nvc0)
327
{
328
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
329
330
BEGIN_NVC0(push, NVC0_3D(BLEND_COLOR(0)), 4);
331
PUSH_DATAf(push, nvc0->blend_colour.color[0]);
332
PUSH_DATAf(push, nvc0->blend_colour.color[1]);
333
PUSH_DATAf(push, nvc0->blend_colour.color[2]);
334
PUSH_DATAf(push, nvc0->blend_colour.color[3]);
335
}
336
337
static void
338
nvc0_validate_stencil_ref(struct nvc0_context *nvc0)
339
{
340
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
341
const ubyte *ref = &nvc0->stencil_ref.ref_value[0];
342
343
IMMED_NVC0(push, NVC0_3D(STENCIL_FRONT_FUNC_REF), ref[0]);
344
IMMED_NVC0(push, NVC0_3D(STENCIL_BACK_FUNC_REF), ref[1]);
345
}
346
347
static void
348
nvc0_validate_stipple(struct nvc0_context *nvc0)
349
{
350
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
351
unsigned i;
352
353
BEGIN_NVC0(push, NVC0_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
354
for (i = 0; i < 32; ++i)
355
PUSH_DATA(push, util_bswap32(nvc0->stipple.stipple[i]));
356
}
357
358
static void
359
nvc0_validate_scissor(struct nvc0_context *nvc0)
360
{
361
int i;
362
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
363
364
if (!(nvc0->dirty_3d & NVC0_NEW_3D_SCISSOR) &&
365
nvc0->rast->pipe.scissor == nvc0->state.scissor)
366
return;
367
368
if (nvc0->state.scissor != nvc0->rast->pipe.scissor)
369
nvc0->scissors_dirty = (1 << NVC0_MAX_VIEWPORTS) - 1;
370
371
nvc0->state.scissor = nvc0->rast->pipe.scissor;
372
373
for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
374
struct pipe_scissor_state *s = &nvc0->scissors[i];
375
if (!(nvc0->scissors_dirty & (1 << i)))
376
continue;
377
378
BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(i)), 2);
379
if (nvc0->rast->pipe.scissor) {
380
PUSH_DATA(push, (s->maxx << 16) | s->minx);
381
PUSH_DATA(push, (s->maxy << 16) | s->miny);
382
} else {
383
PUSH_DATA(push, (0xffff << 16) | 0);
384
PUSH_DATA(push, (0xffff << 16) | 0);
385
}
386
}
387
nvc0->scissors_dirty = 0;
388
}
389
390
static void
391
nvc0_validate_viewport(struct nvc0_context *nvc0)
392
{
393
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
394
uint16_t class_3d = nvc0->screen->base.class_3d;
395
int x, y, w, h, i;
396
float zmin, zmax;
397
398
for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
399
struct pipe_viewport_state *vp = &nvc0->viewports[i];
400
401
if (!(nvc0->viewports_dirty & (1 << i)))
402
continue;
403
404
BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSLATE_X(i)), 3);
405
PUSH_DATAf(push, vp->translate[0]);
406
PUSH_DATAf(push, vp->translate[1]);
407
PUSH_DATAf(push, vp->translate[2]);
408
409
BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SCALE_X(i)), 3);
410
PUSH_DATAf(push, vp->scale[0]);
411
PUSH_DATAf(push, vp->scale[1]);
412
PUSH_DATAf(push, vp->scale[2]);
413
414
/* now set the viewport rectangle to viewport dimensions for clipping */
415
416
x = util_iround(MAX2(0.0f, vp->translate[0] - fabsf(vp->scale[0])));
417
y = util_iround(MAX2(0.0f, vp->translate[1] - fabsf(vp->scale[1])));
418
w = util_iround(vp->translate[0] + fabsf(vp->scale[0])) - x;
419
h = util_iround(vp->translate[1] + fabsf(vp->scale[1])) - y;
420
421
BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(i)), 2);
422
PUSH_DATA (push, (w << 16) | x);
423
PUSH_DATA (push, (h << 16) | y);
424
425
/* If the halfz setting ever changes, the viewports will also get
426
* updated. The rast will get updated before the validate function has a
427
* chance to hit, so we can just use it directly without an atom
428
* dependency.
429
*/
430
util_viewport_zmin_zmax(vp, nvc0->rast->pipe.clip_halfz, &zmin, &zmax);
431
432
BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2);
433
PUSH_DATAf(push, zmin);
434
PUSH_DATAf(push, zmax);
435
436
if (class_3d >= GM200_3D_CLASS) {
437
BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SWIZZLE(i)), 1);
438
PUSH_DATA (push, vp->swizzle_x << 0 |
439
vp->swizzle_y << 4 |
440
vp->swizzle_z << 8 |
441
vp->swizzle_w << 12);
442
}
443
}
444
nvc0->viewports_dirty = 0;
445
}
446
447
static void
448
nvc0_validate_window_rects(struct nvc0_context *nvc0)
449
{
450
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
451
bool enable = nvc0->window_rect.rects > 0 || nvc0->window_rect.inclusive;
452
int i;
453
454
IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_EN), enable);
455
if (!enable)
456
return;
457
458
IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_MODE), !nvc0->window_rect.inclusive);
459
BEGIN_NVC0(push, NVC0_3D(CLIP_RECT_HORIZ(0)), NVC0_MAX_WINDOW_RECTANGLES * 2);
460
for (i = 0; i < nvc0->window_rect.rects; i++) {
461
struct pipe_scissor_state *s = &nvc0->window_rect.rect[i];
462
PUSH_DATA(push, (s->maxx << 16) | s->minx);
463
PUSH_DATA(push, (s->maxy << 16) | s->miny);
464
}
465
for (; i < NVC0_MAX_WINDOW_RECTANGLES; i++) {
466
PUSH_DATA(push, 0);
467
PUSH_DATA(push, 0);
468
}
469
}
470
471
static inline void
472
nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
473
{
474
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
475
struct nvc0_screen *screen = nvc0->screen;
476
477
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
478
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
479
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
480
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
481
BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
482
PUSH_DATA (push, NVC0_CB_AUX_UCP_INFO);
483
PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
484
}
485
486
static inline void
487
nvc0_check_program_ucps(struct nvc0_context *nvc0,
488
struct nvc0_program *vp, uint8_t mask)
489
{
490
const unsigned n = util_logbase2(mask) + 1;
491
492
if (vp->vp.num_ucps >= n)
493
return;
494
nvc0_program_destroy(nvc0, vp);
495
496
vp->vp.num_ucps = n;
497
if (likely(vp == nvc0->vertprog))
498
nvc0_vertprog_validate(nvc0);
499
else
500
if (likely(vp == nvc0->gmtyprog))
501
nvc0_gmtyprog_validate(nvc0);
502
else
503
nvc0_tevlprog_validate(nvc0);
504
}
505
506
static void
507
nvc0_validate_clip(struct nvc0_context *nvc0)
508
{
509
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
510
struct nvc0_program *vp;
511
unsigned stage;
512
uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable;
513
514
if (nvc0->gmtyprog) {
515
stage = 3;
516
vp = nvc0->gmtyprog;
517
} else
518
if (nvc0->tevlprog) {
519
stage = 2;
520
vp = nvc0->tevlprog;
521
} else {
522
stage = 0;
523
vp = nvc0->vertprog;
524
}
525
526
if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES)
527
nvc0_check_program_ucps(nvc0, vp, clip_enable);
528
529
if (nvc0->dirty_3d & (NVC0_NEW_3D_CLIP | (NVC0_NEW_3D_VERTPROG << stage)))
530
if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES)
531
nvc0_upload_uclip_planes(nvc0, stage);
532
533
clip_enable &= vp->vp.clip_enable;
534
clip_enable |= vp->vp.cull_enable;
535
536
if (nvc0->state.clip_enable != clip_enable) {
537
nvc0->state.clip_enable = clip_enable;
538
IMMED_NVC0(push, NVC0_3D(CLIP_DISTANCE_ENABLE), clip_enable);
539
}
540
if (nvc0->state.clip_mode != vp->vp.clip_mode) {
541
nvc0->state.clip_mode = vp->vp.clip_mode;
542
BEGIN_NVC0(push, NVC0_3D(CLIP_DISTANCE_MODE), 1);
543
PUSH_DATA (push, vp->vp.clip_mode);
544
}
545
}
546
547
static void
548
nvc0_validate_blend(struct nvc0_context *nvc0)
549
{
550
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
551
552
PUSH_SPACE(push, nvc0->blend->size);
553
PUSH_DATAp(push, nvc0->blend->state, nvc0->blend->size);
554
}
555
556
static void
557
nvc0_validate_zsa(struct nvc0_context *nvc0)
558
{
559
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
560
561
PUSH_SPACE(push, nvc0->zsa->size);
562
PUSH_DATAp(push, nvc0->zsa->state, nvc0->zsa->size);
563
}
564
565
static void
566
nvc0_validate_rasterizer(struct nvc0_context *nvc0)
567
{
568
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
569
570
PUSH_SPACE(push, nvc0->rast->size);
571
PUSH_DATAp(push, nvc0->rast->state, nvc0->rast->size);
572
}
573
574
static void
575
nvc0_constbufs_validate(struct nvc0_context *nvc0)
576
{
577
unsigned s;
578
579
bool can_serialize = true;
580
581
for (s = 0; s < 5; ++s) {
582
while (nvc0->constbuf_dirty[s]) {
583
int i = ffs(nvc0->constbuf_dirty[s]) - 1;
584
nvc0->constbuf_dirty[s] &= ~(1 << i);
585
586
if (nvc0->constbuf[s][i].user) {
587
struct nouveau_bo *bo = nvc0->screen->uniform_bo;
588
const unsigned base = NVC0_CB_USR_INFO(s);
589
const unsigned size = nvc0->constbuf[s][0].size;
590
assert(i == 0); /* we really only want OpenGL uniforms here */
591
assert(nvc0->constbuf[s][0].u.data);
592
593
if (!nvc0->state.uniform_buffer_bound[s]) {
594
nvc0->state.uniform_buffer_bound[s] = true;
595
596
nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,
597
NVC0_MAX_CONSTBUF_SIZE, bo->offset + base);
598
}
599
nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
600
base, NVC0_MAX_CONSTBUF_SIZE,
601
0, (size + 3) / 4,
602
nvc0->constbuf[s][0].u.data);
603
} else {
604
struct nv04_resource *res =
605
nv04_resource(nvc0->constbuf[s][i].u.buf);
606
if (res) {
607
nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,
608
nvc0->constbuf[s][i].size,
609
res->address + nvc0->constbuf[s][i].offset);
610
611
BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD);
612
613
nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
614
res->cb_bindings[s] |= 1 << i;
615
616
if (i == 0)
617
nvc0->state.uniform_buffer_bound[s] = false;
618
} else if (i != 0) {
619
nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, -1, 0);
620
}
621
}
622
}
623
}
624
625
if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
626
/* Invalidate all COMPUTE constbufs because they are aliased with 3D. */
627
nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
628
nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5];
629
nvc0->state.uniform_buffer_bound[5] = false;
630
}
631
}
632
633
static void
634
nvc0_validate_buffers(struct nvc0_context *nvc0)
635
{
636
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
637
struct nvc0_screen *screen = nvc0->screen;
638
int i, s;
639
640
for (s = 0; s < 5; s++) {
641
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
642
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
643
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
644
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
645
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
646
PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
647
for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
648
if (nvc0->buffers[s][i].buffer) {
649
struct nv04_resource *res =
650
nv04_resource(nvc0->buffers[s][i].buffer);
651
PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
652
PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
653
PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
654
PUSH_DATA (push, 0);
655
BCTX_REFN(nvc0->bufctx_3d, 3D_BUF, res, RDWR);
656
util_range_add(&res->base, &res->valid_buffer_range,
657
nvc0->buffers[s][i].buffer_offset,
658
nvc0->buffers[s][i].buffer_offset +
659
nvc0->buffers[s][i].buffer_size);
660
} else {
661
PUSH_DATA (push, 0);
662
PUSH_DATA (push, 0);
663
PUSH_DATA (push, 0);
664
PUSH_DATA (push, 0);
665
}
666
}
667
}
668
669
}
670
671
static void
672
nvc0_validate_sample_mask(struct nvc0_context *nvc0)
673
{
674
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
675
676
unsigned mask[4] =
677
{
678
nvc0->sample_mask & 0xffff,
679
nvc0->sample_mask & 0xffff,
680
nvc0->sample_mask & 0xffff,
681
nvc0->sample_mask & 0xffff
682
};
683
684
BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4);
685
PUSH_DATA (push, mask[0]);
686
PUSH_DATA (push, mask[1]);
687
PUSH_DATA (push, mask[2]);
688
PUSH_DATA (push, mask[3]);
689
}
690
691
static void
692
nvc0_validate_min_samples(struct nvc0_context *nvc0)
693
{
694
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
695
int samples;
696
697
samples = util_next_power_of_two(nvc0->min_samples);
698
if (samples > 1) {
699
// If we're using the incoming sample mask and doing sample shading, we
700
// have to do sample shading "to the max", otherwise there's no way to
701
// tell which sets of samples are covered by the current invocation.
702
// Similarly for reading the framebuffer.
703
if (nvc0->fragprog && (
704
nvc0->fragprog->fp.sample_mask_in ||
705
nvc0->fragprog->fp.reads_framebuffer))
706
samples = util_framebuffer_get_num_samples(&nvc0->framebuffer);
707
samples |= NVC0_3D_SAMPLE_SHADING_ENABLE;
708
}
709
710
IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples);
711
}
712
713
static void
714
nvc0_validate_driverconst(struct nvc0_context *nvc0)
715
{
716
struct nvc0_screen *screen = nvc0->screen;
717
int i;
718
719
for (i = 0; i < 5; ++i)
720
nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE,
721
screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
722
723
nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;
724
}
725
726
static void
727
nvc0_validate_fp_zsa_rast(struct nvc0_context *nvc0)
728
{
729
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
730
bool rasterizer_discard;
731
732
if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) {
733
rasterizer_discard = true;
734
} else {
735
bool zs = nvc0->zsa &&
736
(nvc0->zsa->pipe.depth_enabled || nvc0->zsa->pipe.stencil[0].enabled);
737
rasterizer_discard = !zs &&
738
(!nvc0->fragprog || !nvc0->fragprog->hdr[18]);
739
}
740
741
if (rasterizer_discard != nvc0->state.rasterizer_discard) {
742
nvc0->state.rasterizer_discard = rasterizer_discard;
743
IMMED_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), !rasterizer_discard);
744
}
745
}
746
747
/* alpha test is disabled if there are no color RTs, so make sure we have at
748
* least one if alpha test is enabled. Note that this must run after
749
* nvc0_validate_fb, otherwise that will override the RT count setting.
750
*/
751
static void
752
nvc0_validate_zsa_fb(struct nvc0_context *nvc0)
753
{
754
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
755
756
if (nvc0->zsa && nvc0->zsa->pipe.alpha_enabled &&
757
nvc0->framebuffer.zsbuf &&
758
nvc0->framebuffer.nr_cbufs == 0) {
759
nvc0_fb_set_null_rt(push, 0, 0);
760
BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
761
PUSH_DATA (push, (076543210 << 4) | 1);
762
}
763
}
764
765
static void
766
nvc0_validate_rast_fb(struct nvc0_context *nvc0)
767
{
768
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
769
struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
770
struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
771
772
if (!rast)
773
return;
774
775
if (rast->offset_units_unscaled) {
776
BEGIN_NVC0(push, NVC0_3D(POLYGON_OFFSET_UNITS), 1);
777
if (fb->zsbuf && fb->zsbuf->format == PIPE_FORMAT_Z16_UNORM)
778
PUSH_DATAf(push, rast->offset_units * (1 << 16));
779
else
780
PUSH_DATAf(push, rast->offset_units * (1 << 24));
781
}
782
}
783
784
785
static void
786
nvc0_validate_tess_state(struct nvc0_context *nvc0)
787
{
788
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
789
790
BEGIN_NVC0(push, NVC0_3D(TESS_LEVEL_OUTER(0)), 6);
791
PUSH_DATAp(push, nvc0->default_tess_outer, 4);
792
PUSH_DATAp(push, nvc0->default_tess_inner, 2);
793
}
794
795
/* If we have a frag shader bound which tries to read from the framebuffer, we
796
* have to make sure that the fb is bound as a texture in the expected
797
* location. For Fermi, that's in the special driver slot 16, while for Kepler
798
* it's a regular binding stored in the driver constbuf.
799
*/
800
static void
801
nvc0_validate_fbread(struct nvc0_context *nvc0)
802
{
803
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
804
struct nvc0_screen *screen = nvc0->screen;
805
struct pipe_context *pipe = &nvc0->base.pipe;
806
struct pipe_sampler_view *old_view = nvc0->fbtexture;
807
struct pipe_sampler_view *new_view = NULL;
808
809
if (nvc0->fragprog &&
810
nvc0->fragprog->fp.reads_framebuffer &&
811
nvc0->framebuffer.nr_cbufs &&
812
nvc0->framebuffer.cbufs[0]) {
813
struct pipe_sampler_view tmpl;
814
struct pipe_surface *sf = nvc0->framebuffer.cbufs[0];
815
816
tmpl.target = PIPE_TEXTURE_2D_ARRAY;
817
tmpl.format = sf->format;
818
tmpl.u.tex.first_level = tmpl.u.tex.last_level = sf->u.tex.level;
819
tmpl.u.tex.first_layer = sf->u.tex.first_layer;
820
tmpl.u.tex.last_layer = sf->u.tex.last_layer;
821
tmpl.swizzle_r = PIPE_SWIZZLE_X;
822
tmpl.swizzle_g = PIPE_SWIZZLE_Y;
823
tmpl.swizzle_b = PIPE_SWIZZLE_Z;
824
tmpl.swizzle_a = PIPE_SWIZZLE_W;
825
826
/* Bail if it's the same parameters */
827
if (old_view && old_view->texture == sf->texture &&
828
old_view->format == sf->format &&
829
old_view->u.tex.first_level == sf->u.tex.level &&
830
old_view->u.tex.first_layer == sf->u.tex.first_layer &&
831
old_view->u.tex.last_layer == sf->u.tex.last_layer)
832
return;
833
834
new_view = pipe->create_sampler_view(pipe, sf->texture, &tmpl);
835
} else if (old_view == NULL) {
836
return;
837
}
838
839
if (old_view)
840
pipe_sampler_view_reference(&nvc0->fbtexture, NULL);
841
nvc0->fbtexture = new_view;
842
843
if (new_view) {
844
struct nv50_tic_entry *tic = nv50_tic_entry(new_view);
845
assert(tic->id < 0);
846
tic->id = nvc0_screen_tic_alloc(screen, tic);
847
nvc0->base.push_data(&nvc0->base, screen->txc, tic->id * 32,
848
NV_VRAM_DOMAIN(&screen->base), 32, tic->tic);
849
screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
850
851
if (screen->base.class_3d >= NVE4_3D_CLASS) {
852
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
853
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
854
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
855
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
856
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1);
857
PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO);
858
PUSH_DATA (push, (0 << 20) | tic->id);
859
} else {
860
BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1);
861
PUSH_DATA (push, (tic->id << 9) | 1);
862
}
863
864
IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
865
}
866
}
867
868
static void
869
nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
870
{
871
struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx;
872
unsigned s;
873
874
if (ctx_from)
875
ctx_to->state = ctx_from->state;
876
else
877
ctx_to->state = ctx_to->screen->save_state;
878
879
ctx_to->dirty_3d = ~0;
880
ctx_to->dirty_cp = ~0;
881
ctx_to->viewports_dirty = ~0;
882
ctx_to->scissors_dirty = ~0;
883
884
for (s = 0; s < 6; ++s) {
885
ctx_to->samplers_dirty[s] = ~0;
886
ctx_to->textures_dirty[s] = ~0;
887
ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1;
888
ctx_to->buffers_dirty[s] = ~0;
889
ctx_to->images_dirty[s] = ~0;
890
}
891
892
/* Reset tfb as the shader that owns it may have been deleted. */
893
ctx_to->state.tfb = NULL;
894
895
if (!ctx_to->vertex)
896
ctx_to->dirty_3d &= ~(NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS);
897
898
if (!ctx_to->vertprog)
899
ctx_to->dirty_3d &= ~NVC0_NEW_3D_VERTPROG;
900
if (!ctx_to->fragprog)
901
ctx_to->dirty_3d &= ~NVC0_NEW_3D_FRAGPROG;
902
903
if (!ctx_to->blend)
904
ctx_to->dirty_3d &= ~NVC0_NEW_3D_BLEND;
905
if (!ctx_to->rast)
906
ctx_to->dirty_3d &= ~(NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_SCISSOR);
907
if (!ctx_to->zsa)
908
ctx_to->dirty_3d &= ~NVC0_NEW_3D_ZSA;
909
910
ctx_to->screen->cur_ctx = ctx_to;
911
}
912
913
static struct nvc0_state_validate
914
validate_list_3d[] = {
915
{ nvc0_validate_fb, NVC0_NEW_3D_FRAMEBUFFER },
916
{ nvc0_validate_blend, NVC0_NEW_3D_BLEND },
917
{ nvc0_validate_zsa, NVC0_NEW_3D_ZSA },
918
{ nvc0_validate_sample_mask, NVC0_NEW_3D_SAMPLE_MASK },
919
{ nvc0_validate_rasterizer, NVC0_NEW_3D_RASTERIZER },
920
{ nvc0_validate_blend_colour, NVC0_NEW_3D_BLEND_COLOUR },
921
{ nvc0_validate_stencil_ref, NVC0_NEW_3D_STENCIL_REF },
922
{ nvc0_validate_stipple, NVC0_NEW_3D_STIPPLE },
923
{ nvc0_validate_scissor, NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_RASTERIZER },
924
{ nvc0_validate_viewport, NVC0_NEW_3D_VIEWPORT },
925
{ nvc0_validate_window_rects, NVC0_NEW_3D_WINDOW_RECTS },
926
{ nvc0_vertprog_validate, NVC0_NEW_3D_VERTPROG },
927
{ nvc0_tctlprog_validate, NVC0_NEW_3D_TCTLPROG },
928
{ nvc0_tevlprog_validate, NVC0_NEW_3D_TEVLPROG },
929
{ nvc0_validate_tess_state, NVC0_NEW_3D_TESSFACTOR },
930
{ nvc0_gmtyprog_validate, NVC0_NEW_3D_GMTYPROG },
931
{ nvc0_validate_min_samples, NVC0_NEW_3D_MIN_SAMPLES |
932
NVC0_NEW_3D_FRAGPROG |
933
NVC0_NEW_3D_FRAMEBUFFER },
934
{ nvc0_fragprog_validate, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_RASTERIZER },
935
{ nvc0_validate_fp_zsa_rast, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_ZSA |
936
NVC0_NEW_3D_RASTERIZER },
937
{ nvc0_validate_zsa_fb, NVC0_NEW_3D_ZSA | NVC0_NEW_3D_FRAMEBUFFER },
938
{ nvc0_validate_rast_fb, NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_FRAMEBUFFER },
939
{ nvc0_validate_clip, NVC0_NEW_3D_CLIP | NVC0_NEW_3D_RASTERIZER |
940
NVC0_NEW_3D_VERTPROG |
941
NVC0_NEW_3D_TEVLPROG |
942
NVC0_NEW_3D_GMTYPROG },
943
{ nvc0_constbufs_validate, NVC0_NEW_3D_CONSTBUF },
944
{ nvc0_validate_textures, NVC0_NEW_3D_TEXTURES },
945
{ nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS },
946
{ nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS },
947
{ nvc0_validate_fbread, NVC0_NEW_3D_FRAGPROG |
948
NVC0_NEW_3D_FRAMEBUFFER },
949
{ nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS },
950
{ nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES },
951
{ nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS },
952
{ nvc0_tfb_validate, NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_GMTYPROG },
953
{ nvc0_layer_validate, NVC0_NEW_3D_VERTPROG |
954
NVC0_NEW_3D_TEVLPROG |
955
NVC0_NEW_3D_GMTYPROG },
956
{ nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST },
957
{ validate_sample_locations, NVC0_NEW_3D_SAMPLE_LOCATIONS |
958
NVC0_NEW_3D_FRAMEBUFFER},
959
};
960
961
bool
962
nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask,
963
struct nvc0_state_validate *validate_list, int size,
964
uint32_t *dirty, struct nouveau_bufctx *bufctx)
965
{
966
uint32_t state_mask;
967
int ret;
968
unsigned i;
969
970
if (nvc0->screen->cur_ctx != nvc0)
971
nvc0_switch_pipe_context(nvc0);
972
973
state_mask = *dirty & mask;
974
975
if (state_mask) {
976
for (i = 0; i < size; ++i) {
977
struct nvc0_state_validate *validate = &validate_list[i];
978
979
if (state_mask & validate->states)
980
validate->func(nvc0);
981
}
982
*dirty &= ~state_mask;
983
984
nvc0_bufctx_fence(nvc0, bufctx, false);
985
}
986
987
nouveau_pushbuf_bufctx(nvc0->base.pushbuf, bufctx);
988
ret = nouveau_pushbuf_validate(nvc0->base.pushbuf);
989
990
return !ret;
991
}
992
993
bool
994
nvc0_state_validate_3d(struct nvc0_context *nvc0, uint32_t mask)
995
{
996
bool ret;
997
998
ret = nvc0_state_validate(nvc0, mask, validate_list_3d,
999
ARRAY_SIZE(validate_list_3d), &nvc0->dirty_3d,
1000
nvc0->bufctx_3d);
1001
1002
if (unlikely(nvc0->state.flushed)) {
1003
nvc0->state.flushed = false;
1004
nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, true);
1005
}
1006
return ret;
1007
}
1008
1009