Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nv50/nv50_compute.c
4574 views
1
/*
2
* Copyright 2012 Francisco Jerez
3
* Copyright 2015 Samuel Pitoiset
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining
6
* a copy of this software and associated documentation files (the
7
* "Software"), to deal in the Software without restriction, including
8
* without limitation the rights to use, copy, modify, merge, publish,
9
* distribute, sublicense, and/or sell copies of the Software, and to
10
* permit persons to whom the Software is furnished to do so, subject to
11
* the following conditions:
12
*
13
* The above copyright notice and this permission notice (including the
14
* next paragraph) shall be included in all copies or substantial
15
* portions of the Software.
16
*
17
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
*
25
*/
26
27
#include "util/format/u_format.h"
28
#include "nv50/nv50_context.h"
29
#include "nv50/nv50_compute.xml.h"
30
31
#include "codegen/nv50_ir_driver.h"
32
33
int
34
nv50_screen_compute_setup(struct nv50_screen *screen,
35
struct nouveau_pushbuf *push)
36
{
37
struct nouveau_device *dev = screen->base.device;
38
struct nouveau_object *chan = screen->base.channel;
39
struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
40
unsigned obj_class;
41
int i, ret;
42
43
switch (dev->chipset & 0xf0) {
44
case 0x50:
45
case 0x80:
46
case 0x90:
47
obj_class = NV50_COMPUTE_CLASS;
48
break;
49
case 0xa0:
50
switch (dev->chipset) {
51
case 0xa3:
52
case 0xa5:
53
case 0xa8:
54
obj_class = NVA3_COMPUTE_CLASS;
55
break;
56
default:
57
obj_class = NV50_COMPUTE_CLASS;
58
break;
59
}
60
break;
61
default:
62
NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
63
return -1;
64
}
65
66
ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
67
&screen->compute);
68
if (ret)
69
return ret;
70
71
BEGIN_NV04(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
72
PUSH_DATA (push, screen->compute->handle);
73
74
BEGIN_NV04(push, NV50_CP(UNK02A0), 1);
75
PUSH_DATA (push, 1);
76
BEGIN_NV04(push, NV50_CP(DMA_STACK), 1);
77
PUSH_DATA (push, fifo->vram);
78
BEGIN_NV04(push, NV50_CP(STACK_ADDRESS_HIGH), 2);
79
PUSH_DATAh(push, screen->stack_bo->offset);
80
PUSH_DATA (push, screen->stack_bo->offset);
81
BEGIN_NV04(push, NV50_CP(STACK_SIZE_LOG), 1);
82
PUSH_DATA (push, 4);
83
84
BEGIN_NV04(push, NV50_CP(UNK0290), 1);
85
PUSH_DATA (push, 1);
86
BEGIN_NV04(push, NV50_CP(LANES32_ENABLE), 1);
87
PUSH_DATA (push, 1);
88
BEGIN_NV04(push, NV50_CP(REG_MODE), 1);
89
PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
90
BEGIN_NV04(push, NV50_CP(UNK0384), 1);
91
PUSH_DATA (push, 0x100);
92
BEGIN_NV04(push, NV50_CP(DMA_GLOBAL), 1);
93
PUSH_DATA (push, fifo->vram);
94
95
for (i = 0; i < 15; i++) {
96
BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(i)), 2);
97
PUSH_DATA (push, 0);
98
PUSH_DATA (push, 0);
99
BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(i)), 1);
100
PUSH_DATA (push, 0);
101
BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(i)), 1);
102
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
103
}
104
105
BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2);
106
PUSH_DATA (push, 0);
107
PUSH_DATA (push, 0);
108
BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(15)), 1);
109
PUSH_DATA (push, ~0);
110
BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(15)), 1);
111
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
112
113
BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_LOG_ALLOC), 1);
114
PUSH_DATA (push, 7);
115
BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_NO_CLAMP), 1);
116
PUSH_DATA (push, 1);
117
BEGIN_NV04(push, NV50_CP(STACK_WARPS_LOG_ALLOC), 1);
118
PUSH_DATA (push, 7);
119
BEGIN_NV04(push, NV50_CP(STACK_WARPS_NO_CLAMP), 1);
120
PUSH_DATA (push, 1);
121
BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
122
PUSH_DATA (push, 0);
123
124
BEGIN_NV04(push, NV50_CP(DMA_TEXTURE), 1);
125
PUSH_DATA (push, fifo->vram);
126
BEGIN_NV04(push, NV50_CP(TEX_LIMITS), 1);
127
PUSH_DATA (push, 0x54);
128
BEGIN_NV04(push, NV50_CP(LINKED_TSC), 1);
129
PUSH_DATA (push, 0);
130
131
BEGIN_NV04(push, NV50_CP(DMA_TIC), 1);
132
PUSH_DATA (push, fifo->vram);
133
BEGIN_NV04(push, NV50_CP(TIC_ADDRESS_HIGH), 3);
134
PUSH_DATAh(push, screen->txc->offset);
135
PUSH_DATA (push, screen->txc->offset);
136
PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
137
138
BEGIN_NV04(push, NV50_CP(DMA_TSC), 1);
139
PUSH_DATA (push, fifo->vram);
140
BEGIN_NV04(push, NV50_CP(TSC_ADDRESS_HIGH), 3);
141
PUSH_DATAh(push, screen->txc->offset + 65536);
142
PUSH_DATA (push, screen->txc->offset + 65536);
143
PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
144
145
BEGIN_NV04(push, NV50_CP(DMA_CODE_CB), 1);
146
PUSH_DATA (push, fifo->vram);
147
148
BEGIN_NV04(push, NV50_CP(DMA_LOCAL), 1);
149
PUSH_DATA (push, fifo->vram);
150
BEGIN_NV04(push, NV50_CP(LOCAL_ADDRESS_HIGH), 2);
151
PUSH_DATAh(push, screen->tls_bo->offset + 65536);
152
PUSH_DATA (push, screen->tls_bo->offset + 65536);
153
BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1);
154
PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
155
156
BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3);
157
PUSH_DATAh(push, screen->uniforms->offset + (3 << 16));
158
PUSH_DATA (push, screen->uniforms->offset + (3 << 16));
159
PUSH_DATA (push, (NV50_CB_PCP << 16) | 0x0000);
160
161
BEGIN_NV04(push, NV50_CP(QUERY_ADDRESS_HIGH), 2);
162
PUSH_DATAh(push, screen->fence.bo->offset + 16);
163
PUSH_DATA (push, screen->fence.bo->offset + 16);
164
165
return 0;
166
}
167
168
static void
169
nv50_compute_validate_samplers(struct nv50_context *nv50)
170
{
171
bool need_flush = nv50_validate_tsc(nv50, NV50_SHADER_STAGE_COMPUTE);
172
if (need_flush) {
173
BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TSC_FLUSH), 1);
174
PUSH_DATA (nv50->base.pushbuf, 0);
175
}
176
177
/* Invalidate all 3D samplers because they are aliased. */
178
nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS;
179
}
180
181
static void
182
nv50_compute_validate_textures(struct nv50_context *nv50)
183
{
184
bool need_flush = nv50_validate_tic(nv50, NV50_SHADER_STAGE_COMPUTE);
185
if (need_flush) {
186
BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TIC_FLUSH), 1);
187
PUSH_DATA (nv50->base.pushbuf, 0);
188
}
189
190
/* Invalidate all 3D textures because they are aliased. */
191
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
192
nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
193
}
194
195
static inline void
196
nv50_compute_invalidate_constbufs(struct nv50_context *nv50)
197
{
198
int s;
199
200
/* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
201
for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; s++) {
202
nv50->constbuf_dirty[s] |= nv50->constbuf_valid[s];
203
nv50->state.uniform_buffer_bound[s] = false;
204
}
205
nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
206
}
207
208
static void
209
nv50_compute_validate_constbufs(struct nv50_context *nv50)
210
{
211
struct nouveau_pushbuf *push = nv50->base.pushbuf;
212
const int s = NV50_SHADER_STAGE_COMPUTE;
213
214
while (nv50->constbuf_dirty[s]) {
215
int i = ffs(nv50->constbuf_dirty[s]) - 1;
216
nv50->constbuf_dirty[s] &= ~(1 << i);
217
218
if (nv50->constbuf[s][i].user) {
219
const unsigned b = NV50_CB_PVP + s;
220
unsigned start = 0;
221
unsigned words = nv50->constbuf[s][0].size / 4;
222
if (i) {
223
NOUVEAU_ERR("user constbufs only supported in slot 0\n");
224
continue;
225
}
226
if (!nv50->state.uniform_buffer_bound[s]) {
227
nv50->state.uniform_buffer_bound[s] = true;
228
BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1);
229
PUSH_DATA (push, (b << 12) | (i << 8) | 1);
230
}
231
while (words) {
232
unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
233
234
PUSH_SPACE(push, nr + 3);
235
BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
236
PUSH_DATA (push, (start << 8) | b);
237
BEGIN_NI04(push, NV50_CP(CB_DATA(0)), nr);
238
PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr);
239
240
start += nr;
241
words -= nr;
242
}
243
} else {
244
struct nv04_resource *res =
245
nv04_resource(nv50->constbuf[s][i].u.buf);
246
if (res) {
247
/* TODO: allocate persistent bindings */
248
const unsigned b = s * 16 + i;
249
250
assert(nouveau_resource_mapped_by_gpu(&res->base));
251
252
BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3);
253
PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset);
254
PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset);
255
PUSH_DATA (push, (b << 16) |
256
(nv50->constbuf[s][i].size & 0xffff));
257
BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1);
258
PUSH_DATA (push, (b << 12) | (i << 8) | 1);
259
260
BCTX_REFN(nv50->bufctx_cp, CP_CB(i), res, RD);
261
262
nv50->cb_dirty = 1; /* Force cache flush for UBO. */
263
res->cb_bindings[s] |= 1 << i;
264
} else {
265
BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1);
266
PUSH_DATA (push, (i << 8) | 0);
267
}
268
if (i == 0)
269
nv50->state.uniform_buffer_bound[s] = false;
270
}
271
}
272
273
// TODO: Check if having orthogonal slots means the two don't trample over
274
// each other.
275
nv50_compute_invalidate_constbufs(nv50);
276
}
277
278
static void
279
nv50_get_surface_dims(const struct pipe_image_view *view,
280
int *width, int *height, int *depth)
281
{
282
struct nv04_resource *res = nv04_resource(view->resource);
283
int level;
284
285
*width = *height = *depth = 1;
286
if (res->base.target == PIPE_BUFFER) {
287
*width = view->u.buf.size / util_format_get_blocksize(view->format);
288
return;
289
}
290
291
level = view->u.tex.level;
292
*width = u_minify(view->resource->width0, level);
293
*height = u_minify(view->resource->height0, level);
294
*depth = u_minify(view->resource->depth0, level);
295
296
switch (res->base.target) {
297
case PIPE_TEXTURE_1D_ARRAY:
298
case PIPE_TEXTURE_2D_ARRAY:
299
case PIPE_TEXTURE_CUBE:
300
case PIPE_TEXTURE_CUBE_ARRAY:
301
*depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
302
break;
303
case PIPE_TEXTURE_1D:
304
case PIPE_TEXTURE_2D:
305
case PIPE_TEXTURE_RECT:
306
case PIPE_TEXTURE_3D:
307
break;
308
default:
309
assert(!"unexpected texture target");
310
break;
311
}
312
}
313
314
static void
315
nv50_mark_image_range_valid(const struct pipe_image_view *view)
316
{
317
struct nv04_resource *res = (struct nv04_resource *)view->resource;
318
319
assert(view->resource->target == PIPE_BUFFER);
320
321
util_range_add(&res->base, &res->valid_buffer_range,
322
view->u.buf.offset,
323
view->u.buf.offset + view->u.buf.size);
324
}
325
326
static inline void
327
nv50_set_surface_info(struct nouveau_pushbuf *push,
328
const struct pipe_image_view *view,
329
int width, int height, int depth)
330
{
331
struct nv04_resource *res;
332
uint32_t *const info = push->cur;
333
334
push->cur += 12;
335
336
/* Make sure to always initialize the surface information area because it's
337
* used to check if the given image is bound or not. */
338
memset(info, 0, 12 * sizeof(*info));
339
340
if (!view || !view->resource)
341
return;
342
res = nv04_resource(view->resource);
343
344
/* Stick the image dimensions for the imageSize() builtin. */
345
info[0] = width;
346
info[1] = height;
347
info[2] = depth;
348
349
/* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
350
* offset and to check if the format doesn't mismatch. */
351
info[3] = util_format_get_blocksize(view->format);
352
353
if (res->base.target != PIPE_BUFFER) {
354
struct nv50_miptree *mt = nv50_miptree(&res->base);
355
struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
356
unsigned nby = align(util_format_get_nblocksy(view->format, height),
357
NV50_TILE_SIZE_Y(lvl->tile_mode));
358
359
if (mt->layout_3d) {
360
info[4] = nby;
361
info[11] = view->u.tex.first_layer;
362
} else {
363
info[4] = mt->layer_stride / lvl->pitch;
364
}
365
info[6] = mt->ms_x;
366
info[7] = mt->ms_y;
367
info[8] = NV50_TILE_SHIFT_X(lvl->tile_mode);
368
info[9] = NV50_TILE_SHIFT_Y(lvl->tile_mode);
369
info[10] = NV50_TILE_SHIFT_Z(lvl->tile_mode);
370
}
371
}
372
373
static void
374
nv50_compute_validate_surfaces(struct nv50_context *nv50)
375
{
376
struct nouveau_pushbuf *push = nv50->base.pushbuf;
377
int i;
378
379
for (i = 0; i < NV50_MAX_GLOBALS - 1; i++) {
380
struct nv50_gmem_state *gmem = &nv50->compprog->cp.gmem[i];
381
int width, height, depth;
382
uint64_t address = 0;
383
384
BEGIN_NV04(push, NV50_CP(GLOBAL(i)), 5);
385
386
if (gmem->valid && !gmem->image && nv50->buffers[gmem->slot].buffer) {
387
struct pipe_shader_buffer *buffer = &nv50->buffers[gmem->slot];
388
struct nv04_resource *res = nv04_resource(buffer->buffer);
389
PUSH_DATAh(push, res->address + buffer->buffer_offset);
390
PUSH_DATA (push, res->address + buffer->buffer_offset);
391
PUSH_DATA (push, 0); /* pitch? */
392
PUSH_DATA (push, ALIGN(buffer->buffer_size, 256) - 1);
393
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
394
BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR);
395
util_range_add(&res->base, &res->valid_buffer_range,
396
buffer->buffer_offset,
397
buffer->buffer_offset +
398
buffer->buffer_size);
399
400
PUSH_SPACE(push, 1 + 3);
401
BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
402
PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX);
403
BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 1);
404
PUSH_DATA (push, buffer->buffer_size);
405
} else if (gmem->valid && gmem->image && nv50->images[gmem->slot].resource) {
406
struct pipe_image_view *view = &nv50->images[gmem->slot];
407
struct nv04_resource *res = nv04_resource(view->resource);
408
409
/* get surface dimensions based on the target. */
410
nv50_get_surface_dims(view, &width, &height, &depth);
411
412
address = res->address;
413
if (res->base.target == PIPE_BUFFER) {
414
address += view->u.buf.offset;
415
assert(!(address & 0xff));
416
417
if (view->access & PIPE_IMAGE_ACCESS_WRITE)
418
nv50_mark_image_range_valid(view);
419
420
PUSH_DATAh(push, address);
421
PUSH_DATA (push, address);
422
PUSH_DATA (push, 0); /* pitch? */
423
PUSH_DATA (push, ALIGN(view->u.buf.size, 0x100) - 1);
424
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
425
} else {
426
struct nv50_miptree *mt = nv50_miptree(view->resource);
427
struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
428
const unsigned z = view->u.tex.first_layer;
429
unsigned max_size;
430
431
if (mt->layout_3d) {
432
address += nv50_mt_zslice_offset(mt, view->u.tex.level, 0);
433
max_size = mt->total_size;
434
} else {
435
address += mt->layer_stride * z;
436
max_size = mt->layer_stride * (view->u.tex.last_layer - view->u.tex.first_layer + 1);
437
}
438
address += lvl->offset;
439
440
PUSH_DATAh(push, address);
441
PUSH_DATA (push, address);
442
if (mt->layout_3d) {
443
// We have to adjust the size of the 3d surface to be
444
// accessible within 2d limits. The size of each z tile goes
445
// into the x direction, while the number of z tiles goes into
446
// the y direction.
447
const unsigned nby = util_format_get_nblocksy(view->format, height);
448
const unsigned tsy = NV50_TILE_SIZE_Y(lvl->tile_mode);
449
const unsigned tsz = NV50_TILE_SIZE_Z(lvl->tile_mode);
450
const unsigned pitch = lvl->pitch * tsz;
451
const unsigned maxy = align(nby, tsy) * align(depth, tsz) >> NV50_TILE_SHIFT_Z(lvl->tile_mode);
452
PUSH_DATA (push, pitch * tsy);
453
PUSH_DATA (push, (maxy - 1) << 16 | (pitch - 1));
454
PUSH_DATA (push, (lvl->tile_mode & 0xff) << 4);
455
} else if (nouveau_bo_memtype(res->bo)) {
456
PUSH_DATA (push, lvl->pitch * NV50_TILE_SIZE_Y(lvl->tile_mode));
457
PUSH_DATA (push, (max_size / lvl->pitch - 1) << 16 | (lvl->pitch - 1));
458
PUSH_DATA (push, (lvl->tile_mode & 0xff) << 4);
459
} else {
460
PUSH_DATA (push, lvl->pitch);
461
PUSH_DATA (push, align(lvl->pitch * height, 0x100) - 1);
462
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
463
}
464
}
465
466
BCTX_REFN(nv50->bufctx_cp, CP_SUF, res, RDWR);
467
468
PUSH_SPACE(push, 12 + 3);
469
BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
470
PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX);
471
BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 12);
472
nv50_set_surface_info(push, view, width, height, depth);
473
} else {
474
PUSH_DATA (push, 0);
475
PUSH_DATA (push, 0);
476
PUSH_DATA (push, 0);
477
PUSH_DATA (push, 0);
478
PUSH_DATA (push, 0);
479
}
480
}
481
}
482
483
static void
484
nv50_compute_validate_globals(struct nv50_context *nv50)
485
{
486
unsigned i;
487
488
for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
489
++i) {
490
struct pipe_resource *res = *util_dynarray_element(
491
&nv50->global_residents, struct pipe_resource *, i);
492
if (res)
493
nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL,
494
nv04_resource(res), NOUVEAU_BO_RDWR);
495
}
496
}
497
498
static struct nv50_state_validate
499
validate_list_cp[] = {
500
{ nv50_compprog_validate, NV50_NEW_CP_PROGRAM },
501
{ nv50_compute_validate_constbufs, NV50_NEW_CP_CONSTBUF },
502
{ nv50_compute_validate_surfaces, NV50_NEW_CP_SURFACES |
503
NV50_NEW_CP_BUFFERS |
504
NV50_NEW_CP_PROGRAM },
505
{ nv50_compute_validate_textures, NV50_NEW_CP_TEXTURES },
506
{ nv50_compute_validate_samplers, NV50_NEW_CP_SAMPLERS },
507
{ nv50_compute_validate_globals, NV50_NEW_CP_GLOBALS },
508
};
509
510
static bool
511
nv50_state_validate_cp(struct nv50_context *nv50, uint32_t mask)
512
{
513
bool ret;
514
515
/* TODO: validate textures, samplers, surfaces */
516
ret = nv50_state_validate(nv50, mask, validate_list_cp,
517
ARRAY_SIZE(validate_list_cp), &nv50->dirty_cp,
518
nv50->bufctx_cp);
519
520
if (unlikely(nv50->state.flushed))
521
nv50_bufctx_fence(nv50->bufctx_cp, true);
522
return ret;
523
}
524
525
static void
526
nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
527
{
528
struct nv50_screen *screen = nv50->screen;
529
struct nouveau_pushbuf *push = screen->base.pushbuf;
530
unsigned size = align(nv50->compprog->parm_size, 0x4);
531
532
BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
533
PUSH_DATA (push, (1 + (size / 4)) << 8);
534
535
if (size) {
536
struct nouveau_mm_allocation *mm;
537
struct nouveau_bo *bo = NULL;
538
unsigned offset;
539
540
mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset);
541
assert(mm);
542
543
nouveau_bo_map(bo, 0, screen->base.client);
544
memcpy(bo->map + offset, input, size);
545
546
nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
547
nouveau_pushbuf_bufctx(push, nv50->bufctx);
548
nouveau_pushbuf_validate(push);
549
550
nouveau_pushbuf_space(push, 0, 0, 1);
551
552
BEGIN_NV04(push, NV50_CP(USER_PARAM(1)), size / 4);
553
nouveau_pushbuf_data(push, bo, offset, size);
554
555
nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
556
nouveau_bo_ref(NULL, &bo);
557
nouveau_bufctx_reset(nv50->bufctx, 0);
558
}
559
}
560
561
void
562
nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
563
{
564
struct nv50_context *nv50 = nv50_context(pipe);
565
struct nouveau_pushbuf *push = nv50->base.pushbuf;
566
unsigned block_size = info->block[0] * info->block[1] * info->block[2];
567
struct nv50_program *cp = nv50->compprog;
568
bool ret;
569
570
ret = !nv50_state_validate_cp(nv50, ~0);
571
if (ret) {
572
NOUVEAU_ERR("Failed to launch grid !\n");
573
return;
574
}
575
576
nv50_compute_upload_input(nv50, info->input);
577
578
BEGIN_NV04(push, NV50_CP(CP_START_ID), 1);
579
PUSH_DATA (push, cp->code_base);
580
581
BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1);
582
PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x14, 0x40));
583
BEGIN_NV04(push, NV50_CP(CP_REG_ALLOC_TEMP), 1);
584
PUSH_DATA (push, cp->max_gpr);
585
586
/* no indirect support - just read the parameters out */
587
uint32_t grid[3];
588
if (unlikely(info->indirect)) {
589
pipe_buffer_read(pipe, info->indirect, info->indirect_offset,
590
sizeof(grid), grid);
591
} else {
592
memcpy(grid, info->grid, sizeof(grid));
593
}
594
595
/* grid/block setup */
596
BEGIN_NV04(push, NV50_CP(BLOCKDIM_XY), 2);
597
PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
598
PUSH_DATA (push, info->block[2]);
599
BEGIN_NV04(push, NV50_CP(BLOCK_ALLOC), 1);
600
PUSH_DATA (push, 1 << 16 | block_size);
601
BEGIN_NV04(push, NV50_CP(BLOCKDIM_LATCH), 1);
602
PUSH_DATA (push, 1);
603
BEGIN_NV04(push, NV50_CP(GRIDDIM), 1);
604
PUSH_DATA (push, grid[1] << 16 | grid[0]);
605
BEGIN_NV04(push, NV50_CP(GRIDID), 1);
606
PUSH_DATA (push, 1);
607
608
for (int i = 0; i < grid[2]; i++) {
609
BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), 1);
610
PUSH_DATA (push, grid[2] | i << 16);
611
612
/* kernel launching */
613
BEGIN_NV04(push, NV50_CP(LAUNCH), 1);
614
PUSH_DATA (push, 0);
615
}
616
617
BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
618
PUSH_DATA (push, 0);
619
620
/* bind a compute shader clobbers fragment shader state */
621
nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
622
623
nv50->compute_invocations += info->block[0] * info->block[1] * info->block[2] *
624
grid[0] * grid[1] * grid[2];
625
}
626
627