Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
4574 views
1
/*
2
* Copyright 2013 Nouveau Project
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
* OTHER DEALINGS IN THE SOFTWARE.
21
*
22
* Authors: Christoph Bumiller, Samuel Pitoiset
23
*/
24
25
#include "nvc0/nvc0_context.h"
26
27
#include "nvc0/nvc0_compute.xml.h"
28
29
int
30
nvc0_screen_compute_setup(struct nvc0_screen *screen,
31
struct nouveau_pushbuf *push)
32
{
33
struct nouveau_object *chan = screen->base.channel;
34
struct nouveau_device *dev = screen->base.device;
35
uint32_t obj_class;
36
int ret;
37
int i;
38
39
switch (dev->chipset & ~0xf) {
40
case 0xc0:
41
case 0xd0:
42
/* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but,
43
* in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */
44
obj_class = NVC0_COMPUTE_CLASS;
45
break;
46
default:
47
NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
48
return -1;
49
}
50
51
ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
52
&screen->compute);
53
if (ret) {
54
NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
55
return ret;
56
}
57
58
BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
59
PUSH_DATA (push, screen->compute->oclass);
60
61
/* hardware limit */
62
BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1);
63
PUSH_DATA (push, screen->mp_count);
64
BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1);
65
PUSH_DATA (push, 0xf);
66
67
BEGIN_NVC0(push, SUBC_CP(0x02a0), 1);
68
PUSH_DATA (push, 0x8000);
69
70
/* global memory setup */
71
BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
72
PUSH_DATA (push, 0);
73
BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100);
74
for (i = 0; i <= 0xff; i++)
75
PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
76
BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
77
PUSH_DATA (push, 1);
78
79
/* local memory and cstack setup */
80
BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2);
81
PUSH_DATAh(push, screen->tls->offset);
82
PUSH_DATA (push, screen->tls->offset);
83
BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2);
84
PUSH_DATAh(push, screen->tls->size);
85
PUSH_DATA (push, screen->tls->size);
86
BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1);
87
PUSH_DATA (push, 0);
88
BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1);
89
PUSH_DATA (push, 0xff << 24);
90
91
/* shared memory setup */
92
BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1);
93
PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
94
BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1);
95
PUSH_DATA (push, 0xfe << 24);
96
BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1);
97
PUSH_DATA (push, 0);
98
99
/* code segment setup */
100
BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
101
PUSH_DATAh(push, screen->text->offset);
102
PUSH_DATA (push, screen->text->offset);
103
104
/* textures */
105
BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3);
106
PUSH_DATAh(push, screen->txc->offset);
107
PUSH_DATA (push, screen->txc->offset);
108
PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
109
110
/* samplers */
111
BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3);
112
PUSH_DATAh(push, screen->txc->offset + 65536);
113
PUSH_DATA (push, screen->txc->offset + 65536);
114
PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
115
116
/* MS sample coordinate offsets */
117
BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
118
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
119
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
120
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
121
BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 2 * 8);
122
PUSH_DATA (push, NVC0_CB_AUX_MS_INFO);
123
PUSH_DATA (push, 0); /* 0 */
124
PUSH_DATA (push, 0);
125
PUSH_DATA (push, 1); /* 1 */
126
PUSH_DATA (push, 0);
127
PUSH_DATA (push, 0); /* 2 */
128
PUSH_DATA (push, 1);
129
PUSH_DATA (push, 1); /* 3 */
130
PUSH_DATA (push, 1);
131
PUSH_DATA (push, 2); /* 4 */
132
PUSH_DATA (push, 0);
133
PUSH_DATA (push, 3); /* 5 */
134
PUSH_DATA (push, 0);
135
PUSH_DATA (push, 2); /* 6 */
136
PUSH_DATA (push, 1);
137
PUSH_DATA (push, 3); /* 7 */
138
PUSH_DATA (push, 1);
139
140
return 0;
141
}
142
143
static void
144
nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
145
{
146
bool need_flush = nvc0_validate_tsc(nvc0, 5);
147
if (need_flush) {
148
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1);
149
PUSH_DATA (nvc0->base.pushbuf, 0);
150
}
151
152
/* Invalidate all 3D samplers because they are aliased. */
153
for (int s = 0; s < 5; s++)
154
nvc0->samplers_dirty[s] = ~0;
155
nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
156
}
157
158
static void
159
nvc0_compute_validate_textures(struct nvc0_context *nvc0)
160
{
161
bool need_flush = nvc0_validate_tic(nvc0, 5);
162
if (need_flush) {
163
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1);
164
PUSH_DATA (nvc0->base.pushbuf, 0);
165
}
166
167
/* Invalidate all 3D textures because they are aliased. */
168
for (int s = 0; s < 5; s++) {
169
for (int i = 0; i < nvc0->num_textures[s]; i++)
170
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
171
nvc0->textures_dirty[s] = ~0;
172
}
173
nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
174
}
175
176
static inline void
177
nvc0_compute_invalidate_constbufs(struct nvc0_context *nvc0)
178
{
179
int s;
180
181
/* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
182
for (s = 0; s < 5; s++) {
183
nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
184
nvc0->state.uniform_buffer_bound[s] = false;
185
}
186
nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
187
}
188
189
static void
190
nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
191
{
192
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
193
const int s = 5;
194
195
while (nvc0->constbuf_dirty[s]) {
196
int i = ffs(nvc0->constbuf_dirty[s]) - 1;
197
nvc0->constbuf_dirty[s] &= ~(1 << i);
198
199
if (nvc0->constbuf[s][i].user) {
200
struct nouveau_bo *bo = nvc0->screen->uniform_bo;
201
const unsigned base = NVC0_CB_USR_INFO(s);
202
const unsigned size = nvc0->constbuf[s][0].size;
203
assert(i == 0); /* we really only want OpenGL uniforms here */
204
assert(nvc0->constbuf[s][0].u.data);
205
206
if (!nvc0->state.uniform_buffer_bound[s]) {
207
nvc0->state.uniform_buffer_bound[s] = true;
208
209
BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
210
PUSH_DATA (push, NVC0_MAX_CONSTBUF_SIZE);
211
PUSH_DATAh(push, bo->offset + base);
212
PUSH_DATA (push, bo->offset + base);
213
BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
214
PUSH_DATA (push, (0 << 8) | 1);
215
}
216
nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
217
base, NVC0_MAX_CONSTBUF_SIZE, 0, (size + 3) / 4,
218
nvc0->constbuf[s][0].u.data);
219
} else {
220
struct nv04_resource *res =
221
nv04_resource(nvc0->constbuf[s][i].u.buf);
222
if (res) {
223
BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
224
PUSH_DATA (push, nvc0->constbuf[s][i].size);
225
PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
226
PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
227
BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
228
PUSH_DATA (push, (i << 8) | 1);
229
230
BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
231
232
res->cb_bindings[s] |= 1 << i;
233
} else {
234
BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
235
PUSH_DATA (push, (i << 8) | 0);
236
}
237
if (i == 0)
238
nvc0->state.uniform_buffer_bound[s] = false;
239
}
240
}
241
242
nvc0_compute_invalidate_constbufs(nvc0);
243
244
BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
245
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
246
}
247
248
static void
249
nvc0_compute_validate_driverconst(struct nvc0_context *nvc0)
250
{
251
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
252
struct nvc0_screen *screen = nvc0->screen;
253
254
BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
255
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
256
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
257
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
258
BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
259
PUSH_DATA (push, (15 << 8) | 1);
260
261
nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST;
262
}
263
264
static void
265
nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
266
{
267
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
268
struct nvc0_screen *screen = nvc0->screen;
269
const int s = 5;
270
int i;
271
272
BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
273
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
274
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
275
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
276
BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
277
PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
278
279
for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
280
if (nvc0->buffers[s][i].buffer) {
281
struct nv04_resource *res =
282
nv04_resource(nvc0->buffers[s][i].buffer);
283
PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
284
PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
285
PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
286
PUSH_DATA (push, 0);
287
BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
288
util_range_add(&res->base, &res->valid_buffer_range,
289
nvc0->buffers[s][i].buffer_offset,
290
nvc0->buffers[s][i].buffer_offset +
291
nvc0->buffers[s][i].buffer_size);
292
} else {
293
PUSH_DATA (push, 0);
294
PUSH_DATA (push, 0);
295
PUSH_DATA (push, 0);
296
PUSH_DATA (push, 0);
297
}
298
}
299
}
300
301
void
302
nvc0_compute_validate_globals(struct nvc0_context *nvc0)
303
{
304
unsigned i;
305
306
for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
307
++i) {
308
struct pipe_resource *res = *util_dynarray_element(
309
&nvc0->global_residents, struct pipe_resource *, i);
310
if (res)
311
nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL,
312
nv04_resource(res), NOUVEAU_BO_RDWR);
313
}
314
}
315
316
static inline void
317
nvc0_compute_invalidate_surfaces(struct nvc0_context *nvc0, const int s)
318
{
319
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
320
int i;
321
322
for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
323
if (s == 5)
324
BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
325
else
326
BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);
327
PUSH_DATA(push, 0);
328
PUSH_DATA(push, 0);
329
PUSH_DATA(push, 0);
330
PUSH_DATA(push, 0);
331
PUSH_DATA(push, 0x14000);
332
PUSH_DATA(push, 0);
333
}
334
}
335
336
static void
337
nvc0_compute_validate_surfaces(struct nvc0_context *nvc0)
338
{
339
/* TODO: Invalidating both 3D and CP surfaces before validating surfaces for
340
* compute is probably not really necessary, but we didn't find any better
341
* solutions for now. This fixes some invalidation issues when compute and
342
* fragment shaders are used inside the same context. Anyway, we definitely
343
* have invalidation issues between 3D and CP for other resources like SSBO
344
* and atomic counters. */
345
nvc0_compute_invalidate_surfaces(nvc0, 4);
346
nvc0_compute_invalidate_surfaces(nvc0, 5);
347
348
nvc0_validate_suf(nvc0, 5);
349
350
/* Invalidate all FRAGMENT images because they are aliased with COMPUTE. */
351
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
352
nvc0->dirty_3d |= NVC0_NEW_3D_SURFACES;
353
nvc0->images_dirty[4] |= nvc0->images_valid[4];
354
}
355
356
static struct nvc0_state_validate
357
validate_list_cp[] = {
358
{ nvc0_compprog_validate, NVC0_NEW_CP_PROGRAM },
359
{ nvc0_compute_validate_constbufs, NVC0_NEW_CP_CONSTBUF },
360
{ nvc0_compute_validate_driverconst, NVC0_NEW_CP_DRIVERCONST },
361
{ nvc0_compute_validate_buffers, NVC0_NEW_CP_BUFFERS },
362
{ nvc0_compute_validate_textures, NVC0_NEW_CP_TEXTURES },
363
{ nvc0_compute_validate_samplers, NVC0_NEW_CP_SAMPLERS },
364
{ nvc0_compute_validate_globals, NVC0_NEW_CP_GLOBALS },
365
{ nvc0_compute_validate_surfaces, NVC0_NEW_CP_SURFACES },
366
};
367
368
static bool
369
nvc0_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask)
370
{
371
bool ret;
372
373
ret = nvc0_state_validate(nvc0, mask, validate_list_cp,
374
ARRAY_SIZE(validate_list_cp), &nvc0->dirty_cp,
375
nvc0->bufctx_cp);
376
377
if (unlikely(nvc0->state.flushed))
378
nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
379
return ret;
380
}
381
382
static void
383
nvc0_compute_upload_input(struct nvc0_context *nvc0,
384
const struct pipe_grid_info *info)
385
{
386
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
387
struct nvc0_screen *screen = nvc0->screen;
388
struct nvc0_program *cp = nvc0->compprog;
389
390
if (cp->parm_size) {
391
struct nouveau_bo *bo = screen->uniform_bo;
392
const unsigned base = NVC0_CB_USR_INFO(5);
393
394
BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
395
PUSH_DATA (push, align(cp->parm_size, 0x100));
396
PUSH_DATAh(push, bo->offset + base);
397
PUSH_DATA (push, bo->offset + base);
398
BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
399
PUSH_DATA (push, (0 << 8) | 1);
400
/* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
401
BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
402
PUSH_DATA (push, 0);
403
PUSH_DATAp(push, info->input, cp->parm_size / 4);
404
405
nvc0_compute_invalidate_constbufs(nvc0);
406
}
407
408
BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
409
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
410
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
411
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
412
413
BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1);
414
/* (7) as we only upload work_dim on nvc0, the rest uses special regs */
415
PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7));
416
PUSH_DATA (push, info->work_dim);
417
418
BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
419
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
420
}
421
422
void
423
nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
424
{
425
struct nvc0_context *nvc0 = nvc0_context(pipe);
426
struct nvc0_screen *screen = nvc0->screen;
427
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
428
struct nvc0_program *cp = nvc0->compprog;
429
int ret;
430
431
ret = !nvc0_state_validate_cp(nvc0, ~0);
432
if (ret) {
433
NOUVEAU_ERR("Failed to launch grid !\n");
434
return;
435
}
436
437
nvc0_compute_upload_input(nvc0, info);
438
439
BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
440
PUSH_DATA (push, cp->code_base);
441
442
BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3);
443
PUSH_DATA (push, (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10));
444
PUSH_DATA (push, 0);
445
PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
446
447
BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3);
448
PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
449
PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
450
PUSH_DATA (push, cp->num_barriers);
451
BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1);
452
PUSH_DATA (push, cp->num_gprs);
453
454
/* launch preliminary setup */
455
BEGIN_NVC0(push, NVC0_CP(GRIDID), 1);
456
PUSH_DATA (push, 0x1);
457
BEGIN_NVC0(push, SUBC_CP(0x036c), 1);
458
PUSH_DATA (push, 0);
459
BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
460
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
461
462
/* block setup */
463
BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2);
464
PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
465
PUSH_DATA (push, info->block[2]);
466
467
nouveau_pushbuf_space(push, 32, 2, 1);
468
PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD);
469
470
if (unlikely(info->indirect)) {
471
struct nv04_resource *res = nv04_resource(info->indirect);
472
uint32_t offset = res->offset + info->indirect_offset;
473
unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT;
474
475
PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
476
PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
477
nouveau_pushbuf_data(push, res->bo, offset,
478
NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
479
} else {
480
/* grid setup */
481
BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2);
482
PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
483
PUSH_DATA (push, info->grid[2]);
484
485
/* kernel launching */
486
BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1);
487
PUSH_DATA (push, 0);
488
BEGIN_NVC0(push, SUBC_CP(0x0a08), 1);
489
PUSH_DATA (push, 0);
490
BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1);
491
PUSH_DATA (push, 0x1000);
492
BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1);
493
PUSH_DATA (push, 0);
494
BEGIN_NVC0(push, SUBC_CP(0x0360), 1);
495
PUSH_DATA (push, 0x1);
496
}
497
498
/* TODO: Not sure if this is really necessary. */
499
nvc0_compute_invalidate_surfaces(nvc0, 5);
500
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
501
nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
502
nvc0->images_dirty[5] |= nvc0->images_valid[5];
503
504
nvc0_update_compute_invocations_counter(nvc0, info);
505
}
506
507
static void
508
nvc0_compute_update_indirect_invocations(struct nvc0_context *nvc0,
509
const struct pipe_grid_info *info) {
510
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
511
struct nv04_resource *res = nv04_resource(info->indirect);
512
uint32_t offset = res->offset + info->indirect_offset;
513
514
nouveau_pushbuf_space(push, 16, 0, 8);
515
PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
516
BEGIN_1IC0(push, NVC0_3D(MACRO_COMPUTE_COUNTER), 7);
517
PUSH_DATA(push, 6);
518
PUSH_DATA(push, info->block[0]);
519
PUSH_DATA(push, info->block[1]);
520
PUSH_DATA(push, info->block[2]);
521
nouveau_pushbuf_data(push, res->bo, offset,
522
NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
523
}
524
525
void
526
nvc0_update_compute_invocations_counter(struct nvc0_context *nvc0,
527
const struct pipe_grid_info *info) {
528
if (unlikely(info->indirect)) {
529
nvc0_compute_update_indirect_invocations(nvc0, info);
530
} else {
531
uint64_t invocations = info->block[0] * info->block[1] * info->block[2];
532
invocations *= info->grid[0] * info->grid[1] * info->grid[2];
533
nvc0->compute_invocations += invocations;
534
}
535
}
536
537