Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
4574 views
/*1* Copyright 2013 Nouveau Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*21* Authors: Christoph Bumiller, Samuel Pitoiset22*/2324#include "nvc0/nvc0_context.h"2526#include "nvc0/nvc0_compute.xml.h"2728int29nvc0_screen_compute_setup(struct nvc0_screen *screen,30struct nouveau_pushbuf *push)31{32struct nouveau_object *chan = screen->base.channel;33struct nouveau_device *dev = screen->base.device;34uint32_t obj_class;35int ret;36int i;3738switch (dev->chipset & ~0xf) {39case 0xc0:40case 0xd0:41/* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but,42* in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */43obj_class = NVC0_COMPUTE_CLASS;44break;45default:46NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);47return -1;48}4950ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,51&screen->compute);52if (ret) {53NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);54return ret;55}5657BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);58PUSH_DATA (push, screen->compute->oclass);5960/* hardware limit */61BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1);62PUSH_DATA (push, screen->mp_count);63BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1);64PUSH_DATA (push, 0xf);6566BEGIN_NVC0(push, SUBC_CP(0x02a0), 1);67PUSH_DATA (push, 0x8000);6869/* global memory setup */70BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);71PUSH_DATA (push, 0);72BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100);73for (i = 0; i <= 0xff; i++)74PUSH_DATA (push, (0xc << 28) | (i << 16) | i);75BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);76PUSH_DATA (push, 1);7778/* local memory and cstack setup */79BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2);80PUSH_DATAh(push, screen->tls->offset);81PUSH_DATA (push, screen->tls->offset);82BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2);83PUSH_DATAh(push, screen->tls->size);84PUSH_DATA (push, screen->tls->size);85BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1);86PUSH_DATA (push, 0);87BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1);88PUSH_DATA (push, 0xff << 24);8990/* shared memory setup */91BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1);92PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);93BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1);94PUSH_DATA (push, 0xfe << 24);95BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1);96PUSH_DATA (push, 0);9798/* code segment setup */99BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);100PUSH_DATAh(push, screen->text->offset);101PUSH_DATA (push, screen->text->offset);102103/* textures */104BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3);105PUSH_DATAh(push, screen->txc->offset);106PUSH_DATA (push, screen->txc->offset);107PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);108109/* samplers */110BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3);111PUSH_DATAh(push, screen->txc->offset + 65536);112PUSH_DATA (push, screen->txc->offset + 65536);113PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);114115/* MS sample coordinate offsets */116BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);117PUSH_DATA (push, NVC0_CB_AUX_SIZE);118PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));119PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));120BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 2 * 8);121PUSH_DATA (push, NVC0_CB_AUX_MS_INFO);122PUSH_DATA (push, 0); /* 0 */123PUSH_DATA (push, 0);124PUSH_DATA (push, 1); /* 1 */125PUSH_DATA (push, 0);126PUSH_DATA (push, 0); /* 2 */127PUSH_DATA (push, 1);128PUSH_DATA (push, 1); /* 3 */129PUSH_DATA (push, 1);130PUSH_DATA (push, 2); /* 4 */131PUSH_DATA (push, 0);132PUSH_DATA (push, 3); /* 5 */133PUSH_DATA (push, 0);134PUSH_DATA (push, 2); /* 6 */135PUSH_DATA (push, 1);136PUSH_DATA (push, 3); /* 7 */137PUSH_DATA (push, 1);138139return 0;140}141142static void143nvc0_compute_validate_samplers(struct nvc0_context *nvc0)144{145bool need_flush = nvc0_validate_tsc(nvc0, 5);146if (need_flush) {147BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1);148PUSH_DATA (nvc0->base.pushbuf, 0);149}150151/* Invalidate all 3D samplers because they are aliased. */152for (int s = 0; s < 5; s++)153nvc0->samplers_dirty[s] = ~0;154nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;155}156157static void158nvc0_compute_validate_textures(struct nvc0_context *nvc0)159{160bool need_flush = nvc0_validate_tic(nvc0, 5);161if (need_flush) {162BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1);163PUSH_DATA (nvc0->base.pushbuf, 0);164}165166/* Invalidate all 3D textures because they are aliased. */167for (int s = 0; s < 5; s++) {168for (int i = 0; i < nvc0->num_textures[s]; i++)169nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));170nvc0->textures_dirty[s] = ~0;171}172nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;173}174175static inline void176nvc0_compute_invalidate_constbufs(struct nvc0_context *nvc0)177{178int s;179180/* Invalidate all 3D constbufs because they are aliased with COMPUTE. */181for (s = 0; s < 5; s++) {182nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];183nvc0->state.uniform_buffer_bound[s] = false;184}185nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;186}187188static void189nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)190{191struct nouveau_pushbuf *push = nvc0->base.pushbuf;192const int s = 5;193194while (nvc0->constbuf_dirty[s]) {195int i = ffs(nvc0->constbuf_dirty[s]) - 1;196nvc0->constbuf_dirty[s] &= ~(1 << i);197198if (nvc0->constbuf[s][i].user) {199struct nouveau_bo *bo = nvc0->screen->uniform_bo;200const unsigned base = NVC0_CB_USR_INFO(s);201const unsigned size = nvc0->constbuf[s][0].size;202assert(i == 0); /* we really only want OpenGL uniforms here */203assert(nvc0->constbuf[s][0].u.data);204205if (!nvc0->state.uniform_buffer_bound[s]) {206nvc0->state.uniform_buffer_bound[s] = true;207208BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);209PUSH_DATA (push, NVC0_MAX_CONSTBUF_SIZE);210PUSH_DATAh(push, bo->offset + base);211PUSH_DATA (push, bo->offset + base);212BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);213PUSH_DATA (push, (0 << 8) | 1);214}215nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),216base, NVC0_MAX_CONSTBUF_SIZE, 0, (size + 3) / 4,217nvc0->constbuf[s][0].u.data);218} else {219struct nv04_resource *res =220nv04_resource(nvc0->constbuf[s][i].u.buf);221if (res) {222BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);223PUSH_DATA (push, nvc0->constbuf[s][i].size);224PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);225PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);226BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);227PUSH_DATA (push, (i << 8) | 1);228229BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);230231res->cb_bindings[s] |= 1 << i;232} else {233BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);234PUSH_DATA (push, (i << 8) | 0);235}236if (i == 0)237nvc0->state.uniform_buffer_bound[s] = false;238}239}240241nvc0_compute_invalidate_constbufs(nvc0);242243BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);244PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);245}246247static void248nvc0_compute_validate_driverconst(struct nvc0_context *nvc0)249{250struct nouveau_pushbuf *push = nvc0->base.pushbuf;251struct nvc0_screen *screen = nvc0->screen;252253BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);254PUSH_DATA (push, NVC0_CB_AUX_SIZE);255PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));256PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));257BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);258PUSH_DATA (push, (15 << 8) | 1);259260nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST;261}262263static void264nvc0_compute_validate_buffers(struct nvc0_context *nvc0)265{266struct nouveau_pushbuf *push = nvc0->base.pushbuf;267struct nvc0_screen *screen = nvc0->screen;268const int s = 5;269int i;270271BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);272PUSH_DATA (push, NVC0_CB_AUX_SIZE);273PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));274PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));275BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);276PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));277278for (i = 0; i < NVC0_MAX_BUFFERS; i++) {279if (nvc0->buffers[s][i].buffer) {280struct nv04_resource *res =281nv04_resource(nvc0->buffers[s][i].buffer);282PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);283PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);284PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);285PUSH_DATA (push, 0);286BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);287util_range_add(&res->base, &res->valid_buffer_range,288nvc0->buffers[s][i].buffer_offset,289nvc0->buffers[s][i].buffer_offset +290nvc0->buffers[s][i].buffer_size);291} else {292PUSH_DATA (push, 0);293PUSH_DATA (push, 0);294PUSH_DATA (push, 0);295PUSH_DATA (push, 0);296}297}298}299300void301nvc0_compute_validate_globals(struct nvc0_context *nvc0)302{303unsigned i;304305for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);306++i) {307struct pipe_resource *res = *util_dynarray_element(308&nvc0->global_residents, struct pipe_resource *, i);309if (res)310nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL,311nv04_resource(res), NOUVEAU_BO_RDWR);312}313}314315static inline void316nvc0_compute_invalidate_surfaces(struct nvc0_context *nvc0, const int s)317{318struct nouveau_pushbuf *push = nvc0->base.pushbuf;319int i;320321for (i = 0; i < NVC0_MAX_IMAGES; ++i) {322if (s == 5)323BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);324else325BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);326PUSH_DATA(push, 0);327PUSH_DATA(push, 0);328PUSH_DATA(push, 0);329PUSH_DATA(push, 0);330PUSH_DATA(push, 0x14000);331PUSH_DATA(push, 0);332}333}334335static void336nvc0_compute_validate_surfaces(struct nvc0_context *nvc0)337{338/* TODO: Invalidating both 3D and CP surfaces before validating surfaces for339* compute is probably not really necessary, but we didn't find any better340* solutions for now. This fixes some invalidation issues when compute and341* fragment shaders are used inside the same context. Anyway, we definitely342* have invalidation issues between 3D and CP for other resources like SSBO343* and atomic counters. */344nvc0_compute_invalidate_surfaces(nvc0, 4);345nvc0_compute_invalidate_surfaces(nvc0, 5);346347nvc0_validate_suf(nvc0, 5);348349/* Invalidate all FRAGMENT images because they are aliased with COMPUTE. */350nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);351nvc0->dirty_3d |= NVC0_NEW_3D_SURFACES;352nvc0->images_dirty[4] |= nvc0->images_valid[4];353}354355static struct nvc0_state_validate356validate_list_cp[] = {357{ nvc0_compprog_validate, NVC0_NEW_CP_PROGRAM },358{ nvc0_compute_validate_constbufs, NVC0_NEW_CP_CONSTBUF },359{ nvc0_compute_validate_driverconst, NVC0_NEW_CP_DRIVERCONST },360{ nvc0_compute_validate_buffers, NVC0_NEW_CP_BUFFERS },361{ nvc0_compute_validate_textures, NVC0_NEW_CP_TEXTURES },362{ nvc0_compute_validate_samplers, NVC0_NEW_CP_SAMPLERS },363{ nvc0_compute_validate_globals, NVC0_NEW_CP_GLOBALS },364{ nvc0_compute_validate_surfaces, NVC0_NEW_CP_SURFACES },365};366367static bool368nvc0_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask)369{370bool ret;371372ret = nvc0_state_validate(nvc0, mask, validate_list_cp,373ARRAY_SIZE(validate_list_cp), &nvc0->dirty_cp,374nvc0->bufctx_cp);375376if (unlikely(nvc0->state.flushed))377nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);378return ret;379}380381static void382nvc0_compute_upload_input(struct nvc0_context *nvc0,383const struct pipe_grid_info *info)384{385struct nouveau_pushbuf *push = nvc0->base.pushbuf;386struct nvc0_screen *screen = nvc0->screen;387struct nvc0_program *cp = nvc0->compprog;388389if (cp->parm_size) {390struct nouveau_bo *bo = screen->uniform_bo;391const unsigned base = NVC0_CB_USR_INFO(5);392393BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);394PUSH_DATA (push, align(cp->parm_size, 0x100));395PUSH_DATAh(push, bo->offset + base);396PUSH_DATA (push, bo->offset + base);397BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);398PUSH_DATA (push, (0 << 8) | 1);399/* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */400BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);401PUSH_DATA (push, 0);402PUSH_DATAp(push, info->input, cp->parm_size / 4);403404nvc0_compute_invalidate_constbufs(nvc0);405}406407BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);408PUSH_DATA (push, NVC0_CB_AUX_SIZE);409PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));410PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));411412BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1);413/* (7) as we only upload work_dim on nvc0, the rest uses special regs */414PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7));415PUSH_DATA (push, info->work_dim);416417BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);418PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);419}420421void422nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)423{424struct nvc0_context *nvc0 = nvc0_context(pipe);425struct nvc0_screen *screen = nvc0->screen;426struct nouveau_pushbuf *push = nvc0->base.pushbuf;427struct nvc0_program *cp = nvc0->compprog;428int ret;429430ret = !nvc0_state_validate_cp(nvc0, ~0);431if (ret) {432NOUVEAU_ERR("Failed to launch grid !\n");433return;434}435436nvc0_compute_upload_input(nvc0, info);437438BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);439PUSH_DATA (push, cp->code_base);440441BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3);442PUSH_DATA (push, (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10));443PUSH_DATA (push, 0);444PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */445446BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3);447PUSH_DATA (push, align(cp->cp.smem_size, 0x100));448PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);449PUSH_DATA (push, cp->num_barriers);450BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1);451PUSH_DATA (push, cp->num_gprs);452453/* launch preliminary setup */454BEGIN_NVC0(push, NVC0_CP(GRIDID), 1);455PUSH_DATA (push, 0x1);456BEGIN_NVC0(push, SUBC_CP(0x036c), 1);457PUSH_DATA (push, 0);458BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);459PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);460461/* block setup */462BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2);463PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);464PUSH_DATA (push, info->block[2]);465466nouveau_pushbuf_space(push, 32, 2, 1);467PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD);468469if (unlikely(info->indirect)) {470struct nv04_resource *res = nv04_resource(info->indirect);471uint32_t offset = res->offset + info->indirect_offset;472unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT;473474PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);475PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));476nouveau_pushbuf_data(push, res->bo, offset,477NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);478} else {479/* grid setup */480BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2);481PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);482PUSH_DATA (push, info->grid[2]);483484/* kernel launching */485BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1);486PUSH_DATA (push, 0);487BEGIN_NVC0(push, SUBC_CP(0x0a08), 1);488PUSH_DATA (push, 0);489BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1);490PUSH_DATA (push, 0x1000);491BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1);492PUSH_DATA (push, 0);493BEGIN_NVC0(push, SUBC_CP(0x0360), 1);494PUSH_DATA (push, 0x1);495}496497/* TODO: Not sure if this is really necessary. */498nvc0_compute_invalidate_surfaces(nvc0, 5);499nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);500nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;501nvc0->images_dirty[5] |= nvc0->images_valid[5];502503nvc0_update_compute_invocations_counter(nvc0, info);504}505506static void507nvc0_compute_update_indirect_invocations(struct nvc0_context *nvc0,508const struct pipe_grid_info *info) {509struct nouveau_pushbuf *push = nvc0->base.pushbuf;510struct nv04_resource *res = nv04_resource(info->indirect);511uint32_t offset = res->offset + info->indirect_offset;512513nouveau_pushbuf_space(push, 16, 0, 8);514PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);515BEGIN_1IC0(push, NVC0_3D(MACRO_COMPUTE_COUNTER), 7);516PUSH_DATA(push, 6);517PUSH_DATA(push, info->block[0]);518PUSH_DATA(push, info->block[1]);519PUSH_DATA(push, info->block[2]);520nouveau_pushbuf_data(push, res->bo, offset,521NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);522}523524void525nvc0_update_compute_invocations_counter(struct nvc0_context *nvc0,526const struct pipe_grid_info *info) {527if (unlikely(info->indirect)) {528nvc0_compute_update_indirect_invocations(nvc0, info);529} else {530uint64_t invocations = info->block[0] * info->block[1] * info->block[2];531invocations *= info->grid[0] * info->grid[1] * info->grid[2];532nvc0->compute_invocations += invocations;533}534}535536537