Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
4574 views
/*1* Copyright 2012 Nouveau Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*21* Authors: Christoph Bumiller22*/2324#include "nvc0/nvc0_context.h"25#include "nvc0/nve4_compute.h"2627#include "codegen/nv50_ir_driver.h"2829#include "drf.h"30#include "qmd.h"31#include "cla0c0qmd.h"32#include "clc0c0qmd.h"33#include "clc3c0qmd.h"3435#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a)36#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a)37#define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a)38#define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a)39#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a)40#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a)4142int43nve4_screen_compute_setup(struct nvc0_screen *screen,44struct nouveau_pushbuf *push)45{46struct nouveau_device *dev = screen->base.device;47struct nouveau_object *chan = screen->base.channel;48int i;49int ret;50uint32_t obj_class;51uint64_t address;5253switch (dev->chipset & ~0xf) {54case 0x160:55obj_class = TU102_COMPUTE_CLASS;56break;57case 0x140:58obj_class = GV100_COMPUTE_CLASS;59break;60case 0x100:61case 0xf0:62obj_class = NVF0_COMPUTE_CLASS; /* GK110 */63break;64case 0xe0:65obj_class = NVE4_COMPUTE_CLASS; /* GK104 */66break;67case 0x110:68obj_class = GM107_COMPUTE_CLASS;69break;70case 0x120:71obj_class = GM200_COMPUTE_CLASS;72break;73case 0x130:74obj_class = (dev->chipset == 0x130 || dev->chipset == 0x13b) ?75GP100_COMPUTE_CLASS : GP104_COMPUTE_CLASS;76break;77default:78NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);79return -1;80}8182ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0,83&screen->compute);84if (ret) {85NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);86return ret;87}8889BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);90PUSH_DATA (push, screen->compute->oclass);9192BEGIN_NVC0(push, NVE4_CP(TEMP_ADDRESS_HIGH), 2);93PUSH_DATAh(push, screen->tls->offset);94PUSH_DATA (push, screen->tls->offset);95/* No idea why there are 2. Divide size by 2 to be safe.96* Actually this might be per-MP TEMP size and looks like I'm only using97* 2 MPs instead of all 8.98*/99BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(0)), 3);100PUSH_DATAh(push, screen->tls->size / screen->mp_count);101PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);102PUSH_DATA (push, 0xff);103if (obj_class < GV100_COMPUTE_CLASS) {104BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);105PUSH_DATAh(push, screen->tls->size / screen->mp_count);106PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);107PUSH_DATA (push, 0xff);108}109110/* Unified address space ? Who needs that ? Certainly not OpenCL.111*112* FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be113* accessible. We cannot prevent that at the moment, so expect failure.114*/115if (obj_class < GV100_COMPUTE_CLASS) {116BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);117PUSH_DATA (push, 0xff << 24);118BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);119PUSH_DATA (push, 0xfe << 24);120121BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);122PUSH_DATAh(push, screen->text->offset);123PUSH_DATA (push, screen->text->offset);124} else {125BEGIN_NVC0(push, SUBC_CP(0x2a0), 2);126PUSH_DATAh(push, 0xfeULL << 24);127PUSH_DATA (push, 0xfeULL << 24);128BEGIN_NVC0(push, SUBC_CP(0x7b0), 2);129PUSH_DATAh(push, 0xffULL << 24);130PUSH_DATA (push, 0xffULL << 24);131}132133BEGIN_NVC0(push, SUBC_CP(0x0310), 1);134PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);135136/* NOTE: these do not affect the state used by the 3D object */137BEGIN_NVC0(push, NVE4_CP(TIC_ADDRESS_HIGH), 3);138PUSH_DATAh(push, screen->txc->offset);139PUSH_DATA (push, screen->txc->offset);140PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);141BEGIN_NVC0(push, NVE4_CP(TSC_ADDRESS_HIGH), 3);142PUSH_DATAh(push, screen->txc->offset + 65536);143PUSH_DATA (push, screen->txc->offset + 65536);144PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);145146if (obj_class >= NVF0_COMPUTE_CLASS) {147/* The blob calls GK110_COMPUTE.FIRMWARE[0x6], along with the args (0x1)148* passed with GK110_COMPUTE.GRAPH.SCRATCH[0x2]. This is currently149* disabled because our firmware doesn't support these commands and the150* GPU hangs if they are used. */151BEGIN_NIC0(push, SUBC_CP(0x0248), 64);152for (i = 63; i >= 0; i--)153PUSH_DATA(push, 0x38000 | i);154IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0);155}156157BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1);158PUSH_DATA (push, 7); /* does not interfere with 3D */159160/* Disabling this UNK command avoid a read fault when using texelFetch()161* from a compute shader for weird reasons.162if (obj_class == NVF0_COMPUTE_CLASS)163IMMED_NVC0(push, SUBC_CP(0x02c4), 1);164*/165166address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);167168/* MS sample coordinate offsets: these do not work with _ALT modes ! */169BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);170PUSH_DATAh(push, address + NVC0_CB_AUX_MS_INFO);171PUSH_DATA (push, address + NVC0_CB_AUX_MS_INFO);172BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);173PUSH_DATA (push, 64);174PUSH_DATA (push, 1);175BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17);176PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));177PUSH_DATA (push, 0); /* 0 */178PUSH_DATA (push, 0);179PUSH_DATA (push, 1); /* 1 */180PUSH_DATA (push, 0);181PUSH_DATA (push, 0); /* 2 */182PUSH_DATA (push, 1);183PUSH_DATA (push, 1); /* 3 */184PUSH_DATA (push, 1);185PUSH_DATA (push, 2); /* 4 */186PUSH_DATA (push, 0);187PUSH_DATA (push, 3); /* 5 */188PUSH_DATA (push, 0);189PUSH_DATA (push, 2); /* 6 */190PUSH_DATA (push, 1);191PUSH_DATA (push, 3); /* 7 */192PUSH_DATA (push, 1);193194#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER195BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);196PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);197PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);198BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);199PUSH_DATA (push, 28);200PUSH_DATA (push, 1);201BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 8);202PUSH_DATA (push, 1);203PUSH_DATA (push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);204PUSH_DATAh(push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);205PUSH_DATA (push, screen->tls->offset);206PUSH_DATAh(push, screen->tls->offset);207PUSH_DATA (push, screen->tls->size / 2); /* MP TEMP block size */208PUSH_DATA (push, screen->tls->size / 2 / 64); /* warp TEMP block size */209PUSH_DATA (push, 0); /* warp cfstack size */210#endif211212BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);213PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);214215return 0;216}217218static void219gm107_compute_validate_surfaces(struct nvc0_context *nvc0,220struct pipe_image_view *view, int slot)221{222struct nv04_resource *res = nv04_resource(view->resource);223struct nouveau_pushbuf *push = nvc0->base.pushbuf;224struct nvc0_screen *screen = nvc0->screen;225struct nouveau_bo *txc = nvc0->screen->txc;226struct nv50_tic_entry *tic;227uint64_t address;228const int s = 5;229230tic = nv50_tic_entry(nvc0->images_tic[s][slot]);231232res = nv04_resource(tic->pipe.texture);233nvc0_update_tic(nvc0, tic, res);234235if (tic->id < 0) {236tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);237238/* upload the texture view */239PUSH_SPACE(push, 16);240BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);241PUSH_DATAh(push, txc->offset + (tic->id * 32));242PUSH_DATA (push, txc->offset + (tic->id * 32));243BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);244PUSH_DATA (push, 32);245PUSH_DATA (push, 1);246BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 9);247PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));248PUSH_DATAp(push, &tic->tic[0], 8);249250BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), 1);251PUSH_DATA (push, (tic->id << 4) | 1);252} else253if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {254BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), 1);255PUSH_DATA (push, (tic->id << 4) | 1);256}257nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);258259res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;260res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;261262BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);263264address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);265266/* upload the texture handle */267BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);268PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(slot + 32));269PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(slot + 32));270BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);271PUSH_DATA (push, 4);272PUSH_DATA (push, 0x1);273BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 2);274PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));275PUSH_DATA (push, tic->id);276277BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);278PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);279}280281static void282nve4_compute_validate_surfaces(struct nvc0_context *nvc0)283{284struct nouveau_pushbuf *push = nvc0->base.pushbuf;285uint64_t address;286const int s = 5;287int i, j;288289if (!nvc0->images_dirty[s])290return;291292address = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);293294for (i = 0; i < NVC0_MAX_IMAGES; ++i) {295struct pipe_image_view *view = &nvc0->images[s][i];296297BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);298PUSH_DATAh(push, address + NVC0_CB_AUX_SU_INFO(i));299PUSH_DATA (push, address + NVC0_CB_AUX_SU_INFO(i));300BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);301PUSH_DATA (push, 16 * 4);302PUSH_DATA (push, 0x1);303BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 16);304PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));305306if (view->resource) {307struct nv04_resource *res = nv04_resource(view->resource);308309if (res->base.target == PIPE_BUFFER) {310if (view->access & PIPE_IMAGE_ACCESS_WRITE)311nvc0_mark_image_range_valid(view);312}313314nve4_set_surface_info(push, view, nvc0);315BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);316317if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)318gm107_compute_validate_surfaces(nvc0, view, i);319} else {320for (j = 0; j < 16; j++)321PUSH_DATA(push, 0);322}323}324}325326/* Thankfully, textures with samplers follow the normal rules. */327static void328nve4_compute_validate_samplers(struct nvc0_context *nvc0)329{330bool need_flush = nve4_validate_tsc(nvc0, 5);331if (need_flush) {332BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1);333PUSH_DATA (nvc0->base.pushbuf, 0);334}335336/* Invalidate all 3D samplers because they are aliased. */337for (int s = 0; s < 5; s++)338nvc0->samplers_dirty[s] = ~0;339nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;340}341342/* (Code duplicated at bottom for various non-convincing reasons.343* E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC344* entries to avoid a subchannel switch.345* Same for texture cache flushes.346* Also, the bufctx differs, and more IFs in the 3D version looks ugly.)347*/348static void nve4_compute_validate_textures(struct nvc0_context *);349350static void351nve4_compute_set_tex_handles(struct nvc0_context *nvc0)352{353struct nouveau_pushbuf *push = nvc0->base.pushbuf;354struct nvc0_screen *screen = nvc0->screen;355uint64_t address;356const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);357unsigned i, n;358uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];359360if (!dirty)361return;362i = ffs(dirty) - 1;363n = util_logbase2(dirty) + 1 - i;364assert(n);365366address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);367368BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);369PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(i));370PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(i));371BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);372PUSH_DATA (push, n * 4);373PUSH_DATA (push, 0x1);374BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + n);375PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));376PUSH_DATAp(push, &nvc0->tex_handles[s][i], n);377378BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);379PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);380381nvc0->textures_dirty[s] = 0;382nvc0->samplers_dirty[s] = 0;383}384385static void386nve4_compute_validate_constbufs(struct nvc0_context *nvc0)387{388struct nouveau_pushbuf *push = nvc0->base.pushbuf;389const int s = 5;390391while (nvc0->constbuf_dirty[s]) {392int i = ffs(nvc0->constbuf_dirty[s]) - 1;393nvc0->constbuf_dirty[s] &= ~(1 << i);394395if (nvc0->constbuf[s][i].user) {396struct nouveau_bo *bo = nvc0->screen->uniform_bo;397const unsigned base = NVC0_CB_USR_INFO(s);398const unsigned size = nvc0->constbuf[s][0].size;399assert(i == 0); /* we really only want OpenGL uniforms here */400assert(nvc0->constbuf[s][0].u.data);401402BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);403PUSH_DATAh(push, bo->offset + base);404PUSH_DATA (push, bo->offset + base);405BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);406PUSH_DATA (push, size);407PUSH_DATA (push, 0x1);408BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (size / 4));409PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));410PUSH_DATAp(push, nvc0->constbuf[s][0].u.data, size / 4);411}412else {413struct nv04_resource *res =414nv04_resource(nvc0->constbuf[s][i].u.buf);415if (res) {416uint64_t address417= nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);418419/* constbufs above 0 will are fetched via ubo info in the shader */420if (i > 0) {421BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);422PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1));423PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1));424BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);425PUSH_DATA (push, 4 * 4);426PUSH_DATA (push, 0x1);427BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4);428PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));429430PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);431PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);432PUSH_DATA (push, nvc0->constbuf[s][i].size);433PUSH_DATA (push, 0);434}435436BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);437res->cb_bindings[s] |= 1 << i;438}439}440}441442BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);443PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);444}445446static void447nve4_compute_validate_buffers(struct nvc0_context *nvc0)448{449struct nouveau_pushbuf *push = nvc0->base.pushbuf;450uint64_t address;451const int s = 5;452int i;453454address = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);455456BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);457PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(0));458PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(0));459BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);460PUSH_DATA (push, 4 * NVC0_MAX_BUFFERS * 4);461PUSH_DATA (push, 0x1);462BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4 * NVC0_MAX_BUFFERS);463PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));464465for (i = 0; i < NVC0_MAX_BUFFERS; i++) {466if (nvc0->buffers[s][i].buffer) {467struct nv04_resource *res =468nv04_resource(nvc0->buffers[s][i].buffer);469PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);470PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);471PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);472PUSH_DATA (push, 0);473BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);474util_range_add(&res->base, &res->valid_buffer_range,475nvc0->buffers[s][i].buffer_offset,476nvc0->buffers[s][i].buffer_offset +477nvc0->buffers[s][i].buffer_size);478} else {479PUSH_DATA (push, 0);480PUSH_DATA (push, 0);481PUSH_DATA (push, 0);482PUSH_DATA (push, 0);483}484}485}486487static struct nvc0_state_validate488validate_list_cp[] = {489{ nvc0_compprog_validate, NVC0_NEW_CP_PROGRAM },490{ nve4_compute_validate_textures, NVC0_NEW_CP_TEXTURES },491{ nve4_compute_validate_samplers, NVC0_NEW_CP_SAMPLERS },492{ nve4_compute_set_tex_handles, NVC0_NEW_CP_TEXTURES |493NVC0_NEW_CP_SAMPLERS },494{ nve4_compute_validate_surfaces, NVC0_NEW_CP_SURFACES },495{ nvc0_compute_validate_globals, NVC0_NEW_CP_GLOBALS },496{ nve4_compute_validate_buffers, NVC0_NEW_CP_BUFFERS },497{ nve4_compute_validate_constbufs, NVC0_NEW_CP_CONSTBUF },498};499500static bool501nve4_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask)502{503bool ret;504505ret = nvc0_state_validate(nvc0, mask, validate_list_cp,506ARRAY_SIZE(validate_list_cp), &nvc0->dirty_cp,507nvc0->bufctx_cp);508509if (unlikely(nvc0->state.flushed))510nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);511return ret;512}513514static void515nve4_compute_upload_input(struct nvc0_context *nvc0,516const struct pipe_grid_info *info)517{518struct nvc0_screen *screen = nvc0->screen;519struct nouveau_pushbuf *push = nvc0->base.pushbuf;520struct nvc0_program *cp = nvc0->compprog;521uint64_t address;522523address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);524525if (cp->parm_size) {526BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);527PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_USR_INFO(5));528PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_USR_INFO(5));529BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);530PUSH_DATA (push, cp->parm_size);531PUSH_DATA (push, 0x1);532BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + DIV_ROUND_UP(cp->parm_size, 4));533PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));534PUSH_DATAb(push, info->input, cp->parm_size);535}536BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);537PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO(0));538PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO(0));539BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);540PUSH_DATA (push, 8 * 4);541PUSH_DATA (push, 0x1);542543if (unlikely(info->indirect)) {544struct nv04_resource *res = nv04_resource(info->indirect);545uint32_t offset = res->offset + info->indirect_offset;546547nouveau_pushbuf_space(push, 32, 0, 1);548PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);549550BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);551PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));552PUSH_DATAp(push, info->block, 3);553nouveau_pushbuf_data(push, res->bo, offset,554NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);555} else {556BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);557PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));558PUSH_DATAp(push, info->block, 3);559PUSH_DATAp(push, info->grid, 3);560}561PUSH_DATA (push, 0);562PUSH_DATA (push, info->work_dim);563564BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);565PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);566}567568static inline void569gp100_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,570struct nouveau_bo *bo, uint32_t base, uint32_t size)571{572uint64_t address = bo->offset + base;573574assert(index < 8);575assert(!(base & 0xff));576577NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);578NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);579NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, index,580DIV_ROUND_UP(size, 16));581NVC0C0_QMDV02_01_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);582}583584static inline void585nve4_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, struct nouveau_bo *bo,586uint32_t base, uint32_t size)587{588uint64_t address = bo->offset + base;589590assert(index < 8);591assert(!(base & 0xff));592593NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);594NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);595NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_SIZE, index, size);596NVA0C0_QMDV00_06_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);597}598599static void600nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc)601{602// only user constant buffers 0-6 can be put in the descriptor, the rest are603// loaded through global memory604for (int i = 0; i <= 6; i++) {605if (nvc0->constbuf[5][i].user || !nvc0->constbuf[5][i].u.buf)606continue;607608struct nv04_resource *res =609nv04_resource(nvc0->constbuf[5][i].u.buf);610611uint32_t base = res->offset + nvc0->constbuf[5][i].offset;612uint32_t size = nvc0->constbuf[5][i].size;613if (gp100)614gp100_cp_launch_desc_set_cb(desc, i, res->bo, base, size);615else616nve4_cp_launch_desc_set_cb(desc, i, res->bo, base, size);617}618619// there is no need to do FLUSH(NVE4_COMPUTE_FLUSH_CB) because620// nve4_compute_upload_input() does it later621}622623static void624nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,625const struct pipe_grid_info *info)626{627const struct nvc0_screen *screen = nvc0->screen;628const struct nvc0_program *cp = nvc0->compprog;629630NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, TRUE);631NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, TRUE);632NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_DATA_CACHE, TRUE);633NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_DATA_CACHE, TRUE);634NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, TRUE);635NVA0C0_QMDV00_06_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR);636NVA0C0_QMDV00_06_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR);637NVA0C0_QMDV00_06_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);638NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30);639640NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, cp->code_base);641642NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);643NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);644NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);645NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);646NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);647NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);648649NVA0C0_QMDV00_06_VAL_SET(qmd, SHARED_MEMORY_SIZE,650align(cp->cp.smem_size, 0x100));651NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,652(cp->hdr[1] & 0xfffff0) +653align(cp->cp.lmem_size, 0x10));654NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);655NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800);656657if (cp->cp.smem_size > (32 << 10))658NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,659DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);660else661if (cp->cp.smem_size > (16 << 10))662NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,663DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB);664else665NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,666DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB);667668NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs);669NVA0C0_QMDV00_06_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);670671// Only bind user uniforms and the driver constant buffer through the672// launch descriptor because UBOs are sticked to the driver cb to avoid the673// limitation of 8 CBs.674if (nvc0->constbuf[5][0].user || cp->parm_size) {675nve4_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,676NVC0_CB_USR_INFO(5), 1 << 16);677678// Later logic will attempt to bind a real buffer at position 0. That679// should not happen if we've bound a user buffer.680assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);681}682nve4_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,683NVC0_CB_AUX_INFO(5), 1 << 11);684685nve4_compute_setup_buf_cb(nvc0, false, qmd);686}687688static void689gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,690const struct pipe_grid_info *info)691{692const struct nvc0_screen *screen = nvc0->screen;693const struct nvc0_program *cp = nvc0->compprog;694695NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);696NVC0C0_QMDV02_01_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR);697NVC0C0_QMDV02_01_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR);698NVC0C0_QMDV02_01_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);699700NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, cp->code_base);701702NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);703NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);704NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);705NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);706NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);707NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);708709NVC0C0_QMDV02_01_VAL_SET(qmd, SHARED_MEMORY_SIZE,710align(cp->cp.smem_size, 0x100));711NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,712(cp->hdr[1] & 0xfffff0) +713align(cp->cp.lmem_size, 0x10));714NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);715NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800);716717NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs);718NVC0C0_QMDV02_01_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);719720// Only bind user uniforms and the driver constant buffer through the721// launch descriptor because UBOs are sticked to the driver cb to avoid the722// limitation of 8 CBs.723if (nvc0->constbuf[5][0].user || cp->parm_size) {724gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,725NVC0_CB_USR_INFO(5), 1 << 16);726727// Later logic will attempt to bind a real buffer at position 0. That728// should not happen if we've bound a user buffer.729assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);730}731gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,732NVC0_CB_AUX_INFO(5), 1 << 11);733734nve4_compute_setup_buf_cb(nvc0, true, qmd);735}736737static int738gv100_sm_config_smem_size(u32 size)739{740if (size > 64 * 1024) size = 96 * 1024;741else if (size > 32 * 1024) size = 64 * 1024;742else if (size > 16 * 1024) size = 32 * 1024;743else if (size > 8 * 1024) size = 16 * 1024;744else size = 8 * 1024;745return (size / 4096) + 1;746}747748static void749gv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd,750const struct pipe_grid_info *info)751{752struct nvc0_program *cp = nvc0->compprog;753struct nvc0_screen *screen = nvc0->screen;754uint64_t entry = screen->text->offset + cp->code_base;755756NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);757NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);758NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, INDEPENDENTLY);759NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE,760align(cp->cp.smem_size, 0x100));761NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,762(cp->hdr[1] & 0xfffff0) +763align(cp->cp.lmem_size, 0x10));764NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);765NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,766gv100_sm_config_smem_size(8 * 1024));767NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,768gv100_sm_config_smem_size(96 * 1024));769NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2);770NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2);771NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,772gv100_sm_config_smem_size(cp->cp.smem_size));773774NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);775NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);776NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);777NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);778NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);779NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);780NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, cp->num_gprs);781NVC3C0_QMDV02_02_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);782783// Only bind user uniforms and the driver constant buffer through the784// launch descriptor because UBOs are sticked to the driver cb to avoid the785// limitation of 8 CBs.786if (nvc0->constbuf[5][0].user || cp->parm_size) {787gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,788NVC0_CB_USR_INFO(5), 1 << 16);789790// Later logic will attempt to bind a real buffer at position 0. That791// should not happen if we've bound a user buffer.792assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);793}794gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,795NVC0_CB_AUX_INFO(5), 1 << 11);796797nve4_compute_setup_buf_cb(nvc0, true, qmd);798799NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, entry & 0xffffffff);800NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, entry >> 32);801}802803static inline void *804nve4_compute_alloc_launch_desc(struct nouveau_context *nv,805struct nouveau_bo **pbo, uint64_t *pgpuaddr)806{807uint8_t *ptr = nouveau_scratch_get(nv, 512, pgpuaddr, pbo);808if (!ptr)809return NULL;810if (*pgpuaddr & 255) {811unsigned adj = 256 - (*pgpuaddr & 255);812ptr += adj;813*pgpuaddr += adj;814}815memset(ptr, 0x00, 256);816return ptr;817}818819static void820nve4_upload_indirect_desc(struct nouveau_pushbuf *push,821struct nv04_resource *res, uint64_t gpuaddr,822uint32_t length, uint32_t bo_offset)823{824BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);825PUSH_DATAh(push, gpuaddr);826PUSH_DATA (push, gpuaddr);827BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);828PUSH_DATA (push, length);829PUSH_DATA (push, 1);830831nouveau_pushbuf_space(push, 32, 0, 1);832PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);833834BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (length / 4));835PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));836nouveau_pushbuf_data(push, res->bo, bo_offset,837NVC0_IB_ENTRY_1_NO_PREFETCH | length);838}839840void841nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)842{843struct nvc0_context *nvc0 = nvc0_context(pipe);844struct nvc0_screen *screen = nvc0->screen;845struct nouveau_pushbuf *push = nvc0->base.pushbuf;846void *desc;847uint64_t desc_gpuaddr;848struct nouveau_bo *desc_bo;849int ret;850851desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr);852if (!desc) {853ret = -1;854goto out;855}856BCTX_REFN_bo(nvc0->bufctx_cp, CP_DESC, NOUVEAU_BO_GART | NOUVEAU_BO_RD,857desc_bo);858859list_for_each_entry(struct nvc0_resident, resident, &nvc0->tex_head, list) {860nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf,861resident->flags);862}863864list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) {865nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf,866resident->flags);867}868869ret = !nve4_state_validate_cp(nvc0, ~0);870if (ret)871goto out;872873if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)874gv100_compute_setup_launch_desc(nvc0, desc, info);875else876if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)877gp100_compute_setup_launch_desc(nvc0, desc, info);878else879nve4_compute_setup_launch_desc(nvc0, desc, info);880881nve4_compute_upload_input(nvc0, info);882883#ifndef NDEBUG884if (debug_get_num_option("NV50_PROG_DEBUG", 0)) {885debug_printf("Queue Meta Data:\n");886if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)887NVC3C0QmdDump_V02_02(desc);888else889if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)890NVC0C0QmdDump_V02_01(desc);891else892NVA0C0QmdDump_V00_06(desc);893}894#endif895896if (unlikely(info->indirect)) {897struct nv04_resource *res = nv04_resource(info->indirect);898uint32_t offset = res->offset + info->indirect_offset;899900/* upload the descriptor */901BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);902PUSH_DATAh(push, desc_gpuaddr);903PUSH_DATA (push, desc_gpuaddr);904BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);905PUSH_DATA (push, 256);906PUSH_DATA (push, 1);907BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (256 / 4));908PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));909PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);910911if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) {912nve4_upload_indirect_desc(push, res, desc_gpuaddr + 48, 12, offset);913} else {914/* overwrite griddim_x and griddim_y as two 32-bits integers even915* if griddim_y must be a 16-bits integer */916nve4_upload_indirect_desc(push, res, desc_gpuaddr + 48, 8, offset);917918/* overwrite the 16 high bits of griddim_y with griddim_z because919* we need (z << 16) | x */920nve4_upload_indirect_desc(push, res, desc_gpuaddr + 54, 4, offset + 8);921}922}923924/* upload descriptor and flush */925nouveau_pushbuf_space(push, 32, 1, 0);926PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD);927BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1);928PUSH_DATA (push, desc_gpuaddr >> 8);929BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1);930PUSH_DATA (push, 0x3);931BEGIN_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);932PUSH_DATA (push, 0);933934nvc0_update_compute_invocations_counter(nvc0, info);935936out:937if (ret)938NOUVEAU_ERR("Failed to launch grid !\n");939nouveau_scratch_done(&nvc0->base);940nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC);941nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS);942}943944945#define NVE4_TIC_ENTRY_INVALID 0x000fffff946947static void948nve4_compute_validate_textures(struct nvc0_context *nvc0)949{950struct nouveau_bo *txc = nvc0->screen->txc;951struct nouveau_pushbuf *push = nvc0->base.pushbuf;952const unsigned s = 5;953unsigned i;954uint32_t commands[2][32];955unsigned n[2] = { 0, 0 };956957for (i = 0; i < nvc0->num_textures[s]; ++i) {958struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);959struct nv04_resource *res;960const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));961962if (!tic) {963nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;964continue;965}966res = nv04_resource(tic->pipe.texture);967nvc0_update_tic(nvc0, tic, res);968969if (tic->id < 0) {970tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);971972PUSH_SPACE(push, 16);973BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);974PUSH_DATAh(push, txc->offset + (tic->id * 32));975PUSH_DATA (push, txc->offset + (tic->id * 32));976BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);977PUSH_DATA (push, 32);978PUSH_DATA (push, 1);979BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 9);980PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));981PUSH_DATAp(push, &tic->tic[0], 8);982983commands[0][n[0]++] = (tic->id << 4) | 1;984} else985if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {986commands[1][n[1]++] = (tic->id << 4) | 1;987}988nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);989990res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;991res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;992993nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;994nvc0->tex_handles[s][i] |= tic->id;995if (dirty)996BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);997}998for (; i < nvc0->state.num_textures[s]; ++i) {999nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;1000nvc0->textures_dirty[s] |= 1 << i;1001}10021003if (n[0]) {1004BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), n[0]);1005PUSH_DATAp(push, commands[0], n[0]);1006}1007if (n[1]) {1008BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), n[1]);1009PUSH_DATAp(push, commands[1], n[1]);1010}10111012nvc0->state.num_textures[s] = nvc0->num_textures[s];10131014/* Invalidate all 3D textures because they are aliased. */1015for (int s = 0; s < 5; s++) {1016for (int i = 0; i < nvc0->num_textures[s]; i++)1017nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));1018nvc0->textures_dirty[s] = ~0;1019}1020nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;1021}10221023#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER1024static void1025nve4_compute_trap_info(struct nvc0_context *nvc0)1026{1027struct nvc0_screen *screen = nvc0->screen;1028struct nouveau_bo *bo = screen->parm;1029int ret, i;1030volatile struct nve4_mp_trap_info *info;1031uint8_t *map;10321033ret = nouveau_bo_map(bo, NOUVEAU_BO_RDWR, nvc0->base.client);1034if (ret)1035return;1036map = (uint8_t *)bo->map;1037info = (volatile struct nve4_mp_trap_info *)(map + NVE4_CP_PARAM_TRAP_INFO);10381039if (info->lock) {1040debug_printf("trapstat = %08x\n", info->trapstat);1041debug_printf("warperr = %08x\n", info->warperr);1042debug_printf("PC = %x\n", info->pc);1043debug_printf("tid = %u %u %u\n",1044info->tid[0], info->tid[1], info->tid[2]);1045debug_printf("ctaid = %u %u %u\n",1046info->ctaid[0], info->ctaid[1], info->ctaid[2]);1047for (i = 0; i <= 63; ++i)1048debug_printf("$r%i = %08x\n", i, info->r[i]);1049for (i = 0; i <= 6; ++i)1050debug_printf("$p%i = %i\n", i, (info->flags >> i) & 1);1051debug_printf("$c = %x\n", info->flags >> 12);1052}1053info->lock = 0;1054}1055#endif105610571058