Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
4574 views
/*1* Copyright 2010 Christoph Bumiller2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "pipe/p_context.h"23#include "pipe/p_defines.h"24#include "pipe/p_state.h"25#include "util/u_inlines.h"2627#include "nvc0/nvc0_context.h"28#include "nvc0/nvc0_query_hw.h"2930#include "nvc0/nvc0_compute.xml.h"3132static inline void33nvc0_program_update_context_state(struct nvc0_context *nvc0,34struct nvc0_program *prog, int stage)35{36if (prog && prog->need_tls) {37const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR;38if (!nvc0->state.tls_required)39BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TLS, flags, nvc0->screen->tls);40nvc0->state.tls_required |= 1 << stage;41} else {42if (nvc0->state.tls_required == (1 << stage))43nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TLS);44nvc0->state.tls_required &= ~(1 << stage);45}46}4748static inline bool49nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)50{51if (prog->mem)52return true;5354if (!prog->translated) {55prog->translated = nvc0_program_translate(56prog, nvc0->screen->base.device->chipset,57nvc0->screen->base.disk_shader_cache, &nvc0->base.debug);58if (!prog->translated)59return false;60}6162if (likely(prog->code_size))63return nvc0_program_upload(nvc0, prog);64return true; /* stream output info only */65}6667void68nvc0_program_sp_start_id(struct nvc0_context *nvc0, int stage,69struct nvc0_program *prog)70{71struct nouveau_pushbuf *push = nvc0->base.pushbuf;7273if (nvc0->screen->eng3d->oclass < GV100_3D_CLASS) {74BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1);75PUSH_DATA (push, prog->code_base);76} else {77BEGIN_NVC0(push, SUBC_3D(GV100_3D_SP_ADDRESS_HIGH(stage)), 2);78PUSH_DATAh(push, nvc0->screen->text->offset + prog->code_base);79PUSH_DATA (push, nvc0->screen->text->offset + prog->code_base);80}81}8283void84nvc0_vertprog_validate(struct nvc0_context *nvc0)85{86struct nouveau_pushbuf *push = nvc0->base.pushbuf;87struct nvc0_program *vp = nvc0->vertprog;8889if (!nvc0_program_validate(nvc0, vp))90return;91nvc0_program_update_context_state(nvc0, vp, 0);9293BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 1);94PUSH_DATA (push, 0x11);95nvc0_program_sp_start_id(nvc0, 1, vp);96BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);97PUSH_DATA (push, vp->num_gprs);9899// BEGIN_NVC0(push, NVC0_3D_(0x163c), 1);100// PUSH_DATA (push, 0);101}102103void104nvc0_fragprog_validate(struct nvc0_context *nvc0)105{106struct nouveau_pushbuf *push = nvc0->base.pushbuf;107struct nvc0_program *fp = nvc0->fragprog;108struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;109110if (fp->fp.force_persample_interp != rast->force_persample_interp) {111/* Force the program to be reuploaded, which will trigger interp fixups112* to get applied113*/114if (fp->mem)115nouveau_heap_free(&fp->mem);116117fp->fp.force_persample_interp = rast->force_persample_interp;118}119120if (fp->fp.msaa != rast->multisample) {121/* Force the program to be reuploaded, which will trigger interp fixups122* to get applied123*/124if (fp->mem)125nouveau_heap_free(&fp->mem);126127fp->fp.msaa = rast->multisample;128}129130/* Shade model works well enough when both colors follow it. However if one131* (or both) is explicitly set, then we have to go the patching route.132*/133bool has_explicit_color = fp->fp.colors &&134(((fp->fp.colors & 1) && !fp->fp.color_interp[0]) ||135((fp->fp.colors & 2) && !fp->fp.color_interp[1]));136bool hwflatshade = false;137if (has_explicit_color && fp->fp.flatshade != rast->flatshade) {138/* Force re-upload */139if (fp->mem)140nouveau_heap_free(&fp->mem);141142fp->fp.flatshade = rast->flatshade;143144/* Always smooth-shade in this mode, the shader will decide on its own145* when to flat-shade.146*/147} else if (!has_explicit_color) {148hwflatshade = rast->flatshade;149150/* No need to binary-patch the shader each time, make sure that it's set151* up for the default behaviour.152*/153fp->fp.flatshade = 0;154}155156if (hwflatshade != nvc0->state.flatshade) {157nvc0->state.flatshade = hwflatshade;158BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);159PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT :160NVC0_3D_SHADE_MODEL_SMOOTH);161}162163if (fp->mem && !(nvc0->dirty_3d & NVC0_NEW_3D_FRAGPROG)) {164return;165}166167if (!nvc0_program_validate(nvc0, fp))168return;169nvc0_program_update_context_state(nvc0, fp, 4);170171if (fp->fp.early_z != nvc0->state.early_z_forced) {172nvc0->state.early_z_forced = fp->fp.early_z;173IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z);174}175if (fp->fp.post_depth_coverage != nvc0->state.post_depth_coverage) {176nvc0->state.post_depth_coverage = fp->fp.post_depth_coverage;177IMMED_NVC0(push, NVC0_3D(POST_DEPTH_COVERAGE),178fp->fp.post_depth_coverage);179}180181BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 1);182PUSH_DATA (push, 0x51);183nvc0_program_sp_start_id(nvc0, 5, fp);184BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);185PUSH_DATA (push, fp->num_gprs);186187BEGIN_NVC0(push, SUBC_3D(0x0360), 2);188PUSH_DATA (push, 0x20164010);189PUSH_DATA (push, 0x20);190BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1);191PUSH_DATA (push, fp->flags[0]);192}193194void195nvc0_tctlprog_validate(struct nvc0_context *nvc0)196{197struct nouveau_pushbuf *push = nvc0->base.pushbuf;198struct nvc0_program *tp = nvc0->tctlprog;199200if (tp && nvc0_program_validate(nvc0, tp)) {201if (tp->tp.tess_mode != ~0) {202BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);203PUSH_DATA (push, tp->tp.tess_mode);204}205BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);206PUSH_DATA (push, 0x21);207nvc0_program_sp_start_id(nvc0, 2, tp);208BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);209PUSH_DATA (push, tp->num_gprs);210} else {211tp = nvc0->tcp_empty;212/* not a whole lot we can do to handle this failure */213if (!nvc0_program_validate(nvc0, tp))214assert(!"unable to validate empty tcp");215BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);216PUSH_DATA (push, 0x20);217nvc0_program_sp_start_id(nvc0, 2, tp);218}219nvc0_program_update_context_state(nvc0, tp, 1);220}221222void223nvc0_tevlprog_validate(struct nvc0_context *nvc0)224{225struct nouveau_pushbuf *push = nvc0->base.pushbuf;226struct nvc0_program *tp = nvc0->tevlprog;227228if (tp && nvc0_program_validate(nvc0, tp)) {229if (tp->tp.tess_mode != ~0) {230BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);231PUSH_DATA (push, tp->tp.tess_mode);232}233BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);234PUSH_DATA (push, 0x31);235nvc0_program_sp_start_id(nvc0, 3, tp);236BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);237PUSH_DATA (push, tp->num_gprs);238} else {239BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);240PUSH_DATA (push, 0x30);241}242nvc0_program_update_context_state(nvc0, tp, 2);243}244245void246nvc0_gmtyprog_validate(struct nvc0_context *nvc0)247{248struct nouveau_pushbuf *push = nvc0->base.pushbuf;249struct nvc0_program *gp = nvc0->gmtyprog;250251/* we allow GPs with no code for specifying stream output state only */252if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) {253BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);254PUSH_DATA (push, 0x41);255nvc0_program_sp_start_id(nvc0, 4, gp);256BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);257PUSH_DATA (push, gp->num_gprs);258} else {259BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);260PUSH_DATA (push, 0x40);261}262nvc0_program_update_context_state(nvc0, gp, 3);263}264265void266nvc0_compprog_validate(struct nvc0_context *nvc0)267{268struct nouveau_pushbuf *push = nvc0->base.pushbuf;269struct nvc0_program *cp = nvc0->compprog;270271if (cp && !nvc0_program_validate(nvc0, cp))272return;273274BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);275PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);276}277278void279nvc0_layer_validate(struct nvc0_context *nvc0)280{281struct nouveau_pushbuf *push = nvc0->base.pushbuf;282struct nvc0_program *last;283bool prog_selects_layer = false;284bool layer_viewport_relative = false;285286if (nvc0->gmtyprog)287last = nvc0->gmtyprog;288else if (nvc0->tevlprog)289last = nvc0->tevlprog;290else291last = nvc0->vertprog;292293if (last) {294prog_selects_layer = !!(last->hdr[13] & (1 << 9));295layer_viewport_relative = last->vp.layer_viewport_relative;296}297298BEGIN_NVC0(push, NVC0_3D(LAYER), 1);299PUSH_DATA (push, prog_selects_layer ? NVC0_3D_LAYER_USE_GP : 0);300if (nvc0->screen->eng3d->oclass >= GM200_3D_CLASS) {301IMMED_NVC0(push, NVC0_3D(LAYER_VIEWPORT_RELATIVE),302layer_viewport_relative);303}304}305306void307nvc0_tfb_validate(struct nvc0_context *nvc0)308{309struct nouveau_pushbuf *push = nvc0->base.pushbuf;310struct nvc0_transform_feedback_state *tfb;311unsigned b;312313if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb;314else315if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb;316else317tfb = nvc0->vertprog->tfb;318319IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0);320321if (tfb && tfb != nvc0->state.tfb) {322for (b = 0; b < 4; ++b) {323if (tfb->varying_count[b]) {324unsigned n = (tfb->varying_count[b] + 3) / 4;325326BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3);327PUSH_DATA (push, tfb->stream[b]);328PUSH_DATA (push, tfb->varying_count[b]);329PUSH_DATA (push, tfb->stride[b]);330BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n);331PUSH_DATAp(push, tfb->varying_index[b], n);332333if (nvc0->tfbbuf[b])334nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b];335} else {336IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0);337}338}339}340nvc0->state.tfb = tfb;341342if (!(nvc0->dirty_3d & NVC0_NEW_3D_TFB_TARGETS))343return;344345for (b = 0; b < nvc0->num_tfbbufs; ++b) {346struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);347struct nv04_resource *buf;348349if (targ && tfb)350targ->stride = tfb->stride[b];351352if (!targ || !targ->stride) {353IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);354continue;355}356357buf = nv04_resource(targ->pipe.buffer);358359BCTX_REFN(nvc0->bufctx_3d, 3D_TFB, buf, WR);360361if (!(nvc0->tfbbuf_dirty & (1 << b)))362continue;363364if (!targ->clean)365nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq));366nouveau_pushbuf_space(push, 0, 0, 1);367BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);368PUSH_DATA (push, 1);369PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);370PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);371PUSH_DATA (push, targ->pipe.buffer_size);372if (!targ->clean) {373nvc0_hw_query_pushbuf_submit(push, nvc0_query(targ->pq), 0x4);374} else {375PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */376targ->clean = false;377}378}379for (; b < 4; ++b)380IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);381}382383384