Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
4574 views
/*1* Copyright 2008 Ben Skeggs2* Copyright 2010 Christoph Bumiller3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice shall be included in12* all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR18* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,19* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR20* OTHER DEALINGS IN THE SOFTWARE.21*/2223#include "pipe/p_context.h"24#include "pipe/p_defines.h"25#include "pipe/p_state.h"26#include "util/u_inlines.h"2728#include "nv50/nv50_context.h"29#include "nv50/nv50_query_hw.h"3031#include "nv50/nv50_compute.xml.h"3233void34nv50_constbufs_validate(struct nv50_context *nv50)35{36struct nouveau_pushbuf *push = nv50->base.pushbuf;37unsigned s;3839for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; ++s) {40unsigned p;4142if (s == NV50_SHADER_STAGE_FRAGMENT)43p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT;44else45if (s == NV50_SHADER_STAGE_GEOMETRY)46p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY;47else48p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX;4950while (nv50->constbuf_dirty[s]) {51const unsigned i = (unsigned)ffs(nv50->constbuf_dirty[s]) - 1;5253assert(i < NV50_MAX_PIPE_CONSTBUFS);54nv50->constbuf_dirty[s] &= ~(1 << i);5556if (nv50->constbuf[s][i].user) {57const unsigned b = NV50_CB_PVP + s;58unsigned start = 0;59unsigned words = nv50->constbuf[s][0].size / 4;60if (i) {61NOUVEAU_ERR("user constbufs only supported in slot 0\n");62continue;63}64if (!nv50->state.uniform_buffer_bound[s]) {65nv50->state.uniform_buffer_bound[s] = true;66BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);67PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);68}69while (words) {70unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);7172PUSH_SPACE(push, nr + 3);73BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);74PUSH_DATA (push, (start << 8) | b);75BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);76PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr);7778start += nr;79words -= nr;80}81} else {82struct nv04_resource *res =83nv04_resource(nv50->constbuf[s][i].u.buf);84if (res) {85/* TODO: allocate persistent bindings */86const unsigned b = s * 16 + i;8788assert(nouveau_resource_mapped_by_gpu(&res->base));8990BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);91PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset);92PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset);93PUSH_DATA (push, (b << 16) |94(nv50->constbuf[s][i].size & 0xffff));95BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);96PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);9798BCTX_REFN(nv50->bufctx_3d, 3D_CB(s, i), res, RD);99100nv50->cb_dirty = 1; /* Force cache flush for UBO. */101res->cb_bindings[s] |= 1 << i;102} else {103BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);104PUSH_DATA (push, (i << 8) | p | 0);105}106if (i == 0)107nv50->state.uniform_buffer_bound[s] = false;108}109}110}111112/* Invalidate all COMPUTE constbufs because they are aliased with 3D. */113nv50->dirty_cp |= NV50_NEW_CP_CONSTBUF;114nv50->constbuf_dirty[NV50_SHADER_STAGE_COMPUTE] |= nv50->constbuf_valid[NV50_SHADER_STAGE_COMPUTE];115nv50->state.uniform_buffer_bound[NV50_SHADER_STAGE_COMPUTE] = false;116}117118static bool119nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)120{121if (!prog->translated) {122prog->translated = nv50_program_translate(123prog, nv50->screen->base.device->chipset, &nv50->base.debug);124if (!prog->translated)125return false;126} else127if (prog->mem)128return true;129130return nv50_program_upload_code(nv50, prog);131}132133static inline void134nv50_program_update_context_state(struct nv50_context *nv50,135struct nv50_program *prog, int stage)136{137const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;138139if (prog && prog->tls_space) {140if (nv50->state.new_tls_space)141nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS);142if (!nv50->state.tls_required || nv50->state.new_tls_space)143BCTX_REFN_bo(nv50->bufctx_3d, 3D_TLS, flags, nv50->screen->tls_bo);144nv50->state.new_tls_space = false;145nv50->state.tls_required |= 1 << stage;146} else {147if (nv50->state.tls_required == (1 << stage))148nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS);149nv50->state.tls_required &= ~(1 << stage);150}151}152153void154nv50_vertprog_validate(struct nv50_context *nv50)155{156struct nouveau_pushbuf *push = nv50->base.pushbuf;157struct nv50_program *vp = nv50->vertprog;158159if (!nv50_program_validate(nv50, vp))160return;161nv50_program_update_context_state(nv50, vp, 0);162163BEGIN_NV04(push, NV50_3D(VP_ATTR_EN(0)), 2);164PUSH_DATA (push, vp->vp.attrs[0]);165PUSH_DATA (push, vp->vp.attrs[1]);166BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_RESULT), 1);167PUSH_DATA (push, vp->max_out);168BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_TEMP), 1);169PUSH_DATA (push, vp->max_gpr);170BEGIN_NV04(push, NV50_3D(VP_START_ID), 1);171PUSH_DATA (push, vp->code_base);172}173174void175nv50_fragprog_validate(struct nv50_context *nv50)176{177struct nouveau_pushbuf *push = nv50->base.pushbuf;178struct nv50_program *fp = nv50->fragprog;179struct pipe_rasterizer_state *rast = &nv50->rast->pipe;180181if (!fp || !rast)182return;183184if (nv50->zsa && nv50->zsa->pipe.alpha_enabled) {185struct pipe_framebuffer_state *fb = &nv50->framebuffer;186bool blendable = fb->nr_cbufs == 0 || !fb->cbufs[0] ||187nv50->screen->base.base.is_format_supported(188&nv50->screen->base.base,189fb->cbufs[0]->format,190fb->cbufs[0]->texture->target,191fb->cbufs[0]->texture->nr_samples,192fb->cbufs[0]->texture->nr_storage_samples,193PIPE_BIND_BLENDABLE);194/* If we already have alphatest code, we have to keep updating195* it. However we only have to have different code if the current RT0 is196* non-blendable. Otherwise we just set it to always pass and use the197* hardware alpha test.198*/199if (fp->fp.alphatest || !blendable) {200uint8_t alphatest = PIPE_FUNC_ALWAYS + 1;201if (!blendable)202alphatest = nv50->zsa->pipe.alpha_func + 1;203if (!fp->fp.alphatest)204nv50_program_destroy(nv50, fp);205else if (fp->mem && fp->fp.alphatest != alphatest)206nouveau_heap_free(&fp->mem);207208fp->fp.alphatest = alphatest;209}210} else if (fp->fp.alphatest && fp->fp.alphatest != PIPE_FUNC_ALWAYS + 1) {211/* Alpha test is disabled but we have a shader where it's filled212* in. Make sure to reset the function to 'always', otherwise it'll end213* up discarding fragments incorrectly.214*/215if (fp->mem)216nouveau_heap_free(&fp->mem);217218fp->fp.alphatest = PIPE_FUNC_ALWAYS + 1;219}220221if (fp->fp.force_persample_interp != rast->force_persample_interp) {222/* Force the program to be reuploaded, which will trigger interp fixups223* to get applied224*/225if (fp->mem)226nouveau_heap_free(&fp->mem);227228fp->fp.force_persample_interp = rast->force_persample_interp;229}230231if (fp->mem && !(nv50->dirty_3d & (NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_MIN_SAMPLES)))232return;233234if (!nv50_program_validate(nv50, fp))235return;236nv50_program_update_context_state(nv50, fp, 1);237238BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1);239PUSH_DATA (push, fp->max_gpr);240BEGIN_NV04(push, NV50_3D(FP_RESULT_COUNT), 1);241PUSH_DATA (push, fp->max_out);242BEGIN_NV04(push, NV50_3D(FP_CONTROL), 1);243PUSH_DATA (push, fp->fp.flags[0]);244BEGIN_NV04(push, NV50_3D(FP_CTRL_UNK196C), 1);245PUSH_DATA (push, fp->fp.flags[1]);246BEGIN_NV04(push, NV50_3D(FP_START_ID), 1);247PUSH_DATA (push, fp->code_base);248249if (nv50->screen->tesla->oclass >= NVA3_3D_CLASS) {250BEGIN_NV04(push, SUBC_3D(NVA3_3D_FP_MULTISAMPLE), 1);251if (nv50->min_samples > 1 || fp->fp.has_samplemask)252PUSH_DATA(push,253NVA3_3D_FP_MULTISAMPLE_FORCE_PER_SAMPLE |254(NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK *255fp->fp.has_samplemask));256else257PUSH_DATA(push, 0);258}259}260261void262nv50_gmtyprog_validate(struct nv50_context *nv50)263{264struct nouveau_pushbuf *push = nv50->base.pushbuf;265struct nv50_program *gp = nv50->gmtyprog;266267if (gp) {268if (!nv50_program_validate(nv50, gp))269return;270BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_TEMP), 1);271PUSH_DATA (push, gp->max_gpr);272BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_RESULT), 1);273PUSH_DATA (push, gp->max_out);274BEGIN_NV04(push, NV50_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1);275PUSH_DATA (push, gp->gp.prim_type);276BEGIN_NV04(push, NV50_3D(GP_VERTEX_OUTPUT_COUNT), 1);277PUSH_DATA (push, gp->gp.vert_count);278BEGIN_NV04(push, NV50_3D(GP_START_ID), 1);279PUSH_DATA (push, gp->code_base);280281nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */282}283nv50_program_update_context_state(nv50, gp, 2);284285/* GP_ENABLE is updated in linkage validation */286}287288void289nv50_compprog_validate(struct nv50_context *nv50)290{291struct nouveau_pushbuf *push = nv50->base.pushbuf;292struct nv50_program *cp = nv50->compprog;293294if (cp && !nv50_program_validate(nv50, cp))295return;296297BEGIN_NV04(push, NV50_CP(CODE_CB_FLUSH), 1);298PUSH_DATA (push, 0);299}300301static void302nv50_sprite_coords_validate(struct nv50_context *nv50)303{304struct nouveau_pushbuf *push = nv50->base.pushbuf;305uint32_t pntc[8], mode;306struct nv50_program *fp = nv50->fragprog;307unsigned i, c;308unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff;309310if (!nv50->rast->pipe.point_quad_rasterization) {311if (nv50->state.point_sprite) {312BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);313for (i = 0; i < 8; ++i)314PUSH_DATA(push, 0);315316nv50->state.point_sprite = false;317}318return;319} else {320nv50->state.point_sprite = true;321}322323memset(pntc, 0, sizeof(pntc));324325for (i = 0; i < fp->in_nr; i++) {326unsigned n = util_bitcount(fp->in[i].mask);327328if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {329m += n;330continue;331}332if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) {333m += n;334continue;335}336337for (c = 0; c < 4; ++c) {338if (fp->in[i].mask & (1 << c)) {339pntc[m / 8] |= (c + 1) << ((m % 8) * 4);340++m;341}342}343}344345if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)346mode = 0x00;347else348mode = 0x10;349350BEGIN_NV04(push, NV50_3D(POINT_SPRITE_CTRL), 1);351PUSH_DATA (push, mode);352353BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);354PUSH_DATAp(push, pntc, 8);355}356357/* Validate state derived from shaders and the rasterizer cso. */358void359nv50_validate_derived_rs(struct nv50_context *nv50)360{361struct nouveau_pushbuf *push = nv50->base.pushbuf;362uint32_t color, psize;363364nv50_sprite_coords_validate(nv50);365366if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) {367nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard;368BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);369PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);370}371372if (nv50->dirty_3d & NV50_NEW_3D_FRAGPROG)373return;374psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;375color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN;376377if (nv50->rast->pipe.clamp_vertex_color)378color |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;379380if (color != nv50->state.semantic_color) {381nv50->state.semantic_color = color;382BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 1);383PUSH_DATA (push, color);384}385386if (nv50->rast->pipe.point_size_per_vertex)387psize |= NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;388389if (psize != nv50->state.semantic_psize) {390nv50->state.semantic_psize = psize;391BEGIN_NV04(push, NV50_3D(SEMANTIC_PTSZ), 1);392PUSH_DATA (push, psize);393}394}395396static int397nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4],398struct nv50_varying *in, struct nv50_varying *out)399{400int c;401uint8_t mv = out->mask, mf = in->mask, oid = out->hw;402403for (c = 0; c < 4; ++c) {404if (mf & 1) {405if (in->linear)406lin[mid / 32] |= 1 << (mid % 32);407if (mv & 1)408map[mid] = oid;409else410if (c == 3)411map[mid] |= 1;412++mid;413}414415oid += mv & 1;416mf >>= 1;417mv >>= 1;418}419420return mid;421}422423void424nv50_fp_linkage_validate(struct nv50_context *nv50)425{426struct nouveau_pushbuf *push = nv50->base.pushbuf;427struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog;428struct nv50_program *fp = nv50->fragprog;429struct nv50_varying dummy;430int i, n, c, m;431uint32_t primid = 0;432uint32_t layerid = 0;433uint32_t viewportid = 0;434uint32_t psiz = 0x000;435uint32_t interp = fp->fp.interp;436uint32_t colors = fp->fp.colors;437uint32_t clpd_nr = util_last_bit(vp->vp.clip_enable | vp->vp.cull_enable);438uint32_t lin[4];439uint8_t map[64];440uint8_t so_map[64];441442if (!(nv50->dirty_3d & (NV50_NEW_3D_VERTPROG |443NV50_NEW_3D_FRAGPROG |444NV50_NEW_3D_GMTYPROG))) {445uint8_t bfc, ffc;446ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK);447bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK)448>> 8;449if (nv50->rast->pipe.light_twoside == ((ffc == bfc) ? 0 : 1))450return;451}452453memset(lin, 0x00, sizeof(lin));454455/* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx456* or is it the first byte ?457*/458memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map));459460dummy.mask = 0xf; /* map all components of HPOS */461dummy.linear = 0;462m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);463464for (c = 0; c < clpd_nr; ++c)465map[m++] = vp->vp.clpd[c / 4] + (c % 4);466467colors |= m << 8; /* adjust BFC0 id */468469dummy.mask = 0x0;470471/* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */472if (nv50->rast->pipe.light_twoside) {473for (i = 0; i < 2; ++i) {474n = vp->vp.bfc[i];475if (fp->vp.bfc[i] >= fp->in_nr)476continue;477m = nv50_vec4_map(map, m, lin, &fp->in[fp->vp.bfc[i]],478(n < vp->out_nr) ? &vp->out[n] : &dummy);479}480}481colors += m - 4; /* adjust FFC0 id */482interp |= m << 8; /* set map id where 'normal' FP inputs start */483484for (i = 0; i < fp->in_nr; ++i) {485for (n = 0; n < vp->out_nr; ++n)486if (vp->out[n].sn == fp->in[i].sn &&487vp->out[n].si == fp->in[i].si)488break;489switch (fp->in[i].sn) {490case TGSI_SEMANTIC_PRIMID:491primid = m;492break;493case TGSI_SEMANTIC_LAYER:494layerid = m;495break;496case TGSI_SEMANTIC_VIEWPORT_INDEX:497viewportid = m;498break;499}500m = nv50_vec4_map(map, m, lin,501&fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);502}503504if (vp->gp.has_layer && !layerid) {505layerid = m;506map[m++] = vp->gp.layerid;507}508509if (vp->gp.has_viewport && !viewportid) {510viewportid = m;511map[m++] = vp->gp.viewportid;512}513514if (nv50->rast->pipe.point_size_per_vertex) {515psiz = (m << 4) | 1;516map[m++] = vp->vp.psiz;517}518519if (nv50->rast->pipe.clamp_vertex_color)520colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;521522if (unlikely(vp->so)) {523/* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP524* gets written.525*526* TODO:527* Inverting vp->so->map (output -> offset) would probably speed this up.528*/529memset(so_map, 0, sizeof(so_map));530for (i = 0; i < vp->so->map_size; ++i) {531if (vp->so->map[i] == 0xff)532continue;533for (c = 0; c < m; ++c)534if (map[c] == vp->so->map[i] && !so_map[c])535break;536if (c == m) {537c = m;538map[m++] = vp->so->map[i];539}540so_map[c] = 0x80 | i;541}542for (c = m; c & 3; ++c)543so_map[c] = 0;544}545546n = (m + 3) / 4;547assert(m <= 64);548549if (unlikely(nv50->gmtyprog)) {550BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP_SIZE), 1);551PUSH_DATA (push, m);552BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP(0)), n);553PUSH_DATAp(push, map, n);554} else {555BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);556PUSH_DATA (push, vp->vp.attrs[2] | fp->vp.attrs[2]);557558BEGIN_NV04(push, NV50_3D(SEMANTIC_PRIM_ID), 1);559PUSH_DATA (push, primid);560561assert(m > 0);562BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);563PUSH_DATA (push, m);564BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);565PUSH_DATAp(push, map, n);566}567568BEGIN_NV04(push, NV50_3D(GP_VIEWPORT_ID_ENABLE), 5);569PUSH_DATA (push, vp->gp.has_viewport);570PUSH_DATA (push, colors);571PUSH_DATA (push, (clpd_nr << 8) | 4);572PUSH_DATA (push, layerid);573PUSH_DATA (push, psiz);574575BEGIN_NV04(push, NV50_3D(SEMANTIC_VIEWPORT), 1);576PUSH_DATA (push, viewportid);577578BEGIN_NV04(push, NV50_3D(LAYER), 1);579PUSH_DATA (push, vp->gp.has_layer << 16);580581BEGIN_NV04(push, NV50_3D(FP_INTERPOLANT_CTRL), 1);582PUSH_DATA (push, interp);583584nv50->state.interpolant_ctrl = interp;585586nv50->state.semantic_color = colors;587nv50->state.semantic_psize = psiz;588589BEGIN_NV04(push, NV50_3D(NOPERSPECTIVE_BITMAP(0)), 4);590PUSH_DATAp(push, lin, 4);591592BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1);593PUSH_DATA (push, nv50->gmtyprog ? 1 : 0);594595if (vp->so) {596BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n);597PUSH_DATAp(push, so_map, n);598}599}600601static int602nv50_vp_gp_mapping(uint8_t *map, int m,603struct nv50_program *vp, struct nv50_program *gp)604{605int i, j, c;606607for (i = 0; i < gp->in_nr; ++i) {608uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;609610for (j = 0; j < vp->out_nr; ++j) {611if (vp->out[j].sn == gp->in[i].sn &&612vp->out[j].si == gp->in[i].si) {613mv = vp->out[j].mask;614oid = vp->out[j].hw;615break;616}617}618619for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {620if (mg & mv & 1)621map[m++] = oid;622else623if (mg & 1)624map[m++] = (c == 3) ? 0x41 : 0x40;625oid += mv & 1;626}627}628if (!m)629map[m++] = 0;630return m;631}632633void634nv50_gp_linkage_validate(struct nv50_context *nv50)635{636struct nouveau_pushbuf *push = nv50->base.pushbuf;637struct nv50_program *vp = nv50->vertprog;638struct nv50_program *gp = nv50->gmtyprog;639int m = 0;640int n;641uint8_t map[64];642643if (!gp)644return;645memset(map, 0, sizeof(map));646647m = nv50_vp_gp_mapping(map, m, vp, gp);648649n = (m + 3) / 4;650651BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);652PUSH_DATA (push, vp->vp.attrs[2] | gp->vp.attrs[2]);653654assert(m > 0);655BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);656PUSH_DATA (push, m);657BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);658PUSH_DATAp(push, map, n);659}660661void662nv50_stream_output_validate(struct nv50_context *nv50)663{664struct nouveau_pushbuf *push = nv50->base.pushbuf;665struct nv50_stream_output_state *so;666uint32_t ctrl;667unsigned i;668unsigned prims = ~0;669670so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so;671672BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);673PUSH_DATA (push, 0);674if (!so || !nv50->num_so_targets) {675if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {676BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);677PUSH_DATA (push, 0);678}679BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);680PUSH_DATA (push, 1);681return;682}683684/* previous TFB needs to complete */685if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {686BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);687PUSH_DATA (push, 0);688}689690ctrl = so->ctrl;691if (nv50->screen->base.class_3d >= NVA0_3D_CLASS)692ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET;693694BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);695PUSH_DATA (push, ctrl);696697for (i = 0; i < nv50->num_so_targets; ++i) {698struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);699struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);700701const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;702703uint32_t so_used = 0;704705if (!targ->clean) {706if (n == 4)707nv84_hw_query_fifo_wait(push, nv50_query(targ->pq));708else709so_used = nv50->so_used[i];710}711BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);712PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset + so_used);713PUSH_DATA (push, buf->address + targ->pipe.buffer_offset + so_used);714PUSH_DATA (push, so->num_attribs[i]);715if (n == 4) {716PUSH_DATA(push, targ->pipe.buffer_size);717if (!targ->clean) {718assert(targ->pq);719nv50_hw_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),720nv50_query(targ->pq), 0x4);721} else {722BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);723PUSH_DATA(push, 0);724targ->clean = false;725}726} else {727const unsigned limit = (targ->pipe.buffer_size - so_used) /728(so->stride[i] * nv50->state.prim_size);729prims = MIN2(prims, limit);730targ->clean = false;731}732targ->stride = so->stride[i];733BCTX_REFN(nv50->bufctx_3d, 3D_SO, buf, WR);734}735if (prims != ~0) {736BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);737PUSH_DATA (push, prims);738}739BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);740PUSH_DATA (push, 1);741BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);742PUSH_DATA (push, 1);743}744745746