Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
4574 views
/*1* Copyright 2012 Red Hat Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*21* Authors: Ben Skeggs22*23*/2425#include "util/format/u_format.h"26#include "util/u_math.h"27#include "util/half_float.h"2829#include "nv_object.xml.h"30#include "nv30/nv30-40_3d.xml.h"31#include "nv30/nv30_context.h"32#include "nv30/nv30_format.h"3334static void35nv30_validate_fb(struct nv30_context *nv30)36{37struct pipe_screen *pscreen = &nv30->screen->base.base;38struct pipe_framebuffer_state *fb = &nv30->framebuffer;39struct nouveau_pushbuf *push = nv30->base.pushbuf;40struct nouveau_object *eng3d = nv30->screen->eng3d;41uint32_t rt_format;42int h = fb->height;43int w = fb->width;44int x = 0;45int y = 0;4647nv30->state.rt_enable = (NV30_3D_RT_ENABLE_COLOR0 << fb->nr_cbufs) - 1;48if (nv30->state.rt_enable > 1)49nv30->state.rt_enable |= NV30_3D_RT_ENABLE_MRT;5051rt_format = 0;52if (fb->nr_cbufs > 0) {53struct nv30_miptree *mt = nv30_miptree(fb->cbufs[0]->texture);54rt_format |= nv30_format(pscreen, fb->cbufs[0]->format)->hw;55rt_format |= mt->ms_mode;56if (mt->swizzled)57rt_format |= NV30_3D_RT_FORMAT_TYPE_SWIZZLED;58else59rt_format |= NV30_3D_RT_FORMAT_TYPE_LINEAR;60} else {61if (fb->zsbuf && util_format_get_blocksize(fb->zsbuf->format) > 2)62rt_format |= NV30_3D_RT_FORMAT_COLOR_A8R8G8B8;63else64rt_format |= NV30_3D_RT_FORMAT_COLOR_R5G6B5;65}6667if (fb->zsbuf) {68rt_format |= nv30_format(pscreen, fb->zsbuf->format)->hw;69if (nv30_miptree(fb->zsbuf->texture)->swizzled)70rt_format |= NV30_3D_RT_FORMAT_TYPE_SWIZZLED;71else72rt_format |= NV30_3D_RT_FORMAT_TYPE_LINEAR;73} else {74if (fb->nr_cbufs && util_format_get_blocksize(fb->cbufs[0]->format) > 2)75rt_format |= NV30_3D_RT_FORMAT_ZETA_Z24S8;76else77rt_format |= NV30_3D_RT_FORMAT_ZETA_Z16;78}7980/* hardware rounds down render target offset to 64 bytes, but surfaces81* with a size of 2x2 pixel (16bpp) or 1x1 pixel (32bpp) have an82* unaligned start address. For these two important square formats83* we can hack around this limitation by adjusting the viewport origin84*/85if (nv30->state.rt_enable) {86int off = nv30_surface(fb->cbufs[0])->offset & 63;87if (off) {88x += off / (util_format_get_blocksize(fb->cbufs[0]->format) * 2);89w = 16;90h = 2;91}92}9394if (rt_format & NV30_3D_RT_FORMAT_TYPE_SWIZZLED) {95rt_format |= util_logbase2(w) << 16;96rt_format |= util_logbase2(h) << 24;97}9899if (!PUSH_SPACE(push, 64))100return;101PUSH_RESET(push, BUFCTX_FB);102103BEGIN_NV04(push, SUBC_3D(0x1da4), 1);104PUSH_DATA (push, 0);105BEGIN_NV04(push, NV30_3D(RT_HORIZ), 3);106PUSH_DATA (push, w << 16);107PUSH_DATA (push, h << 16);108PUSH_DATA (push, rt_format);109BEGIN_NV04(push, NV30_3D(VIEWPORT_TX_ORIGIN), 4);110PUSH_DATA (push, (y << 16) | x);111PUSH_DATA (push, 0);112PUSH_DATA (push, ((w - 1) << 16) | 0);113PUSH_DATA (push, ((h - 1) << 16) | 0);114115if ((nv30->state.rt_enable & NV30_3D_RT_ENABLE_COLOR0) || fb->zsbuf) {116struct nv30_surface *rsf = nv30_surface(fb->cbufs[0]);117struct nv30_surface *zsf = nv30_surface(fb->zsbuf);118struct nouveau_bo *rbo, *zbo;119120if (!rsf) rsf = zsf;121else if (!zsf) zsf = rsf;122rbo = nv30_miptree(rsf->base.texture)->base.bo;123zbo = nv30_miptree(zsf->base.texture)->base.bo;124125if (eng3d->oclass >= NV40_3D_CLASS) {126BEGIN_NV04(push, NV40_3D(ZETA_PITCH), 1);127PUSH_DATA (push, zsf->pitch);128BEGIN_NV04(push, NV40_3D(COLOR0_PITCH), 3);129PUSH_DATA (push, rsf->pitch);130} else {131BEGIN_NV04(push, NV30_3D(COLOR0_PITCH), 3);132PUSH_DATA (push, (zsf->pitch << 16) | rsf->pitch);133}134PUSH_MTHDl(push, NV30_3D(COLOR0_OFFSET), BUFCTX_FB, rbo, rsf->offset & ~63,135NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);136PUSH_MTHDl(push, NV30_3D(ZETA_OFFSET), BUFCTX_FB, zbo, zsf->offset & ~63,137NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);138}139140if (nv30->state.rt_enable & NV30_3D_RT_ENABLE_COLOR1) {141struct nv30_surface *sf = nv30_surface(fb->cbufs[1]);142struct nouveau_bo *bo = nv30_miptree(sf->base.texture)->base.bo;143144BEGIN_NV04(push, NV30_3D(COLOR1_OFFSET), 2);145PUSH_MTHDl(push, NV30_3D(COLOR1_OFFSET), BUFCTX_FB, bo, sf->offset,146NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);147PUSH_DATA (push, sf->pitch);148}149150if (nv30->state.rt_enable & NV40_3D_RT_ENABLE_COLOR2) {151struct nv30_surface *sf = nv30_surface(fb->cbufs[2]);152struct nouveau_bo *bo = nv30_miptree(sf->base.texture)->base.bo;153154BEGIN_NV04(push, NV40_3D(COLOR2_OFFSET), 1);155PUSH_MTHDl(push, NV40_3D(COLOR2_OFFSET), BUFCTX_FB, bo, sf->offset,156NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);157BEGIN_NV04(push, NV40_3D(COLOR2_PITCH), 1);158PUSH_DATA (push, sf->pitch);159}160161if (nv30->state.rt_enable & NV40_3D_RT_ENABLE_COLOR3) {162struct nv30_surface *sf = nv30_surface(fb->cbufs[3]);163struct nouveau_bo *bo = nv30_miptree(sf->base.texture)->base.bo;164165BEGIN_NV04(push, NV40_3D(COLOR3_OFFSET), 1);166PUSH_MTHDl(push, NV40_3D(COLOR3_OFFSET), BUFCTX_FB, bo, sf->offset,167NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);168BEGIN_NV04(push, NV40_3D(COLOR3_PITCH), 1);169PUSH_DATA (push, sf->pitch);170}171}172173static void174nv30_validate_blend_colour(struct nv30_context *nv30)175{176struct nouveau_pushbuf *push = nv30->base.pushbuf;177float *rgba = nv30->blend_colour.color;178179if (nv30->framebuffer.nr_cbufs) {180switch (nv30->framebuffer.cbufs[0]->format) {181case PIPE_FORMAT_R16G16B16A16_FLOAT:182case PIPE_FORMAT_R32G32B32A32_FLOAT:183BEGIN_NV04(push, NV30_3D(BLEND_COLOR), 1);184PUSH_DATA (push, (_mesa_float_to_half(rgba[0]) << 0) |185(_mesa_float_to_half(rgba[1]) << 16));186BEGIN_NV04(push, SUBC_3D(0x037c), 1);187PUSH_DATA (push, (_mesa_float_to_half(rgba[2]) << 0) |188(_mesa_float_to_half(rgba[3]) << 16));189break;190default:191break;192}193}194195BEGIN_NV04(push, NV30_3D(BLEND_COLOR), 1);196PUSH_DATA (push, (float_to_ubyte(rgba[3]) << 24) |197(float_to_ubyte(rgba[0]) << 16) |198(float_to_ubyte(rgba[1]) << 8) |199(float_to_ubyte(rgba[2]) << 0));200}201202static void203nv30_validate_stencil_ref(struct nv30_context *nv30)204{205struct nouveau_pushbuf *push = nv30->base.pushbuf;206207BEGIN_NV04(push, NV30_3D(STENCIL_FUNC_REF(0)), 1);208PUSH_DATA (push, nv30->stencil_ref.ref_value[0]);209BEGIN_NV04(push, NV30_3D(STENCIL_FUNC_REF(1)), 1);210PUSH_DATA (push, nv30->stencil_ref.ref_value[1]);211}212213static void214nv30_validate_stipple(struct nv30_context *nv30)215{216struct nouveau_pushbuf *push = nv30->base.pushbuf;217218BEGIN_NV04(push, NV30_3D(POLYGON_STIPPLE_PATTERN(0)), 32);219PUSH_DATAp(push, nv30->stipple.stipple, 32);220}221222static void223nv30_validate_scissor(struct nv30_context *nv30)224{225struct nouveau_pushbuf *push = nv30->base.pushbuf;226struct pipe_scissor_state *s = &nv30->scissor;227bool rast_scissor = nv30->rast ? nv30->rast->pipe.scissor : false;228229if (!(nv30->dirty & NV30_NEW_SCISSOR) &&230rast_scissor != nv30->state.scissor_off)231return;232nv30->state.scissor_off = !rast_scissor;233234BEGIN_NV04(push, NV30_3D(SCISSOR_HORIZ), 2);235if (rast_scissor) {236PUSH_DATA (push, ((s->maxx - s->minx) << 16) | s->minx);237PUSH_DATA (push, ((s->maxy - s->miny) << 16) | s->miny);238} else {239PUSH_DATA (push, 0x10000000);240PUSH_DATA (push, 0x10000000);241}242}243244static void245nv30_validate_viewport(struct nv30_context *nv30)246{247struct nouveau_pushbuf *push = nv30->base.pushbuf;248struct pipe_viewport_state *vp = &nv30->viewport;249250unsigned x = CLAMP(vp->translate[0] - fabsf(vp->scale[0]), 0, 4095);251unsigned y = CLAMP(vp->translate[1] - fabsf(vp->scale[1]), 0, 4095);252unsigned w = CLAMP(2.0f * fabsf(vp->scale[0]), 0, 4096);253unsigned h = CLAMP(2.0f * fabsf(vp->scale[1]), 0, 4096);254255BEGIN_NV04(push, NV30_3D(VIEWPORT_TRANSLATE_X), 8);256PUSH_DATAf(push, vp->translate[0]);257PUSH_DATAf(push, vp->translate[1]);258PUSH_DATAf(push, vp->translate[2]);259PUSH_DATAf(push, 0.0f);260PUSH_DATAf(push, vp->scale[0]);261PUSH_DATAf(push, vp->scale[1]);262PUSH_DATAf(push, vp->scale[2]);263PUSH_DATAf(push, 0.0f);264BEGIN_NV04(push, NV30_3D(DEPTH_RANGE_NEAR), 2);265PUSH_DATAf(push, vp->translate[2] - fabsf(vp->scale[2]));266PUSH_DATAf(push, vp->translate[2] + fabsf(vp->scale[2]));267268BEGIN_NV04(push, NV30_3D(VIEWPORT_HORIZ), 2);269PUSH_DATA (push, (w << 16) | x);270PUSH_DATA (push, (h << 16) | y);271}272273static void274nv30_validate_clip(struct nv30_context *nv30)275{276struct nouveau_pushbuf *push = nv30->base.pushbuf;277unsigned i;278uint32_t clpd_enable = 0;279280for (i = 0; i < 6; i++) {281if (nv30->dirty & NV30_NEW_CLIP) {282BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5);283PUSH_DATA (push, i);284PUSH_DATAp(push, nv30->clip.ucp[i], 4);285}286if (nv30->rast->pipe.clip_plane_enable & (1 << i))287clpd_enable |= 2 << (4*i);288}289290BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1);291PUSH_DATA (push, clpd_enable);292}293294static void295nv30_validate_blend(struct nv30_context *nv30)296{297struct nouveau_pushbuf *push = nv30->base.pushbuf;298299PUSH_SPACE(push, nv30->blend->size);300PUSH_DATAp(push, nv30->blend->data, nv30->blend->size);301}302303static void304nv30_validate_zsa(struct nv30_context *nv30)305{306struct nouveau_pushbuf *push = nv30->base.pushbuf;307308PUSH_SPACE(push, nv30->zsa->size);309PUSH_DATAp(push, nv30->zsa->data, nv30->zsa->size);310}311312static void313nv30_validate_rasterizer(struct nv30_context *nv30)314{315struct nouveau_pushbuf *push = nv30->base.pushbuf;316317PUSH_SPACE(push, nv30->rast->size);318PUSH_DATAp(push, nv30->rast->data, nv30->rast->size);319}320321static void322nv30_validate_multisample(struct nv30_context *nv30)323{324struct pipe_rasterizer_state *rasterizer = &nv30->rast->pipe;325struct pipe_blend_state *blend = &nv30->blend->pipe;326struct nouveau_pushbuf *push = nv30->base.pushbuf;327uint32_t ctrl = nv30->sample_mask << 16;328329if (blend->alpha_to_one)330ctrl |= 0x00000100;331if (blend->alpha_to_coverage)332ctrl |= 0x00000010;333if (rasterizer->multisample)334ctrl |= 0x00000001;335336BEGIN_NV04(push, NV30_3D(MULTISAMPLE_CONTROL), 1);337PUSH_DATA (push, ctrl);338}339340static void341nv30_validate_fragment(struct nv30_context *nv30)342{343struct nouveau_pushbuf *push = nv30->base.pushbuf;344struct nv30_fragprog *fp = nv30->fragprog.program;345346BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1);347PUSH_DATA (push, nv30->state.rt_enable & (fp ? ~fp->rt_enable : 0x1f));348BEGIN_NV04(push, NV30_3D(COORD_CONVENTIONS), 1);349PUSH_DATA (push, (fp ? fp->coord_conventions : 0) | nv30->framebuffer.height);350}351352static void353nv30_validate_point_coord(struct nv30_context *nv30)354{355struct pipe_rasterizer_state *rasterizer = &nv30->rast->pipe;356struct nouveau_pushbuf *push = nv30->base.pushbuf;357struct nv30_fragprog *fp = nv30->fragprog.program;358uint32_t hw = 0x00000000;359360if (rasterizer) {361hw |= (nv30->rast->pipe.sprite_coord_enable & 0xff) << 8;362if (fp)363hw |= fp->point_sprite_control;364365if (rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) {366if (hw)367nv30->draw_flags |= NV30_NEW_RASTERIZER;368} else369if (rasterizer->point_quad_rasterization) {370hw |= NV30_3D_POINT_SPRITE_ENABLE;371}372}373374BEGIN_NV04(push, NV30_3D(POINT_SPRITE), 1);375PUSH_DATA (push, hw);376}377378struct state_validate {379void (*func)(struct nv30_context *);380uint32_t mask;381};382383static struct state_validate hwtnl_validate_list[] = {384{ nv30_validate_fb, NV30_NEW_FRAMEBUFFER },385{ nv30_validate_blend, NV30_NEW_BLEND },386{ nv30_validate_zsa, NV30_NEW_ZSA },387{ nv30_validate_rasterizer, NV30_NEW_RASTERIZER },388{ nv30_validate_multisample, NV30_NEW_SAMPLE_MASK | NV30_NEW_BLEND |389NV30_NEW_RASTERIZER },390{ nv30_validate_blend_colour, NV30_NEW_BLEND_COLOUR |391NV30_NEW_FRAMEBUFFER },392{ nv30_validate_stencil_ref, NV30_NEW_STENCIL_REF },393{ nv30_validate_stipple, NV30_NEW_STIPPLE },394{ nv30_validate_scissor, NV30_NEW_SCISSOR | NV30_NEW_RASTERIZER },395{ nv30_validate_viewport, NV30_NEW_VIEWPORT },396{ nv30_validate_clip, NV30_NEW_CLIP | NV30_NEW_RASTERIZER },397{ nv30_fragprog_validate, NV30_NEW_FRAGPROG | NV30_NEW_FRAGCONST },398{ nv30_vertprog_validate, NV30_NEW_VERTPROG | NV30_NEW_VERTCONST |399NV30_NEW_FRAGPROG | NV30_NEW_RASTERIZER },400{ nv30_validate_fragment, NV30_NEW_FRAMEBUFFER | NV30_NEW_FRAGPROG },401{ nv30_validate_point_coord, NV30_NEW_RASTERIZER | NV30_NEW_FRAGPROG },402{ nv30_fragtex_validate, NV30_NEW_FRAGTEX },403{ nv40_verttex_validate, NV30_NEW_VERTTEX },404{ nv30_vbo_validate, NV30_NEW_VERTEX | NV30_NEW_ARRAYS },405{}406};407408#define NV30_SWTNL_MASK (NV30_NEW_VIEWPORT | \409NV30_NEW_CLIP | \410NV30_NEW_VERTPROG | \411NV30_NEW_VERTCONST | \412NV30_NEW_VERTTEX | \413NV30_NEW_VERTEX | \414NV30_NEW_ARRAYS)415416static struct state_validate swtnl_validate_list[] = {417{ nv30_validate_fb, NV30_NEW_FRAMEBUFFER },418{ nv30_validate_blend, NV30_NEW_BLEND },419{ nv30_validate_zsa, NV30_NEW_ZSA },420{ nv30_validate_rasterizer, NV30_NEW_RASTERIZER },421{ nv30_validate_multisample, NV30_NEW_SAMPLE_MASK | NV30_NEW_BLEND |422NV30_NEW_RASTERIZER },423{ nv30_validate_blend_colour, NV30_NEW_BLEND_COLOUR |424NV30_NEW_FRAMEBUFFER },425{ nv30_validate_stencil_ref, NV30_NEW_STENCIL_REF },426{ nv30_validate_stipple, NV30_NEW_STIPPLE },427{ nv30_validate_scissor, NV30_NEW_SCISSOR | NV30_NEW_RASTERIZER },428{ nv30_fragprog_validate, NV30_NEW_FRAGPROG | NV30_NEW_FRAGCONST },429{ nv30_validate_fragment, NV30_NEW_FRAMEBUFFER | NV30_NEW_FRAGPROG },430{ nv30_fragtex_validate, NV30_NEW_FRAGTEX },431{}432};433434static void435nv30_state_context_switch(struct nv30_context *nv30)436{437struct nv30_context *prev = nv30->screen->cur_ctx;438439if (prev)440nv30->state = prev->state;441nv30->dirty = NV30_NEW_ALL;442443if (!nv30->vertex)444nv30->dirty &= ~(NV30_NEW_VERTEX | NV30_NEW_ARRAYS);445446if (!nv30->vertprog.program)447nv30->dirty &= ~NV30_NEW_VERTPROG;448if (!nv30->fragprog.program)449nv30->dirty &= ~NV30_NEW_FRAGPROG;450451if (!nv30->blend)452nv30->dirty &= ~NV30_NEW_BLEND;453if (!nv30->rast)454nv30->dirty &= ~NV30_NEW_RASTERIZER;455if (!nv30->zsa)456nv30->dirty &= ~NV30_NEW_ZSA;457458nv30->screen->cur_ctx = nv30;459nv30->base.pushbuf->user_priv = &nv30->bufctx;460}461462bool463nv30_state_validate(struct nv30_context *nv30, uint32_t mask, bool hwtnl)464{465struct nouveau_screen *screen = &nv30->screen->base;466struct nouveau_pushbuf *push = nv30->base.pushbuf;467struct nouveau_bufctx *bctx = nv30->bufctx;468struct nouveau_bufref *bref;469struct state_validate *validate;470471if (nv30->screen->cur_ctx != nv30)472nv30_state_context_switch(nv30);473474if (hwtnl) {475nv30->draw_dirty |= nv30->dirty;476if (nv30->draw_flags) {477nv30->draw_flags &= ~nv30->dirty;478if (!nv30->draw_flags)479nv30->dirty |= NV30_SWTNL_MASK;480}481}482483if (!nv30->draw_flags)484validate = hwtnl_validate_list;485else486validate = swtnl_validate_list;487488mask &= nv30->dirty;489490if (mask) {491while (validate->func) {492if (mask & validate->mask)493validate->func(nv30);494validate++;495}496497nv30->dirty &= ~mask;498}499500nouveau_pushbuf_bufctx(push, bctx);501if (nouveau_pushbuf_validate(push)) {502nouveau_pushbuf_bufctx(push, NULL);503return false;504}505506/*XXX*/507BEGIN_NV04(push, NV30_3D(VTX_CACHE_INVALIDATE_1710), 1);508PUSH_DATA (push, 0);509if (nv30->screen->eng3d->oclass >= NV40_3D_CLASS) {510BEGIN_NV04(push, NV40_3D(TEX_CACHE_CTL), 1);511PUSH_DATA (push, 2);512BEGIN_NV04(push, NV40_3D(TEX_CACHE_CTL), 1);513PUSH_DATA (push, 1);514BEGIN_NV04(push, NV30_3D(R1718), 1);515PUSH_DATA (push, 0);516BEGIN_NV04(push, NV30_3D(R1718), 1);517PUSH_DATA (push, 0);518BEGIN_NV04(push, NV30_3D(R1718), 1);519PUSH_DATA (push, 0);520}521522LIST_FOR_EACH_ENTRY(bref, &bctx->current, thead) {523struct nv04_resource *res = bref->priv;524if (res && res->mm) {525nouveau_fence_ref(screen->fence.current, &res->fence);526527if (bref->flags & NOUVEAU_BO_RD)528res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;529530if (bref->flags & NOUVEAU_BO_WR) {531nouveau_fence_ref(screen->fence.current, &res->fence_wr);532res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;533}534}535}536537return true;538}539540void541nv30_state_release(struct nv30_context *nv30)542{543nouveau_pushbuf_bufctx(nv30->base.pushbuf, NULL);544}545546547