Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
4574 views
#include "util/format/u_format.h"1#include "util/u_framebuffer.h"2#include "util/u_math.h"3#include "util/u_viewport.h"45#include "nvc0/nvc0_context.h"67#if 08static void9nvc0_validate_zcull(struct nvc0_context *nvc0)10{11struct nouveau_pushbuf *push = nvc0->base.pushbuf;12struct pipe_framebuffer_state *fb = &nvc0->framebuffer;13struct nv50_surface *sf = nv50_surface(fb->zsbuf);14struct nv50_miptree *mt = nv50_miptree(sf->base.texture);15struct nouveau_bo *bo = mt->base.bo;16uint32_t size;17uint32_t offset = align(mt->total_size, 1 << 17);18unsigned width, height;1920assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2);2122size = mt->total_size * 2;2324height = align(fb->height, 32);25width = fb->width % 224;26if (width)27width = fb->width + (224 - width);28else29width = fb->width;3031BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1);32PUSH_DATA (push, 0);33BEGIN_NVC0(push, NVC0_3D(ZCULL_ADDRESS_HIGH), 2);34PUSH_DATAh(push, bo->offset + offset);35PUSH_DATA (push, bo->offset + offset);36offset += 1 << 17;37BEGIN_NVC0(push, NVC0_3D(ZCULL_LIMIT_HIGH), 2);38PUSH_DATAh(push, bo->offset + offset);39PUSH_DATA (push, bo->offset + offset);40BEGIN_NVC0(push, SUBC_3D(0x07e0), 2);41PUSH_DATA (push, size);42PUSH_DATA (push, size >> 16);43BEGIN_NVC0(push, SUBC_3D(0x15c8), 1); /* bits 0x3 */44PUSH_DATA (push, 2);45BEGIN_NVC0(push, NVC0_3D(ZCULL_WIDTH), 4);46PUSH_DATA (push, width);47PUSH_DATA (push, height);48PUSH_DATA (push, 1);49PUSH_DATA (push, 0);50BEGIN_NVC0(push, NVC0_3D(ZCULL_WINDOW_OFFSET_X), 2);51PUSH_DATA (push, 0);52PUSH_DATA (push, 0);53BEGIN_NVC0(push, NVC0_3D(ZCULL_INVALIDATE), 1);54PUSH_DATA (push, 0);55}56#endif5758static inline void59nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)60{61BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);62PUSH_DATA (push, 0);63PUSH_DATA (push, 0);64PUSH_DATA (push, 64); // width65PUSH_DATA (push, 0); // height66PUSH_DATA (push, 0); // format67PUSH_DATA (push, 0); // tile mode68PUSH_DATA (push, layers); // layers69PUSH_DATA (push, 0); // layer stride70PUSH_DATA (push, 0); // base layer71}7273static uint32_t74gm200_encode_cb_sample_location(uint8_t x, uint8_t y)75{76static const uint8_t lut[] = {770x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,780x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};79uint32_t result = 0;80/* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */81result |= lut[x] << 8 | lut[y] << 24;82/* fill in gaps with data in a representation for SV_SAMPLE_POS */83result |= x << 12 | y << 28;84return result;85}8687static void88gm200_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)89{90struct nouveau_pushbuf *push = nvc0->base.pushbuf;91struct nvc0_screen *screen = nvc0->screen;92unsigned grid_width, grid_height, hw_grid_width;93uint8_t sample_locations[16][2];94unsigned cb[64];95unsigned i, pixel, pixel_y, pixel_x, sample;96uint32_t packed_locations[4] = {};9798screen->base.base.get_sample_pixel_grid(99&screen->base.base, ms, &grid_width, &grid_height);100101hw_grid_width = grid_width;102if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */103hw_grid_width = 4;104105if (nvc0->sample_locations_enabled) {106uint8_t locations[2 * 4 * 8];107memcpy(locations, nvc0->sample_locations, sizeof(locations));108util_sample_locations_flip_y(109&screen->base.base, nvc0->framebuffer.height, ms, locations);110111for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) {112for (sample = 0; sample < ms; sample++) {113unsigned pixel_x = pixel % hw_grid_width;114unsigned pixel_y = pixel / hw_grid_width;115unsigned wi = pixel * ms + sample;116unsigned ri = (pixel_y * grid_width + pixel_x % grid_width);117ri = ri * ms + sample;118sample_locations[wi][0] = locations[ri] & 0xf;119sample_locations[wi][1] = 16 - (locations[ri] >> 4);120}121}122} else {123const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);124for (i = 0; i < 16; i++) {125sample_locations[i][0] = ptr[i % ms][0];126sample_locations[i][1] = ptr[i % ms][1];127}128}129130BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);131PUSH_DATA (push, NVC0_CB_AUX_SIZE);132PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));133PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));134BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 64);135PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);136for (pixel_y = 0; pixel_y < 4; pixel_y++) {137for (pixel_x = 0; pixel_x < 2; pixel_x++) {138for (sample = 0; sample < ms; sample++) {139unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample;140unsigned read_index = pixel_y % grid_height * hw_grid_width;141uint8_t x, y;142read_index += pixel_x % grid_width;143read_index = read_index * ms + sample;144x = sample_locations[read_index][0];145y = sample_locations[read_index][1];146cb[write_index] = gm200_encode_cb_sample_location(x, y);147}148}149}150PUSH_DATAp(push, cb, 64);151152for (i = 0; i < 16; i++) {153packed_locations[i / 4] |= sample_locations[i][0] << ((i % 4) * 8);154packed_locations[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4);155}156157BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);158PUSH_DATAp(push, packed_locations, 4);159}160161static void162nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)163{164struct nouveau_pushbuf *push = nvc0->base.pushbuf;165struct nvc0_screen *screen = nvc0->screen;166unsigned i;167168BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);169PUSH_DATA (push, NVC0_CB_AUX_SIZE);170PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));171PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));172BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);173PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);174for (i = 0; i < ms; i++) {175float xy[2];176nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);177PUSH_DATAf(push, xy[0]);178PUSH_DATAf(push, xy[1]);179}180}181182static void183validate_sample_locations(struct nvc0_context *nvc0)184{185unsigned ms = util_framebuffer_get_num_samples(&nvc0->framebuffer);186187if (nvc0->screen->base.class_3d >= GM200_3D_CLASS)188gm200_validate_sample_locations(nvc0, ms);189else190nvc0_validate_sample_locations(nvc0, ms);191}192193static void194nvc0_validate_fb(struct nvc0_context *nvc0)195{196struct nouveau_pushbuf *push = nvc0->base.pushbuf;197struct pipe_framebuffer_state *fb = &nvc0->framebuffer;198unsigned i;199unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;200unsigned nr_cbufs = fb->nr_cbufs;201bool serialize = false;202203nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);204205BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);206PUSH_DATA (push, fb->width << 16);207PUSH_DATA (push, fb->height << 16);208209for (i = 0; i < fb->nr_cbufs; ++i) {210struct nv50_surface *sf;211struct nv04_resource *res;212struct nouveau_bo *bo;213214if (!fb->cbufs[i]) {215nvc0_fb_set_null_rt(push, i, 0);216continue;217}218219sf = nv50_surface(fb->cbufs[i]);220res = nv04_resource(sf->base.texture);221bo = res->bo;222223BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);224PUSH_DATAh(push, res->address + sf->offset);225PUSH_DATA (push, res->address + sf->offset);226if (likely(nouveau_bo_memtype(bo))) {227struct nv50_miptree *mt = nv50_miptree(sf->base.texture);228229assert(sf->base.texture->target != PIPE_BUFFER);230231PUSH_DATA(push, sf->width);232PUSH_DATA(push, sf->height);233PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);234PUSH_DATA(push, (mt->layout_3d << 16) |235mt->level[sf->base.u.tex.level].tile_mode);236PUSH_DATA(push, sf->base.u.tex.first_layer + sf->depth);237PUSH_DATA(push, mt->layer_stride >> 2);238PUSH_DATA(push, sf->base.u.tex.first_layer);239240ms_mode = mt->ms_mode;241} else {242if (res->base.target == PIPE_BUFFER) {243PUSH_DATA(push, 262144);244PUSH_DATA(push, 1);245} else {246PUSH_DATA(push, nv50_miptree(sf->base.texture)->level[0].pitch);247PUSH_DATA(push, sf->height);248}249PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);250PUSH_DATA(push, 1 << 12);251PUSH_DATA(push, 1);252PUSH_DATA(push, 0);253PUSH_DATA(push, 0);254255nvc0_resource_fence(res, NOUVEAU_BO_WR);256257assert(!fb->zsbuf);258}259260if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING)261serialize = true;262res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;263res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;264265/* only register for writing, otherwise we'd always serialize here */266BCTX_REFN(nvc0->bufctx_3d, 3D_FB, res, WR);267}268269if (fb->zsbuf) {270struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);271struct nv50_surface *sf = nv50_surface(fb->zsbuf);272int unk = mt->base.base.target == PIPE_TEXTURE_2D;273274BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5);275PUSH_DATAh(push, mt->base.address + sf->offset);276PUSH_DATA (push, mt->base.address + sf->offset);277PUSH_DATA (push, nvc0_format_table[fb->zsbuf->format].rt);278PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);279PUSH_DATA (push, mt->layer_stride >> 2);280BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);281PUSH_DATA (push, 1);282BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3);283PUSH_DATA (push, sf->width);284PUSH_DATA (push, sf->height);285PUSH_DATA (push, (unk << 16) |286(sf->base.u.tex.first_layer + sf->depth));287BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);288PUSH_DATA (push, sf->base.u.tex.first_layer);289290ms_mode = mt->ms_mode;291292if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)293serialize = true;294mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;295mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;296297BCTX_REFN(nvc0->bufctx_3d, 3D_FB, &mt->base, WR);298} else {299BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);300PUSH_DATA (push, 0);301}302303if (nr_cbufs == 0 && !fb->zsbuf) {304assert(util_is_power_of_two_or_zero(fb->samples));305assert(fb->samples <= 8);306307nvc0_fb_set_null_rt(push, 0, fb->layers);308309if (fb->samples > 1)310ms_mode = ffs(fb->samples) - 1;311nr_cbufs = 1;312}313314BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);315PUSH_DATA (push, (076543210 << 4) | nr_cbufs);316IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);317318if (serialize)319IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);320321NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, serialize);322}323324static void325nvc0_validate_blend_colour(struct nvc0_context *nvc0)326{327struct nouveau_pushbuf *push = nvc0->base.pushbuf;328329BEGIN_NVC0(push, NVC0_3D(BLEND_COLOR(0)), 4);330PUSH_DATAf(push, nvc0->blend_colour.color[0]);331PUSH_DATAf(push, nvc0->blend_colour.color[1]);332PUSH_DATAf(push, nvc0->blend_colour.color[2]);333PUSH_DATAf(push, nvc0->blend_colour.color[3]);334}335336static void337nvc0_validate_stencil_ref(struct nvc0_context *nvc0)338{339struct nouveau_pushbuf *push = nvc0->base.pushbuf;340const ubyte *ref = &nvc0->stencil_ref.ref_value[0];341342IMMED_NVC0(push, NVC0_3D(STENCIL_FRONT_FUNC_REF), ref[0]);343IMMED_NVC0(push, NVC0_3D(STENCIL_BACK_FUNC_REF), ref[1]);344}345346static void347nvc0_validate_stipple(struct nvc0_context *nvc0)348{349struct nouveau_pushbuf *push = nvc0->base.pushbuf;350unsigned i;351352BEGIN_NVC0(push, NVC0_3D(POLYGON_STIPPLE_PATTERN(0)), 32);353for (i = 0; i < 32; ++i)354PUSH_DATA(push, util_bswap32(nvc0->stipple.stipple[i]));355}356357static void358nvc0_validate_scissor(struct nvc0_context *nvc0)359{360int i;361struct nouveau_pushbuf *push = nvc0->base.pushbuf;362363if (!(nvc0->dirty_3d & NVC0_NEW_3D_SCISSOR) &&364nvc0->rast->pipe.scissor == nvc0->state.scissor)365return;366367if (nvc0->state.scissor != nvc0->rast->pipe.scissor)368nvc0->scissors_dirty = (1 << NVC0_MAX_VIEWPORTS) - 1;369370nvc0->state.scissor = nvc0->rast->pipe.scissor;371372for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {373struct pipe_scissor_state *s = &nvc0->scissors[i];374if (!(nvc0->scissors_dirty & (1 << i)))375continue;376377BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(i)), 2);378if (nvc0->rast->pipe.scissor) {379PUSH_DATA(push, (s->maxx << 16) | s->minx);380PUSH_DATA(push, (s->maxy << 16) | s->miny);381} else {382PUSH_DATA(push, (0xffff << 16) | 0);383PUSH_DATA(push, (0xffff << 16) | 0);384}385}386nvc0->scissors_dirty = 0;387}388389static void390nvc0_validate_viewport(struct nvc0_context *nvc0)391{392struct nouveau_pushbuf *push = nvc0->base.pushbuf;393uint16_t class_3d = nvc0->screen->base.class_3d;394int x, y, w, h, i;395float zmin, zmax;396397for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {398struct pipe_viewport_state *vp = &nvc0->viewports[i];399400if (!(nvc0->viewports_dirty & (1 << i)))401continue;402403BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSLATE_X(i)), 3);404PUSH_DATAf(push, vp->translate[0]);405PUSH_DATAf(push, vp->translate[1]);406PUSH_DATAf(push, vp->translate[2]);407408BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SCALE_X(i)), 3);409PUSH_DATAf(push, vp->scale[0]);410PUSH_DATAf(push, vp->scale[1]);411PUSH_DATAf(push, vp->scale[2]);412413/* now set the viewport rectangle to viewport dimensions for clipping */414415x = util_iround(MAX2(0.0f, vp->translate[0] - fabsf(vp->scale[0])));416y = util_iround(MAX2(0.0f, vp->translate[1] - fabsf(vp->scale[1])));417w = util_iround(vp->translate[0] + fabsf(vp->scale[0])) - x;418h = util_iround(vp->translate[1] + fabsf(vp->scale[1])) - y;419420BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(i)), 2);421PUSH_DATA (push, (w << 16) | x);422PUSH_DATA (push, (h << 16) | y);423424/* If the halfz setting ever changes, the viewports will also get425* updated. The rast will get updated before the validate function has a426* chance to hit, so we can just use it directly without an atom427* dependency.428*/429util_viewport_zmin_zmax(vp, nvc0->rast->pipe.clip_halfz, &zmin, &zmax);430431BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2);432PUSH_DATAf(push, zmin);433PUSH_DATAf(push, zmax);434435if (class_3d >= GM200_3D_CLASS) {436BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SWIZZLE(i)), 1);437PUSH_DATA (push, vp->swizzle_x << 0 |438vp->swizzle_y << 4 |439vp->swizzle_z << 8 |440vp->swizzle_w << 12);441}442}443nvc0->viewports_dirty = 0;444}445446static void447nvc0_validate_window_rects(struct nvc0_context *nvc0)448{449struct nouveau_pushbuf *push = nvc0->base.pushbuf;450bool enable = nvc0->window_rect.rects > 0 || nvc0->window_rect.inclusive;451int i;452453IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_EN), enable);454if (!enable)455return;456457IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_MODE), !nvc0->window_rect.inclusive);458BEGIN_NVC0(push, NVC0_3D(CLIP_RECT_HORIZ(0)), NVC0_MAX_WINDOW_RECTANGLES * 2);459for (i = 0; i < nvc0->window_rect.rects; i++) {460struct pipe_scissor_state *s = &nvc0->window_rect.rect[i];461PUSH_DATA(push, (s->maxx << 16) | s->minx);462PUSH_DATA(push, (s->maxy << 16) | s->miny);463}464for (; i < NVC0_MAX_WINDOW_RECTANGLES; i++) {465PUSH_DATA(push, 0);466PUSH_DATA(push, 0);467}468}469470static inline void471nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)472{473struct nouveau_pushbuf *push = nvc0->base.pushbuf;474struct nvc0_screen *screen = nvc0->screen;475476BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);477PUSH_DATA (push, NVC0_CB_AUX_SIZE);478PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));479PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));480BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);481PUSH_DATA (push, NVC0_CB_AUX_UCP_INFO);482PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);483}484485static inline void486nvc0_check_program_ucps(struct nvc0_context *nvc0,487struct nvc0_program *vp, uint8_t mask)488{489const unsigned n = util_logbase2(mask) + 1;490491if (vp->vp.num_ucps >= n)492return;493nvc0_program_destroy(nvc0, vp);494495vp->vp.num_ucps = n;496if (likely(vp == nvc0->vertprog))497nvc0_vertprog_validate(nvc0);498else499if (likely(vp == nvc0->gmtyprog))500nvc0_gmtyprog_validate(nvc0);501else502nvc0_tevlprog_validate(nvc0);503}504505static void506nvc0_validate_clip(struct nvc0_context *nvc0)507{508struct nouveau_pushbuf *push = nvc0->base.pushbuf;509struct nvc0_program *vp;510unsigned stage;511uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable;512513if (nvc0->gmtyprog) {514stage = 3;515vp = nvc0->gmtyprog;516} else517if (nvc0->tevlprog) {518stage = 2;519vp = nvc0->tevlprog;520} else {521stage = 0;522vp = nvc0->vertprog;523}524525if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES)526nvc0_check_program_ucps(nvc0, vp, clip_enable);527528if (nvc0->dirty_3d & (NVC0_NEW_3D_CLIP | (NVC0_NEW_3D_VERTPROG << stage)))529if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES)530nvc0_upload_uclip_planes(nvc0, stage);531532clip_enable &= vp->vp.clip_enable;533clip_enable |= vp->vp.cull_enable;534535if (nvc0->state.clip_enable != clip_enable) {536nvc0->state.clip_enable = clip_enable;537IMMED_NVC0(push, NVC0_3D(CLIP_DISTANCE_ENABLE), clip_enable);538}539if (nvc0->state.clip_mode != vp->vp.clip_mode) {540nvc0->state.clip_mode = vp->vp.clip_mode;541BEGIN_NVC0(push, NVC0_3D(CLIP_DISTANCE_MODE), 1);542PUSH_DATA (push, vp->vp.clip_mode);543}544}545546static void547nvc0_validate_blend(struct nvc0_context *nvc0)548{549struct nouveau_pushbuf *push = nvc0->base.pushbuf;550551PUSH_SPACE(push, nvc0->blend->size);552PUSH_DATAp(push, nvc0->blend->state, nvc0->blend->size);553}554555static void556nvc0_validate_zsa(struct nvc0_context *nvc0)557{558struct nouveau_pushbuf *push = nvc0->base.pushbuf;559560PUSH_SPACE(push, nvc0->zsa->size);561PUSH_DATAp(push, nvc0->zsa->state, nvc0->zsa->size);562}563564static void565nvc0_validate_rasterizer(struct nvc0_context *nvc0)566{567struct nouveau_pushbuf *push = nvc0->base.pushbuf;568569PUSH_SPACE(push, nvc0->rast->size);570PUSH_DATAp(push, nvc0->rast->state, nvc0->rast->size);571}572573static void574nvc0_constbufs_validate(struct nvc0_context *nvc0)575{576unsigned s;577578bool can_serialize = true;579580for (s = 0; s < 5; ++s) {581while (nvc0->constbuf_dirty[s]) {582int i = ffs(nvc0->constbuf_dirty[s]) - 1;583nvc0->constbuf_dirty[s] &= ~(1 << i);584585if (nvc0->constbuf[s][i].user) {586struct nouveau_bo *bo = nvc0->screen->uniform_bo;587const unsigned base = NVC0_CB_USR_INFO(s);588const unsigned size = nvc0->constbuf[s][0].size;589assert(i == 0); /* we really only want OpenGL uniforms here */590assert(nvc0->constbuf[s][0].u.data);591592if (!nvc0->state.uniform_buffer_bound[s]) {593nvc0->state.uniform_buffer_bound[s] = true;594595nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,596NVC0_MAX_CONSTBUF_SIZE, bo->offset + base);597}598nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),599base, NVC0_MAX_CONSTBUF_SIZE,6000, (size + 3) / 4,601nvc0->constbuf[s][0].u.data);602} else {603struct nv04_resource *res =604nv04_resource(nvc0->constbuf[s][i].u.buf);605if (res) {606nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,607nvc0->constbuf[s][i].size,608res->address + nvc0->constbuf[s][i].offset);609610BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD);611612nvc0->cb_dirty = 1; /* Force cache flush for UBO. */613res->cb_bindings[s] |= 1 << i;614615if (i == 0)616nvc0->state.uniform_buffer_bound[s] = false;617} else if (i != 0) {618nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, -1, 0);619}620}621}622}623624if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {625/* Invalidate all COMPUTE constbufs because they are aliased with 3D. */626nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;627nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5];628nvc0->state.uniform_buffer_bound[5] = false;629}630}631632static void633nvc0_validate_buffers(struct nvc0_context *nvc0)634{635struct nouveau_pushbuf *push = nvc0->base.pushbuf;636struct nvc0_screen *screen = nvc0->screen;637int i, s;638639for (s = 0; s < 5; s++) {640BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);641PUSH_DATA (push, NVC0_CB_AUX_SIZE);642PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));643PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));644BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);645PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));646for (i = 0; i < NVC0_MAX_BUFFERS; i++) {647if (nvc0->buffers[s][i].buffer) {648struct nv04_resource *res =649nv04_resource(nvc0->buffers[s][i].buffer);650PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);651PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);652PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);653PUSH_DATA (push, 0);654BCTX_REFN(nvc0->bufctx_3d, 3D_BUF, res, RDWR);655util_range_add(&res->base, &res->valid_buffer_range,656nvc0->buffers[s][i].buffer_offset,657nvc0->buffers[s][i].buffer_offset +658nvc0->buffers[s][i].buffer_size);659} else {660PUSH_DATA (push, 0);661PUSH_DATA (push, 0);662PUSH_DATA (push, 0);663PUSH_DATA (push, 0);664}665}666}667668}669670static void671nvc0_validate_sample_mask(struct nvc0_context *nvc0)672{673struct nouveau_pushbuf *push = nvc0->base.pushbuf;674675unsigned mask[4] =676{677nvc0->sample_mask & 0xffff,678nvc0->sample_mask & 0xffff,679nvc0->sample_mask & 0xffff,680nvc0->sample_mask & 0xffff681};682683BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4);684PUSH_DATA (push, mask[0]);685PUSH_DATA (push, mask[1]);686PUSH_DATA (push, mask[2]);687PUSH_DATA (push, mask[3]);688}689690static void691nvc0_validate_min_samples(struct nvc0_context *nvc0)692{693struct nouveau_pushbuf *push = nvc0->base.pushbuf;694int samples;695696samples = util_next_power_of_two(nvc0->min_samples);697if (samples > 1) {698// If we're using the incoming sample mask and doing sample shading, we699// have to do sample shading "to the max", otherwise there's no way to700// tell which sets of samples are covered by the current invocation.701// Similarly for reading the framebuffer.702if (nvc0->fragprog && (703nvc0->fragprog->fp.sample_mask_in ||704nvc0->fragprog->fp.reads_framebuffer))705samples = util_framebuffer_get_num_samples(&nvc0->framebuffer);706samples |= NVC0_3D_SAMPLE_SHADING_ENABLE;707}708709IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples);710}711712static void713nvc0_validate_driverconst(struct nvc0_context *nvc0)714{715struct nvc0_screen *screen = nvc0->screen;716int i;717718for (i = 0; i < 5; ++i)719nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE,720screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));721722nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;723}724725static void726nvc0_validate_fp_zsa_rast(struct nvc0_context *nvc0)727{728struct nouveau_pushbuf *push = nvc0->base.pushbuf;729bool rasterizer_discard;730731if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) {732rasterizer_discard = true;733} else {734bool zs = nvc0->zsa &&735(nvc0->zsa->pipe.depth_enabled || nvc0->zsa->pipe.stencil[0].enabled);736rasterizer_discard = !zs &&737(!nvc0->fragprog || !nvc0->fragprog->hdr[18]);738}739740if (rasterizer_discard != nvc0->state.rasterizer_discard) {741nvc0->state.rasterizer_discard = rasterizer_discard;742IMMED_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), !rasterizer_discard);743}744}745746/* alpha test is disabled if there are no color RTs, so make sure we have at747* least one if alpha test is enabled. Note that this must run after748* nvc0_validate_fb, otherwise that will override the RT count setting.749*/750static void751nvc0_validate_zsa_fb(struct nvc0_context *nvc0)752{753struct nouveau_pushbuf *push = nvc0->base.pushbuf;754755if (nvc0->zsa && nvc0->zsa->pipe.alpha_enabled &&756nvc0->framebuffer.zsbuf &&757nvc0->framebuffer.nr_cbufs == 0) {758nvc0_fb_set_null_rt(push, 0, 0);759BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);760PUSH_DATA (push, (076543210 << 4) | 1);761}762}763764static void765nvc0_validate_rast_fb(struct nvc0_context *nvc0)766{767struct nouveau_pushbuf *push = nvc0->base.pushbuf;768struct pipe_framebuffer_state *fb = &nvc0->framebuffer;769struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;770771if (!rast)772return;773774if (rast->offset_units_unscaled) {775BEGIN_NVC0(push, NVC0_3D(POLYGON_OFFSET_UNITS), 1);776if (fb->zsbuf && fb->zsbuf->format == PIPE_FORMAT_Z16_UNORM)777PUSH_DATAf(push, rast->offset_units * (1 << 16));778else779PUSH_DATAf(push, rast->offset_units * (1 << 24));780}781}782783784static void785nvc0_validate_tess_state(struct nvc0_context *nvc0)786{787struct nouveau_pushbuf *push = nvc0->base.pushbuf;788789BEGIN_NVC0(push, NVC0_3D(TESS_LEVEL_OUTER(0)), 6);790PUSH_DATAp(push, nvc0->default_tess_outer, 4);791PUSH_DATAp(push, nvc0->default_tess_inner, 2);792}793794/* If we have a frag shader bound which tries to read from the framebuffer, we795* have to make sure that the fb is bound as a texture in the expected796* location. For Fermi, that's in the special driver slot 16, while for Kepler797* it's a regular binding stored in the driver constbuf.798*/799static void800nvc0_validate_fbread(struct nvc0_context *nvc0)801{802struct nouveau_pushbuf *push = nvc0->base.pushbuf;803struct nvc0_screen *screen = nvc0->screen;804struct pipe_context *pipe = &nvc0->base.pipe;805struct pipe_sampler_view *old_view = nvc0->fbtexture;806struct pipe_sampler_view *new_view = NULL;807808if (nvc0->fragprog &&809nvc0->fragprog->fp.reads_framebuffer &&810nvc0->framebuffer.nr_cbufs &&811nvc0->framebuffer.cbufs[0]) {812struct pipe_sampler_view tmpl;813struct pipe_surface *sf = nvc0->framebuffer.cbufs[0];814815tmpl.target = PIPE_TEXTURE_2D_ARRAY;816tmpl.format = sf->format;817tmpl.u.tex.first_level = tmpl.u.tex.last_level = sf->u.tex.level;818tmpl.u.tex.first_layer = sf->u.tex.first_layer;819tmpl.u.tex.last_layer = sf->u.tex.last_layer;820tmpl.swizzle_r = PIPE_SWIZZLE_X;821tmpl.swizzle_g = PIPE_SWIZZLE_Y;822tmpl.swizzle_b = PIPE_SWIZZLE_Z;823tmpl.swizzle_a = PIPE_SWIZZLE_W;824825/* Bail if it's the same parameters */826if (old_view && old_view->texture == sf->texture &&827old_view->format == sf->format &&828old_view->u.tex.first_level == sf->u.tex.level &&829old_view->u.tex.first_layer == sf->u.tex.first_layer &&830old_view->u.tex.last_layer == sf->u.tex.last_layer)831return;832833new_view = pipe->create_sampler_view(pipe, sf->texture, &tmpl);834} else if (old_view == NULL) {835return;836}837838if (old_view)839pipe_sampler_view_reference(&nvc0->fbtexture, NULL);840nvc0->fbtexture = new_view;841842if (new_view) {843struct nv50_tic_entry *tic = nv50_tic_entry(new_view);844assert(tic->id < 0);845tic->id = nvc0_screen_tic_alloc(screen, tic);846nvc0->base.push_data(&nvc0->base, screen->txc, tic->id * 32,847NV_VRAM_DOMAIN(&screen->base), 32, tic->tic);848screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);849850if (screen->base.class_3d >= NVE4_3D_CLASS) {851BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);852PUSH_DATA (push, NVC0_CB_AUX_SIZE);853PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));854PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));855BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1);856PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO);857PUSH_DATA (push, (0 << 20) | tic->id);858} else {859BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1);860PUSH_DATA (push, (tic->id << 9) | 1);861}862863IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);864}865}866867static void868nvc0_switch_pipe_context(struct nvc0_context *ctx_to)869{870struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx;871unsigned s;872873if (ctx_from)874ctx_to->state = ctx_from->state;875else876ctx_to->state = ctx_to->screen->save_state;877878ctx_to->dirty_3d = ~0;879ctx_to->dirty_cp = ~0;880ctx_to->viewports_dirty = ~0;881ctx_to->scissors_dirty = ~0;882883for (s = 0; s < 6; ++s) {884ctx_to->samplers_dirty[s] = ~0;885ctx_to->textures_dirty[s] = ~0;886ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1;887ctx_to->buffers_dirty[s] = ~0;888ctx_to->images_dirty[s] = ~0;889}890891/* Reset tfb as the shader that owns it may have been deleted. */892ctx_to->state.tfb = NULL;893894if (!ctx_to->vertex)895ctx_to->dirty_3d &= ~(NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS);896897if (!ctx_to->vertprog)898ctx_to->dirty_3d &= ~NVC0_NEW_3D_VERTPROG;899if (!ctx_to->fragprog)900ctx_to->dirty_3d &= ~NVC0_NEW_3D_FRAGPROG;901902if (!ctx_to->blend)903ctx_to->dirty_3d &= ~NVC0_NEW_3D_BLEND;904if (!ctx_to->rast)905ctx_to->dirty_3d &= ~(NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_SCISSOR);906if (!ctx_to->zsa)907ctx_to->dirty_3d &= ~NVC0_NEW_3D_ZSA;908909ctx_to->screen->cur_ctx = ctx_to;910}911912static struct nvc0_state_validate913validate_list_3d[] = {914{ nvc0_validate_fb, NVC0_NEW_3D_FRAMEBUFFER },915{ nvc0_validate_blend, NVC0_NEW_3D_BLEND },916{ nvc0_validate_zsa, NVC0_NEW_3D_ZSA },917{ nvc0_validate_sample_mask, NVC0_NEW_3D_SAMPLE_MASK },918{ nvc0_validate_rasterizer, NVC0_NEW_3D_RASTERIZER },919{ nvc0_validate_blend_colour, NVC0_NEW_3D_BLEND_COLOUR },920{ nvc0_validate_stencil_ref, NVC0_NEW_3D_STENCIL_REF },921{ nvc0_validate_stipple, NVC0_NEW_3D_STIPPLE },922{ nvc0_validate_scissor, NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_RASTERIZER },923{ nvc0_validate_viewport, NVC0_NEW_3D_VIEWPORT },924{ nvc0_validate_window_rects, NVC0_NEW_3D_WINDOW_RECTS },925{ nvc0_vertprog_validate, NVC0_NEW_3D_VERTPROG },926{ nvc0_tctlprog_validate, NVC0_NEW_3D_TCTLPROG },927{ nvc0_tevlprog_validate, NVC0_NEW_3D_TEVLPROG },928{ nvc0_validate_tess_state, NVC0_NEW_3D_TESSFACTOR },929{ nvc0_gmtyprog_validate, NVC0_NEW_3D_GMTYPROG },930{ nvc0_validate_min_samples, NVC0_NEW_3D_MIN_SAMPLES |931NVC0_NEW_3D_FRAGPROG |932NVC0_NEW_3D_FRAMEBUFFER },933{ nvc0_fragprog_validate, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_RASTERIZER },934{ nvc0_validate_fp_zsa_rast, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_ZSA |935NVC0_NEW_3D_RASTERIZER },936{ nvc0_validate_zsa_fb, NVC0_NEW_3D_ZSA | NVC0_NEW_3D_FRAMEBUFFER },937{ nvc0_validate_rast_fb, NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_FRAMEBUFFER },938{ nvc0_validate_clip, NVC0_NEW_3D_CLIP | NVC0_NEW_3D_RASTERIZER |939NVC0_NEW_3D_VERTPROG |940NVC0_NEW_3D_TEVLPROG |941NVC0_NEW_3D_GMTYPROG },942{ nvc0_constbufs_validate, NVC0_NEW_3D_CONSTBUF },943{ nvc0_validate_textures, NVC0_NEW_3D_TEXTURES },944{ nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS },945{ nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS },946{ nvc0_validate_fbread, NVC0_NEW_3D_FRAGPROG |947NVC0_NEW_3D_FRAMEBUFFER },948{ nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS },949{ nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES },950{ nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS },951{ nvc0_tfb_validate, NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_GMTYPROG },952{ nvc0_layer_validate, NVC0_NEW_3D_VERTPROG |953NVC0_NEW_3D_TEVLPROG |954NVC0_NEW_3D_GMTYPROG },955{ nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST },956{ validate_sample_locations, NVC0_NEW_3D_SAMPLE_LOCATIONS |957NVC0_NEW_3D_FRAMEBUFFER},958};959960bool961nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask,962struct nvc0_state_validate *validate_list, int size,963uint32_t *dirty, struct nouveau_bufctx *bufctx)964{965uint32_t state_mask;966int ret;967unsigned i;968969if (nvc0->screen->cur_ctx != nvc0)970nvc0_switch_pipe_context(nvc0);971972state_mask = *dirty & mask;973974if (state_mask) {975for (i = 0; i < size; ++i) {976struct nvc0_state_validate *validate = &validate_list[i];977978if (state_mask & validate->states)979validate->func(nvc0);980}981*dirty &= ~state_mask;982983nvc0_bufctx_fence(nvc0, bufctx, false);984}985986nouveau_pushbuf_bufctx(nvc0->base.pushbuf, bufctx);987ret = nouveau_pushbuf_validate(nvc0->base.pushbuf);988989return !ret;990}991992bool993nvc0_state_validate_3d(struct nvc0_context *nvc0, uint32_t mask)994{995bool ret;996997ret = nvc0_state_validate(nvc0, mask, validate_list_3d,998ARRAY_SIZE(validate_list_3d), &nvc0->dirty_3d,999nvc0->bufctx_3d);10001001if (unlikely(nvc0->state.flushed)) {1002nvc0->state.flushed = false;1003nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, true);1004}1005return ret;1006}100710081009