Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
4574 views
/*1* Copyright 2008 Ben Skeggs2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "nvc0/nvc0_context.h"23#include "nvc0/nvc0_resource.h"24#include "nvc0/gm107_texture.xml.h"25#include "nvc0/nvc0_compute.xml.h"26#include "nv50/g80_texture.xml.h"27#include "nv50/g80_defs.xml.h"2829#include "util/format/u_format.h"3031#define NVE4_TIC_ENTRY_INVALID 0x000fffff32#define NVE4_TSC_ENTRY_INVALID 0xfff000003334static inline uint32_t35nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int)36{37switch (swz) {38case PIPE_SWIZZLE_X : return fmt->tic.src_x;39case PIPE_SWIZZLE_Y: return fmt->tic.src_y;40case PIPE_SWIZZLE_Z : return fmt->tic.src_z;41case PIPE_SWIZZLE_W: return fmt->tic.src_w;42case PIPE_SWIZZLE_1:43return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;44case PIPE_SWIZZLE_0:45default:46return G80_TIC_SOURCE_ZERO;47}48}4950struct pipe_sampler_view *51nvc0_create_sampler_view(struct pipe_context *pipe,52struct pipe_resource *res,53const struct pipe_sampler_view *templ)54{55uint32_t flags = 0;5657if (templ->target == PIPE_TEXTURE_RECT || templ->target == PIPE_BUFFER)58flags |= NV50_TEXVIEW_SCALED_COORDS;5960return nvc0_create_texture_view(pipe, res, templ, flags);61}6263static struct pipe_sampler_view *64gm107_create_texture_view(struct pipe_context *pipe,65struct pipe_resource *texture,66const struct pipe_sampler_view *templ,67uint32_t flags)68{69const struct util_format_description *desc;70const struct nvc0_format *fmt;71uint64_t address;72uint32_t *tic;73uint32_t swz[4];74uint32_t width, height;75uint32_t depth;76struct nv50_tic_entry *view;77struct nv50_miptree *mt;78bool tex_int;7980view = MALLOC_STRUCT(nv50_tic_entry);81if (!view)82return NULL;83mt = nv50_miptree(texture);8485view->pipe = *templ;86view->pipe.reference.count = 1;87view->pipe.texture = NULL;88view->pipe.context = pipe;8990view->id = -1;91view->bindless = 0;9293pipe_resource_reference(&view->pipe.texture, texture);9495tic = &view->tic[0];9697desc = util_format_description(view->pipe.format);98tex_int = util_format_is_pure_integer(view->pipe.format);99100fmt = &nvc0_format_table[view->pipe.format];101swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);102swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);103swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);104swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);105106tic[0] = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT;107tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT;108tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT;109tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT;110tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT;111tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT;112tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT;113tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT;114tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT;115116address = mt->base.address;117118tic[3] = GM107_TIC2_3_LOD_ANISO_QUALITY_2;119tic[4] = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V;120tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR;121122if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)123tic[4] |= GM107_TIC2_4_SRGB_CONVERSION;124125if (!(flags & NV50_TEXVIEW_SCALED_COORDS))126tic[5] = GM107_TIC2_5_NORMALIZED_COORDS;127else128tic[5] = 0;129130/* check for linear storage type */131if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {132if (texture->target == PIPE_BUFFER) {133assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS));134width = view->pipe.u.buf.size / (desc->block.bits / 8) - 1;135address +=136view->pipe.u.buf.offset;137tic[2] = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER;138tic[3] |= width >> 16;139tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER;140tic[4] |= width & 0xffff;141} else {142assert(!(mt->level[0].pitch & 0x1f));143/* must be 2D texture without mip maps */144tic[2] = GM107_TIC2_2_HEADER_VERSION_PITCH;145tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP;146tic[3] |= mt->level[0].pitch >> 5;147tic[4] |= mt->base.base.width0 - 1;148tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;149tic[5] |= mt->base.base.height0 - 1;150}151tic[1] = address;152tic[2] |= address >> 32;153tic[6] = 0;154tic[7] = 0;155return &view->pipe;156}157158tic[2] = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR;159tic[3] |=160((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) |161((mt->level[0].tile_mode & 0xf00) >> 8 << 6);162163depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);164165if (mt->base.base.array_size > 1) {166/* there doesn't seem to be a base layer field in TIC */167address += view->pipe.u.tex.first_layer * mt->layer_stride;168depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;169}170tic[1] = address;171tic[2] |= address >> 32;172173switch (templ->target) {174case PIPE_TEXTURE_1D:175tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D;176break;177case PIPE_TEXTURE_2D:178tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;179break;180case PIPE_TEXTURE_RECT:181tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;182break;183case PIPE_TEXTURE_3D:184tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D;185break;186case PIPE_TEXTURE_CUBE:187depth /= 6;188tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP;189break;190case PIPE_TEXTURE_1D_ARRAY:191tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY;192break;193case PIPE_TEXTURE_2D_ARRAY:194tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY;195break;196case PIPE_TEXTURE_CUBE_ARRAY:197depth /= 6;198tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY;199break;200default:201unreachable("unexpected/invalid texture target");202}203204tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ?205GM107_TIC2_3_USE_HEADER_OPT_CONTROL :206GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH |207GM107_TIC2_3_LOD_ISO_QUALITY_HIGH;208209if (flags & (NV50_TEXVIEW_ACCESS_RESOLVE | NV50_TEXVIEW_IMAGE_GM107)) {210width = mt->base.base.width0 << mt->ms_x;211height = mt->base.base.height0 << mt->ms_y;212} else {213width = mt->base.base.width0;214height = mt->base.base.height0;215}216217tic[4] |= width - 1;218219tic[5] |= (height - 1) & 0xffff;220tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;221tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT;222223/* sampling points: (?) */224if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) {225tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO;226tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1;227} else {228tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO;229tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE;230}231232tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;233tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT;234235return &view->pipe;236}237238struct pipe_sampler_view *239gm107_create_texture_view_from_image(struct pipe_context *pipe,240const struct pipe_image_view *view)241{242struct nv04_resource *res = nv04_resource(view->resource);243struct pipe_sampler_view templ = {};244enum pipe_texture_target target;245uint32_t flags = 0;246247if (!res)248return NULL;249target = res->base.target;250251if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY)252target = PIPE_TEXTURE_2D_ARRAY;253254templ.target = target;255templ.format = view->format;256templ.swizzle_r = PIPE_SWIZZLE_X;257templ.swizzle_g = PIPE_SWIZZLE_Y;258templ.swizzle_b = PIPE_SWIZZLE_Z;259templ.swizzle_a = PIPE_SWIZZLE_W;260261if (target == PIPE_BUFFER) {262templ.u.buf.offset = view->u.buf.offset;263templ.u.buf.size = view->u.buf.size;264} else {265templ.u.tex.first_layer = view->u.tex.first_layer;266templ.u.tex.last_layer = view->u.tex.last_layer;267templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level;268}269270flags = NV50_TEXVIEW_SCALED_COORDS | NV50_TEXVIEW_IMAGE_GM107;271272return nvc0_create_texture_view(pipe, &res->base, &templ, flags);273}274275static struct pipe_sampler_view *276gf100_create_texture_view(struct pipe_context *pipe,277struct pipe_resource *texture,278const struct pipe_sampler_view *templ,279uint32_t flags)280{281const struct util_format_description *desc;282const struct nvc0_format *fmt;283uint64_t address;284uint32_t *tic;285uint32_t swz[4];286uint32_t width, height;287uint32_t depth;288uint32_t tex_fmt;289struct nv50_tic_entry *view;290struct nv50_miptree *mt;291bool tex_int;292293view = MALLOC_STRUCT(nv50_tic_entry);294if (!view)295return NULL;296mt = nv50_miptree(texture);297298view->pipe = *templ;299view->pipe.reference.count = 1;300view->pipe.texture = NULL;301view->pipe.context = pipe;302303view->id = -1;304view->bindless = 0;305306pipe_resource_reference(&view->pipe.texture, texture);307308tic = &view->tic[0];309310desc = util_format_description(view->pipe.format);311312fmt = &nvc0_format_table[view->pipe.format];313314tex_int = util_format_is_pure_integer(view->pipe.format);315tex_fmt = fmt->tic.format & 0x3f;316317swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);318swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);319swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);320swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);321tic[0] = (tex_fmt << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |322(fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |323(fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |324(fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |325(fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |326(swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |327(swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |328(swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |329(swz[3] << G80_TIC_0_W_SOURCE__SHIFT) |330((fmt->tic.format & 0x40) << (GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT - 6));331332address = mt->base.address;333334tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;335336if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)337tic[2] |= G80_TIC_2_SRGB_CONVERSION;338339if (!(flags & NV50_TEXVIEW_SCALED_COORDS))340tic[2] |= G80_TIC_2_NORMALIZED_COORDS;341342/* check for linear storage type */343if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {344if (texture->target == PIPE_BUFFER) {345assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS));346address +=347view->pipe.u.buf.offset;348tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;349tic[3] = 0;350tic[4] = /* width */351view->pipe.u.buf.size / (desc->block.bits / 8);352tic[5] = 0;353} else {354/* must be 2D texture without mip maps */355tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;356tic[3] = mt->level[0].pitch;357tic[4] = mt->base.base.width0;358tic[5] = (1 << 16) | mt->base.base.height0;359}360tic[6] =361tic[7] = 0;362tic[1] = address;363tic[2] |= address >> 32;364return &view->pipe;365}366367tic[2] |=368((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |369((mt->level[0].tile_mode & 0xf00) << (25 - 8));370371depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);372373if (mt->base.base.array_size > 1) {374/* there doesn't seem to be a base layer field in TIC */375address += view->pipe.u.tex.first_layer * mt->layer_stride;376depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;377}378tic[1] = address;379tic[2] |= address >> 32;380381switch (templ->target) {382case PIPE_TEXTURE_1D:383tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;384break;385case PIPE_TEXTURE_2D:386tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;387break;388case PIPE_TEXTURE_RECT:389tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;390break;391case PIPE_TEXTURE_3D:392tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;393break;394case PIPE_TEXTURE_CUBE:395depth /= 6;396tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;397break;398case PIPE_TEXTURE_1D_ARRAY:399tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;400break;401case PIPE_TEXTURE_2D_ARRAY:402tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;403break;404case PIPE_TEXTURE_CUBE_ARRAY:405depth /= 6;406tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;407break;408default:409unreachable("unexpected/invalid texture target");410}411412tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;413414if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {415width = mt->base.base.width0 << mt->ms_x;416height = mt->base.base.height0 << mt->ms_y;417} else {418width = mt->base.base.width0;419height = mt->base.base.height0;420}421422tic[4] = (1 << 31) | width;423424tic[5] = height & 0xffff;425tic[5] |= depth << 16;426tic[5] |= mt->base.base.last_level << 28;427428/* sampling points: (?) */429if (flags & NV50_TEXVIEW_ACCESS_RESOLVE)430tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000;431else432tic[6] = 0x03000000;433434tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;435tic[7] |= mt->ms_mode << 12;436437return &view->pipe;438}439440struct pipe_sampler_view *441nvc0_create_texture_view(struct pipe_context *pipe,442struct pipe_resource *texture,443const struct pipe_sampler_view *templ,444uint32_t flags)445{446if (nvc0_context(pipe)->screen->tic.maxwell)447return gm107_create_texture_view(pipe, texture, templ, flags);448return gf100_create_texture_view(pipe, texture, templ, flags);449}450451bool452nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,453struct nv04_resource *res)454{455uint64_t address = res->address;456if (res->base.target != PIPE_BUFFER)457return false;458address += tic->pipe.u.buf.offset;459if (tic->tic[1] == (uint32_t)address &&460(tic->tic[2] & 0xff) == address >> 32)461return false;462463tic->tic[1] = address;464tic->tic[2] &= 0xffffff00;465tic->tic[2] |= address >> 32;466467if (tic->id >= 0) {468nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,469NV_VRAM_DOMAIN(&nvc0->screen->base), 32,470tic->tic);471return true;472}473474return false;475}476477bool478nvc0_validate_tic(struct nvc0_context *nvc0, int s)479{480uint32_t commands[32];481struct nouveau_pushbuf *push = nvc0->base.pushbuf;482unsigned i;483unsigned n = 0;484bool need_flush = false;485486for (i = 0; i < nvc0->num_textures[s]; ++i) {487struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);488struct nv04_resource *res;489const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));490491if (!tic) {492if (dirty)493commands[n++] = (i << 1) | 0;494continue;495}496res = nv04_resource(tic->pipe.texture);497need_flush |= nvc0_update_tic(nvc0, tic, res);498499if (tic->id < 0) {500tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);501502nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,503NV_VRAM_DOMAIN(&nvc0->screen->base), 32,504tic->tic);505need_flush = true;506} else507if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {508if (unlikely(s == 5))509BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1);510else511BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);512PUSH_DATA (push, (tic->id << 4) | 1);513NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);514}515nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);516517res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;518res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;519520if (!dirty)521continue;522commands[n++] = (tic->id << 9) | (i << 1) | 1;523524if (unlikely(s == 5))525BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);526else527BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);528}529for (; i < nvc0->state.num_textures[s]; ++i)530commands[n++] = (i << 1) | 0;531532nvc0->state.num_textures[s] = nvc0->num_textures[s];533534if (n) {535if (unlikely(s == 5))536BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n);537else538BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);539PUSH_DATAp(push, commands, n);540}541nvc0->textures_dirty[s] = 0;542543return need_flush;544}545546static bool547nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)548{549struct nouveau_pushbuf *push = nvc0->base.pushbuf;550unsigned i;551bool need_flush = false;552553for (i = 0; i < nvc0->num_textures[s]; ++i) {554struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);555struct nv04_resource *res;556const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));557558if (!tic) {559nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;560continue;561}562res = nv04_resource(tic->pipe.texture);563need_flush |= nvc0_update_tic(nvc0, tic, res);564565if (tic->id < 0) {566tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);567568nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,569NV_VRAM_DOMAIN(&nvc0->screen->base), 32,570tic->tic);571need_flush = true;572} else573if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {574BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);575PUSH_DATA (push, (tic->id << 4) | 1);576}577nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);578579res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;580res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;581582nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;583nvc0->tex_handles[s][i] |= tic->id;584if (dirty)585BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);586}587for (; i < nvc0->state.num_textures[s]; ++i) {588nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;589nvc0->textures_dirty[s] |= 1 << i;590}591592nvc0->state.num_textures[s] = nvc0->num_textures[s];593594return need_flush;595}596597void nvc0_validate_textures(struct nvc0_context *nvc0)598{599bool need_flush = false;600int i;601602for (i = 0; i < 5; i++) {603if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)604need_flush |= nve4_validate_tic(nvc0, i);605else606need_flush |= nvc0_validate_tic(nvc0, i);607}608609if (need_flush) {610BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);611PUSH_DATA (nvc0->base.pushbuf, 0);612}613614/* Invalidate all CP textures because they are aliased. */615for (int i = 0; i < nvc0->num_textures[5]; i++)616nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_TEX(i));617nvc0->textures_dirty[5] = ~0;618nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;619}620621bool622nvc0_validate_tsc(struct nvc0_context *nvc0, int s)623{624uint32_t commands[16];625struct nouveau_pushbuf *push = nvc0->base.pushbuf;626unsigned i;627unsigned n = 0;628bool need_flush = false;629630for (i = 0; i < nvc0->num_samplers[s]; ++i) {631struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);632633if (!(nvc0->samplers_dirty[s] & (1 << i)))634continue;635if (!tsc) {636commands[n++] = (i << 4) | 0;637continue;638}639nvc0->seamless_cube_map = tsc->seamless_cube_map;640if (tsc->id < 0) {641tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);642643nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,64465536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),64532, tsc->tsc);646need_flush = true;647}648nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);649650commands[n++] = (tsc->id << 12) | (i << 4) | 1;651}652for (; i < nvc0->state.num_samplers[s]; ++i)653commands[n++] = (i << 4) | 0;654655nvc0->state.num_samplers[s] = nvc0->num_samplers[s];656657// TXF, in unlinked tsc mode, will always use sampler 0. So we have to658// ensure that it remains bound. Its contents don't matter, all samplers we659// ever create have the SRGB_CONVERSION bit set, so as long as the first660// entry is initialized, we're good to go. This is the only bit that has661// any effect on what TXF does.662if ((nvc0->samplers_dirty[s] & 1) && !nvc0->samplers[s][0]) {663if (n == 0)664n = 1;665// We're guaranteed that the first command refers to the first slot, so666// we're not overwriting a valid entry.667commands[0] = (0 << 12) | (0 << 4) | 1;668}669670if (n) {671if (unlikely(s == 5))672BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);673else674BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);675PUSH_DATAp(push, commands, n);676}677nvc0->samplers_dirty[s] = 0;678679return need_flush;680}681682bool683nve4_validate_tsc(struct nvc0_context *nvc0, int s)684{685unsigned i;686bool need_flush = false;687688for (i = 0; i < nvc0->num_samplers[s]; ++i) {689struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);690691if (!tsc) {692nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;693continue;694}695if (tsc->id < 0) {696tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);697698nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,69965536 + tsc->id * 32,700NV_VRAM_DOMAIN(&nvc0->screen->base),70132, tsc->tsc);702need_flush = true;703}704nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);705706nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;707nvc0->tex_handles[s][i] |= tsc->id << 20;708}709for (; i < nvc0->state.num_samplers[s]; ++i) {710nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;711nvc0->samplers_dirty[s] |= 1 << i;712}713714nvc0->state.num_samplers[s] = nvc0->num_samplers[s];715716return need_flush;717}718719void nvc0_validate_samplers(struct nvc0_context *nvc0)720{721bool need_flush = false;722int i;723724for (i = 0; i < 5; i++) {725if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)726need_flush |= nve4_validate_tsc(nvc0, i);727else728need_flush |= nvc0_validate_tsc(nvc0, i);729}730731if (need_flush) {732BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);733PUSH_DATA (nvc0->base.pushbuf, 0);734}735736/* Invalidate all CP samplers because they are aliased. */737nvc0->samplers_dirty[5] = ~0;738nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;739}740741void742nvc0_upload_tsc0(struct nvc0_context *nvc0)743{744struct nouveau_pushbuf *push = nvc0->base.pushbuf;745u32 data[8] = { G80_TSC_0_SRGB_CONVERSION };746nvc0->base.push_data(&nvc0->base, nvc0->screen->txc,74765536 /*+ tsc->id * 32*/,748NV_VRAM_DOMAIN(&nvc0->screen->base), 32, data);749BEGIN_NVC0(push, NVC0_3D(TSC_FLUSH), 1);750PUSH_DATA (push, 0);751}752753/* Upload the "diagonal" entries for the possible texture sources ($t == $s).754* At some point we might want to get a list of the combinations used by a755* shader and fill in those entries instead of having it extract the handles.756*/757void758nve4_set_tex_handles(struct nvc0_context *nvc0)759{760struct nouveau_pushbuf *push = nvc0->base.pushbuf;761struct nvc0_screen *screen = nvc0->screen;762unsigned s;763764if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)765return;766767for (s = 0; s < 5; ++s) {768uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];769if (!dirty)770continue;771BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);772PUSH_DATA (push, NVC0_CB_AUX_SIZE);773PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));774PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));775do {776int i = ffs(dirty) - 1;777dirty &= ~(1 << i);778779BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);780PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(i));781PUSH_DATA (push, nvc0->tex_handles[s][i]);782} while (dirty);783784nvc0->textures_dirty[s] = 0;785nvc0->samplers_dirty[s] = 0;786}787}788789static uint64_t790nve4_create_texture_handle(struct pipe_context *pipe,791struct pipe_sampler_view *view,792const struct pipe_sampler_state *sampler)793{794/* We have to create persistent handles that won't change for these objects795* That means that we have to upload them into place and lock them so that796* they can't be kicked out later.797*/798struct nvc0_context *nvc0 = nvc0_context(pipe);799struct nouveau_pushbuf *push = nvc0->base.pushbuf;800struct nv50_tic_entry *tic = nv50_tic_entry(view);801struct nv50_tsc_entry *tsc = pipe->create_sampler_state(pipe, sampler);802struct pipe_sampler_view *v = NULL;803804tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);805if (tsc->id < 0)806goto fail;807808if (tic->id < 0) {809tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);810if (tic->id < 0)811goto fail;812813nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,814NV_VRAM_DOMAIN(&nvc0->screen->base), 32,815tic->tic);816817IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);818}819820nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,82165536 + tsc->id * 32,822NV_VRAM_DOMAIN(&nvc0->screen->base),82332, tsc->tsc);824825IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);826827// Add an extra reference to this sampler view effectively held by this828// texture handle. This is to deal with the sampler view being dereferenced829// before the handle is. However we need the view to still be live until the830// handle to it is deleted.831pipe_sampler_view_reference(&v, view);832p_atomic_inc(&tic->bindless);833834nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);835nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);836837return 0x100000000ULL | (tsc->id << 20) | tic->id;838839fail:840pipe->delete_sampler_state(pipe, tsc);841return 0;842}843844static bool845view_bound(struct nvc0_context *nvc0, struct pipe_sampler_view *view) {846for (int s = 0; s < 6; s++) {847for (int i = 0; i < nvc0->num_textures[s]; i++)848if (nvc0->textures[s][i] == view)849return true;850}851return false;852}853854static void855nve4_delete_texture_handle(struct pipe_context *pipe, uint64_t handle)856{857struct nvc0_context *nvc0 = nvc0_context(pipe);858uint32_t tic = handle & NVE4_TIC_ENTRY_INVALID;859uint32_t tsc = (handle & NVE4_TSC_ENTRY_INVALID) >> 20;860struct nv50_tic_entry *entry = nvc0->screen->tic.entries[tic];861862if (entry) {863struct pipe_sampler_view *view = &entry->pipe;864assert(entry->bindless);865p_atomic_dec(&entry->bindless);866if (!view_bound(nvc0, view))867nvc0_screen_tic_unlock(nvc0->screen, entry);868pipe_sampler_view_reference(&view, NULL);869}870871pipe->delete_sampler_state(pipe, nvc0->screen->tsc.entries[tsc]);872}873874static void875nve4_make_texture_handle_resident(struct pipe_context *pipe,876uint64_t handle, bool resident)877{878struct nvc0_context *nvc0 = nvc0_context(pipe);879if (resident) {880struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));881struct nv50_tic_entry *tic =882nvc0->screen->tic.entries[handle & NVE4_TIC_ENTRY_INVALID];883assert(tic);884assert(tic->bindless);885886res->handle = handle;887res->buf = nv04_resource(tic->pipe.texture);888res->flags = NOUVEAU_BO_RD;889list_add(&res->list, &nvc0->tex_head);890} else {891list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->tex_head, list) {892if (pos->handle == handle) {893list_del(&pos->list);894free(pos);895break;896}897}898}899}900901static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];902static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];903static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];904905static void906nvc0_get_surface_dims(const struct pipe_image_view *view,907int *width, int *height, int *depth)908{909struct nv04_resource *res = nv04_resource(view->resource);910int level;911912*width = *height = *depth = 1;913if (res->base.target == PIPE_BUFFER) {914*width = view->u.buf.size / util_format_get_blocksize(view->format);915return;916}917918level = view->u.tex.level;919*width = u_minify(view->resource->width0, level);920*height = u_minify(view->resource->height0, level);921*depth = u_minify(view->resource->depth0, level);922923switch (res->base.target) {924case PIPE_TEXTURE_1D_ARRAY:925case PIPE_TEXTURE_2D_ARRAY:926case PIPE_TEXTURE_CUBE:927case PIPE_TEXTURE_CUBE_ARRAY:928*depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;929break;930case PIPE_TEXTURE_1D:931case PIPE_TEXTURE_2D:932case PIPE_TEXTURE_RECT:933case PIPE_TEXTURE_3D:934break;935default:936assert(!"unexpected texture target");937break;938}939}940941void942nvc0_mark_image_range_valid(const struct pipe_image_view *view)943{944struct nv04_resource *res = (struct nv04_resource *)view->resource;945946assert(view->resource->target == PIPE_BUFFER);947948util_range_add(&res->base, &res->valid_buffer_range,949view->u.buf.offset,950view->u.buf.offset + view->u.buf.size);951}952953void954nve4_set_surface_info(struct nouveau_pushbuf *push,955const struct pipe_image_view *view,956struct nvc0_context *nvc0)957{958struct nvc0_screen *screen = nvc0->screen;959struct nv04_resource *res;960uint64_t address;961uint32_t *const info = push->cur;962int width, height, depth;963uint8_t log2cpp;964965if (view && !nve4_su_format_map[view->format])966NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");967968push->cur += 16;969970if (!view || !nve4_su_format_map[view->format]) {971memset(info, 0, 16 * sizeof(*info));972973info[0] = 0xbadf0000;974info[1] = 0x80004000;975info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +976screen->lib_code->start;977return;978}979res = nv04_resource(view->resource);980981address = res->address;982983/* get surface dimensions based on the target. */984nvc0_get_surface_dims(view, &width, &height, &depth);985986info[8] = width;987info[9] = height;988info[10] = depth;989switch (res->base.target) {990case PIPE_TEXTURE_1D_ARRAY:991info[11] = 1;992break;993case PIPE_TEXTURE_2D:994case PIPE_TEXTURE_RECT:995info[11] = 2;996break;997case PIPE_TEXTURE_3D:998info[11] = 3;999break;1000case PIPE_TEXTURE_2D_ARRAY:1001case PIPE_TEXTURE_CUBE:1002case PIPE_TEXTURE_CUBE_ARRAY:1003info[11] = 4;1004break;1005default:1006info[11] = 0;1007break;1008}1009log2cpp = (0xf000 & nve4_su_format_aux_map[view->format]) >> 12;10101011/* Stick the blockwidth (ie. number of bytes per pixel) to check if the1012* format doesn't mismatch. */1013info[12] = util_format_get_blocksize(view->format);10141015/* limit in bytes for raw access */1016info[13] = (0x06 << 22) | ((width << log2cpp) - 1);10171018info[1] = nve4_su_format_map[view->format];10191020#if 01021switch (util_format_get_blocksizebits(view->format)) {1022case 16: info[1] |= 1 << 16; break;1023case 32: info[1] |= 2 << 16; break;1024case 64: info[1] |= 3 << 16; break;1025case 128: info[1] |= 4 << 16; break;1026default:1027break;1028}1029#else1030info[1] |= log2cpp << 16;1031info[1] |= 0x4000;1032info[1] |= (0x0f00 & nve4_su_format_aux_map[view->format]);1033#endif10341035if (res->base.target == PIPE_BUFFER) {1036address += view->u.buf.offset;10371038info[0] = address >> 8;1039info[2] = width - 1;1040info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;1041info[3] = 0;1042info[4] = 0;1043info[5] = 0;1044info[6] = 0;1045info[7] = 0;1046info[14] = 0;1047info[15] = 0;1048} else {1049struct nv50_miptree *mt = nv50_miptree(&res->base);1050struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];1051unsigned z = view->u.tex.first_layer;10521053if (!mt->layout_3d) {1054address += mt->layer_stride * z;1055z = 0;1056}10571058address += lvl->offset;10591060info[0] = address >> 8;1061info[2] = (width << mt->ms_x) - 1;1062/* NOTE: this is really important: */1063info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;1064info[3] = (0x88 << 24) | (lvl->pitch / 64);1065info[4] = (height << mt->ms_y) - 1;1066info[4] |= (lvl->tile_mode & 0x0f0) << 25;1067info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;1068info[5] = mt->layer_stride >> 8;1069info[6] = depth - 1;1070info[6] |= (lvl->tile_mode & 0xf00) << 21;1071info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;1072info[7] = mt->layout_3d ? 1 : 0;1073info[7] |= z << 16;1074info[14] = mt->ms_x;1075info[15] = mt->ms_y;1076}1077}10781079static inline void1080nvc0_set_surface_info(struct nouveau_pushbuf *push,1081const struct pipe_image_view *view, uint64_t address,1082int width, int height, int depth)1083{1084struct nv04_resource *res;1085uint32_t *const info = push->cur;10861087push->cur += 16;10881089/* Make sure to always initialize the surface information area because it's1090* used to check if the given image is bound or not. */1091memset(info, 0, 16 * sizeof(*info));10921093if (!view || !view->resource)1094return;1095res = nv04_resource(view->resource);10961097/* Stick the image dimensions for the imageSize() builtin. */1098info[8] = width;1099info[9] = height;1100info[10] = depth;11011102/* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel1103* offset and to check if the format doesn't mismatch. */1104info[12] = ffs(util_format_get_blocksize(view->format)) - 1;11051106if (res->base.target == PIPE_BUFFER) {1107info[0] = address >> 8;1108info[2] = width;1109} else {1110struct nv50_miptree *mt = nv50_miptree(&res->base);1111struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];1112unsigned z = mt->layout_3d ? view->u.tex.first_layer : 0;1113unsigned nby = align(util_format_get_nblocksy(view->format, height),1114NVC0_TILE_SIZE_Y(lvl->tile_mode));11151116/* NOTE: this does not precisely match nve4; the values are made to be1117* easier for the shader to consume.1118*/1119info[0] = address >> 8;1120info[2] = (NVC0_TILE_SHIFT_X(lvl->tile_mode) - info[12]) << 24;1121info[4] = NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 24 | nby;1122info[5] = mt->layer_stride >> 8;1123info[6] = NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 24;1124info[7] = z;1125info[14] = mt->ms_x;1126info[15] = mt->ms_y;1127}1128}11291130void1131nvc0_validate_suf(struct nvc0_context *nvc0, int s)1132{1133struct nouveau_pushbuf *push = nvc0->base.pushbuf;1134struct nvc0_screen *screen = nvc0->screen;11351136for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {1137struct pipe_image_view *view = &nvc0->images[s][i];1138int width, height, depth;1139uint64_t address = 0;11401141if (s == 5)1142BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);1143else1144BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);11451146if (view->resource) {1147struct nv04_resource *res = nv04_resource(view->resource);1148unsigned rt = nvc0_format_table[view->format].rt;11491150if (util_format_is_depth_or_stencil(view->format))1151rt = rt << 12;1152else1153rt = (rt << 4) | (0x14 << 12);11541155/* get surface dimensions based on the target. */1156nvc0_get_surface_dims(view, &width, &height, &depth);11571158address = res->address;1159if (res->base.target == PIPE_BUFFER) {1160unsigned blocksize = util_format_get_blocksize(view->format);11611162address += view->u.buf.offset;1163assert(!(address & 0xff));11641165if (view->access & PIPE_IMAGE_ACCESS_WRITE)1166nvc0_mark_image_range_valid(view);11671168PUSH_DATAh(push, address);1169PUSH_DATA (push, address);1170PUSH_DATA (push, align(width * blocksize, 0x100));1171PUSH_DATA (push, NVC0_3D_IMAGE_HEIGHT_LINEAR | 1);1172PUSH_DATA (push, rt);1173PUSH_DATA (push, 0);1174} else {1175struct nv50_miptree *mt = nv50_miptree(view->resource);1176struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];1177unsigned adjusted_width = width, adjusted_height = height;11781179if (mt->layout_3d) {1180// We have to adjust the size of the 3d surface to be1181// accessible within 2d limits. The size of each z tile goes1182// into the x direction, while the number of z tiles goes into1183// the y direction.1184const unsigned nbx = util_format_get_nblocksx(view->format, width);1185const unsigned nby = util_format_get_nblocksy(view->format, height);1186const unsigned tsx = NVC0_TILE_SIZE_X(lvl->tile_mode);1187const unsigned tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode);1188const unsigned tsz = NVC0_TILE_SIZE_Z(lvl->tile_mode);11891190adjusted_width = align(nbx, tsx / util_format_get_blocksize(view->format)) * tsz;1191adjusted_height = align(nby, tsy) * align(depth, tsz) >> NVC0_TILE_SHIFT_Z(lvl->tile_mode);1192} else {1193const unsigned z = view->u.tex.first_layer;1194address += mt->layer_stride * z;1195}1196address += lvl->offset;11971198PUSH_DATAh(push, address);1199PUSH_DATA (push, address);1200PUSH_DATA (push, adjusted_width << mt->ms_x);1201PUSH_DATA (push, adjusted_height << mt->ms_y);1202PUSH_DATA (push, rt);1203PUSH_DATA (push, lvl->tile_mode & 0xff); /* mask out z-tiling */1204}12051206if (s == 5)1207BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);1208else1209BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);1210} else {1211PUSH_DATA(push, 0);1212PUSH_DATA(push, 0);1213PUSH_DATA(push, 0);1214PUSH_DATA(push, 0);1215PUSH_DATA(push, 0x14000);1216PUSH_DATA(push, 0);1217}12181219/* stick surface information into the driver constant buffer */1220if (s == 5)1221BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);1222else1223BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);1224PUSH_DATA (push, NVC0_CB_AUX_SIZE);1225PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));1226PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));1227if (s == 5)1228BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 16);1229else1230BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);1231PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));12321233nvc0_set_surface_info(push, view, address, width, height, depth);1234}1235}12361237static inline void1238nvc0_update_surface_bindings(struct nvc0_context *nvc0)1239{1240nvc0_validate_suf(nvc0, 4);12411242/* Invalidate all COMPUTE images because they are aliased with FRAGMENT. */1243nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);1244nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;1245nvc0->images_dirty[5] |= nvc0->images_valid[5];1246}12471248static void1249gm107_validate_surfaces(struct nvc0_context *nvc0,1250struct pipe_image_view *view, int stage, int slot)1251{1252struct nv04_resource *res = nv04_resource(view->resource);1253struct nouveau_pushbuf *push = nvc0->base.pushbuf;1254struct nvc0_screen *screen = nvc0->screen;1255struct nv50_tic_entry *tic;12561257tic = nv50_tic_entry(nvc0->images_tic[stage][slot]);12581259res = nv04_resource(tic->pipe.texture);1260nvc0_update_tic(nvc0, tic, res);12611262if (tic->id < 0) {1263tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);12641265/* upload the texture view */1266nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,1267NV_VRAM_DOMAIN(&nvc0->screen->base), 32, tic->tic);12681269BEGIN_NVC0(push, NVC0_3D(TIC_FLUSH), 1);1270PUSH_DATA (push, 0);1271} else1272if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {1273BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);1274PUSH_DATA (push, (tic->id << 4) | 1);1275}1276nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);12771278res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;1279res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;12801281BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RD);12821283/* upload the texture handle */1284BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);1285PUSH_DATA (push, NVC0_CB_AUX_SIZE);1286PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));1287PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));1288BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);1289PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32));1290PUSH_DATA (push, tic->id);1291}12921293static inline void1294nve4_update_surface_bindings(struct nvc0_context *nvc0)1295{1296struct nouveau_pushbuf *push = nvc0->base.pushbuf;1297struct nvc0_screen *screen = nvc0->screen;1298int i, j, s;12991300for (s = 0; s < 5; s++) {1301if (!nvc0->images_dirty[s])1302continue;13031304for (i = 0; i < NVC0_MAX_IMAGES; ++i) {1305struct pipe_image_view *view = &nvc0->images[s][i];13061307BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);1308PUSH_DATA (push, NVC0_CB_AUX_SIZE);1309PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));1310PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));1311BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);1312PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));13131314if (view->resource) {1315struct nv04_resource *res = nv04_resource(view->resource);13161317if (res->base.target == PIPE_BUFFER) {1318if (view->access & PIPE_IMAGE_ACCESS_WRITE)1319nvc0_mark_image_range_valid(view);1320}13211322nve4_set_surface_info(push, view, nvc0);1323BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);13241325if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)1326gm107_validate_surfaces(nvc0, view, s, i);1327} else {1328for (j = 0; j < 16; j++)1329PUSH_DATA(push, 0);1330}1331}1332}1333}13341335void1336nvc0_validate_surfaces(struct nvc0_context *nvc0)1337{1338if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {1339nve4_update_surface_bindings(nvc0);1340} else {1341nvc0_update_surface_bindings(nvc0);1342}1343}13441345static uint64_t1346nve4_create_image_handle(struct pipe_context *pipe,1347const struct pipe_image_view *view)1348{1349struct nvc0_context *nvc0 = nvc0_context(pipe);1350struct nouveau_pushbuf *push = nvc0->base.pushbuf;1351struct nvc0_screen *screen = nvc0->screen;1352int i = screen->img.next, s;13531354while (screen->img.entries[i]) {1355i = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);1356if (i == screen->img.next)1357return 0;1358}13591360screen->img.next = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);1361screen->img.entries[i] = calloc(1, sizeof(struct pipe_image_view));1362*screen->img.entries[i] = *view;13631364for (s = 0; s < 6; s++) {1365BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);1366PUSH_DATA (push, NVC0_CB_AUX_SIZE);1367PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));1368PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));1369BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);1370PUSH_DATA (push, NVC0_CB_AUX_BINDLESS_INFO(i));1371nve4_set_surface_info(push, view, nvc0);1372}13731374return 0x100000000ULL | i;1375}13761377static void1378nve4_delete_image_handle(struct pipe_context *pipe, uint64_t handle)1379{1380struct nvc0_context *nvc0 = nvc0_context(pipe);1381struct nvc0_screen *screen = nvc0->screen;1382int i = handle & (NVE4_IMG_MAX_HANDLES - 1);13831384free(screen->img.entries[i]);1385screen->img.entries[i] = NULL;1386}13871388static void1389nve4_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle,1390unsigned access, bool resident)1391{1392struct nvc0_context *nvc0 = nvc0_context(pipe);1393struct nvc0_screen *screen = nvc0->screen;13941395if (resident) {1396struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));1397struct pipe_image_view *view =1398screen->img.entries[handle & (NVE4_IMG_MAX_HANDLES - 1)];1399assert(view);14001401if (view->resource->target == PIPE_BUFFER &&1402access & PIPE_IMAGE_ACCESS_WRITE)1403nvc0_mark_image_range_valid(view);1404res->handle = handle;1405res->buf = nv04_resource(view->resource);1406res->flags = (access & 3) << 8;1407list_add(&res->list, &nvc0->img_head);1408} else {1409list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) {1410if (pos->handle == handle) {1411list_del(&pos->list);1412free(pos);1413break;1414}1415}1416}1417}14181419static uint64_t1420gm107_create_image_handle(struct pipe_context *pipe,1421const struct pipe_image_view *view)1422{1423/* GM107+ use TIC handles to reference images. As such, image handles are1424* just the TIC id.1425*/1426struct nvc0_context *nvc0 = nvc0_context(pipe);1427struct nouveau_pushbuf *push = nvc0->base.pushbuf;1428struct pipe_sampler_view *sview =1429gm107_create_texture_view_from_image(pipe, view);1430struct nv50_tic_entry *tic = nv50_tic_entry(sview);14311432if (tic == NULL)1433goto fail;14341435tic->bindless = 1;1436tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);1437if (tic->id < 0)1438goto fail;14391440nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,1441NV_VRAM_DOMAIN(&nvc0->screen->base), 32,1442tic->tic);14431444IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);14451446nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);14471448// Compute handle. This will include the TIC as well as some additional1449// info regarding the bound 3d surface layer, if applicable.1450uint64_t handle = 0x100000000ULL | tic->id;1451struct nv04_resource *res = nv04_resource(view->resource);1452if (res->base.target == PIPE_TEXTURE_3D) {1453handle |= 1 << 11;1454handle |= view->u.tex.first_layer << (11 + 16);1455}1456return handle;14571458fail:1459FREE(tic);1460return 0;1461}14621463static void1464gm107_delete_image_handle(struct pipe_context *pipe, uint64_t handle)1465{1466struct nvc0_context *nvc0 = nvc0_context(pipe);1467int tic = handle & NVE4_TIC_ENTRY_INVALID;1468struct nv50_tic_entry *entry = nvc0->screen->tic.entries[tic];1469struct pipe_sampler_view *view = &entry->pipe;1470assert(entry->bindless == 1);1471assert(!view_bound(nvc0, view));1472entry->bindless = 0;1473nvc0_screen_tic_unlock(nvc0->screen, entry);1474pipe_sampler_view_reference(&view, NULL);1475}14761477static void1478gm107_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle,1479unsigned access, bool resident)1480{1481struct nvc0_context *nvc0 = nvc0_context(pipe);14821483if (resident) {1484struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));1485struct nv50_tic_entry *tic =1486nvc0->screen->tic.entries[handle & NVE4_TIC_ENTRY_INVALID];1487assert(tic);1488assert(tic->bindless);14891490res->handle = handle;1491res->buf = nv04_resource(tic->pipe.texture);1492res->flags = (access & 3) << 8;1493if (res->buf->base.target == PIPE_BUFFER &&1494access & PIPE_IMAGE_ACCESS_WRITE)1495util_range_add(&res->buf->base, &res->buf->valid_buffer_range,1496tic->pipe.u.buf.offset,1497tic->pipe.u.buf.offset + tic->pipe.u.buf.size);1498list_add(&res->list, &nvc0->img_head);1499} else {1500list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) {1501if (pos->handle == handle) {1502list_del(&pos->list);1503free(pos);1504break;1505}1506}1507}1508}15091510void1511nvc0_init_bindless_functions(struct pipe_context *pipe) {1512pipe->create_texture_handle = nve4_create_texture_handle;1513pipe->delete_texture_handle = nve4_delete_texture_handle;1514pipe->make_texture_handle_resident = nve4_make_texture_handle_resident;15151516if (nvc0_context(pipe)->screen->base.class_3d < GM107_3D_CLASS) {1517pipe->create_image_handle = nve4_create_image_handle;1518pipe->delete_image_handle = nve4_delete_image_handle;1519pipe->make_image_handle_resident = nve4_make_image_handle_resident;1520} else {1521pipe->create_image_handle = gm107_create_image_handle;1522pipe->delete_image_handle = gm107_delete_image_handle;1523pipe->make_image_handle_resident = gm107_make_image_handle_resident;1524}1525}152615271528static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =1529{1530[PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT,1531[PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT,1532[PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT,1533[PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT,1534[PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM,1535[PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM,1536[PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT,1537[PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT,1538[PIPE_FORMAT_B8G8R8A8_UNORM] = GK104_IMAGE_FORMAT_BGRA8_UNORM,1539[PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM,1540[PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM,1541[PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT,1542[PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT,1543[PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT,1544[PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM,1545[PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT,1546[PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT,1547[PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT,1548[PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT,1549[PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT,1550[PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM,1551[PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM,1552[PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT,1553[PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT,1554[PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM,1555[PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM,1556[PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT,1557[PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT,1558[PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT,1559[PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT,1560[PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT,1561[PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT,1562[PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM,1563[PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM,1564[PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT,1565[PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT,1566[PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM,1567[PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM,1568[PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT,1569[PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT,1570};15711572/* Auxiliary format description values for surface instructions.1573* (log2(bytes per pixel) << 12) | (unk8 << 8) | unk221574*/1575static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =1576{1577[PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,1578[PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,1579[PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,15801581[PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,1582[PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,1583[PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,1584[PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,1585[PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,15861587[PIPE_FORMAT_R32G32_FLOAT] = 0x3433,1588[PIPE_FORMAT_R32G32_SINT] = 0x3433,1589[PIPE_FORMAT_R32G32_UINT] = 0x3433,15901591[PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,1592[PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24,1593[PIPE_FORMAT_B8G8R8A8_UNORM] = 0x2a24,1594[PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,1595[PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,1596[PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,1597[PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,1598[PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,15991600[PIPE_FORMAT_R16G16_UNORM] = 0x2524,1601[PIPE_FORMAT_R16G16_SNORM] = 0x2524,1602[PIPE_FORMAT_R16G16_SINT] = 0x2524,1603[PIPE_FORMAT_R16G16_UINT] = 0x2524,1604[PIPE_FORMAT_R16G16_FLOAT] = 0x2524,16051606[PIPE_FORMAT_R32_SINT] = 0x2024,1607[PIPE_FORMAT_R32_UINT] = 0x2024,1608[PIPE_FORMAT_R32_FLOAT] = 0x2024,16091610[PIPE_FORMAT_R8G8_UNORM] = 0x1615,1611[PIPE_FORMAT_R8G8_SNORM] = 0x1615,1612[PIPE_FORMAT_R8G8_SINT] = 0x1615,1613[PIPE_FORMAT_R8G8_UINT] = 0x1615,16141615[PIPE_FORMAT_R16_UNORM] = 0x1115,1616[PIPE_FORMAT_R16_SNORM] = 0x1115,1617[PIPE_FORMAT_R16_SINT] = 0x1115,1618[PIPE_FORMAT_R16_UINT] = 0x1115,1619[PIPE_FORMAT_R16_FLOAT] = 0x1115,16201621[PIPE_FORMAT_R8_UNORM] = 0x0206,1622[PIPE_FORMAT_R8_SNORM] = 0x0206,1623[PIPE_FORMAT_R8_SINT] = 0x0206,1624[PIPE_FORMAT_R8_UINT] = 0x02061625};16261627/* NOTE: These are hardcoded offsets for the shader library.1628* TODO: Automate them.1629*/1630static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =1631{1632[PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,1633[PIPE_FORMAT_R32G32B32A32_SINT] = 0x218,1634[PIPE_FORMAT_R32G32B32A32_UINT] = 0x218,1635[PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,1636[PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,1637[PIPE_FORMAT_R16G16B16A16_SINT] = 0x330,1638[PIPE_FORMAT_R16G16B16A16_UINT] = 0x388,1639[PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,1640[PIPE_FORMAT_R32G32_FLOAT] = 0x428,1641[PIPE_FORMAT_R32G32_SINT] = 0x468,1642[PIPE_FORMAT_R32G32_UINT] = 0x468,1643[PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8,1644[PIPE_FORMAT_R10G10B10A2_UINT] = 0x530,1645[PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588,1646[PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8,1647[PIPE_FORMAT_R8G8B8A8_SINT] = 0x670,1648[PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8,1649[PIPE_FORMAT_B5G6R5_UNORM] = 0x718,1650[PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0,1651[PIPE_FORMAT_R16G16_UNORM] = 0x828,1652[PIPE_FORMAT_R16G16_SNORM] = 0x890,1653[PIPE_FORMAT_R16G16_SINT] = 0x8f0,1654[PIPE_FORMAT_R16G16_UINT] = 0x948,1655[PIPE_FORMAT_R16G16_FLOAT] = 0x998,1656[PIPE_FORMAT_R32_FLOAT] = 0x9e8,1657[PIPE_FORMAT_R32_SINT] = 0xa30,1658[PIPE_FORMAT_R32_UINT] = 0xa30,1659[PIPE_FORMAT_R8G8_UNORM] = 0xa78,1660[PIPE_FORMAT_R8G8_SNORM] = 0xae0,1661[PIPE_FORMAT_R8G8_UINT] = 0xb48,1662[PIPE_FORMAT_R8G8_SINT] = 0xb98,1663[PIPE_FORMAT_R16_UNORM] = 0xbe8,1664[PIPE_FORMAT_R16_SNORM] = 0xc48,1665[PIPE_FORMAT_R16_SINT] = 0xca0,1666[PIPE_FORMAT_R16_UINT] = 0xce8,1667[PIPE_FORMAT_R16_FLOAT] = 0xd30,1668[PIPE_FORMAT_R8_UNORM] = 0xd88,1669[PIPE_FORMAT_R8_SNORM] = 0xde0,1670[PIPE_FORMAT_R8_SINT] = 0xe38,1671[PIPE_FORMAT_R8_UINT] = 0xe88,1672[PIPE_FORMAT_R11G11B10_FLOAT] = 0xed01673};167416751676