Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nv50/nv50_tex.c
4574 views
/*1* Copyright 2008 Ben Skeggs2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "nv50/nv50_context.h"23#include "nv50/nv50_resource.h"24#include "nv50/g80_texture.xml.h"25#include "nv50/g80_defs.xml.h"2627#include "util/format/u_format.h"2829static inline uint32_t30nv50_tic_swizzle(const struct nv50_format *fmt, unsigned swz, bool tex_int)31{32switch (swz) {33case PIPE_SWIZZLE_X : return fmt->tic.src_x;34case PIPE_SWIZZLE_Y: return fmt->tic.src_y;35case PIPE_SWIZZLE_Z : return fmt->tic.src_z;36case PIPE_SWIZZLE_W: return fmt->tic.src_w;37case PIPE_SWIZZLE_1:38return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;39case PIPE_SWIZZLE_0:40default:41return G80_TIC_SOURCE_ZERO;42}43}4445struct pipe_sampler_view *46nv50_create_sampler_view(struct pipe_context *pipe,47struct pipe_resource *res,48const struct pipe_sampler_view *templ)49{50uint32_t flags = 0;5152if (templ->target == PIPE_TEXTURE_RECT || templ->target == PIPE_BUFFER)53flags |= NV50_TEXVIEW_SCALED_COORDS;5455return nv50_create_texture_view(pipe, res, templ, flags);56}5758struct pipe_sampler_view *59nv50_create_texture_view(struct pipe_context *pipe,60struct pipe_resource *texture,61const struct pipe_sampler_view *templ,62uint32_t flags)63{64const uint32_t class_3d = nouveau_context(pipe)->screen->class_3d;65const struct util_format_description *desc;66const struct nv50_format *fmt;67uint64_t addr;68uint32_t *tic;69uint32_t swz[4];70uint32_t depth;71struct nv50_tic_entry *view;72struct nv50_miptree *mt = nv50_miptree(texture);73bool tex_int;7475view = MALLOC_STRUCT(nv50_tic_entry);76if (!view)77return NULL;7879view->pipe = *templ;80view->pipe.reference.count = 1;81view->pipe.texture = NULL;82view->pipe.context = pipe;8384view->id = -1;8586pipe_resource_reference(&view->pipe.texture, texture);8788tic = &view->tic[0];8990desc = util_format_description(view->pipe.format);9192/* TIC[0] */9394fmt = &nv50_format_table[view->pipe.format];9596tex_int = util_format_is_pure_integer(view->pipe.format);9798swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);99swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);100swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);101swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);102tic[0] = (fmt->tic.format << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |103(fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |104(fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |105(fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |106(fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |107(swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |108(swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |109(swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |110(swz[3] << G80_TIC_0_W_SOURCE__SHIFT);111112addr = mt->base.address;113114depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);115116if (mt->base.base.array_size > 1) {117/* there doesn't seem to be a base layer field in TIC */118addr += view->pipe.u.tex.first_layer * mt->layer_stride;119depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;120}121122tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;123124if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)125tic[2] |= G80_TIC_2_SRGB_CONVERSION;126127if (!(flags & NV50_TEXVIEW_SCALED_COORDS))128tic[2] |= G80_TIC_2_NORMALIZED_COORDS;129130if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {131if (templ->target == PIPE_BUFFER) {132addr += view->pipe.u.buf.offset;133tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;134tic[3] = 0;135tic[4] = /* width */136view->pipe.u.buf.size / (desc->block.bits / 8);137tic[5] = 0;138} else {139tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;140tic[3] = mt->level[0].pitch;141tic[4] = mt->base.base.width0;142tic[5] = (1 << 16) | (mt->base.base.height0);143}144tic[6] =145tic[7] = 0;146tic[1] = addr;147tic[2] |= addr >> 32;148return &view->pipe;149}150151tic[1] = addr;152tic[2] |= (addr >> 32) & 0xff;153154tic[2] |=155((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |156((mt->level[0].tile_mode & 0xf00) << (25 - 8));157158switch (templ->target) {159case PIPE_TEXTURE_1D:160tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;161break;162case PIPE_TEXTURE_2D:163if (mt->ms_x)164tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;165else166tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;167break;168case PIPE_TEXTURE_RECT:169tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;170break;171case PIPE_TEXTURE_3D:172tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;173break;174case PIPE_TEXTURE_CUBE:175depth /= 6;176tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;177break;178case PIPE_TEXTURE_1D_ARRAY:179tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;180break;181case PIPE_TEXTURE_2D_ARRAY:182tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;183break;184case PIPE_TEXTURE_CUBE_ARRAY:185depth /= 6;186tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;187break;188case PIPE_BUFFER:189assert(0); /* should be linear and handled above ! */190tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER | G80_TIC_2_LAYOUT_PITCH;191break;192default:193unreachable("unexpected/invalid texture target");194}195196tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;197198tic[4] = (1 << 31) | (mt->base.base.width0 << mt->ms_x);199200tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff;201tic[5] |= depth << 16;202if (class_3d > NV50_3D_CLASS)203tic[5] |= mt->base.base.last_level << G80_TIC_5_MAP_MIP_LEVEL__SHIFT;204else205tic[5] |= view->pipe.u.tex.last_level << G80_TIC_5_MAP_MIP_LEVEL__SHIFT;206207tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */208209if (class_3d > NV50_3D_CLASS)210tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;211else212tic[7] = 0;213214if (unlikely(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS)))215if (mt->base.base.last_level)216tic[5] &= ~G80_TIC_5_MAP_MIP_LEVEL__MASK;217218return &view->pipe;219}220221static void222nv50_update_tic(struct nv50_context *nv50, struct nv50_tic_entry *tic,223struct nv04_resource *res)224{225uint64_t address = res->address;226if (res->base.target != PIPE_BUFFER)227return;228address += tic->pipe.u.buf.offset;229if (tic->tic[1] == (uint32_t)address &&230(tic->tic[2] & 0xff) == address >> 32)231return;232233nv50_screen_tic_unlock(nv50->screen, tic);234tic->id = -1;235tic->tic[1] = address;236tic->tic[2] &= 0xffffff00;237tic->tic[2] |= address >> 32;238}239240bool241nv50_validate_tic(struct nv50_context *nv50, int s)242{243struct nouveau_pushbuf *push = nv50->base.pushbuf;244struct nouveau_bo *txc = nv50->screen->txc;245unsigned i;246bool need_flush = false;247const bool is_compute_stage = s == NV50_SHADER_STAGE_COMPUTE;248249assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);250for (i = 0; i < nv50->num_textures[s]; ++i) {251struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]);252struct nv04_resource *res;253254if (!tic) {255if (unlikely(is_compute_stage))256BEGIN_NV04(push, NV50_CP(BIND_TIC), 1);257else258BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);259PUSH_DATA (push, (i << 1) | 0);260continue;261}262res = &nv50_miptree(tic->pipe.texture)->base;263nv50_update_tic(nv50, tic, res);264265if (tic->id < 0) {266tic->id = nv50_screen_tic_alloc(nv50->screen, tic);267268BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);269PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);270PUSH_DATA (push, 1);271BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);272PUSH_DATA (push, 262144);273PUSH_DATA (push, 65536);274PUSH_DATA (push, 1);275PUSH_DATAh(push, txc->offset);276PUSH_DATA (push, txc->offset);277BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);278PUSH_DATA (push, 0);279PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);280BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);281PUSH_DATA (push, 32);282PUSH_DATA (push, 1);283PUSH_DATA (push, 0);284PUSH_DATA (push, 1);285PUSH_DATA (push, 0);286PUSH_DATA (push, 1);287PUSH_DATA (push, 0);288PUSH_DATA (push, tic->id * 32);289PUSH_DATA (push, 0);290PUSH_DATA (push, 0);291BEGIN_NI04(push, NV50_2D(SIFC_DATA), 8);292PUSH_DATAp(push, &tic->tic[0], 8);293294need_flush = true;295} else296if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {297if (unlikely(is_compute_stage))298BEGIN_NV04(push, NV50_CP(TEX_CACHE_CTL), 1);299else300BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);301PUSH_DATA (push, 0x20);302}303304nv50->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);305306res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;307res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;308309if (unlikely(is_compute_stage)) {310BCTX_REFN(nv50->bufctx_cp, CP_TEXTURES, res, RD);311BEGIN_NV04(push, NV50_CP(BIND_TIC), 1);312} else {313BCTX_REFN(nv50->bufctx_3d, 3D_TEXTURES, res, RD);314BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);315}316PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1);317}318for (; i < nv50->state.num_textures[s]; ++i) {319if (unlikely(is_compute_stage))320BEGIN_NV04(push, NV50_CP(BIND_TIC), 1);321else322BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);323PUSH_DATA (push, (i << 1) | 0);324}325if (nv50->num_textures[s]) {326if (unlikely(is_compute_stage))327BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);328else329BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);330PUSH_DATA (push, ((NV50_CB_AUX_TEX_MS_OFFSET + 16 * s * 2 * 4) << (8 - 2)) | NV50_CB_AUX);331if (unlikely(is_compute_stage))332BEGIN_NV04(push, NV50_CP(CB_DATA(0)), nv50->num_textures[s] * 2);333else334BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nv50->num_textures[s] * 2);335for (i = 0; i < nv50->num_textures[s]; i++) {336struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]);337struct nv50_miptree *res;338339if (!tic || tic->pipe.target == PIPE_BUFFER) {340PUSH_DATA (push, 0);341PUSH_DATA (push, 0);342continue;343}344res = nv50_miptree(tic->pipe.texture);345PUSH_DATA (push, res->ms_x);346PUSH_DATA (push, res->ms_y);347}348}349nv50->state.num_textures[s] = nv50->num_textures[s];350351return need_flush;352}353354void nv50_validate_textures(struct nv50_context *nv50)355{356unsigned s;357bool need_flush = false;358359for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; ++s)360need_flush |= nv50_validate_tic(nv50, s);361362if (need_flush) {363BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TIC_FLUSH), 1);364PUSH_DATA (nv50->base.pushbuf, 0);365}366367/* Invalidate all CP textures because they are aliased. */368nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_TEXTURES);369nv50->dirty_cp |= NV50_NEW_CP_TEXTURES;370}371372bool373nv50_validate_tsc(struct nv50_context *nv50, int s)374{375struct nouveau_pushbuf *push = nv50->base.pushbuf;376unsigned i;377bool need_flush = false;378const bool is_compute_stage = s == NV50_SHADER_STAGE_COMPUTE;379380assert(nv50->num_samplers[s] <= PIPE_MAX_SAMPLERS);381for (i = 0; i < nv50->num_samplers[s]; ++i) {382struct nv50_tsc_entry *tsc = nv50_tsc_entry(nv50->samplers[s][i]);383384if (!tsc) {385if (is_compute_stage)386BEGIN_NV04(push, NV50_CP(BIND_TSC), 1);387else388BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);389PUSH_DATA (push, (i << 4) | 0);390continue;391}392nv50->seamless_cube_map = tsc->seamless_cube_map;393if (tsc->id < 0) {394tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc);395396nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc,39765536 + tsc->id * 32,398NOUVEAU_BO_VRAM, 32, tsc->tsc);399need_flush = true;400}401nv50->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);402403if (is_compute_stage)404BEGIN_NV04(push, NV50_CP(BIND_TSC), 1);405else406BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);407PUSH_DATA (push, (tsc->id << 12) | (i << 4) | 1);408}409for (; i < nv50->state.num_samplers[s]; ++i) {410if (is_compute_stage)411BEGIN_NV04(push, NV50_CP(BIND_TSC), 1);412else413BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);414PUSH_DATA (push, (i << 4) | 0);415}416nv50->state.num_samplers[s] = nv50->num_samplers[s];417418// TXF, in unlinked tsc mode, will always use sampler 0. So we have to419// ensure that it remains bound. Its contents don't matter, all samplers we420// ever create have the SRGB_CONVERSION bit set, so as long as the first421// entry is initialized, we're good to go. This is the only bit that has422// any effect on what TXF does.423if (!nv50->samplers[s][0]) {424if (is_compute_stage)425BEGIN_NV04(push, NV50_CP(BIND_TSC), 1);426else427BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);428PUSH_DATA (push, 1);429}430431return need_flush;432}433434void nv50_validate_samplers(struct nv50_context *nv50)435{436unsigned s;437bool need_flush = false;438439for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; ++s)440need_flush |= nv50_validate_tsc(nv50, s);441442if (need_flush) {443if (unlikely(s == NV50_SHADER_STAGE_COMPUTE))444// TODO(pmoreau): Is this needed? Not done on nvc0445BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TSC_FLUSH), 1);446else447BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TSC_FLUSH), 1);448PUSH_DATA (nv50->base.pushbuf, 0);449}450451/* Invalidate all CP samplers because they are aliased. */452nv50->dirty_cp |= NV50_NEW_CP_SAMPLERS;453}454455/* There can be up to 4 different MS levels (1, 2, 4, 8). To simplify the456* shader logic, allow each one to take up 8 offsets.457*/458#define COMBINE(x, y) x, y459#define DUMMY 0, 0460static const uint32_t msaa_sample_xy_offsets[] = {461/* MS1 */462COMBINE(0, 0),463DUMMY,464DUMMY,465DUMMY,466DUMMY,467DUMMY,468DUMMY,469DUMMY,470471/* MS2 */472COMBINE(0, 0),473COMBINE(1, 0),474DUMMY,475DUMMY,476DUMMY,477DUMMY,478DUMMY,479DUMMY,480481/* MS4 */482COMBINE(0, 0),483COMBINE(1, 0),484COMBINE(0, 1),485COMBINE(1, 1),486DUMMY,487DUMMY,488DUMMY,489DUMMY,490491/* MS8 */492COMBINE(0, 0),493COMBINE(1, 0),494COMBINE(0, 1),495COMBINE(1, 1),496COMBINE(2, 0),497COMBINE(3, 0),498COMBINE(2, 1),499COMBINE(3, 1),500};501502void nv50_upload_ms_info(struct nouveau_pushbuf *push)503{504BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);505PUSH_DATA (push, (NV50_CB_AUX_MS_OFFSET << (8 - 2)) | NV50_CB_AUX);506BEGIN_NI04(push, NV50_3D(CB_DATA(0)), ARRAY_SIZE(msaa_sample_xy_offsets));507PUSH_DATAp(push, msaa_sample_xy_offsets, ARRAY_SIZE(msaa_sample_xy_offsets));508}509510void nv50_upload_tsc0(struct nv50_context *nv50)511{512struct nouveau_pushbuf *push = nv50->base.pushbuf;513u32 data[8] = { G80_TSC_0_SRGB_CONVERSION };514nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc,51565536 /* + tsc->id * 32 */,516NOUVEAU_BO_VRAM, 32, data);517BEGIN_NV04(push, NV50_3D(TSC_FLUSH), 1);518PUSH_DATA (push, 0);519}520521522