Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
4574 views
/*1* Copyright 2008 Ben Skeggs2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "drm-uapi/drm_fourcc.h"2324#include "pipe/p_state.h"25#include "pipe/p_defines.h"26#include "frontend/drm_driver.h"27#include "util/u_inlines.h"28#include "util/format/u_format.h"2930#include "nvc0/nvc0_context.h"31#include "nvc0/nvc0_resource.h"3233static uint32_t34nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d)35{36return nv50_tex_choose_tile_dims_helper(nx, ny, nz, is_3d);37}3839static uint32_t40tu102_choose_tiled_storage_type(enum pipe_format format,41unsigned ms,42bool compressed)4344{45uint32_t kind;4647switch (format) {48case PIPE_FORMAT_Z16_UNORM:49if (compressed)50kind = 0x0b; // NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC51else52kind = 0x01; // NV_MMU_PTE_KIND_Z1653break;54case PIPE_FORMAT_X8Z24_UNORM:55case PIPE_FORMAT_S8X24_UINT:56case PIPE_FORMAT_S8_UINT_Z24_UNORM:57if (compressed)58kind = 0x0e; // NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC59else60kind = 0x05; // NV_MMU_PTE_KIND_Z24S861break;62case PIPE_FORMAT_X24S8_UINT:63case PIPE_FORMAT_Z24X8_UNORM:64case PIPE_FORMAT_Z24_UNORM_S8_UINT:65if (compressed)66kind = 0x0c; // NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC67else68kind = 0x03; // NV_MMU_PTE_KIND_S8Z2469break;70case PIPE_FORMAT_X32_S8X24_UINT:71case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:72if (compressed)73kind = 0x0d; // NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC74else75kind = 0x04; // NV_MMU_PTE_KIND_ZF32_X24S876break;77case PIPE_FORMAT_Z32_FLOAT:78default:79kind = 0x06;80break;81}8283return kind;84}8586uint32_t87nvc0_choose_tiled_storage_type(struct pipe_screen *pscreen,88enum pipe_format format,89unsigned ms,90bool compressed)91{92uint32_t tile_flags;9394if (nouveau_screen(pscreen)->device->chipset >= 0x160)95return tu102_choose_tiled_storage_type(format, ms, compressed);9697switch (format) {98case PIPE_FORMAT_Z16_UNORM:99if (compressed)100tile_flags = 0x02 + ms;101else102tile_flags = 0x01;103break;104case PIPE_FORMAT_X8Z24_UNORM:105case PIPE_FORMAT_S8X24_UINT:106case PIPE_FORMAT_S8_UINT_Z24_UNORM:107if (compressed)108tile_flags = 0x51 + ms;109else110tile_flags = 0x46;111break;112case PIPE_FORMAT_X24S8_UINT:113case PIPE_FORMAT_Z24X8_UNORM:114case PIPE_FORMAT_Z24_UNORM_S8_UINT:115if (compressed)116tile_flags = 0x17 + ms;117else118tile_flags = 0x11;119break;120case PIPE_FORMAT_Z32_FLOAT:121if (compressed)122tile_flags = 0x86 + ms;123else124tile_flags = 0x7b;125break;126case PIPE_FORMAT_X32_S8X24_UINT:127case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:128if (compressed)129tile_flags = 0xce + ms;130else131tile_flags = 0xc3;132break;133default:134switch (util_format_get_blocksizebits(format)) {135case 128:136if (compressed)137tile_flags = 0xf4 + ms * 2;138else139tile_flags = 0xfe;140break;141case 64:142if (compressed) {143switch (ms) {144case 0: tile_flags = 0xe6; break;145case 1: tile_flags = 0xeb; break;146case 2: tile_flags = 0xed; break;147case 3: tile_flags = 0xf2; break;148default:149return 0;150}151} else {152tile_flags = 0xfe;153}154break;155case 32:156if (compressed && ms) {157switch (ms) {158/* This one makes things blurry:159case 0: tile_flags = 0xdb; break;160*/161case 1: tile_flags = 0xdd; break;162case 2: tile_flags = 0xdf; break;163case 3: tile_flags = 0xe4; break;164default:165return 0;166}167} else {168tile_flags = 0xfe;169}170break;171case 16:172case 8:173tile_flags = 0xfe;174break;175default:176return 0;177}178break;179}180181return tile_flags;182}183184static uint32_t185nvc0_mt_choose_storage_type(struct pipe_screen *pscreen,186const struct nv50_miptree *mt,187bool compressed)188{189const unsigned ms = util_logbase2(mt->base.base.nr_samples);190191if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR))192return 0;193if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR))194return 0;195196return nvc0_choose_tiled_storage_type(pscreen, mt->base.base.format, ms, compressed);197}198199static inline bool200nvc0_miptree_init_ms_mode(struct nv50_miptree *mt)201{202switch (mt->base.base.nr_samples) {203case 8:204mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS8;205mt->ms_x = 2;206mt->ms_y = 1;207break;208case 4:209mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS4;210mt->ms_x = 1;211mt->ms_y = 1;212break;213case 2:214mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS2;215mt->ms_x = 1;216break;217case 1:218case 0:219mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;220break;221default:222NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples);223return false;224}225return true;226}227228static void229nvc0_miptree_init_layout_video(struct nv50_miptree *mt)230{231const struct pipe_resource *pt = &mt->base.base;232const unsigned blocksize = util_format_get_blocksize(pt->format);233234assert(pt->last_level == 0);235assert(mt->ms_x == 0 && mt->ms_y == 0);236assert(!util_format_is_compressed(pt->format));237238mt->layout_3d = pt->target == PIPE_TEXTURE_3D;239240mt->level[0].tile_mode = 0x10;241mt->level[0].pitch = align(pt->width0 * blocksize, 64);242mt->total_size = align(pt->height0, 16) * mt->level[0].pitch * (mt->layout_3d ? pt->depth0 : 1);243244if (pt->array_size > 1) {245mt->layer_stride = align(mt->total_size, NVC0_TILE_SIZE(0x10));246mt->total_size = mt->layer_stride * pt->array_size;247}248}249250static void251nvc0_miptree_init_layout_tiled(struct nv50_miptree *mt, uint64_t modifier)252{253struct pipe_resource *pt = &mt->base.base;254unsigned w, h, d, l;255const unsigned blocksize = util_format_get_blocksize(pt->format);256257mt->layout_3d = pt->target == PIPE_TEXTURE_3D;258259w = pt->width0 << mt->ms_x;260h = pt->height0 << mt->ms_y;261262/* For 3D textures, a mipmap is spanned by all the layers, for array263* textures and cube maps, each layer contains its own mipmaps.264*/265d = mt->layout_3d ? pt->depth0 : 1;266267assert(!mt->ms_mode || !pt->last_level);268assert(modifier == DRM_FORMAT_MOD_INVALID ||269(!pt->last_level && !mt->layout_3d));270assert(modifier != DRM_FORMAT_MOD_LINEAR);271272for (l = 0; l <= pt->last_level; ++l) {273struct nv50_miptree_level *lvl = &mt->level[l];274unsigned tsx, tsy, tsz;275unsigned nbx = util_format_get_nblocksx(pt->format, w);276unsigned nby = util_format_get_nblocksy(pt->format, h);277278lvl->offset = mt->total_size;279280if (modifier != DRM_FORMAT_MOD_INVALID)281/* Extract the log2(block height) field from the modifier and pack it282* into tile_mode's y field. Other tile dimensions are always 1283* (represented using 0 here) for 2D surfaces, and non-2D surfaces are284* not supported by the current modifiers (asserted above). Note the285* modifier must be validated prior to calling this function.286*/287lvl->tile_mode = ((uint32_t)modifier & 0xf) << 4;288else289lvl->tile_mode = nvc0_tex_choose_tile_dims(nbx, nby, d, mt->layout_3d);290291tsx = NVC0_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */292tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode);293tsz = NVC0_TILE_SIZE_Z(lvl->tile_mode);294295lvl->pitch = align(nbx * blocksize, tsx);296297mt->total_size += lvl->pitch * align(nby, tsy) * align(d, tsz);298299w = u_minify(w, 1);300h = u_minify(h, 1);301d = u_minify(d, 1);302}303304if (pt->array_size > 1) {305mt->layer_stride = align(mt->total_size,306NVC0_TILE_SIZE(mt->level[0].tile_mode));307mt->total_size = mt->layer_stride * pt->array_size;308}309}310311static uint64_t312nvc0_miptree_get_modifier(struct pipe_screen *pscreen, struct nv50_miptree *mt)313{314const union nouveau_bo_config *config = &mt->base.bo->config;315const uint32_t uc_kind =316nvc0_choose_tiled_storage_type(pscreen,317mt->base.base.format,318mt->base.base.nr_samples,319false);320const uint32_t kind_gen = nvc0_get_kind_generation(pscreen);321322if (mt->layout_3d)323return DRM_FORMAT_MOD_INVALID;324if (mt->base.base.nr_samples > 1)325return DRM_FORMAT_MOD_INVALID;326if (config->nvc0.memtype == 0x00)327return DRM_FORMAT_MOD_LINEAR;328if (NVC0_TILE_MODE_Y(config->nvc0.tile_mode) > 5)329return DRM_FORMAT_MOD_INVALID;330if (config->nvc0.memtype != uc_kind)331return DRM_FORMAT_MOD_INVALID;332333return DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(3340,335nouveau_screen(pscreen)->tegra_sector_layout ? 0 : 1,336kind_gen,337config->nvc0.memtype,338NVC0_TILE_MODE_Y(config->nvc0.tile_mode));339}340341bool342nvc0_miptree_get_handle(struct pipe_screen *pscreen,343struct pipe_context *context,344struct pipe_resource *pt,345struct winsys_handle *whandle,346unsigned usage)347{348struct nv50_miptree *mt = nv50_miptree(pt);349bool ret;350351ret = nv50_miptree_get_handle(pscreen, context, pt, whandle, usage);352if (!ret)353return ret;354355whandle->modifier = nvc0_miptree_get_modifier(pscreen, mt);356357return true;358}359360static uint64_t361nvc0_miptree_select_best_modifier(struct pipe_screen *pscreen,362const struct nv50_miptree *mt,363const uint64_t *modifiers,364unsigned int count)365{366/*367* Supported block heights are 1,2,4,8,16,32, stored as log2() their368* value. Reserve one slot for each, as well as the linear modifier.369*/370uint64_t prio_supported_mods[] = {371DRM_FORMAT_MOD_INVALID,372DRM_FORMAT_MOD_INVALID,373DRM_FORMAT_MOD_INVALID,374DRM_FORMAT_MOD_INVALID,375DRM_FORMAT_MOD_INVALID,376DRM_FORMAT_MOD_INVALID,377DRM_FORMAT_MOD_LINEAR,378};379const uint32_t uc_kind = nvc0_mt_choose_storage_type(pscreen, mt, false);380int top_mod_slot = ARRAY_SIZE(prio_supported_mods);381const uint32_t kind_gen = nvc0_get_kind_generation(pscreen);382unsigned int i;383int p;384385if (uc_kind != 0u) {386const struct pipe_resource *pt = &mt->base.base;387const unsigned nbx = util_format_get_nblocksx(pt->format, pt->width0);388const unsigned nby = util_format_get_nblocksy(pt->format, pt->height0);389const uint32_t lbh_preferred =390NVC0_TILE_MODE_Y(nvc0_tex_choose_tile_dims(nbx, nby, 1u, false));391uint32_t lbh = lbh_preferred;392bool dec_lbh = true;393const uint8_t s = nouveau_screen(pscreen)->tegra_sector_layout ? 0 : 1;394395for (i = 0; i < ARRAY_SIZE(prio_supported_mods) - 1; i++) {396assert(lbh <= 5u);397prio_supported_mods[i] =398DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, s, kind_gen, uc_kind, lbh);399400/*401* The preferred block height is the largest block size that doesn't402* waste excessive space with unused padding bytes relative to the403* height of the image. Construct the priority array such that404* the preferred block height is highest priority, followed by405* progressively smaller block sizes down to a block height of one,406* followed by progressively larger (more wasteful) block sizes up407* to 5.408*/409if (lbh == 0u) {410lbh = lbh_preferred + 1u;411dec_lbh = false;412} else if (dec_lbh) {413lbh--;414} else {415lbh++;416}417}418}419420assert(prio_supported_mods[ARRAY_SIZE(prio_supported_mods) - 1] ==421DRM_FORMAT_MOD_LINEAR);422423for (i = 0u; i < count; i++) {424for (p = 0; p < ARRAY_SIZE(prio_supported_mods); p++) {425if (prio_supported_mods[p] == modifiers[i]) {426if (top_mod_slot > p) top_mod_slot = p;427break;428}429}430}431432if (top_mod_slot >= ARRAY_SIZE(prio_supported_mods))433return DRM_FORMAT_MOD_INVALID;434435return prio_supported_mods[top_mod_slot];436}437438struct pipe_resource *439nvc0_miptree_create(struct pipe_screen *pscreen,440const struct pipe_resource *templ,441const uint64_t *modifiers, unsigned int count)442{443struct nouveau_device *dev = nouveau_screen(pscreen)->device;444struct nouveau_drm *drm = nouveau_screen(pscreen)->drm;445struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);446struct pipe_resource *pt = &mt->base.base;447bool compressed = drm->version >= 0x01000101;448int ret;449union nouveau_bo_config bo_config;450uint32_t bo_flags;451unsigned pitch_align;452uint64_t modifier = DRM_FORMAT_MOD_INVALID;453454if (!mt)455return NULL;456457*pt = *templ;458pipe_reference_init(&pt->reference, 1);459pt->screen = pscreen;460461if (pt->usage == PIPE_USAGE_STAGING) {462/* PIPE_USAGE_STAGING, and usage in general, should not be specified when463* modifiers are used. */464assert(count == 0);465switch (pt->target) {466case PIPE_TEXTURE_2D:467case PIPE_TEXTURE_RECT:468if (pt->last_level == 0 &&469!util_format_is_depth_or_stencil(pt->format) &&470pt->nr_samples <= 1)471pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;472break;473default:474break;475}476}477478if (pt->bind & PIPE_BIND_LINEAR)479pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;480481if (count > 0) {482modifier = nvc0_miptree_select_best_modifier(pscreen, mt,483modifiers, count);484485if (modifier == DRM_FORMAT_MOD_INVALID) {486FREE(mt);487return NULL;488}489490if (modifier == DRM_FORMAT_MOD_LINEAR) {491pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;492bo_config.nvc0.memtype = 0;493} else {494bo_config.nvc0.memtype = (modifier >> 12) & 0xff;495}496} else {497bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(pscreen, mt, compressed);498}499500if (!nvc0_miptree_init_ms_mode(mt)) {501FREE(mt);502return NULL;503}504505if (unlikely(pt->flags & NVC0_RESOURCE_FLAG_VIDEO)) {506assert(modifier == DRM_FORMAT_MOD_INVALID);507nvc0_miptree_init_layout_video(mt);508} else509if (likely(bo_config.nvc0.memtype)) {510nvc0_miptree_init_layout_tiled(mt, modifier);511} else {512/* When modifiers are supplied, usage is zero. TODO: detect the513* modifiers+cursor case. */514if (pt->usage & PIPE_BIND_CURSOR)515pitch_align = 1;516else if ((pt->usage & PIPE_BIND_SCANOUT) || count > 0)517pitch_align = 256;518else519pitch_align = 128;520if (!nv50_miptree_init_layout_linear(mt, pitch_align)) {521FREE(mt);522return NULL;523}524}525bo_config.nvc0.tile_mode = mt->level[0].tile_mode;526527if (!bo_config.nvc0.memtype && (pt->usage == PIPE_USAGE_STAGING || pt->bind & PIPE_BIND_SHARED))528mt->base.domain = NOUVEAU_BO_GART;529else530mt->base.domain = NV_VRAM_DOMAIN(nouveau_screen(pscreen));531532bo_flags = mt->base.domain | NOUVEAU_BO_NOSNOOP;533534if (mt->base.base.bind & (PIPE_BIND_CURSOR | PIPE_BIND_DISPLAY_TARGET))535bo_flags |= NOUVEAU_BO_CONTIG;536537ret = nouveau_bo_new(dev, bo_flags, 4096, mt->total_size, &bo_config,538&mt->base.bo);539if (ret) {540FREE(mt);541return NULL;542}543mt->base.address = mt->base.bo->offset;544545NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_count, 1);546NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_bytes,547mt->total_size);548549return pt;550}551552/* Offset of zslice @z from start of level @l. */553inline unsigned554nvc0_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z)555{556const struct pipe_resource *pt = &mt->base.base;557558unsigned tds = NVC0_TILE_SHIFT_Z(mt->level[l].tile_mode);559unsigned ths = NVC0_TILE_SHIFT_Y(mt->level[l].tile_mode);560561unsigned nby = util_format_get_nblocksy(pt->format,562u_minify(pt->height0, l));563564/* to next 2D tile slice within a 3D tile */565unsigned stride_2d = NVC0_TILE_SIZE_2D(mt->level[l].tile_mode);566567/* to slice in the next (in z direction) 3D tile */568unsigned stride_3d = (align(nby, (1 << ths)) * mt->level[l].pitch) << tds;569570return (z & (1 << (tds - 1))) * stride_2d + (z >> tds) * stride_3d;571}572573/* Surface functions.574*/575576struct pipe_surface *577nvc0_miptree_surface_new(struct pipe_context *pipe,578struct pipe_resource *pt,579const struct pipe_surface *templ)580{581struct nv50_surface *ns = nv50_surface_from_miptree(nv50_miptree(pt), templ);582if (!ns)583return NULL;584ns->base.context = pipe;585return &ns->base;586}587588589