Path: blob/21.2-virgl/src/freedreno/fdl/fd6_layout.c
4561 views
/*1* Copyright (C) 2018 Rob Clark <[email protected]>2* Copyright © 2018-2019 Google, Inc.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,20* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*23* Authors:24* Rob Clark <[email protected]>25*/2627#include <stdio.h>2829#include "freedreno_layout.h"3031static bool32is_r8g8(struct fdl_layout *layout)33{34return layout->cpp == 2 &&35util_format_get_nr_components(layout->format) == 2;36}3738void39fdl6_get_ubwc_blockwidth(struct fdl_layout *layout, uint32_t *blockwidth,40uint32_t *blockheight)41{42static const struct {43uint8_t width;44uint8_t height;45} blocksize[] = {46{ 16, 4 }, /* cpp = 1 */47{ 16, 4 }, /* cpp = 2 */48{ 16, 4 }, /* cpp = 4 */49{ 8, 4 }, /* cpp = 8 */50{ 4, 4 }, /* cpp = 16 */51{ 4, 2 }, /* cpp = 32 */52{ 0, 0 }, /* cpp = 64 (TODO) */53};5455/* special case for r8g8: */56if (is_r8g8(layout)) {57*blockwidth = 16;58*blockheight = 8;59return;60}6162uint32_t cpp = fdl_cpp_shift(layout);63assert(cpp < ARRAY_SIZE(blocksize));64*blockwidth = blocksize[cpp].width;65*blockheight = blocksize[cpp].height;66}6768static void69fdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign)70{71layout->pitchalign = fdl_cpp_shift(layout);72*heightalign = 16;7374if (is_r8g8(layout) || layout->cpp == 1) {75layout->pitchalign = 1;76*heightalign = 32;77} else if (layout->cpp == 2) {78layout->pitchalign = 2;79}8081/* note: this base_align is *probably* not always right,82* it doesn't really get tested. for example with UBWC we might83* want 4k alignment, since we align UBWC levels to 4k84*/85if (layout->cpp == 1)86layout->base_align = 64;87else if (layout->cpp == 2)88layout->base_align = 128;89else90layout->base_align = 256;91}9293/* NOTE: good way to test this is: (for example)94* piglit/bin/texelFetch fs sampler3D 100x100x895*/96bool97fdl6_layout(struct fdl_layout *layout, enum pipe_format format,98uint32_t nr_samples, uint32_t width0, uint32_t height0,99uint32_t depth0, uint32_t mip_levels, uint32_t array_size,100bool is_3d, struct fdl_explicit_layout *explicit_layout)101{102uint32_t offset = 0, heightalign;103uint32_t ubwc_blockwidth, ubwc_blockheight;104105assert(nr_samples > 0);106layout->width0 = width0;107layout->height0 = height0;108layout->depth0 = depth0;109110layout->cpp = util_format_get_blocksize(format);111layout->cpp *= nr_samples;112layout->cpp_shift = ffs(layout->cpp) - 1;113114layout->format = format;115layout->nr_samples = nr_samples;116layout->layer_first = !is_3d;117118fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight);119120if (depth0 > 1 || ubwc_blockwidth == 0)121layout->ubwc = false;122123if (layout->ubwc || util_format_is_depth_or_stencil(format))124layout->tile_all = true;125126/* in layer_first layout, the level (slice) contains just one127* layer (since in fact the layer contains the slices)128*/129uint32_t layers_in_level = layout->layer_first ? 1 : array_size;130131/* note: for tiled+noubwc layouts, we can use a lower pitchalign132* which will affect the linear levels only, (the hardware will still133* expect the tiled alignment on the tiled levels)134*/135if (layout->tile_mode) {136fdl6_tile_alignment(layout, &heightalign);137} else {138layout->base_align = 64;139layout->pitchalign = 0;140/* align pitch to at least 16 pixels:141* both turnip and galium assume there is enough alignment for 16x4142* aligned gmem store. turnip can use CP_BLIT to work without this143* extra alignment, but gallium driver doesn't implement it yet144*/145if (layout->cpp > 4)146layout->pitchalign = fdl_cpp_shift(layout) - 2;147148/* when possible, use a bit more alignment than necessary149* presumably this is better for performance?150*/151if (!explicit_layout)152layout->pitchalign = fdl_cpp_shift(layout);153154/* not used, avoid "may be used uninitialized" warning */155heightalign = 1;156}157158fdl_set_pitchalign(layout, layout->pitchalign + 6);159160if (explicit_layout) {161offset = explicit_layout->offset;162layout->pitch0 = explicit_layout->pitch;163if (align(layout->pitch0, 1 << layout->pitchalign) != layout->pitch0)164return false;165}166167uint32_t ubwc_width0 = width0;168uint32_t ubwc_height0 = height0;169uint32_t ubwc_tile_height_alignment = RGB_TILE_HEIGHT_ALIGNMENT;170if (mip_levels > 1) {171/* With mipmapping enabled, UBWC layout is power-of-two sized,172* specified in log2 width/height in the descriptors. The height173* alignment is 64 for mipmapping, but for buffer sharing (always174* single level) other participants expect 16.175*/176ubwc_width0 = util_next_power_of_two(width0);177ubwc_height0 = util_next_power_of_two(height0);178ubwc_tile_height_alignment = 64;179}180layout->ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth),181RGB_TILE_WIDTH_ALIGNMENT);182ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight),183ubwc_tile_height_alignment);184185for (uint32_t level = 0; level < mip_levels; level++) {186uint32_t depth = u_minify(depth0, level);187struct fdl_slice *slice = &layout->slices[level];188struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];189uint32_t tile_mode = fdl_tile_mode(layout, level);190uint32_t pitch = fdl_pitch(layout, level);191uint32_t height;192193/* tiled levels of 3D textures are rounded up to PoT dimensions: */194if (is_3d && tile_mode) {195height = u_minify(util_next_power_of_two(height0), level);196} else {197height = u_minify(height0, level);198}199200uint32_t nblocksy = util_format_get_nblocksy(format, height);201if (tile_mode)202nblocksy = align(nblocksy, heightalign);203204/* The blits used for mem<->gmem work at a granularity of205* 16x4, which can cause faults due to over-fetch on the206* last level. The simple solution is to over-allocate a207* bit the last level to ensure any over-fetch is harmless.208* The pitch is already sufficiently aligned, but height209* may not be. note this only matters if last level is linear210*/211if (level == mip_levels - 1)212nblocksy = align(nblocksy, 4);213214slice->offset = offset + layout->size;215216/* 1d array and 2d array textures must all have the same layer size217* for each miplevel on a6xx. 3d textures can have different layer218* sizes for high levels, but the hw auto-sizer is buggy (or at least219* different than what this code does), so as soon as the layer size220* range gets into range, we stop reducing it.221*/222if (is_3d) {223if (level < 1 || layout->slices[level - 1].size0 > 0xf000) {224slice->size0 = align(nblocksy * pitch, 4096);225} else {226slice->size0 = layout->slices[level - 1].size0;227}228} else {229slice->size0 = nblocksy * pitch;230}231232layout->size += slice->size0 * depth * layers_in_level;233234if (layout->ubwc) {235/* with UBWC every level is aligned to 4K */236layout->size = align(layout->size, 4096);237238uint32_t meta_pitch = fdl_ubwc_pitch(layout, level);239uint32_t meta_height =240align(u_minify(ubwc_height0, level), ubwc_tile_height_alignment);241242ubwc_slice->size0 =243align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);244ubwc_slice->offset = offset + layout->ubwc_layer_size;245layout->ubwc_layer_size += ubwc_slice->size0;246}247}248249if (layout->layer_first) {250layout->layer_size = align(layout->size, 4096);251layout->size = layout->layer_size * array_size;252}253254/* Place the UBWC slices before the uncompressed slices, because the255* kernel expects UBWC to be at the start of the buffer. In the HW, we256* get to program the UBWC and non-UBWC offset/strides257* independently.258*/259if (layout->ubwc) {260for (uint32_t level = 0; level < mip_levels; level++)261layout->slices[level].offset += layout->ubwc_layer_size * array_size;262layout->size += layout->ubwc_layer_size * array_size;263}264265/* include explicit offset in size */266layout->size += offset;267268return true;269}270271272