/*1* Copyright 2015 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "isl_gfx7.h"24#include "isl_priv.h"2526static bool27gfx7_format_needs_valign2(const struct isl_device *dev,28enum isl_format format)29{30assert(ISL_GFX_VER(dev) == 7);3132/* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1,33* RENDER_SURFACE_STATE Surface Vertical Alignment:34*35* - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL36* (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY37* (0x190)38*39* - VALIGN_4 is not supported for surface format R32G32B32_FLOAT.40*41* The R32G32B32_FLOAT restriction is dropped on Haswell.42*/43return isl_format_is_yuv(format) ||44(format == ISL_FORMAT_R32G32B32_FLOAT && !ISL_DEV_IS_HASWELL(dev));45}4647bool48isl_gfx7_choose_msaa_layout(const struct isl_device *dev,49const struct isl_surf_init_info *info,50enum isl_tiling tiling,51enum isl_msaa_layout *msaa_layout)52{53bool require_array = false;54bool require_interleaved = false;5556assert(ISL_GFX_VER(dev) == 7);57assert(info->samples >= 1);5859if (info->samples == 1) {60*msaa_layout = ISL_MSAA_LAYOUT_NONE;61return true;62}6364if (!isl_format_supports_multisampling(dev->info, info->format))65return false;6667/* From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of68* Multisamples:69*70* - If this field is any value other than MULTISAMPLECOUNT_1, the71* Surface Type must be SURFTYPE_2D.72*73* - If this field is any value other than MULTISAMPLECOUNT_1, Surface74* Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero75*/76if (info->dim != ISL_SURF_DIM_2D)77return false;78if (info->levels > 1)79return false;8081/* The Ivyrbridge PRM insists twice that signed integer formats cannot be82* multisampled.83*84* From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of85* Multisamples:86*87* - This field must be set to MULTISAMPLECOUNT_1 for SINT MSRTs when88* all RT channels are not written.89*90* And errata from the Ivybridge PRM, Volume 4 Part 1 p77,91* RENDER_SURFACE_STATE, MCS Enable:92*93* This field must be set to 0 [MULTISAMPLECOUNT_1] for all SINT MSRTs94* when all RT channels are not written.95*96* Note that the above SINT restrictions apply only to *MSRTs* (that is,97* *multisampled* render targets). The restrictions seem to permit an MCS98* if the render target is singlesampled.99*100* Moreover, empirically it looks that hardware can render multisampled101* surfaces with RGBA8I, RGBA16I and RGBA32I.102*/103104/* Multisampling requires vertical alignment of four. */105if (info->samples > 1 && gfx7_format_needs_valign2(dev, info->format))106return false;107108/* More obvious restrictions */109if (isl_surf_usage_is_display(info->usage))110return false;111if (tiling == ISL_TILING_LINEAR)112return false;113114/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled115* Suface Storage Format:116*117* +---------------------+----------------------------------------------------------------+118* | MSFMT_MSS | Multsampled surface was/is rendered as a render target |119* | MSFMT_DEPTH_STENCIL | Multisampled surface was rendered as a depth or stencil buffer |120* +---------------------+----------------------------------------------------------------+121*122* In the table above, MSFMT_MSS refers to ISL_MSAA_LAYOUT_ARRAY, and123* MSFMT_DEPTH_STENCIL refers to ISL_MSAA_LAYOUT_INTERLEAVED.124*/125if (isl_surf_usage_is_depth_or_stencil(info->usage) ||126(info->usage & ISL_SURF_USAGE_HIZ_BIT))127require_interleaved = true;128129/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled130* Suface Storage Format:131*132* If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, Width133* is >= 8192 (meaning the actual surface width is >= 8193 pixels), this134* field must be set to MSFMT_MSS.135*/136if (info->samples == 8 && info->width > 8192)137require_array = true;138139/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled140* Suface Storage Format:141*142* If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8,143* ((Depth+1) * (Height+1)) is > 4,194,304, OR if the surface’s Number144* of Multisamples is MULTISAMPLECOUNT_4, ((Depth+1) * (Height+1)) is145* > 8,388,608, this field must be set to MSFMT_DEPTH_STENCIL.146*/147if ((info->samples == 8 && info->height > 4194304u) ||148(info->samples == 4 && info->height > 8388608u))149require_interleaved = true;150151/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled152* Suface Storage Format:153*154* This field must be set to MSFMT_DEPTH_STENCIL if Surface Format is155* one of the following: I24X8_UNORM, L24X8_UNORM, A24X8_UNORM, or156* R24_UNORM_X8_TYPELESS.157*/158if (info->format == ISL_FORMAT_I24X8_UNORM ||159info->format == ISL_FORMAT_L24X8_UNORM ||160info->format == ISL_FORMAT_A24X8_UNORM ||161info->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS)162require_interleaved = true;163164if (require_array && require_interleaved)165return false;166167if (require_interleaved) {168*msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED;169return true;170}171172/* Default to the array layout because it permits multisample173* compression.174*/175*msaa_layout = ISL_MSAA_LAYOUT_ARRAY;176return true;177}178179/**180* @brief Filter out tiling flags that are incompatible with the surface.181*182* The resultant outgoing @a flags is a subset of the incoming @a flags. The183* outgoing flags may be empty (0x0) if the incoming flags were too184* restrictive.185*186* For example, if the surface will be used for a display187* (ISL_SURF_USAGE_DISPLAY_BIT), then this function filters out all tiling188* flags except ISL_TILING_X_BIT and ISL_TILING_LINEAR_BIT.189*/190void191isl_gfx6_filter_tiling(const struct isl_device *dev,192const struct isl_surf_init_info *restrict info,193isl_tiling_flags_t *flags)194{195/* IVB+ requires separate stencil */196assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));197198/* Clear flags unsupported on this hardware */199if (ISL_GFX_VER(dev) < 9) {200*flags &= ~ISL_TILING_Yf_BIT;201*flags &= ~ISL_TILING_Ys_BIT;202}203204/* And... clear the Yf and Ys bits anyway because Anvil doesn't support205* them yet.206*/207*flags &= ~ISL_TILING_Yf_BIT; /* FINISHME[SKL]: Support Yf */208*flags &= ~ISL_TILING_Ys_BIT; /* FINISHME[SKL]: Support Ys */209210if (isl_surf_usage_is_depth(info->usage)) {211/* Depth requires Y. */212*flags &= ISL_TILING_ANY_Y_MASK;213}214215if (isl_surf_usage_is_stencil(info->usage)) {216if (ISL_GFX_VER(dev) >= 12) {217/* Stencil requires Y. */218*flags &= ISL_TILING_ANY_Y_MASK;219} else {220/* Stencil requires W. */221*flags &= ISL_TILING_W_BIT;222}223} else {224*flags &= ~ISL_TILING_W_BIT;225}226227/* From the SKL+ PRMs, RENDER_SURFACE_STATE:TileMode,228* If Surface Format is ASTC*, this field must be TILEMODE_YMAJOR.229*/230if (isl_format_get_layout(info->format)->txc == ISL_TXC_ASTC)231*flags &= ISL_TILING_Y0_BIT;232233/* MCS buffers are always Y-tiled */234if (isl_format_get_layout(info->format)->txc == ISL_TXC_MCS)235*flags &= ISL_TILING_Y0_BIT;236237if (info->usage & (ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT |238ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT |239ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT)) {240assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT);241isl_finishme("%s:%s: handle rotated display surfaces",242__FILE__, __func__);243}244245if (info->usage & (ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT |246ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT)) {247assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT);248isl_finishme("%s:%s: handle flipped display surfaces",249__FILE__, __func__);250}251252if (info->usage & ISL_SURF_USAGE_DISPLAY_BIT) {253if (ISL_GFX_VER(dev) >= 12) {254*flags &= (ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT |255ISL_TILING_Y0_BIT);256} else if (ISL_GFX_VER(dev) >= 9) {257/* Note we let Yf even though it was cleared above. This is just for258* completeness.259*/260*flags &= (ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT |261ISL_TILING_Y0_BIT | ISL_TILING_Yf_BIT);262} else {263/* Before Skylake, the display engine does not accept Y */264*flags &= (ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT);265}266}267268if (info->samples > 1) {269/* From the Sandybridge PRM, Volume 4 Part 1, SURFACE_STATE Tiled270* Surface:271*272* For multisample render targets, this field must be 1 (true). MSRTs273* can only be tiled.274*275* From the Broadwell PRM >> Volume2d: Command Structures >>276* RENDER_SURFACE_STATE Tile Mode:277*278* If Number of Multisamples is not MULTISAMPLECOUNT_1, this field279* must be YMAJOR.280*281* As usual, though, stencil is special and requires W-tiling.282*/283*flags &= (ISL_TILING_ANY_Y_MASK | ISL_TILING_W_BIT);284}285286/* workaround */287if (ISL_GFX_VER(dev) == 7 &&288gfx7_format_needs_valign2(dev, info->format) &&289(info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) &&290info->samples == 1) {291/* Y tiling is illegal. From the Ivybridge PRM, Vol4 Part1 2.12.2.1,292* SURFACE_STATE Surface Vertical Alignment:293*294* This field must be set to VALIGN_4 for all tiled Y Render Target295* surfaces.296*/297*flags &= ~ISL_TILING_Y0_BIT;298}299300/* From the Sandybridge PRM, Volume 1, Part 2, page 32:301*302* "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either303* TileX or Linear."304*305* This is necessary all the way back to 965, but is permitted on Gfx7+.306*/307if (ISL_GFX_VER(dev) < 7 && isl_format_get_layout(info->format)->bpb >= 128)308*flags &= ~ISL_TILING_Y0_BIT;309310/* From the BDW and SKL PRMs, Volume 2d,311* RENDER_SURFACE_STATE::Width - Programming Notes:312*313* A known issue exists if a primitive is rendered to the first 2 rows and314* last 2 columns of a 16K width surface. If any geometry is drawn inside315* this square it will be copied to column X=2 and X=3 (arrangement on Y316* position will stay the same). If any geometry exceeds the boundaries of317* this 2x2 region it will be drawn normally. The issue also only occurs318* if the surface has TileMode != Linear.319*320* [Internal documentation notes that this issue isn't present on SKL GT4.]321* To prevent this rendering corruption, only allow linear tiling for322* surfaces with widths greater than 16K-2 pixels.323*324* TODO: Is this an issue for multisampled surfaces as well?325*/326if (info->width > 16382 && info->samples == 1 &&327info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT &&328(ISL_GFX_VER(dev) == 8 ||329(dev->info->is_skylake && dev->info->gt != 4))) {330*flags &= ISL_TILING_LINEAR_BIT;331}332}333334void335isl_gfx7_choose_image_alignment_el(const struct isl_device *dev,336const struct isl_surf_init_info *restrict info,337enum isl_tiling tiling,338enum isl_dim_layout dim_layout,339enum isl_msaa_layout msaa_layout,340struct isl_extent3d *image_align_el)341{342assert(ISL_GFX_VER(dev) == 7);343344/* Handled by isl_choose_image_alignment_el */345assert(info->format != ISL_FORMAT_HIZ);346347/* IVB+ does not support combined depthstencil. */348assert(!isl_surf_usage_is_depth_and_stencil(info->usage));349350/* From the Ivy Bridge PRM, Vol. 2, Part 2, Section 6.18.4.4,351* "Alignment unit size", the alignment parameters are summarized in the352* following table:353*354* Surface Defined By | Surface Format | Align Width | Align Height355* --------------------+-----------------+-------------+--------------356* DEPTH_BUFFER | D16_UNORM | 8 | 4357* | other | 4 | 4358* --------------------+-----------------+-------------+--------------359* STENCIL_BUFFER | N/A | 8 | 8360* --------------------+-----------------+-------------+--------------361* SURFACE_STATE | BC*, ETC*, EAC* | 4 | 4362* | FXT1 | 8 | 4363* | all others | HALIGN | VALIGN364* -------------------------------------------------------------------365*/366if (isl_surf_usage_is_depth(info->usage)) {367*image_align_el = info->format == ISL_FORMAT_R16_UNORM ?368isl_extent3d(8, 4, 1) : isl_extent3d(4, 4, 1);369return;370} else if (isl_surf_usage_is_stencil(info->usage)) {371*image_align_el = isl_extent3d(8, 8, 1);372return;373} else if (isl_format_is_compressed(info->format)) {374/* Compressed formats all have alignment equal to block size. */375*image_align_el = isl_extent3d(1, 1, 1);376return;377}378379/* Everything after this point is in the "set by Surface Horizontal or380* Vertical Alignment" case. Now it's just a matter of applying381* restrictions.382*/383384/* There are no restrictions on halign beyond what's given in the table385* above. We set it to the minimum value of 4 because that uses the least386* memory.387*/388const uint32_t halign = 4;389390bool require_valign4 = false;391392/* From the Ivybridge PRM, Volume 4, Part 1, Section 2.12.1:393* RENDER_SURFACE_STATE Surface Vertical Alignment:394*395* * This field is intended to be set to VALIGN_4 if the surface was396* rendered as a depth buffer,397*398* * for a multisampled (4x) render target, or for a multisampled (8x)399* render target, since these surfaces support only alignment of 4.400*401* * This field must be set to VALIGN_4 for all tiled Y Render Target402* surfaces403*404* * Value of 1 is not supported for format YCRCB_NORMAL (0x182),405* YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)406*407* * If Number of Multisamples is not MULTISAMPLECOUNT_1, this field408* must be set to VALIGN_4."409*410* The first restriction is already handled by the table above and the411* second restriction is redundant with the fifth.412*/413if (info->samples > 1)414require_valign4 = true;415416if (tiling == ISL_TILING_Y0 &&417(info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT))418require_valign4 = true;419420assert(!(require_valign4 && gfx7_format_needs_valign2(dev, info->format)));421422/* We default to VALIGN_2 because it uses the least memory. */423const uint32_t valign = require_valign4 ? 4 : 2;424425*image_align_el = isl_extent3d(halign, valign, 1);426}427428429