Path: blob/21.2-virgl/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
4574 views
/****************************************************************************1* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22* @file StoreTile.h23*24* @brief Functionality for Store.25*26******************************************************************************/27#pragma once2829#include "common/os.h"30#include "common/formats.h"31#include "core/context.h"32#include "core/rdtsc_core.h"33#include "core/format_conversion.h"3435#include "memory/TilingFunctions.h"36#include "memory/Convert.h"37#include "memory/SurfaceState.h"38#include "core/multisample.h"3940#include <array>41#include <sstream>4243#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))4445// Function pointer to different storing functions for color, depth, and stencil based on incoming formats.46typedef void(*PFN_STORE_TILES)(uint8_t*, SWR_SURFACE_STATE*, uint32_t, uint32_t, uint32_t);4748//////////////////////////////////////////////////////////////////////////49/// Store Raster Tile Function Tables.50//////////////////////////////////////////////////////////////////////////51extern PFN_STORE_TILES sStoreTilesTableColor[SWR_TILE_MODE_COUNT][NUM_SWR_FORMATS];52extern PFN_STORE_TILES sStoreTilesTableDepth[SWR_TILE_MODE_COUNT][NUM_SWR_FORMATS];53extern PFN_STORE_TILES sStoreTilesTableStencil[SWR_TILE_MODE_COUNT][NUM_SWR_FORMATS];5455void InitStoreTilesTable_Linear_1();56void InitStoreTilesTable_Linear_2();57void InitStoreTilesTable_TileX_1();58void InitStoreTilesTable_TileX_2();59void InitStoreTilesTable_TileY_1();60void InitStoreTilesTable_TileY_2();61void InitStoreTilesTable_TileW();62void InitStoreTilesTable();6364//////////////////////////////////////////////////////////////////////////65/// StorePixels66/// @brief Stores a 4x2 (AVX) raster-tile to two rows.67/// @param pSrc - Pointer to source raster tile in SWRZ pixel order68/// @param ppDsts - Array of destination pointers. Each pointer is69/// to a single row of at most 16B.70/// @tparam NumDests - Number of destination pointers. Each pair of71/// pointers is for a 16-byte column of two rows.72//////////////////////////////////////////////////////////////////////////73template <size_t PixelSize, size_t NumDests>74struct StorePixels75{76static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) = delete;77};7879//////////////////////////////////////////////////////////////////////////80/// StorePixels (32-bit pixel specialization)81/// @brief Stores a 4x2 (AVX) raster-tile to two rows.82/// @param pSrc - Pointer to source raster tile in SWRZ pixel order83/// @param ppDsts - Array of destination pointers. Each pointer is84/// to a single row of at most 16B.85/// @tparam NumDests - Number of destination pointers. Each pair of86/// pointers is for a 16-byte column of two rows.87//////////////////////////////////////////////////////////////////////////88template <>89struct StorePixels<8, 2>90{91static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[2])92{93// Each 4-pixel row is 4 bytes.94const uint16_t* pPixSrc = (const uint16_t*)pSrc;9596// Unswizzle from SWR-Z order97uint16_t* pRow = (uint16_t*)ppDsts[0];98pRow[0] = pPixSrc[0];99pRow[1] = pPixSrc[2];100101pRow = (uint16_t*)ppDsts[1];102pRow[0] = pPixSrc[1];103pRow[1] = pPixSrc[3];104}105};106107template <>108struct StorePixels<8, 4>109{110static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[4])111{112// 8 x 2 bytes = 16 bytes, 16 pixels113const uint16_t *pSrc16 = reinterpret_cast<const uint16_t *>(pSrc);114115uint16_t **ppDsts16 = reinterpret_cast<uint16_t **>(ppDsts);116117// Unswizzle from SWR-Z order118ppDsts16[0][0] = pSrc16[0]; // 0 1119ppDsts16[0][1] = pSrc16[2]; // 4 5120121ppDsts16[1][0] = pSrc16[1]; // 2 3122ppDsts16[1][1] = pSrc16[3]; // 6 7123124ppDsts16[2][0] = pSrc16[4]; // 8 9125ppDsts16[2][1] = pSrc16[6]; // C D126127ppDsts16[3][0] = pSrc16[5]; // A B128ppDsts16[3][1] = pSrc16[7]; // E F129}130};131132//////////////////////////////////////////////////////////////////////////133/// StorePixels (32-bit pixel specialization)134/// @brief Stores a 4x2 (AVX) raster-tile to two rows.135/// @param pSrc - Pointer to source raster tile in SWRZ pixel order136/// @param ppDsts - Array of destination pointers. Each pointer is137/// to a single row of at most 16B.138/// @tparam NumDests - Number of destination pointers. Each pair of139/// pointers is for a 16-byte column of two rows.140//////////////////////////////////////////////////////////////////////////141template <>142struct StorePixels<16, 2>143{144static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[2])145{146// Each 4-pixel row is 8 bytes.147const uint32_t* pPixSrc = (const uint32_t*)pSrc;148149// Unswizzle from SWR-Z order150uint32_t* pRow = (uint32_t*)ppDsts[0];151pRow[0] = pPixSrc[0];152pRow[1] = pPixSrc[2];153154pRow = (uint32_t*)ppDsts[1];155pRow[0] = pPixSrc[1];156pRow[1] = pPixSrc[3];157}158};159160template <>161struct StorePixels<16, 4>162{163static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[4])164{165// 8 x 4 bytes = 32 bytes, 16 pixels166const uint32_t *pSrc32 = reinterpret_cast<const uint32_t *>(pSrc);167168uint32_t **ppDsts32 = reinterpret_cast<uint32_t **>(ppDsts);169170// Unswizzle from SWR-Z order171ppDsts32[0][0] = pSrc32[0]; // 0 1172ppDsts32[0][1] = pSrc32[2]; // 4 5173174ppDsts32[1][0] = pSrc32[1]; // 2 3175ppDsts32[1][1] = pSrc32[3]; // 6 7176177ppDsts32[2][0] = pSrc32[4]; // 8 9178ppDsts32[2][1] = pSrc32[6]; // C D179180ppDsts32[3][0] = pSrc32[5]; // A B181ppDsts32[3][1] = pSrc32[7]; // E F182}183};184185//////////////////////////////////////////////////////////////////////////186/// StorePixels (32-bit pixel specialization)187/// @brief Stores a 4x2 (AVX) raster-tile to two rows.188/// @param pSrc - Pointer to source raster tile in SWRZ pixel order189/// @param ppDsts - Array of destination pointers. Each pointer is190/// to a single row of at most 16B.191/// @tparam NumDests - Number of destination pointers. Each pair of192/// pointers is for a 16-byte column of two rows.193//////////////////////////////////////////////////////////////////////////194template <>195struct StorePixels<32, 2>196{197static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[2])198{199// Each 4-pixel row is 16-bytes200simd4scalari *pZRow01 = (simd4scalari*)pSrc;201simd4scalari vQuad00 = SIMD128::load_si(pZRow01);202simd4scalari vQuad01 = SIMD128::load_si(pZRow01 + 1);203204simd4scalari vRow00 = SIMD128::unpacklo_epi64(vQuad00, vQuad01);205simd4scalari vRow10 = SIMD128::unpackhi_epi64(vQuad00, vQuad01);206207SIMD128::storeu_si((simd4scalari*)ppDsts[0], vRow00);208SIMD128::storeu_si((simd4scalari*)ppDsts[1], vRow10);209}210};211212template <>213struct StorePixels<32, 4>214{215static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[4])216{217// 4 x 16 bytes = 64 bytes, 16 pixels218const simd4scalari *pSrc128 = reinterpret_cast<const simd4scalari *>(pSrc);219220simd4scalari **ppDsts128 = reinterpret_cast<simd4scalari **>(ppDsts);221222// Unswizzle from SWR-Z order223simd4scalari quad0 = SIMD128::load_si(&pSrc128[0]); // 0 1 2 3224simd4scalari quad1 = SIMD128::load_si(&pSrc128[1]); // 4 5 6 7225simd4scalari quad2 = SIMD128::load_si(&pSrc128[2]); // 8 9 A B226simd4scalari quad3 = SIMD128::load_si(&pSrc128[3]); // C D E F227228SIMD128::storeu_si(ppDsts128[0], SIMD128::unpacklo_epi64(quad0, quad1)); // 0 1 4 5229SIMD128::storeu_si(ppDsts128[1], SIMD128::unpackhi_epi64(quad0, quad1)); // 2 3 6 7230SIMD128::storeu_si(ppDsts128[2], SIMD128::unpacklo_epi64(quad2, quad3)); // 8 9 C D231SIMD128::storeu_si(ppDsts128[3], SIMD128::unpackhi_epi64(quad2, quad3)); // A B E F232}233};234235//////////////////////////////////////////////////////////////////////////236/// StorePixels (32-bit pixel specialization)237/// @brief Stores a 4x2 (AVX) raster-tile to two rows.238/// @param pSrc - Pointer to source raster tile in SWRZ pixel order239/// @param ppDsts - Array of destination pointers. Each pointer is240/// to a single row of at most 16B.241/// @tparam NumDests - Number of destination pointers. Each pair of242/// pointers is for a 16-byte column of two rows.243//////////////////////////////////////////////////////////////////////////244template <>245struct StorePixels<64, 4>246{247static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[4])248{249// Each 4-pixel row is 32 bytes.250const simd4scalari* pPixSrc = (const simd4scalari*)pSrc;251252// order of pointers match SWR-Z layout253simd4scalari** pvDsts = (simd4scalari**)&ppDsts[0];254*pvDsts[0] = pPixSrc[0];255*pvDsts[1] = pPixSrc[1];256*pvDsts[2] = pPixSrc[2];257*pvDsts[3] = pPixSrc[3];258}259};260261template <>262struct StorePixels<64, 8>263{264static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[8])265{266// 8 x 16 bytes = 128 bytes, 16 pixels267const simd4scalari *pSrc128 = reinterpret_cast<const simd4scalari *>(pSrc);268269simd4scalari **ppDsts128 = reinterpret_cast<simd4scalari **>(ppDsts);270271// order of pointers match SWR-Z layout272*ppDsts128[0] = pSrc128[0]; // 0 1273*ppDsts128[1] = pSrc128[1]; // 2 3274*ppDsts128[2] = pSrc128[2]; // 4 5275*ppDsts128[3] = pSrc128[3]; // 6 7276*ppDsts128[4] = pSrc128[4]; // 8 9277*ppDsts128[5] = pSrc128[5]; // A B278*ppDsts128[6] = pSrc128[6]; // C D279*ppDsts128[7] = pSrc128[7]; // E F280}281};282283//////////////////////////////////////////////////////////////////////////284/// StorePixels (32-bit pixel specialization)285/// @brief Stores a 4x2 (AVX) raster-tile to two rows.286/// @param pSrc - Pointer to source raster tile in SWRZ pixel order287/// @param ppDsts - Array of destination pointers. Each pointer is288/// to a single row of at most 16B.289/// @tparam NumDests - Number of destination pointers. Each pair of290/// pointers is for a 16-byte column of two rows.291//////////////////////////////////////////////////////////////////////////292template <>293struct StorePixels<128, 8>294{295static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[8])296{297// Each 4-pixel row is 64 bytes.298const simd4scalari* pPixSrc = (const simd4scalari*)pSrc;299300// Unswizzle from SWR-Z order301simd4scalari** pvDsts = (simd4scalari**)&ppDsts[0];302*pvDsts[0] = pPixSrc[0];303*pvDsts[1] = pPixSrc[2];304*pvDsts[2] = pPixSrc[1];305*pvDsts[3] = pPixSrc[3];306*pvDsts[4] = pPixSrc[4];307*pvDsts[5] = pPixSrc[6];308*pvDsts[6] = pPixSrc[5];309*pvDsts[7] = pPixSrc[7];310}311};312313template <>314struct StorePixels<128, 16>315{316static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[16])317{318// 16 x 16 bytes = 256 bytes, 16 pixels319const simd4scalari *pSrc128 = reinterpret_cast<const simd4scalari *>(pSrc);320321simd4scalari **ppDsts128 = reinterpret_cast<simd4scalari **>(ppDsts);322323for (uint32_t i = 0; i < 16; i += 4)324{325*ppDsts128[i + 0] = pSrc128[i + 0];326*ppDsts128[i + 1] = pSrc128[i + 2];327*ppDsts128[i + 2] = pSrc128[i + 1];328*ppDsts128[i + 3] = pSrc128[i + 3];329}330}331};332333//////////////////////////////////////////////////////////////////////////334/// ConvertPixelsSOAtoAOS - Conversion for SIMD pixel (4x2 or 2x2)335//////////////////////////////////////////////////////////////////////////336template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>337struct ConvertPixelsSOAtoAOS338{339//////////////////////////////////////////////////////////////////////////340/// @brief Converts a SIMD from the Hot Tile to the destination format341/// and converts from SOA to AOS.342/// @param pSrc - Pointer to raster tile.343/// @param pDst - Pointer to destination surface or deswizzling buffer.344template <size_t NumDests>345INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])346{347static const uint32_t MAX_RASTER_TILE_BYTES = 16 * 16; // 16 pixels * 16 bytes per pixel348349OSALIGNSIMD16(uint8_t) soaTile[MAX_RASTER_TILE_BYTES] = {0};350OSALIGNSIMD16(uint8_t) aosTile[MAX_RASTER_TILE_BYTES] = {0};351352// Convert from SrcFormat --> DstFormat353simd16vector src;354LoadSOA<SrcFormat>(pSrc, src);355StoreSOA<DstFormat>(src, soaTile);356357// Convert from SOA --> AOS358FormatTraits<DstFormat>::TransposeT::Transpose_simd16(soaTile, aosTile);359360// Store data into destination361StorePixels<FormatTraits<DstFormat>::bpp, NumDests>::Store(aosTile, ppDsts);362}363};364365//////////////////////////////////////////////////////////////////////////366/// ConvertPixelsSOAtoAOS - Conversion for SIMD pixel (4x2 or 2x2)367/// Specialization for no format conversion368//////////////////////////////////////////////////////////////////////////369template<SWR_FORMAT Format>370struct ConvertPixelsSOAtoAOS<Format, Format>371{372//////////////////////////////////////////////////////////////////////////373/// @brief Converts a SIMD from the Hot Tile to the destination format374/// and converts from SOA to AOS.375/// @param pSrc - Pointer to raster tile.376/// @param pDst - Pointer to destination surface or deswizzling buffer.377template <size_t NumDests>378INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])379{380static const uint32_t MAX_RASTER_TILE_BYTES = 16 * 16; // 16 pixels * 16 bytes per pixel381382OSALIGNSIMD16(uint8_t) aosTile[MAX_RASTER_TILE_BYTES];383384// Convert from SOA --> AOS385FormatTraits<Format>::TransposeT::Transpose_simd16(pSrc, aosTile);386387// Store data into destination388StorePixels<FormatTraits<Format>::bpp, NumDests>::Store(aosTile, ppDsts);389}390};391392//////////////////////////////////////////////////////////////////////////393/// ConvertPixelsSOAtoAOS - Specialization conversion for B5G6R6_UNORM394//////////////////////////////////////////////////////////////////////////395template<>396struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, B5G6R5_UNORM >397{398//////////////////////////////////////////////////////////////////////////399/// @brief Converts a SIMD from the Hot Tile to the destination format400/// and converts from SOA to AOS.401/// @param pSrc - Pointer to raster tile.402/// @param pDst - Pointer to destination surface or deswizzling buffer.403template <size_t NumDests>404INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])405{406static const SWR_FORMAT SrcFormat = R32G32B32A32_FLOAT;407static const SWR_FORMAT DstFormat = B5G6R5_UNORM;408409static const uint32_t MAX_RASTER_TILE_BYTES = 16 * 16; // 16 pixels * 16 bytes per pixel410411OSALIGNSIMD16(uint8_t) aosTile[MAX_RASTER_TILE_BYTES];412413// Load hot-tile414simd16vector src, dst;415LoadSOA<SrcFormat>(pSrc, src);416417// deswizzle418dst.x = src[FormatTraits<DstFormat>::swizzle(0)];419dst.y = src[FormatTraits<DstFormat>::swizzle(1)];420dst.z = src[FormatTraits<DstFormat>::swizzle(2)];421422// clamp423dst.x = Clamp<DstFormat>(dst.x, 0);424dst.y = Clamp<DstFormat>(dst.y, 1);425dst.z = Clamp<DstFormat>(dst.z, 2);426427// normalize428dst.x = Normalize<DstFormat>(dst.x, 0);429dst.y = Normalize<DstFormat>(dst.y, 1);430dst.z = Normalize<DstFormat>(dst.z, 2);431432// pack433simd16scalari packed = _simd16_castps_si(dst.x);434435SWR_ASSERT(FormatTraits<DstFormat>::GetBPC(0) == 5);436SWR_ASSERT(FormatTraits<DstFormat>::GetBPC(1) == 6);437438packed = _simd16_or_si(packed, _simd16_slli_epi32(_simd16_castps_si(dst.y), 5));439packed = _simd16_or_si(packed, _simd16_slli_epi32(_simd16_castps_si(dst.z), 5 + 6));440441// pack low 16 bits of each 32 bit lane to low 128 bits of dst442uint32_t *pPacked = (uint32_t*)&packed;443uint16_t *pAosTile = (uint16_t*)&aosTile[0];444for (uint32_t t = 0; t < KNOB_SIMD16_WIDTH; ++t)445{446*pAosTile++ = *pPacked++;447}448449// Store data into destination450StorePixels<FormatTraits<DstFormat>::bpp, NumDests>::Store(aosTile, ppDsts);451}452};453454//////////////////////////////////////////////////////////////////////////455/// ConvertPixelsSOAtoAOS - Conversion for SIMD pixel (4x2 or 2x2)456//////////////////////////////////////////////////////////////////////////457template<>458struct ConvertPixelsSOAtoAOS<R32_FLOAT, R24_UNORM_X8_TYPELESS>459{460static const SWR_FORMAT SrcFormat = R32_FLOAT;461static const SWR_FORMAT DstFormat = R24_UNORM_X8_TYPELESS;462463//////////////////////////////////////////////////////////////////////////464/// @brief Converts a SIMD from the Hot Tile to the destination format465/// and converts from SOA to AOS.466/// @param pSrc - Pointer to raster tile.467/// @param pDst - Pointer to destination surface or deswizzling buffer.468template <size_t NumDests>469INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])470{471simd16scalar comp = _simd16_load_ps(reinterpret_cast<const float *>(pSrc));472473// clamp474const simd16scalar zero = _simd16_setzero_ps();475const simd16scalar ones = _simd16_set1_ps(1.0f);476477comp = _simd16_max_ps(comp, zero);478comp = _simd16_min_ps(comp, ones);479480// normalize481comp = _simd16_mul_ps(comp, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(0)));482483simd16scalari temp = _simd16_cvtps_epi32(comp);484485// swizzle486temp = _simd16_permute_epi32(temp, _simd16_set_epi32(15, 14, 11, 10, 13, 12, 9, 8, 7, 6, 3, 2, 5, 4, 1, 0));487488// merge/store data into destination but don't overwrite the X8 bits489simdscalari destlo = _simd_loadu2_si(reinterpret_cast<simd4scalari *>(ppDsts[1]), reinterpret_cast<simd4scalari *>(ppDsts[0]));490simdscalari desthi = _simd_loadu2_si(reinterpret_cast<simd4scalari *>(ppDsts[3]), reinterpret_cast<simd4scalari *>(ppDsts[2]));491492simd16scalari dest = _simd16_setzero_si();493494dest = _simd16_insert_si(dest, destlo, 0);495dest = _simd16_insert_si(dest, desthi, 1);496497simd16scalari mask = _simd16_set1_epi32(0x00FFFFFF);498499dest = _simd16_or_si(_simd16_andnot_si(mask, dest), _simd16_and_si(mask, temp));500501_simd_storeu2_si(reinterpret_cast<simd4scalari *>(ppDsts[1]), reinterpret_cast<simd4scalari *>(ppDsts[0]), _simd16_extract_si(dest, 0));502_simd_storeu2_si(reinterpret_cast<simd4scalari *>(ppDsts[3]), reinterpret_cast<simd4scalari *>(ppDsts[2]), _simd16_extract_si(dest, 1));503}504};505506template<SWR_FORMAT DstFormat>507INLINE static void FlatConvert(const uint8_t* pSrc, uint8_t* pDst0, uint8_t* pDst1, uint8_t* pDst2, uint8_t* pDst3)508{509// swizzle rgba -> bgra while we load510simd16scalar comp0 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(0) * sizeof(simd16scalar))); // float32 rrrrrrrrrrrrrrrr511simd16scalar comp1 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(1) * sizeof(simd16scalar))); // float32 gggggggggggggggg512simd16scalar comp2 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(2) * sizeof(simd16scalar))); // float32 bbbbbbbbbbbbbbbb513simd16scalar comp3 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(3) * sizeof(simd16scalar))); // float32 aaaaaaaaaaaaaaaa514515// clamp516const simd16scalar zero = _simd16_setzero_ps();517const simd16scalar ones = _simd16_set1_ps(1.0f);518519comp0 = _simd16_max_ps(comp0, zero);520comp0 = _simd16_min_ps(comp0, ones);521522comp1 = _simd16_max_ps(comp1, zero);523comp1 = _simd16_min_ps(comp1, ones);524525comp2 = _simd16_max_ps(comp2, zero);526comp2 = _simd16_min_ps(comp2, ones);527528comp3 = _simd16_max_ps(comp3, zero);529comp3 = _simd16_min_ps(comp3, ones);530531// gamma-correct only rgb532if (FormatTraits<DstFormat>::isSRGB)533{534comp0 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(0, comp0);535comp1 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(1, comp1);536comp2 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(2, comp2);537}538539// convert float components from 0.0f..1.0f to correct scale for 0..255 dest format540comp0 = _simd16_mul_ps(comp0, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(0)));541comp1 = _simd16_mul_ps(comp1, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(1)));542comp2 = _simd16_mul_ps(comp2, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(2)));543comp3 = _simd16_mul_ps(comp3, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(3)));544545// moving to 16 wide integer vector types546simd16scalari src0 = _simd16_cvtps_epi32(comp0); // padded byte rrrrrrrrrrrrrrrr547simd16scalari src1 = _simd16_cvtps_epi32(comp1); // padded byte gggggggggggggggg548simd16scalari src2 = _simd16_cvtps_epi32(comp2); // padded byte bbbbbbbbbbbbbbbb549simd16scalari src3 = _simd16_cvtps_epi32(comp3); // padded byte aaaaaaaaaaaaaaaa550551// SOA to AOS conversion552src1 = _simd16_slli_epi32(src1, 8);553src2 = _simd16_slli_epi32(src2, 16);554src3 = _simd16_slli_epi32(src3, 24);555556simd16scalari final = _simd16_or_si(_simd16_or_si(src0, src1), _simd16_or_si(src2, src3)); // 0 1 2 3 4 5 6 7 8 9 A B C D E F557558// de-swizzle conversion559#if 1560simd16scalari final0 = _simd16_permute2f128_si(final, final, 0xA0); // (2, 2, 0, 0) // 0 1 2 3 0 1 2 3 8 9 A B 8 9 A B561simd16scalari final1 = _simd16_permute2f128_si(final, final, 0xF5); // (3, 3, 1, 1) // 4 5 6 7 4 5 6 7 C D E F C D E F562563final = _simd16_shuffle_epi64(final0, final1, 0xCC); // (1 1 0 0 1 1 0 0) // 0 1 4 5 2 3 6 7 8 9 C D A B E F564565#else566final = _simd16_permute_epi32(final, _simd16_set_epi32(15, 14, 11, 10, 13, 12, 9, 8, 7, 6, 3, 2, 5, 4, 1, 0));567568#endif569// store 8x2 memory order:570// row0: [ pDst0, pDst2 ] = { 0 1 4 5 }, { 8 9 C D }571// row1: [ pDst1, pDst3 ] = { 2 3 6 7 }, { A B E F }572_simd_storeu2_si(reinterpret_cast<simd4scalari *>(pDst1), reinterpret_cast<simd4scalari *>(pDst0), _simd16_extract_si(final, 0));573_simd_storeu2_si(reinterpret_cast<simd4scalari *>(pDst3), reinterpret_cast<simd4scalari *>(pDst2), _simd16_extract_si(final, 1));574}575576template<SWR_FORMAT DstFormat>577INLINE static void FlatConvert(const uint8_t* pSrc, uint8_t* pDst, uint8_t* pDst1)578{579static const uint32_t offset = sizeof(simdscalar);580581// swizzle rgba -> bgra while we load582simdscalar vComp0 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(0))*offset)); // float32 rrrrrrrr583simdscalar vComp1 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(1))*offset)); // float32 gggggggg584simdscalar vComp2 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(2))*offset)); // float32 bbbbbbbb585simdscalar vComp3 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(3))*offset)); // float32 aaaaaaaa586587// clamp588vComp0 = _simd_max_ps(vComp0, _simd_setzero_ps());589vComp0 = _simd_min_ps(vComp0, _simd_set1_ps(1.0f));590591vComp1 = _simd_max_ps(vComp1, _simd_setzero_ps());592vComp1 = _simd_min_ps(vComp1, _simd_set1_ps(1.0f));593594vComp2 = _simd_max_ps(vComp2, _simd_setzero_ps());595vComp2 = _simd_min_ps(vComp2, _simd_set1_ps(1.0f));596597vComp3 = _simd_max_ps(vComp3, _simd_setzero_ps());598vComp3 = _simd_min_ps(vComp3, _simd_set1_ps(1.0f));599600if (FormatTraits<DstFormat>::isSRGB)601{602// Gamma-correct only rgb603vComp0 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(0, vComp0);604vComp1 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(1, vComp1);605vComp2 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(2, vComp2);606}607608// convert float components from 0.0f .. 1.0f to correct scale for 0 .. 255 dest format609vComp0 = _simd_mul_ps(vComp0, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(0)));610vComp1 = _simd_mul_ps(vComp1, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(1)));611vComp2 = _simd_mul_ps(vComp2, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(2)));612vComp3 = _simd_mul_ps(vComp3, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(3)));613614// moving to 8 wide integer vector types615simdscalari src0 = _simd_cvtps_epi32(vComp0); // padded byte rrrrrrrr616simdscalari src1 = _simd_cvtps_epi32(vComp1); // padded byte gggggggg617simdscalari src2 = _simd_cvtps_epi32(vComp2); // padded byte bbbbbbbb618simdscalari src3 = _simd_cvtps_epi32(vComp3); // padded byte aaaaaaaa619620#if KNOB_ARCH <= KNOB_ARCH_AVX621622// splitting into two sets of 4 wide integer vector types623// because AVX doesn't have instructions to support this operation at 8 wide624simd4scalari srcLo0 = _mm256_castsi256_si128(src0); // 000r000r000r000r625simd4scalari srcLo1 = _mm256_castsi256_si128(src1); // 000g000g000g000g626simd4scalari srcLo2 = _mm256_castsi256_si128(src2); // 000b000b000b000b627simd4scalari srcLo3 = _mm256_castsi256_si128(src3); // 000a000a000a000a628629simd4scalari srcHi0 = _mm256_extractf128_si256(src0, 1); // 000r000r000r000r630simd4scalari srcHi1 = _mm256_extractf128_si256(src1, 1); // 000g000g000g000g631simd4scalari srcHi2 = _mm256_extractf128_si256(src2, 1); // 000b000b000b000b632simd4scalari srcHi3 = _mm256_extractf128_si256(src3, 1); // 000a000a000a000a633634srcLo1 = _mm_slli_si128(srcLo1, 1); // 00g000g000g000g0635srcHi1 = _mm_slli_si128(srcHi1, 1); // 00g000g000g000g0636srcLo2 = _mm_slli_si128(srcLo2, 2); // 0b000b000b000b00637srcHi2 = _mm_slli_si128(srcHi2, 2); // 0b000b000b000b00638srcLo3 = _mm_slli_si128(srcLo3, 3); // a000a000a000a000639srcHi3 = _mm_slli_si128(srcHi3, 3); // a000a000a000a000640641srcLo0 = SIMD128::or_si(srcLo0, srcLo1); // 00gr00gr00gr00gr642srcLo2 = SIMD128::or_si(srcLo2, srcLo3); // ab00ab00ab00ab00643644srcHi0 = SIMD128::or_si(srcHi0, srcHi1); // 00gr00gr00gr00gr645srcHi2 = SIMD128::or_si(srcHi2, srcHi3); // ab00ab00ab00ab00646647srcLo0 = SIMD128::or_si(srcLo0, srcLo2); // abgrabgrabgrabgr648srcHi0 = SIMD128::or_si(srcHi0, srcHi2); // abgrabgrabgrabgr649650// unpack into rows that get the tiling order correct651simd4scalari vRow00 = SIMD128::unpacklo_epi64(srcLo0, srcHi0); // abgrabgrabgrabgrabgrabgrabgrabgr652simd4scalari vRow10 = SIMD128::unpackhi_epi64(srcLo0, srcHi0);653654simdscalari final = _mm256_castsi128_si256(vRow00);655final = _mm256_insertf128_si256(final, vRow10, 1);656657#else658659// logic is as above, only wider660src1 = _mm256_slli_si256(src1, 1);661src2 = _mm256_slli_si256(src2, 2);662src3 = _mm256_slli_si256(src3, 3);663664src0 = _mm256_or_si256(src0, src1);665src2 = _mm256_or_si256(src2, src3);666667simdscalari final = _mm256_or_si256(src0, src2);668669// adjust the data to get the tiling order correct 0 1 2 3 -> 0 2 1 3670final = _mm256_permute4x64_epi64(final, 0xD8);671#endif672673_simd_storeu2_si((simd4scalari*)pDst1, (simd4scalari*)pDst, final);674}675676template<SWR_FORMAT DstFormat>677INLINE static void FlatConvertNoAlpha(const uint8_t* pSrc, uint8_t* pDst0, uint8_t* pDst1, uint8_t* pDst2, uint8_t* pDst3)678{679// swizzle rgba -> bgra while we load680simd16scalar comp0 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(0) * sizeof(simd16scalar))); // float32 rrrrrrrrrrrrrrrr681simd16scalar comp1 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(1) * sizeof(simd16scalar))); // float32 gggggggggggggggg682simd16scalar comp2 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(2) * sizeof(simd16scalar))); // float32 bbbbbbbbbbbbbbbb683684// clamp685const simd16scalar zero = _simd16_setzero_ps();686const simd16scalar ones = _simd16_set1_ps(1.0f);687688comp0 = _simd16_max_ps(comp0, zero);689comp0 = _simd16_min_ps(comp0, ones);690691comp1 = _simd16_max_ps(comp1, zero);692comp1 = _simd16_min_ps(comp1, ones);693694comp2 = _simd16_max_ps(comp2, zero);695comp2 = _simd16_min_ps(comp2, ones);696697// gamma-correct only rgb698if (FormatTraits<DstFormat>::isSRGB)699{700comp0 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(0, comp0);701comp1 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(1, comp1);702comp2 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(2, comp2);703}704705// convert float components from 0.0f..1.0f to correct scale for 0..255 dest format706comp0 = _simd16_mul_ps(comp0, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(0)));707comp1 = _simd16_mul_ps(comp1, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(1)));708comp2 = _simd16_mul_ps(comp2, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(2)));709710// moving to 16 wide integer vector types711simd16scalari src0 = _simd16_cvtps_epi32(comp0); // padded byte rrrrrrrrrrrrrrrr712simd16scalari src1 = _simd16_cvtps_epi32(comp1); // padded byte gggggggggggggggg713simd16scalari src2 = _simd16_cvtps_epi32(comp2); // padded byte bbbbbbbbbbbbbbbb714715// SOA to AOS conversion716src1 = _simd16_slli_epi32(src1, 8);717src2 = _simd16_slli_epi32(src2, 16);718719simd16scalari final = _simd16_or_si(_simd16_or_si(src0, src1), src2); // 0 1 2 3 4 5 6 7 8 9 A B C D E F720721// de-swizzle conversion722#if 1723simd16scalari final0 = _simd16_permute2f128_si(final, final, 0xA0); // (2, 2, 0, 0) // 0 1 2 3 0 1 2 3 8 9 A B 8 9 A B724simd16scalari final1 = _simd16_permute2f128_si(final, final, 0xF5); // (3, 3, 1, 1) // 4 5 6 7 4 5 6 7 C D E F C D E F725726final = _simd16_shuffle_epi64(final0, final1, 0xCC); // (1 1 0 0 1 1 0 0) // 0 1 4 5 2 3 6 7 8 9 C D A B E F727728#else729final = _simd16_permute_epi32(final, _simd16_set_epi32(15, 14, 11, 10, 13, 12, 9, 8, 7, 6, 3, 2, 5, 4, 1, 0));730731#endif732// store 8x2 memory order:733// row0: [ pDst0, pDst2 ] = { 0 1 4 5 }, { 8 9 C D }734// row1: [ pDst1, pDst3 ] = { 2 3 6 7 }, { A B E F }735_simd_storeu2_si(reinterpret_cast<simd4scalari *>(pDst1), reinterpret_cast<simd4scalari *>(pDst0), _simd16_extract_si(final, 0));736_simd_storeu2_si(reinterpret_cast<simd4scalari *>(pDst3), reinterpret_cast<simd4scalari *>(pDst2), _simd16_extract_si(final, 1));737}738739template<SWR_FORMAT DstFormat>740INLINE static void FlatConvertNoAlpha(const uint8_t* pSrc, uint8_t* pDst, uint8_t* pDst1)741{742static const uint32_t offset = sizeof(simdscalar);743744// swizzle rgba -> bgra while we load745simdscalar vComp0 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(0))*offset)); // float32 rrrrrrrr746simdscalar vComp1 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(1))*offset)); // float32 gggggggg747simdscalar vComp2 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(2))*offset)); // float32 bbbbbbbb748// clamp749vComp0 = _simd_max_ps(vComp0, _simd_setzero_ps());750vComp0 = _simd_min_ps(vComp0, _simd_set1_ps(1.0f));751752vComp1 = _simd_max_ps(vComp1, _simd_setzero_ps());753vComp1 = _simd_min_ps(vComp1, _simd_set1_ps(1.0f));754755vComp2 = _simd_max_ps(vComp2, _simd_setzero_ps());756vComp2 = _simd_min_ps(vComp2, _simd_set1_ps(1.0f));757758if (FormatTraits<DstFormat>::isSRGB)759{760// Gamma-correct only rgb761vComp0 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(0, vComp0);762vComp1 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(1, vComp1);763vComp2 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(2, vComp2);764}765766// convert float components from 0.0f .. 1.0f to correct scale for 0 .. 255 dest format767vComp0 = _simd_mul_ps(vComp0, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(0)));768vComp1 = _simd_mul_ps(vComp1, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(1)));769vComp2 = _simd_mul_ps(vComp2, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(2)));770771// moving to 8 wide integer vector types772simdscalari src0 = _simd_cvtps_epi32(vComp0); // padded byte rrrrrrrr773simdscalari src1 = _simd_cvtps_epi32(vComp1); // padded byte gggggggg774simdscalari src2 = _simd_cvtps_epi32(vComp2); // padded byte bbbbbbbb775776#if KNOB_ARCH <= KNOB_ARCH_AVX777778// splitting into two sets of 4 wide integer vector types779// because AVX doesn't have instructions to support this operation at 8 wide780simd4scalari srcLo0 = _mm256_castsi256_si128(src0); // 000r000r000r000r781simd4scalari srcLo1 = _mm256_castsi256_si128(src1); // 000g000g000g000g782simd4scalari srcLo2 = _mm256_castsi256_si128(src2); // 000b000b000b000b783784simd4scalari srcHi0 = _mm256_extractf128_si256(src0, 1); // 000r000r000r000r785simd4scalari srcHi1 = _mm256_extractf128_si256(src1, 1); // 000g000g000g000g786simd4scalari srcHi2 = _mm256_extractf128_si256(src2, 1); // 000b000b000b000b787788srcLo1 = _mm_slli_si128(srcLo1, 1); // 00g000g000g000g0789srcHi1 = _mm_slli_si128(srcHi1, 1); // 00g000g000g000g0790srcLo2 = _mm_slli_si128(srcLo2, 2); // 0b000b000b000b00791srcHi2 = _mm_slli_si128(srcHi2, 2); // 0b000b000b000b00792793srcLo0 = SIMD128::or_si(srcLo0, srcLo1); // 00gr00gr00gr00gr794795srcHi0 = SIMD128::or_si(srcHi0, srcHi1); // 00gr00gr00gr00gr796797srcLo0 = SIMD128::or_si(srcLo0, srcLo2); // 0bgr0bgr0bgr0bgr798srcHi0 = SIMD128::or_si(srcHi0, srcHi2); // 0bgr0bgr0bgr0bgr799800// unpack into rows that get the tiling order correct801simd4scalari vRow00 = SIMD128::unpacklo_epi64(srcLo0, srcHi0); // 0bgr0bgr0bgr0bgr0bgr0bgr0bgr0bgr802simd4scalari vRow10 = SIMD128::unpackhi_epi64(srcLo0, srcHi0);803804simdscalari final = _mm256_castsi128_si256(vRow00);805final = _mm256_insertf128_si256(final, vRow10, 1);806807#else808809// logic is as above, only wider810src1 = _mm256_slli_si256(src1, 1);811src2 = _mm256_slli_si256(src2, 2);812813src0 = _mm256_or_si256(src0, src1);814815simdscalari final = _mm256_or_si256(src0, src2);816817// adjust the data to get the tiling order correct 0 1 2 3 -> 0 2 1 3818final = _mm256_permute4x64_epi64(final, 0xD8);819820#endif821822_simd_storeu2_si((simd4scalari*)pDst1, (simd4scalari*)pDst, final);823}824825template<>826struct ConvertPixelsSOAtoAOS<R32G32B32A32_FLOAT, B8G8R8A8_UNORM>827{828template <size_t NumDests>829INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])830{831FlatConvert<B8G8R8A8_UNORM>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);832}833};834835template<>836struct ConvertPixelsSOAtoAOS<R32G32B32A32_FLOAT, B8G8R8X8_UNORM>837{838template <size_t NumDests>839INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])840{841FlatConvertNoAlpha<B8G8R8X8_UNORM>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);842}843};844845template<>846struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, B8G8R8A8_UNORM_SRGB >847{848template <size_t NumDests>849INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])850{851FlatConvert<B8G8R8A8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);852}853};854855template<>856struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, B8G8R8X8_UNORM_SRGB >857{858template <size_t NumDests>859INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])860{861FlatConvertNoAlpha<B8G8R8X8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);862}863};864865template<>866struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, R8G8B8A8_UNORM >867{868template <size_t NumDests>869INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])870{871FlatConvert<R8G8B8A8_UNORM>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);872}873};874875template<>876struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, R8G8B8X8_UNORM >877{878template <size_t NumDests>879INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])880{881FlatConvertNoAlpha<R8G8B8X8_UNORM>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);882}883};884885template<>886struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, R8G8B8A8_UNORM_SRGB >887{888template <size_t NumDests>889INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])890{891FlatConvert<R8G8B8A8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);892}893};894895template<>896struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, R8G8B8X8_UNORM_SRGB >897{898template <size_t NumDests>899INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])900{901FlatConvertNoAlpha<R8G8B8X8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);902}903};904905//////////////////////////////////////////////////////////////////////////906/// StoreRasterTile907//////////////////////////////////////////////////////////////////////////908template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>909struct StoreRasterTile910{911//////////////////////////////////////////////////////////////////////////912/// @brief Retrieve color from hot tile source which is always float.913/// @param pSrc - Pointer to raster tile.914/// @param x, y - Coordinates to raster tile.915/// @param output - output color916INLINE static void GetSwizzledSrcColor(917uint8_t* pSrc,918uint32_t x, uint32_t y,919float outputColor[4])920{921typedef SimdTile_16<SrcFormat, DstFormat> SimdT;922923SimdT *pSrcSimdTiles = reinterpret_cast<SimdT *>(pSrc);924925// Compute which simd tile we're accessing within 8x8 tile.926// i.e. Compute linear simd tile coordinate given (x, y) in pixel coordinates.927uint32_t simdIndex = (y / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM) + (x / SIMD16_TILE_X_DIM);928929SimdT *pSimdTile = &pSrcSimdTiles[simdIndex];930931uint32_t simdOffset = (y % SIMD16_TILE_Y_DIM) * SIMD16_TILE_X_DIM + (x % SIMD16_TILE_X_DIM);932933pSimdTile->GetSwizzledColor(simdOffset, outputColor);934}935936//////////////////////////////////////////////////////////////////////////937/// @brief Stores an 8x8 raster tile to the destination surface.938/// @param pSrc - Pointer to raster tile.939/// @param pDstSurface - Destination surface state940/// @param x, y - Coordinates to raster tile.941INLINE static void Store(942uint8_t *pSrc,943SWR_SURFACE_STATE* pDstSurface,944uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.945{946uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);947uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);948949// For each raster tile pixel (rx, ry)950for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)951{952for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)953{954// Perform bounds checking.955if (((x + rx) < lodWidth) &&956((y + ry) < lodHeight))957{958float srcColor[4];959GetSwizzledSrcColor(pSrc, rx, ry, srcColor);960961uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>((x + rx), (y + ry),962pDstSurface->arrayIndex + renderTargetArrayIndex, pDstSurface->arrayIndex + renderTargetArrayIndex,963sampleNum, pDstSurface->lod, pDstSurface);964{965ConvertPixelFromFloat<DstFormat>(pDst, srcColor);966}967}968}969}970}971972//////////////////////////////////////////////////////////////////////////973/// @brief Resolves an 8x8 raster tile to the resolve destination surface.974/// @param pSrc - Pointer to raster tile.975/// @param pDstSurface - Destination surface state976/// @param x, y - Coordinates to raster tile.977/// @param sampleOffset - Offset between adjacent multisamples978INLINE static void Resolve(979uint8_t *pSrc,980SWR_SURFACE_STATE* pDstSurface,981uint32_t x, uint32_t y, uint32_t sampleOffset, uint32_t renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.982{983uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);984uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);985986float oneOverNumSamples = 1.0f / pDstSurface->numSamples;987988// For each raster tile pixel (rx, ry)989for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)990{991for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)992{993// Perform bounds checking.994if (((x + rx) < lodWidth) &&995((y + ry) < lodHeight))996{997// Sum across samples998float resolveColor[4] = {0};999for (uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++)1000{1001float sampleColor[4] = {0};1002uint8_t *pSampleSrc = pSrc + sampleOffset * sampleNum;1003GetSwizzledSrcColor(pSampleSrc, rx, ry, sampleColor);1004resolveColor[0] += sampleColor[0];1005resolveColor[1] += sampleColor[1];1006resolveColor[2] += sampleColor[2];1007resolveColor[3] += sampleColor[3];1008}10091010// Divide by numSamples to average1011resolveColor[0] *= oneOverNumSamples;1012resolveColor[1] *= oneOverNumSamples;1013resolveColor[2] *= oneOverNumSamples;1014resolveColor[3] *= oneOverNumSamples;10151016// Use the resolve surface state1017SWR_SURFACE_STATE* pResolveSurface = (SWR_SURFACE_STATE*)pDstSurface->xpAuxBaseAddress;1018uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>((x + rx), (y + ry),1019pResolveSurface->arrayIndex + renderTargetArrayIndex, pResolveSurface->arrayIndex + renderTargetArrayIndex,10200, pResolveSurface->lod, pResolveSurface);1021{1022ConvertPixelFromFloat<DstFormat>(pDst, resolveColor);1023}1024}1025}1026}1027}10281029};10301031template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>1032struct OptStoreRasterTile : StoreRasterTile<TTraits, SrcFormat, DstFormat>1033{};10341035//////////////////////////////////////////////////////////////////////////1036/// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 8bpp1037//////////////////////////////////////////////////////////////////////////1038template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>1039struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 8>, SrcFormat, DstFormat>1040{1041typedef StoreRasterTile<TilingTraits<SWR_TILE_NONE, 8>, SrcFormat, DstFormat> GenericStoreTile;1042static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;1043static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8;10441045//////////////////////////////////////////////////////////////////////////1046/// @brief Stores an 8x8 raster tile to the destination surface.1047/// @param pSrc - Pointer to raster tile.1048/// @param pDstSurface - Destination surface state1049/// @param x, y - Coordinates to raster tile.1050INLINE static void Store(1051uint8_t *pSrc,1052SWR_SURFACE_STATE* pDstSurface,1053uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)1054{1055// Punt non-full tiles to generic store1056uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);1057uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);10581059if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)1060{1061return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);1062}10631064uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,1065pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);10661067const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL;1068const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch - KNOB_TILE_X_DIM * DST_BYTES_PER_PIXEL;10691070uint8_t* ppDsts[] =1071{1072pDst, // row 0, col 01073pDst + pDstSurface->pitch, // row 1, col 01074pDst + dx / 2, // row 0, col 11075pDst + pDstSurface->pitch + dx / 2 // row 1, col 11076};10771078for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)1079{1080for (uint32_t xx = 0; xx < KNOB_TILE_X_DIM; xx += SIMD16_TILE_X_DIM)1081{1082ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);10831084pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;10851086ppDsts[0] += dx;1087ppDsts[1] += dx;1088ppDsts[2] += dx;1089ppDsts[3] += dx;1090}10911092ppDsts[0] += dy;1093ppDsts[1] += dy;1094ppDsts[2] += dy;1095ppDsts[3] += dy;1096}1097}1098};10991100//////////////////////////////////////////////////////////////////////////1101/// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 16bpp1102//////////////////////////////////////////////////////////////////////////1103template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>1104struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 16>, SrcFormat, DstFormat>1105{1106typedef StoreRasterTile<TilingTraits<SWR_TILE_NONE, 16>, SrcFormat, DstFormat> GenericStoreTile;1107static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;1108static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8;11091110//////////////////////////////////////////////////////////////////////////1111/// @brief Stores an 8x8 raster tile to the destination surface.1112/// @param pSrc - Pointer to raster tile.1113/// @param pDstSurface - Destination surface state1114/// @param x, y - Coordinates to raster tile.1115INLINE static void Store(1116uint8_t *pSrc,1117SWR_SURFACE_STATE* pDstSurface,1118uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)1119{1120// Punt non-full tiles to generic store1121uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);1122uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);11231124if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)1125{1126return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);1127}11281129uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,1130pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);11311132const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL;1133const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch - KNOB_TILE_X_DIM * DST_BYTES_PER_PIXEL;11341135uint8_t* ppDsts[] =1136{1137pDst, // row 0, col 01138pDst + pDstSurface->pitch, // row 1, col 01139pDst + dx / 2, // row 0, col 11140pDst + pDstSurface->pitch + dx / 2 // row 1, col 11141};11421143for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)1144{1145for (uint32_t xx = 0; xx < KNOB_TILE_X_DIM; xx += SIMD16_TILE_X_DIM)1146{1147ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);11481149pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;11501151ppDsts[0] += dx;1152ppDsts[1] += dx;1153ppDsts[2] += dx;1154ppDsts[3] += dx;1155}11561157ppDsts[0] += dy;1158ppDsts[1] += dy;1159ppDsts[2] += dy;1160ppDsts[3] += dy;1161}1162}1163};11641165//////////////////////////////////////////////////////////////////////////1166/// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 32bpp1167//////////////////////////////////////////////////////////////////////////1168template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>1169struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 32>, SrcFormat, DstFormat>1170{1171typedef StoreRasterTile<TilingTraits<SWR_TILE_NONE, 32>, SrcFormat, DstFormat> GenericStoreTile;1172static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;1173static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8;11741175//////////////////////////////////////////////////////////////////////////1176/// @brief Stores an 8x8 raster tile to the destination surface.1177/// @param pSrc - Pointer to raster tile.1178/// @param pDstSurface - Destination surface state1179/// @param x, y - Coordinates to raster tile.1180INLINE static void Store(1181uint8_t *pSrc,1182SWR_SURFACE_STATE* pDstSurface,1183uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)1184{1185// Punt non-full tiles to generic store1186uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);1187uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);11881189if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)1190{1191return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);1192}11931194uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,1195pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);11961197const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL;1198const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch - KNOB_TILE_X_DIM * DST_BYTES_PER_PIXEL;11991200uint8_t* ppDsts[] =1201{1202pDst, // row 0, col 01203pDst + pDstSurface->pitch, // row 1, col 01204pDst + dx / 2, // row 0, col 11205pDst + pDstSurface->pitch + dx / 2 // row 1, col 11206};12071208for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)1209{1210for (uint32_t xx = 0; xx < KNOB_TILE_X_DIM; xx += SIMD16_TILE_X_DIM)1211{1212ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);12131214pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;12151216ppDsts[0] += dx;1217ppDsts[1] += dx;1218ppDsts[2] += dx;1219ppDsts[3] += dx;1220}12211222ppDsts[0] += dy;1223ppDsts[1] += dy;1224ppDsts[2] += dy;1225ppDsts[3] += dy;1226}1227}1228};12291230//////////////////////////////////////////////////////////////////////////1231/// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 64bpp1232//////////////////////////////////////////////////////////////////////////1233template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>1234struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 64>, SrcFormat, DstFormat>1235{1236typedef StoreRasterTile<TilingTraits<SWR_TILE_NONE, 64>, SrcFormat, DstFormat> GenericStoreTile;1237static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;1238static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8;1239static const size_t MAX_DST_COLUMN_BYTES = 16;12401241//////////////////////////////////////////////////////////////////////////1242/// @brief Stores an 8x8 raster tile to the destination surface.1243/// @param pSrc - Pointer to raster tile.1244/// @param pDstSurface - Destination surface state1245/// @param x, y - Coordinates to raster tile.1246INLINE static void Store(1247uint8_t *pSrc,1248SWR_SURFACE_STATE* pDstSurface,1249uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)1250{1251// Punt non-full tiles to generic store1252uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);1253uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);12541255if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)1256{1257return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);1258}12591260uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,1261pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);12621263const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL;1264const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch;12651266// we have to break these large spans up, since ConvertPixelsSOAtoAOS() can only work on max 16B spans (a TileY limitation)1267static_assert(dx == MAX_DST_COLUMN_BYTES * 4, "Invalid column offsets");12681269uint8_t *ppDsts[] =1270{1271pDst, // row 0, col 01272pDst + pDstSurface->pitch, // row 1, col 01273pDst + MAX_DST_COLUMN_BYTES, // row 0, col 11274pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES, // row 1, col 11275pDst + MAX_DST_COLUMN_BYTES * 2, // row 0, col 21276pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 2, // row 1, col 21277pDst + MAX_DST_COLUMN_BYTES * 3, // row 0, col 31278pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 3 // row 1, col 31279};12801281for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)1282{1283// Raster tile width is same as simd16 tile width1284static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");12851286ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);12871288pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;12891290for (uint32_t i = 0; i < ARRAY_SIZE(ppDsts); i += 1)1291{1292ppDsts[i] += dy;1293}1294}1295}1296};12971298//////////////////////////////////////////////////////////////////////////1299/// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 128bpp1300//////////////////////////////////////////////////////////////////////////1301template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>1302struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 128>, SrcFormat, DstFormat>1303{1304typedef StoreRasterTile<TilingTraits<SWR_TILE_NONE, 128>, SrcFormat, DstFormat> GenericStoreTile;1305static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;1306static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8;1307static const size_t MAX_DST_COLUMN_BYTES = 16;13081309//////////////////////////////////////////////////////////////////////////1310/// @brief Stores an 8x8 raster tile to the destination surface.1311/// @param pSrc - Pointer to raster tile.1312/// @param pDstSurface - Destination surface state1313/// @param x, y - Coordinates to raster tile.1314INLINE static void Store(1315uint8_t *pSrc,1316SWR_SURFACE_STATE* pDstSurface,1317uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)1318{1319// Punt non-full tiles to generic store1320uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);1321uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);13221323if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)1324{1325return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);1326}13271328uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,1329pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);13301331const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL;1332const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch;13331334// we have to break these large spans up, since ConvertPixelsSOAtoAOS() can only work on max 16B spans (a TileY limitation)1335static_assert(dx == MAX_DST_COLUMN_BYTES * 8, "Invalid column offsets");13361337uint8_t* ppDsts[] =1338{1339pDst, // row 0, col 01340pDst + pDstSurface->pitch, // row 1, col 01341pDst + MAX_DST_COLUMN_BYTES, // row 0, col 11342pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES, // row 1, col 11343pDst + MAX_DST_COLUMN_BYTES * 2, // row 0, col 21344pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 2, // row 1, col 21345pDst + MAX_DST_COLUMN_BYTES * 3, // row 0, col 31346pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 3, // row 1, col 31347pDst + MAX_DST_COLUMN_BYTES * 4, // row 0, col 41348pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 4, // row 1, col 41349pDst + MAX_DST_COLUMN_BYTES * 5, // row 0, col 51350pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 5, // row 1, col 51351pDst + MAX_DST_COLUMN_BYTES * 6, // row 0, col 61352pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 6, // row 1, col 61353pDst + MAX_DST_COLUMN_BYTES * 7, // row 0, col 71354pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 7, // row 1, col 71355};13561357for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)1358{1359// Raster tile width is same as simd16 tile width1360static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");13611362ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);13631364pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;13651366for (uint32_t i = 0; i < ARRAY_SIZE(ppDsts); i += 1)1367{1368ppDsts[i] += dy;1369}1370}1371}1372};13731374//////////////////////////////////////////////////////////////////////////1375/// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 8bpp1376//////////////////////////////////////////////////////////////////////////1377template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>1378struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 8>, SrcFormat, DstFormat>1379{1380typedef StoreRasterTile<TilingTraits<SWR_TILE_MODE_YMAJOR, 8>, SrcFormat, DstFormat> GenericStoreTile;1381static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;13821383//////////////////////////////////////////////////////////////////////////1384/// @brief Stores an 8x8 raster tile to the destination surface.1385/// @param pSrc - Pointer to raster tile.1386/// @param pDstSurface - Destination surface state1387/// @param x, y - Coordinates to raster tile.1388INLINE static void Store(1389uint8_t *pSrc,1390SWR_SURFACE_STATE* pDstSurface,1391uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)1392{1393static const uint32_t DestRowWidthBytes = 16; // 16B rows13941395// Punt non-full tiles to generic store1396uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);1397uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);13981399if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)1400{1401return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);1402}14031404// TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.1405// We can compute the offsets to each column within the raster tile once and increment from these.1406// There will be 4 8x2 simd tiles in an 8x8 raster tile.1407uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,1408pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);14091410const uint32_t dy = SIMD16_TILE_Y_DIM * DestRowWidthBytes;14111412// The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.1413uint8_t *ppDsts[] =1414{1415pDst,1416pDst + DestRowWidthBytes,1417pDst + DestRowWidthBytes / 4,1418pDst + DestRowWidthBytes + DestRowWidthBytes / 41419};14201421for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)1422{1423// Raster tile width is same as simd16 tile width1424static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");14251426ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);14271428pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;14291430ppDsts[0] += dy;1431ppDsts[1] += dy;1432ppDsts[2] += dy;1433ppDsts[3] += dy;1434}1435}1436};14371438//////////////////////////////////////////////////////////////////////////1439/// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 16bpp1440//////////////////////////////////////////////////////////////////////////1441template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>1442struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 16>, SrcFormat, DstFormat>1443{1444typedef StoreRasterTile<TilingTraits<SWR_TILE_MODE_YMAJOR, 16>, SrcFormat, DstFormat> GenericStoreTile;1445static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;14461447//////////////////////////////////////////////////////////////////////////1448/// @brief Stores an 8x8 raster tile to the destination surface.1449/// @param pSrc - Pointer to raster tile.1450/// @param pDstSurface - Destination surface state1451/// @param x, y - Coordinates to raster tile.1452INLINE static void Store(1453uint8_t *pSrc,1454SWR_SURFACE_STATE* pDstSurface,1455uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)1456{1457static const uint32_t DestRowWidthBytes = 16; // 16B rows14581459// Punt non-full tiles to generic store1460uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);1461uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);14621463if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)1464{1465return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);1466}14671468// TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.1469// We can compute the offsets to each column within the raster tile once and increment from these.1470// There will be 4 8x2 simd tiles in an 8x8 raster tile.1471uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,1472pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);14731474const uint32_t dy = SIMD16_TILE_Y_DIM * DestRowWidthBytes;14751476// The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.1477uint8_t *ppDsts[] =1478{1479pDst,1480pDst + DestRowWidthBytes,1481pDst + DestRowWidthBytes / 2,1482pDst + DestRowWidthBytes + DestRowWidthBytes / 21483};14841485for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)1486{1487// Raster tile width is same as simd16 tile width1488static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");14891490ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);14911492pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;14931494ppDsts[0] += dy;1495ppDsts[1] += dy;1496ppDsts[2] += dy;1497ppDsts[3] += dy;1498}1499}1500};15011502//////////////////////////////////////////////////////////////////////////1503/// OptStoreRasterTile - TILE_MODE_XMAJOR specialization for 32bpp1504//////////////////////////////////////////////////////////////////////////1505template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>1506struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_XMAJOR, 32>, SrcFormat, DstFormat>1507{1508typedef StoreRasterTile<TilingTraits<SWR_TILE_MODE_XMAJOR, 32>, SrcFormat, DstFormat> GenericStoreTile;1509static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;1510static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8;15111512//////////////////////////////////////////////////////////////////////////1513/// @brief Stores an 8x8 raster tile to the destination surface.1514/// @param pSrc - Pointer to raster tile.1515/// @param pDstSurface - Destination surface state1516/// @param x, y - Coordinates to raster tile.1517INLINE static void Store(1518uint8_t *pSrc,1519SWR_SURFACE_STATE* pDstSurface,1520uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)1521{1522static const uint32_t DestRowWidthBytes = 512; // 512B rows15231524// Punt non-full tiles to generic store1525uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);1526uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);15271528if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)1529{1530return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);1531}15321533// TileX is a row-major tiling mode where each 4KB tile consist of 8 x 512B rows.1534// We can compute the offsets to each column within the raster tile once and increment from these.1535uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,1536pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);15371538const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL;1539const uint32_t dy = SIMD16_TILE_Y_DIM * DestRowWidthBytes - KNOB_TILE_X_DIM * DST_BYTES_PER_PIXEL;15401541uint8_t* ppDsts[] =1542{1543pDst, // row 0, col 01544pDst + DestRowWidthBytes, // row 1, col 01545pDst + dx / 2, // row 0, col 11546pDst + DestRowWidthBytes + dx / 2 // row 1, col 11547};15481549for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)1550{1551for (uint32_t xx = 0; xx < KNOB_TILE_X_DIM; xx += SIMD16_TILE_X_DIM)1552{1553ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);15541555pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;15561557ppDsts[0] += dx;1558ppDsts[1] += dx;1559ppDsts[2] += dx;1560ppDsts[3] += dx;1561}15621563ppDsts[0] += dy;1564ppDsts[1] += dy;1565ppDsts[2] += dy;1566ppDsts[3] += dy;1567}1568}1569};15701571//////////////////////////////////////////////////////////////////////////1572/// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 32bpp1573//////////////////////////////////////////////////////////////////////////1574template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>1575struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 32>, SrcFormat, DstFormat>1576{1577typedef StoreRasterTile<TilingTraits<SWR_TILE_MODE_YMAJOR, 32>, SrcFormat, DstFormat> GenericStoreTile;1578static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;15791580//////////////////////////////////////////////////////////////////////////1581/// @brief Stores an 8x8 raster tile to the destination surface.1582/// @param pSrc - Pointer to raster tile.1583/// @param pDstSurface - Destination surface state1584/// @param x, y - Coordinates to raster tile.1585INLINE static void Store(1586uint8_t *pSrc,1587SWR_SURFACE_STATE* pDstSurface,1588uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)1589{1590static const uint32_t DestRowWidthBytes = 16; // 16B rows1591static const uint32_t DestColumnBytes = DestRowWidthBytes * 32; // 16B x 32 rows.15921593// Punt non-full tiles to generic store1594uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);1595uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);15961597if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)1598{1599return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);1600}16011602// TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.1603// We can compute the offsets to each column within the raster tile once and increment from these.1604// There will be 4 8x2 simd tiles in an 8x8 raster tile.1605uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,1606pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);16071608// we have to break these large spans up, since ConvertPixelsSOAtoAOS() can only work on max 16B spans (a TileY limitation)1609const uint32_t dy = SIMD16_TILE_Y_DIM * DestRowWidthBytes;16101611// The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.1612uint8_t *ppDsts[] =1613{1614pDst, // row 0, col 01615pDst + DestRowWidthBytes, // row 1, col 01616pDst + DestColumnBytes, // row 0, col 11617pDst + DestRowWidthBytes + DestColumnBytes // row 1, col 11618};16191620for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)1621{1622// Raster tile width is same as simd16 tile width1623static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");16241625ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);16261627pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;16281629ppDsts[0] += dy;1630ppDsts[1] += dy;1631ppDsts[2] += dy;1632ppDsts[3] += dy;1633}1634}1635};16361637//////////////////////////////////////////////////////////////////////////1638/// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 64bpp1639//////////////////////////////////////////////////////////////////////////1640template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>1641struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 64>, SrcFormat, DstFormat>1642{1643typedef StoreRasterTile<TilingTraits<SWR_TILE_MODE_YMAJOR, 64>, SrcFormat, DstFormat> GenericStoreTile;1644static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;16451646//////////////////////////////////////////////////////////////////////////1647/// @brief Stores an 8x8 raster tile to the destination surface.1648/// @param pSrc - Pointer to raster tile.1649/// @param pDstSurface - Destination surface state1650/// @param x, y - Coordinates to raster tile.1651INLINE static void Store(1652uint8_t *pSrc,1653SWR_SURFACE_STATE* pDstSurface,1654uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)1655{1656static const uint32_t DestRowWidthBytes = 16; // 16B rows1657static const uint32_t DestColumnBytes = DestRowWidthBytes * 32; // 16B x 32 rows.16581659// Punt non-full tiles to generic store1660uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);1661uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);16621663if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)1664{1665return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);1666}16671668// TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.1669// We can compute the offsets to each column within the raster tile once and increment from these.1670// There will be 4 8x2 simd tiles in an 8x8 raster tile.1671uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,1672pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);16731674// we have to break these large spans up, since ConvertPixelsSOAtoAOS() can only work on max 16B spans (a TileY limitation)1675const uint32_t dy = SIMD16_TILE_Y_DIM * DestRowWidthBytes;16761677// The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.1678uint8_t *ppDsts[] =1679{1680pDst, // row 0, col 01681pDst + DestRowWidthBytes, // row 1, col 01682pDst + DestColumnBytes, // row 0, col 11683pDst + DestRowWidthBytes + DestColumnBytes, // row 1, col 11684pDst + DestColumnBytes * 2, // row 0, col 21685pDst + DestRowWidthBytes + DestColumnBytes * 2, // row 1, col 21686pDst + DestColumnBytes * 3, // row 0, col 31687pDst + DestRowWidthBytes + DestColumnBytes * 3 // row 1, col 31688};16891690for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)1691{1692// Raster tile width is same as simd16 tile width1693static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");16941695ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);16961697pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;16981699for (uint32_t i = 0; i < ARRAY_SIZE(ppDsts); i += 1)1700{1701ppDsts[i] += dy;1702}1703}1704}1705};17061707//////////////////////////////////////////////////////////////////////////1708/// OptStoreRasterTile - SWR_TILE_MODE_YMAJOR specialization for 128bpp1709//////////////////////////////////////////////////////////////////////////1710template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>1711struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 128>, SrcFormat, DstFormat>1712{1713typedef StoreRasterTile<TilingTraits<SWR_TILE_MODE_YMAJOR, 128>, SrcFormat, DstFormat> GenericStoreTile;1714static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;17151716//////////////////////////////////////////////////////////////////////////1717/// @brief Stores an 8x8 raster tile to the destination surface.1718/// @param pSrc - Pointer to raster tile.1719/// @param pDstSurface - Destination surface state1720/// @param x, y - Coordinates to raster tile.1721INLINE static void Store(1722uint8_t *pSrc,1723SWR_SURFACE_STATE* pDstSurface,1724uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)1725{1726static const uint32_t DestRowWidthBytes = 16; // 16B rows1727static const uint32_t DestColumnBytes = DestRowWidthBytes * 32; // 16B x 32 rows.17281729// Punt non-full tiles to generic store1730uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);1731uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);17321733if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)1734{1735return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);1736}17371738// TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.1739// We can compute the offsets to each column within the raster tile once and increment from these.1740// There will be 4 8x2 simd tiles in an 8x8 raster tile.1741uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,1742pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);17431744// we have to break these large spans up, since ConvertPixelsSOAtoAOS() can only work on max 16B spans (a TileY limitation)1745const uint32_t dy = SIMD16_TILE_Y_DIM * DestRowWidthBytes;17461747// The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.1748uint8_t *ppDsts[] =1749{1750pDst, // row 0, col 01751pDst + DestRowWidthBytes, // row 1, col 01752pDst + DestColumnBytes, // row 0, col 11753pDst + DestRowWidthBytes + DestColumnBytes, // row 1, col 11754pDst + DestColumnBytes * 2, // row 0, col 21755pDst + DestRowWidthBytes + DestColumnBytes * 2, // row 1, col 21756pDst + DestColumnBytes * 3, // row 0, col 31757pDst + DestRowWidthBytes + DestColumnBytes * 3, // row 1, col 31758pDst + DestColumnBytes * 4, // row 0, col 41759pDst + DestRowWidthBytes + DestColumnBytes * 4, // row 1, col 41760pDst + DestColumnBytes * 5, // row 0, col 51761pDst + DestRowWidthBytes + DestColumnBytes * 5, // row 1, col 51762pDst + DestColumnBytes * 6, // row 0, col 61763pDst + DestRowWidthBytes + DestColumnBytes * 6, // row 1, col 61764pDst + DestColumnBytes * 7, // row 0, col 71765pDst + DestRowWidthBytes + DestColumnBytes * 7 // row 1, col 71766};17671768for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)1769{1770// Raster tile width is same as simd16 tile width1771static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");17721773ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);17741775pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;17761777for (uint32_t i = 0; i < ARRAY_SIZE(ppDsts); i += 1)1778{1779ppDsts[i] += dy;1780}1781}1782}1783};17841785//////////////////////////////////////////////////////////////////////////1786/// StoreMacroTile - Stores a macro tile which consists of raster tiles.1787//////////////////////////////////////////////////////////////////////////1788template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>1789struct StoreMacroTile1790{1791//////////////////////////////////////////////////////////////////////////1792/// @brief Stores a macrotile to the destination surface using safe implementation.1793/// @param pSrc - Pointer to macro tile.1794/// @param pDstSurface - Destination surface state1795/// @param x, y - Coordinates to macro tile1796static void StoreGeneric(1797uint8_t *pSrcHotTile,1798SWR_SURFACE_STATE* pDstSurface,1799uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex)1800{1801PFN_STORE_TILES_INTERNAL pfnStore;1802pfnStore = StoreRasterTile<TTraits, SrcFormat, DstFormat>::Store;18031804// Store each raster tile from the hot tile to the destination surface.1805for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)1806{1807for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)1808{1809for (uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++)1810{1811pfnStore(pSrcHotTile, pDstSurface, (x + col), (y + row), sampleNum, renderTargetArrayIndex);1812pSrcHotTile += KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<SrcFormat>::bpp / 8);1813}1814}1815}18161817}18181819typedef void(*PFN_STORE_TILES_INTERNAL)(uint8_t*, SWR_SURFACE_STATE*, uint32_t, uint32_t, uint32_t, uint32_t);1820//////////////////////////////////////////////////////////////////////////1821/// @brief Stores a macrotile to the destination surface.1822/// @param pSrc - Pointer to macro tile.1823/// @param pDstSurface - Destination surface state1824/// @param x, y - Coordinates to macro tile1825static void Store(1826uint8_t *pSrcHotTile,1827SWR_SURFACE_STATE* pDstSurface,1828uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex)1829{1830PFN_STORE_TILES_INTERNAL pfnStore[SWR_MAX_NUM_MULTISAMPLES];18311832for (uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++)1833{1834size_t dstSurfAddress = (size_t)ComputeSurfaceAddress<false, false>(18350,18360,1837pDstSurface->arrayIndex + renderTargetArrayIndex, // z for 3D surfaces1838pDstSurface->arrayIndex + renderTargetArrayIndex, // array index for 2D arrays1839sampleNum,1840pDstSurface->lod,1841pDstSurface);18421843// Only support generic store-tile if lod surface doesn't start on a page boundary and is non-linear1844bool bForceGeneric = ((pDstSurface->tileMode != SWR_TILE_NONE) && (0 != (dstSurfAddress & 0xfff))) ||1845(pDstSurface->bInterleavedSamples);18461847pfnStore[sampleNum] = (bForceGeneric || KNOB_USE_GENERIC_STORETILE) ? StoreRasterTile<TTraits, SrcFormat, DstFormat>::Store : OptStoreRasterTile<TTraits, SrcFormat, DstFormat>::Store;1848}18491850// Save original for pSrcHotTile resolve.1851uint8_t *pResolveSrcHotTile = pSrcHotTile;18521853// Store each raster tile from the hot tile to the destination surface.1854for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)1855{1856for(uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)1857{1858for(uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++)1859{1860pfnStore[sampleNum](pSrcHotTile, pDstSurface, (x + col), (y + row), sampleNum, renderTargetArrayIndex);1861pSrcHotTile += KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<SrcFormat>::bpp / 8);1862}1863}1864}18651866if (pDstSurface->xpAuxBaseAddress)1867{1868uint32_t sampleOffset = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<SrcFormat>::bpp / 8);1869// Store each raster tile from the hot tile to the destination surface.1870for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)1871{1872for(uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)1873{1874StoreRasterTile<TTraits, SrcFormat, DstFormat>::Resolve(pResolveSrcHotTile, pDstSurface, (x + col), (y + row), sampleOffset, renderTargetArrayIndex);1875pResolveSrcHotTile += sampleOffset * pDstSurface->numSamples;1876}1877}1878}1879}1880};18811882//////////////////////////////////////////////////////////////////////////1883/// InitStoreTilesTable - Helper for setting up the tables.1884template <SWR_TILE_MODE TTileMode, size_t NumTileModesT, size_t ArraySizeT>1885void InitStoreTilesTableColor_Half1(1886PFN_STORE_TILES (&table)[NumTileModesT][ArraySizeT])1887{1888table[TTileMode][R32G32B32A32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32A32_FLOAT>::Store;1889table[TTileMode][R32G32B32A32_SINT] = StoreMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32A32_SINT>::Store;1890table[TTileMode][R32G32B32A32_UINT] = StoreMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32A32_UINT>::Store;1891table[TTileMode][R32G32B32X32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32X32_FLOAT>::Store;1892table[TTileMode][R32G32B32A32_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32A32_SSCALED>::Store;1893table[TTileMode][R32G32B32A32_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32A32_USCALED>::Store;1894table[TTileMode][R32G32B32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 96>, R32G32B32A32_FLOAT, R32G32B32_FLOAT>::Store;1895table[TTileMode][R32G32B32_SINT] = StoreMacroTile<TilingTraits<TTileMode, 96>, R32G32B32A32_FLOAT, R32G32B32_SINT>::Store;1896table[TTileMode][R32G32B32_UINT] = StoreMacroTile<TilingTraits<TTileMode, 96>, R32G32B32A32_FLOAT, R32G32B32_UINT>::Store;1897table[TTileMode][R32G32B32_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 96>, R32G32B32A32_FLOAT, R32G32B32_SSCALED>::Store;1898table[TTileMode][R32G32B32_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 96>, R32G32B32A32_FLOAT, R32G32B32_USCALED>::Store;1899table[TTileMode][R16G16B16A16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_UNORM>::Store;1900table[TTileMode][R16G16B16A16_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_SNORM>::Store;1901table[TTileMode][R16G16B16A16_SINT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_SINT>::Store;1902table[TTileMode][R16G16B16A16_UINT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_UINT>::Store;1903table[TTileMode][R16G16B16A16_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_FLOAT>::Store;1904table[TTileMode][R32G32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R32G32_FLOAT>::Store;1905table[TTileMode][R32G32_SINT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R32G32_SINT>::Store;1906table[TTileMode][R32G32_UINT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R32G32_UINT>::Store;1907table[TTileMode][R32_FLOAT_X8X24_TYPELESS] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R32_FLOAT_X8X24_TYPELESS>::Store;1908table[TTileMode][X32_TYPELESS_G8X24_UINT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, X32_TYPELESS_G8X24_UINT>::Store;1909table[TTileMode][R16G16B16X16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16X16_UNORM>::Store;1910table[TTileMode][R16G16B16X16_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16X16_FLOAT>::Store;1911table[TTileMode][R16G16B16A16_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_SSCALED>::Store;1912table[TTileMode][R16G16B16A16_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_USCALED>::Store;1913table[TTileMode][R32G32_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R32G32_SSCALED>::Store;1914table[TTileMode][R32G32_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R32G32_USCALED>::Store;1915table[TTileMode][B8G8R8A8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B8G8R8A8_UNORM>::Store;1916table[TTileMode][B8G8R8A8_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B8G8R8A8_UNORM_SRGB>::Store;1917table[TTileMode][R10G10B10A2_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_UNORM>::StoreGeneric;1918table[TTileMode][R10G10B10A2_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_UNORM_SRGB>::StoreGeneric;1919table[TTileMode][R10G10B10A2_UINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_UINT>::StoreGeneric;1920table[TTileMode][R8G8B8A8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_UNORM>::Store;1921table[TTileMode][R8G8B8A8_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_UNORM_SRGB>::Store;1922table[TTileMode][R8G8B8A8_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_SNORM>::Store;1923table[TTileMode][R8G8B8A8_SINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_SINT>::Store;1924table[TTileMode][R8G8B8A8_UINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_UINT>::Store;1925table[TTileMode][R16G16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_UNORM>::Store;1926table[TTileMode][R16G16_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_SNORM>::Store;1927table[TTileMode][R16G16_SINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_SINT>::Store;1928table[TTileMode][R16G16_UINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_UINT>::Store;1929table[TTileMode][R16G16_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_FLOAT>::Store;1930table[TTileMode][B10G10R10A2_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_UNORM>::StoreGeneric;1931table[TTileMode][B10G10R10A2_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_UNORM_SRGB>::StoreGeneric;1932table[TTileMode][R11G11B10_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R11G11B10_FLOAT>::StoreGeneric;1933table[TTileMode][R10G10B10_FLOAT_A2_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10_FLOAT_A2_UNORM>::StoreGeneric;1934table[TTileMode][R32_SINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R32_SINT>::Store;1935table[TTileMode][R32_UINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R32_UINT>::Store;1936table[TTileMode][R32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R32_FLOAT>::Store;1937table[TTileMode][R24_UNORM_X8_TYPELESS] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R24_UNORM_X8_TYPELESS>::StoreGeneric;1938table[TTileMode][X24_TYPELESS_G8_UINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, X24_TYPELESS_G8_UINT>::StoreGeneric;1939table[TTileMode][A32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, A32_FLOAT>::Store;1940table[TTileMode][B8G8R8X8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B8G8R8X8_UNORM>::Store;1941table[TTileMode][B8G8R8X8_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B8G8R8X8_UNORM_SRGB>::Store;1942table[TTileMode][R8G8B8X8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8X8_UNORM>::Store;1943table[TTileMode][R8G8B8X8_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8X8_UNORM_SRGB>::Store;1944}19451946template <SWR_TILE_MODE TTileMode, size_t NumTileModesT, size_t ArraySizeT>1947void InitStoreTilesTableColor_Half2(1948PFN_STORE_TILES(&table)[NumTileModesT][ArraySizeT])1949{1950table[TTileMode][R9G9B9E5_SHAREDEXP] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R9G9B9E5_SHAREDEXP>::StoreGeneric;1951table[TTileMode][B10G10R10X2_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10X2_UNORM>::StoreGeneric;1952table[TTileMode][R10G10B10X2_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10X2_USCALED>::StoreGeneric;1953table[TTileMode][R8G8B8A8_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_SSCALED>::Store;1954table[TTileMode][R8G8B8A8_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_USCALED>::Store;1955table[TTileMode][R16G16_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_SSCALED>::Store;1956table[TTileMode][R16G16_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_USCALED>::Store;1957table[TTileMode][R32_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R32_SSCALED>::Store;1958table[TTileMode][R32_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R32_USCALED>::Store;1959table[TTileMode][B5G6R5_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B5G6R5_UNORM>::Store;1960table[TTileMode][B5G6R5_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B5G6R5_UNORM_SRGB>::StoreGeneric;1961table[TTileMode][B5G5R5A1_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B5G5R5A1_UNORM>::StoreGeneric;1962table[TTileMode][B5G5R5A1_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B5G5R5A1_UNORM_SRGB>::StoreGeneric;1963table[TTileMode][B4G4R4A4_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B4G4R4A4_UNORM>::StoreGeneric;1964table[TTileMode][B4G4R4A4_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B4G4R4A4_UNORM_SRGB>::StoreGeneric;1965table[TTileMode][R8G8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R8G8_UNORM>::Store;1966table[TTileMode][R8G8_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R8G8_SNORM>::Store;1967table[TTileMode][R8G8_SINT] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R8G8_SINT>::Store;1968table[TTileMode][R8G8_UINT] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R8G8_UINT>::Store;1969table[TTileMode][R16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_UNORM>::Store;1970table[TTileMode][R16_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_SNORM>::Store;1971table[TTileMode][R16_SINT] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_SINT>::Store;1972table[TTileMode][R16_UINT] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_UINT>::Store;1973table[TTileMode][R16_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_FLOAT>::Store;1974table[TTileMode][A16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, A16_UNORM>::Store;1975table[TTileMode][A16_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, A16_FLOAT>::Store;1976table[TTileMode][B5G5R5X1_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B5G5R5X1_UNORM>::StoreGeneric;1977table[TTileMode][B5G5R5X1_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B5G5R5X1_UNORM_SRGB>::StoreGeneric;1978table[TTileMode][R8G8_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R8G8_SSCALED>::Store;1979table[TTileMode][R8G8_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R8G8_USCALED>::Store;1980table[TTileMode][R16_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_SSCALED>::Store;1981table[TTileMode][R16_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_USCALED>::Store;1982table[TTileMode][A1B5G5R5_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, A1B5G5R5_UNORM>::StoreGeneric;1983table[TTileMode][A4B4G4R4_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, A4B4G4R4_UNORM>::StoreGeneric;1984table[TTileMode][R8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, R8_UNORM>::Store;1985table[TTileMode][R8_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, R8_SNORM>::Store;1986table[TTileMode][R8_SINT] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, R8_SINT>::Store;1987table[TTileMode][R8_UINT] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, R8_UINT>::Store;1988table[TTileMode][A8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, A8_UNORM>::Store;1989table[TTileMode][R8_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, R8_SSCALED>::Store;1990table[TTileMode][R8_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, R8_USCALED>::Store;1991table[TTileMode][R8G8B8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_UNORM>::Store;1992table[TTileMode][R8G8B8_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_SNORM>::Store;1993table[TTileMode][R8G8B8_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_SSCALED>::Store;1994table[TTileMode][R8G8B8_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_USCALED>::Store;1995table[TTileMode][R16G16B16_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_FLOAT>::Store;1996table[TTileMode][R16G16B16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_UNORM>::Store;1997table[TTileMode][R16G16B16_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_SNORM>::Store;1998table[TTileMode][R16G16B16_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_SSCALED>::Store;1999table[TTileMode][R16G16B16_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_USCALED>::Store;2000table[TTileMode][R8G8B8_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_UNORM_SRGB>::Store;2001table[TTileMode][R16G16B16_UINT] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_UINT>::Store;2002table[TTileMode][R16G16B16_SINT] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_SINT>::Store;2003table[TTileMode][R10G10B10A2_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_SNORM>::StoreGeneric;2004table[TTileMode][R10G10B10A2_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_USCALED>::StoreGeneric;2005table[TTileMode][R10G10B10A2_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_SSCALED>::StoreGeneric;2006table[TTileMode][R10G10B10A2_SINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_SINT>::StoreGeneric;2007table[TTileMode][B10G10R10A2_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_SNORM>::StoreGeneric;2008table[TTileMode][B10G10R10A2_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_USCALED>::StoreGeneric;2009table[TTileMode][B10G10R10A2_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_SSCALED>::StoreGeneric;2010table[TTileMode][B10G10R10A2_UINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_UINT>::StoreGeneric;2011table[TTileMode][B10G10R10A2_SINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_SINT>::StoreGeneric;2012table[TTileMode][R8G8B8_UINT] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_UINT>::Store;2013table[TTileMode][R8G8B8_SINT] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_SINT>::Store;2014}20152016//////////////////////////////////////////////////////////////////////////2017/// INIT_STORE_TILES_TABLE - Helper macro for setting up the tables.2018template <SWR_TILE_MODE TTileMode, size_t NumTileModes, size_t ArraySizeT>2019void InitStoreTilesTableDepth(2020PFN_STORE_TILES(&table)[NumTileModes][ArraySizeT])2021{2022table[TTileMode][R32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32_FLOAT, R32_FLOAT>::Store;2023table[TTileMode][R32_FLOAT_X8X24_TYPELESS] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32_FLOAT, R32_FLOAT_X8X24_TYPELESS>::Store;2024table[TTileMode][R24_UNORM_X8_TYPELESS] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32_FLOAT, R24_UNORM_X8_TYPELESS>::Store;2025table[TTileMode][R16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32_FLOAT, R16_UNORM>::Store;2026}20272028template <SWR_TILE_MODE TTileMode, size_t NumTileModes, size_t ArraySizeT>2029void InitStoreTilesTableStencil(2030PFN_STORE_TILES(&table)[NumTileModes][ArraySizeT])2031{2032table[TTileMode][R8_UINT] = StoreMacroTile<TilingTraits<TTileMode, 8>, R8_UINT, R8_UINT>::Store;2033}203420352036//////////////////////////////////////////////////////////////////////////2037/// @brief Deswizzles and stores a full hottile to a render surface2038/// @param hPrivateContext - Handle to private DC2039/// @param srcFormat - Format for hot tile.2040/// @param renderTargetIndex - Index to destination render target2041/// @param x, y - Coordinates to raster tile.2042/// @param pSrcHotTile - Pointer to Hot Tile2043void SwrStoreHotTileToSurface(2044HANDLE hWorkerPrivateData,2045SWR_SURFACE_STATE *pDstSurface,2046BucketManager* pBucketMgr,2047SWR_FORMAT srcFormat,2048SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,2049uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex,2050uint8_t *pSrcHotTile);205120522053