Path: blob/master/thirdparty/cvtt/ConvectionKernels.h
9903 views
/*1Convection Texture Tools2Copyright (c) 2018 Eric Lasota34Permission is hereby granted, free of charge, to any person obtaining5a copy of this software and associated documentation files (the6"Software"), to deal in the Software without restriction, including7without limitation the rights to use, copy, modify, merge, publish,8distribute, sublicense, and/or sell copies of the Software, and to9permit persons to whom the Software is furnished to do so, subject10to the following conditions:1112The above copyright notice and this permission notice shall be included13in all copies or substantial portions of the Software.1415THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS16OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF17MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.18IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY19CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,20TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE21SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.22*/23#pragma once24#ifndef __CVTT_CONVECTION_KERNELS__25#define __CVTT_CONVECTION_KERNELS__2627#include <stddef.h>28#include <stdint.h>2930namespace cvtt31{32namespace Flags33{34// Use fast indexing in BC7 encoding (about 2x faster, slightly worse quality)35const uint32_t BC7_FastIndexing = 0x008;3637// Try precomputed single-color lookups where applicable (slightly slower, small quality increase on specific blocks)38const uint32_t BC7_TrySingleColor = 0x010;3940// Don't allow non-zero or non-max alpha values in blocks that only contain one or the other41const uint32_t BC7_RespectPunchThrough = 0x020;4243// Use fast indexing in HDR formats (faster, worse quality)44const uint32_t BC6H_FastIndexing = 0x040;4546// Exhaustive search RGB orderings when encoding BC1-BC3 (much slower, better quality)47const uint32_t S3TC_Exhaustive = 0x080;4849// Penalize distant endpoints, improving quality on inaccurate GPU decoders50const uint32_t S3TC_Paranoid = 0x100;5152// Uniform color channel importance53const uint32_t Uniform = 0x200;5455// Use fake BT.709 color space for etc2comp compatibility (slower)56const uint32_t ETC_UseFakeBT709 = 0x400;5758// Use accurate quantization functions when quantizing fake BT.709 (much slower, marginal improvement on specific blocks)59const uint32_t ETC_FakeBT709Accurate = 0x800;6061// Misc useful default flag combinations62const uint32_t Fastest = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid);63const uint32_t Faster = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid);64const uint32_t Fast = (BC7_FastIndexing | S3TC_Paranoid);65const uint32_t Default = (BC7_FastIndexing | S3TC_Paranoid);66const uint32_t Better = (S3TC_Paranoid | S3TC_Exhaustive);67const uint32_t Ultra = (BC7_TrySingleColor | S3TC_Paranoid | S3TC_Exhaustive | ETC_FakeBT709Accurate);68}6970const unsigned int NumParallelBlocks = 8;7172struct Options73{74uint32_t flags; // Bitmask of cvtt::Flags values75float threshold; // Alpha test threshold for BC176float redWeight; // Red channel importance77float greenWeight; // Green channel importance78float blueWeight; // Blue channel importance79float alphaWeight; // Alpha channel importance8081int refineRoundsBC7; // Number of refine rounds for BC782int refineRoundsBC6H; // Number of refine rounds for BC6H (max 3)83int refineRoundsIIC; // Number of refine rounds for independent interpolated channels (BC3 alpha, BC4, BC5)84int refineRoundsS3TC; // Number of refine rounds for S3TC RGB8586int seedPoints; // Number of seed points (min 1, max 4)8788Options()89: flags(Flags::Default)90, threshold(0.5f)91, redWeight(0.2125f / 0.7154f)92, greenWeight(1.0f)93, blueWeight(0.0721f / 0.7154f)94, alphaWeight(1.0f)95, refineRoundsBC7(2)96, refineRoundsBC6H(3)97, refineRoundsIIC(8)98, refineRoundsS3TC(2)99, seedPoints(4)100{101}102};103104struct BC7FineTuningParams105{106// Seed point counts for each mode+configuration combination107uint8_t mode0SP[16];108uint8_t mode1SP[64];109uint8_t mode2SP[64];110uint8_t mode3SP[64];111uint8_t mode4SP[4][2];112uint8_t mode5SP[4];113uint8_t mode6SP;114uint8_t mode7SP[64];115116BC7FineTuningParams()117{118for (int i = 0; i < 16; i++)119this->mode0SP[i] = 4;120121for (int i = 0; i < 64; i++)122{123this->mode1SP[i] = 4;124this->mode2SP[i] = 4;125this->mode3SP[i] = 4;126this->mode7SP[i] = 4;127}128129for (int i = 0; i < 4; i++)130{131for (int j = 0; j < 2; j++)132this->mode4SP[i][j] = 4;133134this->mode5SP[i] = 4;135}136137this->mode6SP = 4;138}139};140141struct BC7EncodingPlan142{143static const int kNumRGBAShapes = 129;144static const int kNumRGBShapes = 243;145146uint64_t mode1PartitionEnabled;147uint64_t mode2PartitionEnabled;148uint64_t mode3PartitionEnabled;149uint16_t mode0PartitionEnabled;150uint64_t mode7RGBAPartitionEnabled;151uint64_t mode7RGBPartitionEnabled;152uint8_t mode4SP[4][2];153uint8_t mode5SP[4];154bool mode6Enabled;155156uint8_t seedPointsForShapeRGB[kNumRGBShapes];157uint8_t seedPointsForShapeRGBA[kNumRGBAShapes];158159uint8_t rgbaShapeList[kNumRGBAShapes];160uint8_t rgbaNumShapesToEvaluate;161162uint8_t rgbShapeList[kNumRGBShapes];163uint8_t rgbNumShapesToEvaluate;164165BC7EncodingPlan()166{167for (int i = 0; i < kNumRGBShapes; i++)168{169this->rgbShapeList[i] = i;170this->seedPointsForShapeRGB[i] = 4;171}172this->rgbNumShapesToEvaluate = kNumRGBShapes;173174for (int i = 0; i < kNumRGBAShapes; i++)175{176this->rgbaShapeList[i] = i;177this->seedPointsForShapeRGBA[i] = 4;178}179this->rgbaNumShapesToEvaluate = kNumRGBAShapes;180181182this->mode0PartitionEnabled = 0xffff;183this->mode1PartitionEnabled = 0xffffffffffffffffULL;184this->mode2PartitionEnabled = 0xffffffffffffffffULL;185this->mode3PartitionEnabled = 0xffffffffffffffffULL;186this->mode6Enabled = true;187this->mode7RGBPartitionEnabled = 0xffffffffffffffffULL;188this->mode7RGBAPartitionEnabled = 0xffffffffffffffffULL;189190for (int i = 0; i < 4; i++)191{192for (int j = 0; j < 2; j++)193this->mode4SP[i][j] = 4;194195this->mode5SP[i] = 4;196}197}198};199200// RGBA input block for unsigned 8-bit formats201struct PixelBlockU8202{203uint8_t m_pixels[16][4];204};205206// RGBA input block for signed 8-bit formats207struct PixelBlockS8208{209int8_t m_pixels[16][4];210};211212struct PixelBlockScalarS16213{214int16_t m_pixels[16];215};216217// RGBA input block for half-precision float formats (bit-cast to int16_t)218struct PixelBlockF16219{220int16_t m_pixels[16][4];221};222223class ETC2CompressionData224{225protected:226ETC2CompressionData() {}227};228229class ETC1CompressionData230{231protected:232ETC1CompressionData() {}233};234235namespace Kernels236{237typedef void* allocFunc_t(void *context, size_t size);238typedef void freeFunc_t(void *context, void* ptr, size_t size);239240// NOTE: All functions accept and output NumParallelBlocks blocks at once241void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);242void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);243void EncodeBC3(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);244void EncodeBC4U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);245void EncodeBC4S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options);246void EncodeBC5U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);247void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options);248void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options);249void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options);250void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, const BC7EncodingPlan &encodingPlan);251void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC1CompressionData *compressionData);252void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC2CompressionData *compressionData);253void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);254void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);255256void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options);257void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options);258259// Generates a BC7 encoding plan from a quality parameter that ranges from 1 (fastest) to 100 (best)260void ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality);261262// Generates a BC7 encoding plan from fine-tuning parameters.263bool ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams ¶ms);264265// ETC compression requires temporary storage that normally consumes a large amount of stack space.266// To allocate and release it, use one of these functions.267ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options);268void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc);269270ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context);271void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc);272273void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC);274void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC);275void DecodeBC7(PixelBlockU8 *pBlocks, const uint8_t *pBC);276}277}278279#endif280281282