Path: blob/master/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h
9905 views
// basisu_astc_helpers.h1// Be sure to define ASTC_HELPERS_IMPLEMENTATION somewhere to get the implementation, otherwise you only get the header.2#pragma once3#ifndef BASISU_ASTC_HELPERS_HEADER4#define BASISU_ASTC_HELPERS_HEADER56#include <stdlib.h>7#include <stdint.h>8#include <math.h>9#include <fenv.h>1011namespace astc_helpers12{13const uint32_t MAX_WEIGHT_VALUE = 64; // grid texel weights must range from [0,64]14const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid15const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels16const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values17const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4;1819static const uint32_t NUM_ASTC_BLOCK_SIZES = 14;20extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2];2122// The Color Endpoint Modes (CEM's)23enum cems24{25CEM_LDR_LUM_DIRECT = 0,26CEM_LDR_LUM_BASE_PLUS_OFS = 1,27CEM_HDR_LUM_LARGE_RANGE = 2,28CEM_HDR_LUM_SMALL_RANGE = 3,29CEM_LDR_LUM_ALPHA_DIRECT = 4,30CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS = 5,31CEM_LDR_RGB_BASE_SCALE = 6,32CEM_HDR_RGB_BASE_SCALE = 7,33CEM_LDR_RGB_DIRECT = 8,34CEM_LDR_RGB_BASE_PLUS_OFFSET = 9,35CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A = 10,36CEM_HDR_RGB = 11,37CEM_LDR_RGBA_DIRECT = 12,38CEM_LDR_RGBA_BASE_PLUS_OFFSET = 13,39CEM_HDR_RGB_LDR_ALPHA = 14,40CEM_HDR_RGB_HDR_ALPHA = 1541};4243// All Bounded Integer Sequence Coding (BISE or ISE) ranges.44// Weights: Ranges [0,11] are valid.45// Endpoints: Ranges [4,20] are valid.46enum bise_levels47{48BISE_2_LEVELS = 0,49BISE_3_LEVELS = 1,50BISE_4_LEVELS = 2,51BISE_5_LEVELS = 3,52BISE_6_LEVELS = 4,53BISE_8_LEVELS = 5,54BISE_10_LEVELS = 6,55BISE_12_LEVELS = 7,56BISE_16_LEVELS = 8,57BISE_20_LEVELS = 9,58BISE_24_LEVELS = 10,59BISE_32_LEVELS = 11,60BISE_40_LEVELS = 12,61BISE_48_LEVELS = 13,62BISE_64_LEVELS = 14,63BISE_80_LEVELS = 15,64BISE_96_LEVELS = 16,65BISE_128_LEVELS = 17,66BISE_160_LEVELS = 18,67BISE_192_LEVELS = 19,68BISE_256_LEVELS = 2069};7071const uint32_t TOTAL_ISE_RANGES = 21;7273// Valid endpoint ISE ranges74const uint32_t FIRST_VALID_ENDPOINT_ISE_RANGE = BISE_6_LEVELS; // 475const uint32_t LAST_VALID_ENDPOINT_ISE_RANGE = BISE_256_LEVELS; // 2076const uint32_t TOTAL_ENDPOINT_ISE_RANGES = LAST_VALID_ENDPOINT_ISE_RANGE - FIRST_VALID_ENDPOINT_ISE_RANGE + 1;7778// Valid weight ISE ranges79const uint32_t FIRST_VALID_WEIGHT_ISE_RANGE = BISE_2_LEVELS; // 080const uint32_t LAST_VALID_WEIGHT_ISE_RANGE = BISE_32_LEVELS; // 1181const uint32_t TOTAL_WEIGHT_ISE_RANGES = LAST_VALID_WEIGHT_ISE_RANGE - FIRST_VALID_WEIGHT_ISE_RANGE + 1;8283// The ISE range table.84extern const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3]; // 0=bits (0 to 8), 1=trits (0 or 1), 2=quints (0 or 1)8586// Possible Color Component Select values, used in dual plane mode.87// The CCS component will be interpolated using the 2nd weight plane.88enum ccs89{90CCS_GBA_R = 0,91CCS_RBA_G = 1,92CCS_RGA_B = 2,93CCS_RGB_A = 394};9596struct astc_block97{98uint32_t m_vals[4];99};100101const uint32_t MAX_PARTITIONS = 4; // Max # of partitions or subsets for single plane mode102const uint32_t MAX_DUAL_PLANE_PARTITIONS = 3; // Max # of partitions or subsets for dual plane mode103const uint32_t NUM_PARTITION_PATTERNS = 1024; // Total # of partition pattern seeds (10-bits)104const uint32_t MAX_ENDPOINTS = 18; // Maximum # of endpoint values in a block105106struct log_astc_block107{108bool m_error_flag;109110bool m_solid_color_flag_ldr, m_solid_color_flag_hdr;111112uint8_t m_user_mode; // user defined value, not used in this module113114// Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr115uint8_t m_grid_width, m_grid_height; // weight grid dimensions, not the dimension of the block116117bool m_dual_plane;118119uint8_t m_weight_ise_range; // 0-11120uint8_t m_endpoint_ise_range; // 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking121122uint8_t m_color_component_selector; // 0-3, controls which channel uses the 2nd (odd) weights, only used in dual plane mode123124uint8_t m_num_partitions; // or the # of subsets, 1-4 (1-3 if dual plane mode)125uint16_t m_partition_id; // 10-bits, must be 0 if m_num_partitions==1126127uint8_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's128129union130{131// ISE weight grid values. In dual plane mode, the order is p0,p1, p0,p1, etc.132uint8_t m_weights[MAX_GRID_WEIGHTS];133uint16_t m_solid_color[4];134};135136// ISE endpoint values137// Endpoint order examples:138// 1 subset LA : LL0 LH0 AL0 AH0139// 1 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0140// 1 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0141// 2 subset LA : LL0 LH0 AL0 AH0 LL1 LH1 AL1 AH1142// 2 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 RL1 RH1 GL1 GH1 BL1 BH1143// 2 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 RL1 RH1 GL1 GH1 BL1 BH1 AL1 AH1144uint8_t m_endpoints[MAX_ENDPOINTS];145146void clear()147{148memset(this, 0, sizeof(*this));149}150};151152// Open interval153inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }154inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }155156inline uint32_t get_bits(uint32_t val, int low, int high)157{158const int num_bits = (high - low) + 1;159assert((num_bits >= 1) && (num_bits <= 32));160161val >>= low;162if (num_bits != 32)163val &= ((1u << num_bits) - 1);164165return val;166}167168// Returns the number of levels in the given ISE range.169inline uint32_t get_ise_levels(uint32_t ise_range)170{171assert(ise_range < TOTAL_ISE_RANGES);172return (1 + 2 * g_ise_range_table[ise_range][1] + 4 * g_ise_range_table[ise_range][2]) << g_ise_range_table[ise_range][0];173}174175inline int get_ise_sequence_bits(int count, int range)176{177// See 18.22 Data Size Determination - note this will be <= the # of bits actually written by encode_bise(). (It's magic.)178int total_bits = g_ise_range_table[range][0] * count;179total_bits += (g_ise_range_table[range][1] * 8 * count + 4) / 5;180total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3;181return total_bits;182}183184inline uint32_t weight_interpolate(uint32_t l, uint32_t h, uint32_t w)185{186assert(w <= MAX_WEIGHT_VALUE);187return (l * (64 - w) + h * w + 32) >> 6;188}189190void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats = nullptr);191192struct pack_stats193{194uint32_t m_header_bits;195uint32_t m_endpoint_bits;196uint32_t m_weight_bits;197198inline pack_stats() { clear(); }199inline void clear() { memset(this, 0, sizeof(*this)); }200};201202// Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions.203bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr, pack_stats *pStats = nullptr);204205// Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component.206void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a, pack_stats *pStats = nullptr);207208// Pack HDR void extent (16-bit values are FP16/half floats - no NaN/Inf's)209void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats = nullptr);210211// These helpers are all quite slow, but are useful for table preparation.212213// Dequantizes ISE encoded endpoint val to [0,255]214uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range); // ISE ranges 4-11215216// Dequantizes ISE encoded weight val to [0,64]217uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range); // ISE ranges 0-10218219uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range);220uint32_t find_nearest_bise_weight(int v, uint32_t ise_range);221222void create_quant_tables(223uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]224uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]225uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]226uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]227uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights228bool weight_flag); // false if block endpoints, true if weights229230// True if the CEM is LDR.231bool is_cem_ldr(uint32_t mode);232inline bool is_cem_hdr(uint32_t mode) { return !is_cem_ldr(mode); }233234// True if the passed in dimensions are a valid ASTC block size. There are 14 supported configs, from 4x4 (8bpp) to 12x12 (.89bpp).235bool is_valid_block_size(uint32_t w, uint32_t h);236237bool block_has_any_hdr_cems(const log_astc_block& log_blk);238bool block_has_any_ldr_cems(const log_astc_block& log_blk);239240// Returns the # of endpoint values for the given CEM.241inline uint32_t get_num_cem_values(uint32_t cem) { assert(cem <= 15); return 2 + 2 * (cem >> 2); }242243struct dequant_table244{245basisu::vector<uint8_t> m_val_to_ise; // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]246basisu::vector<uint8_t> m_ISE_to_val; // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]247basisu::vector<uint8_t> m_ISE_to_rank; // returns the level rank index given an ISE symbol, [levels]248basisu::vector<uint8_t> m_rank_to_ISE; // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]249250void init(bool weight_flag, uint32_t num_levels, bool init_rank_tabs)251{252m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256);253m_ISE_to_val.resize(num_levels);254if (init_rank_tabs)255{256m_ISE_to_rank.resize(num_levels);257m_rank_to_ISE.resize(num_levels);258}259}260};261262struct dequant_tables263{264dequant_table m_weights[TOTAL_WEIGHT_ISE_RANGES];265dequant_table m_endpoints[TOTAL_ENDPOINT_ISE_RANGES];266267const dequant_table& get_weight_tab(uint32_t range) const268{269assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));270return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];271}272273dequant_table& get_weight_tab(uint32_t range)274{275assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));276return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];277}278279const dequant_table& get_endpoint_tab(uint32_t range) const280{281assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));282return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];283}284285dequant_table& get_endpoint_tab(uint32_t range)286{287assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));288return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];289}290291void init(bool init_rank_tabs)292{293for (uint32_t range = FIRST_VALID_WEIGHT_ISE_RANGE; range <= LAST_VALID_WEIGHT_ISE_RANGE; range++)294{295const uint32_t num_levels = get_ise_levels(range);296dequant_table& tab = get_weight_tab(range);297298tab.init(true, num_levels, init_rank_tabs);299300create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, true);301}302303for (uint32_t range = FIRST_VALID_ENDPOINT_ISE_RANGE; range <= LAST_VALID_ENDPOINT_ISE_RANGE; range++)304{305const uint32_t num_levels = get_ise_levels(range);306dequant_table& tab = get_endpoint_tab(range);307308tab.init(false, num_levels, init_rank_tabs);309310create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, false);311}312}313};314315extern dequant_tables g_dequant_tables;316void init_tables(bool init_rank_tabs);317318struct weighted_sample319{320uint8_t m_src_x;321uint8_t m_src_y;322uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8323};324325void compute_upsample_weights(326int block_width, int block_height,327int weight_grid_width, int weight_grid_height,328weighted_sample* pWeights); // there will be block_width * block_height bilinear samples329330void upsample_weight_grid(331uint32_t bx, uint32_t by, // destination/to dimension332uint32_t wx, uint32_t wy, // source/from dimension333const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]334uint8_t* pDst_weights); // [by][bx]335336// Procedurally returns the texel partition/subset index given the block coordinate and config.337int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block);338339void blue_contract(340int r, int g, int b, int a,341int& dr, int& dg, int& db, int& da);342343void bit_transfer_signed(int& a, int& b);344345void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t* pE);346347typedef uint16_t half_float;348half_float float_to_half(float val, bool toward_zero);349float half_to_float(half_float hval);350351// Notes:352// qlog16_to_half(half_to_qlog16(half_val_as_int)) == half_val_as_int (is lossless)353// However, this is not lossless in the general sense.354inline half_float qlog16_to_half(int k)355{356assert((k >= 0) && (k <= 0xFFFF));357358int E = (k & 0xF800) >> 11;359int M = k & 0x7FF;360361int Mt;362if (M < 512)363Mt = 3 * M;364else if (M >= 1536)365Mt = 5 * M - 2048;366else367Mt = 4 * M - 512;368369return (half_float)((E << 10) + (Mt >> 3));370}371372const int MAX_RGB9E5 = 0xff80;373void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b);374uint32_t pack_rgb9e5(float r, float g, float b);375376enum decode_mode377{378cDecodeModeSRGB8 = 0, // returns uint8_t's, not valid on HDR blocks379cDecodeModeLDR8 = 1, // returns uint8_t's, not valid on HDR blocks380cDecodeModeHDR16 = 2, // returns uint16_t's (half floats), valid on all LDR/HDR blocks381cDecodeModeRGB9E5 = 3 // returns uint32_t's, packed as RGB 9E5 (shared exponent), see https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt382};383384// Decodes logical block to output pixels.385// pPixels must point to either 32-bit pixel values (SRGB8/LDR8/9E5) or 64-bit pixel values (HDR16)386bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode);387388void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t *pBits128, uint32_t bit_ofs);389390// Unpack a physical ASTC encoded GPU texture block to a logical block description.391bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height);392393} // namespace astc_helpers394395#endif // BASISU_ASTC_HELPERS_HEADER396397//------------------------------------------------------------------398399#ifdef BASISU_ASTC_HELPERS_IMPLEMENTATION400401namespace astc_helpers402{403template<typename T> inline T my_min(T a, T b) { return (a < b) ? a : b; }404template<typename T> inline T my_max(T a, T b) { return (a > b) ? a : b; }405406const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = {407{ 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 },408{ 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 },409{ 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 },410{ 12, 10 }, { 12, 12 }411};412413const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3] =414{415//b t q416//2 3 5 // rng ise_index notes417{ 1, 0, 0 }, // 0..1 0418{ 0, 1, 0 }, // 0..2 1419{ 2, 0, 0 }, // 0..3 2420{ 0, 0, 1 }, // 0..4 3421{ 1, 1, 0 }, // 0..5 4 min endpoint ISE index422{ 3, 0, 0 }, // 0..7 5423{ 1, 0, 1 }, // 0..9 6424{ 2, 1, 0 }, // 0..11 7425{ 4, 0, 0 }, // 0..15 8426{ 2, 0, 1 }, // 0..19 9427{ 3, 1, 0 }, // 0..23 10428{ 5, 0, 0 }, // 0..31 11 max weight ISE index429{ 3, 0, 1 }, // 0..39 12430{ 4, 1, 0 }, // 0..47 13431{ 6, 0, 0 }, // 0..63 14432{ 4, 0, 1 }, // 0..79 15433{ 5, 1, 0 }, // 0..95 16434{ 7, 0, 0 }, // 0..127 17435{ 5, 0, 1 }, // 0..159 18436{ 6, 1, 0 }, // 0..191 19437{ 8, 0, 0 }, // 0..255 20438};439440static inline void astc_set_bits_1_to_9(uint32_t* pDst, uint32_t& bit_offset, uint32_t code, uint32_t codesize)441{442uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst);443444assert(codesize <= 9);445if (codesize)446{447uint32_t byte_bit_offset = bit_offset & 7;448uint32_t val = code << byte_bit_offset;449450uint32_t index = bit_offset >> 3;451pBuf[index] |= (uint8_t)val;452453if (codesize > (8 - byte_bit_offset))454pBuf[index + 1] |= (uint8_t)(val >> 8);455456bit_offset += codesize;457}458}459460static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)461{462return (bits >> low) & ((1 << (high - low + 1)) - 1);463}464465// Writes bits to output in an endian safe way466static inline void astc_set_bits(uint32_t* pOutput, uint32_t& bit_pos, uint32_t value, uint32_t total_bits)467{468assert(total_bits <= 31);469assert(value < (1u << total_bits));470471uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);472473while (total_bits)474{475const uint32_t bits_to_write = my_min<int>(total_bits, 8 - (bit_pos & 7));476477pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7));478479bit_pos += bits_to_write;480total_bits -= bits_to_write;481value >>= bits_to_write;482}483}484485static const uint8_t g_astc_quint_encode[125] =486{4870, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57,48858, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104,489105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54,490126, 127, 94, 95, 62, 39, 47, 55, 63, 7 /*31 - results in the same decode as 7*/491};492493// Encodes 3 values to output, usable for any range that uses quints and bits494static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t* pStats)495{496// First extract the quints and the bits from the 3 input values497int quints = 0, bits[3];498const uint32_t bit_mask = (1 << n) - 1;499for (int i = 0; i < 3; i++)500{501static const int s_muls[3] = { 1, 5, 25 };502503const int t = pValues[i] >> n;504505quints += t * s_muls[i];506bits[i] = pValues[i] & bit_mask;507}508509// Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits.510// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding511512assert(quints < 125);513const int T = g_astc_quint_encode[quints];514515// Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.516astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |517(bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);518519if (pStats)520*pStats += n * 3 + 7;521}522523static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,52443, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154,525131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202,526208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224,527225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159,528191, 223, 124, 125, 126 };529530// Encodes 5 values to output, usable for any range that uses trits and bits531static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t *pStats)532{533// First extract the trits and the bits from the 5 input values534int trits = 0, bits[5];535const uint32_t bit_mask = (1 << n) - 1;536for (int i = 0; i < 5; i++)537{538static const int s_muls[5] = { 1, 3, 9, 27, 81 };539540const int t = pValues[i] >> n;541542trits += t * s_muls[i];543bits[i] = pValues[i] & bit_mask;544}545546// Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits.547// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding548549assert(trits < 243);550const int T = g_astc_trit_encode[trits];551552// Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94.553astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2);554555astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |556(bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);557558if (pStats)559*pStats += n * 5 + 8;560}561562// Packs values using ASTC's BISE to output buffer.563void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats)564{565uint32_t temp[5] = { 0 };566567const int num_bits = g_ise_range_table[range][0];568569int group_size = 0;570if (g_ise_range_table[range][1])571group_size = 5;572else if (g_ise_range_table[range][2])573group_size = 3;574575#ifndef NDEBUG576const uint32_t num_levels = get_ise_levels(range);577for (int i = 0; i < num_vals; i++)578{579assert(pSrc_vals[i] < num_levels);580}581#endif582583if (group_size)584{585// Range has trits or quints - pack each group of 5 or 3 values586const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3);587588for (int group_index = 0; group_index < total_groups; group_index++)589{590uint8_t vals[5] = { 0 };591592const int limit = my_min(group_size, num_vals - group_index * group_size);593for (int i = 0; i < limit; i++)594vals[i] = pSrc_vals[group_index * group_size + i];595596// Note this always writes a group of 3 or 5 bits values, even for incomplete groups. So it can write more than needed.597// get_ise_sequence_bits() returns the # of bits that must be written for proper decoding.598if (group_size == 5)599astc_encode_trits(temp, vals, bit_pos, num_bits, pStats);600else601astc_encode_quints(temp, vals, bit_pos, num_bits, pStats);602}603}604else605{606for (int i = 0; i < num_vals; i++)607astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);608609if (pStats)610*pStats += num_vals * num_bits;611}612613pDst[0] |= temp[0]; pDst[1] |= temp[1];614pDst[2] |= temp[2]; pDst[3] |= temp[3];615}616617inline uint32_t rev_dword(uint32_t bits)618{619uint32_t v = (bits << 16) | (bits >> 16);620v = ((v & 0x00ff00ff) << 8) | ((v & 0xff00ff00) >> 8); v = ((v & 0x0f0f0f0f) << 4) | ((v & 0xf0f0f0f0) >> 4);621v = ((v & 0x33333333) << 2) | ((v & 0xcccccccc) >> 2); v = ((v & 0x55555555) << 1) | ((v & 0xaaaaaaaa) >> 1);622return v;623}624625static inline bool is_packable(int value, int num_bits) { assert((num_bits >= 1) && (num_bits < 31)); return (value >= 0) && (value < (1 << num_bits)); }626627static bool get_config_bits(const log_astc_block &log_block, uint32_t &config_bits)628{629config_bits = 0;630631const int W = log_block.m_grid_width, H = log_block.m_grid_height;632633const uint32_t P = log_block.m_weight_ise_range >= 6; // high precision634const uint32_t Dp_P = (log_block.m_dual_plane << 1) | P; // pack dual plane+high precision bits635636// See Tables 81-82637// Compute p from weight range638uint32_t p = 2 + log_block.m_weight_ise_range - (P ? 6 : 0);639640// Rearrange p's bits to p0 p2 p1641p = (p >> 1) + ((p & 1) << 2);642643// Try encoding each row of table 82.644645// W+4 H+2646if (is_packable(W - 4, 2) && is_packable(H - 2, 2))647{648config_bits = (Dp_P << 9) | ((W - 4) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | (p & 3);649return true;650}651652// W+8 H+2653if (is_packable(W - 8, 2) && is_packable(H - 2, 2))654{655config_bits = (Dp_P << 9) | ((W - 8) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 4 | (p & 3);656return true;657}658659// W+2 H+8660if (is_packable(W - 2, 2) && is_packable(H - 8, 2))661{662config_bits = (Dp_P << 9) | ((H - 8) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 8 | (p & 3);663return true;664}665666// W+2 H+6667if (is_packable(W - 2, 2) && is_packable(H - 6, 1))668{669config_bits = (Dp_P << 9) | ((H - 6) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);670return true;671}672673// W+2 H+2674if (is_packable(W - 2, 1) && is_packable(H - 2, 2))675{676config_bits = (Dp_P << 9) | ((W) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);677return true;678}679680// 12 H+2681if ((W == 12) && is_packable(H - 2, 2))682{683config_bits = (Dp_P << 9) | ((H - 2) << 5) | (p << 2);684return true;685}686687// W+2 12688if ((H == 12) && is_packable(W - 2, 2))689{690config_bits = (Dp_P << 9) | (1 << 7) | ((W - 2) << 5) | (p << 2);691return true;692}693694// 6 10695if ((W == 6) && (H == 10))696{697config_bits = (Dp_P << 9) | (3 << 7) | (p << 2);698return true;699}700701// 10 6702if ((W == 10) && (H == 6))703{704config_bits = (Dp_P << 9) | (0b1101 << 5) | (p << 2);705return true;706}707708// W+6 H+6 (no dual plane or high prec)709if ((!Dp_P) && is_packable(W - 6, 2) && is_packable(H - 6, 2))710{711config_bits = ((H - 6) << 9) | 256 | ((W - 6) << 5) | (p << 2);712return true;713}714715// Failed: unsupported weight grid dimensions or config.716return false;717}718719bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range, pack_stats *pStats)720{721memset(&phys_block, 0, sizeof(phys_block));722723if (pExpected_endpoint_range)724*pExpected_endpoint_range = -1;725726assert(!log_block.m_error_flag);727if (log_block.m_error_flag)728return false;729730if (log_block.m_solid_color_flag_ldr)731{732pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats);733return true;734}735else if (log_block.m_solid_color_flag_hdr)736{737pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats);738return true;739}740741if ((log_block.m_num_partitions < 1) || (log_block.m_num_partitions > MAX_PARTITIONS))742return false;743744// Max usable weight range is 11745if (log_block.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE)746return false;747748// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints749if ((log_block.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_block.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))750return false;751752if (log_block.m_color_component_selector > 3)753return false;754755// TODO: sanity check grid width/height vs. block's physical width/height756757uint32_t config_bits = 0;758if (!get_config_bits(log_block, config_bits))759return false;760761uint32_t bit_pos = 0;762astc_set_bits(&phys_block.m_vals[0], bit_pos, config_bits, 11);763if (pStats)764pStats->m_header_bits += 11;765766const uint32_t total_grid_weights = (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height);767const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);768769// 18.24 Illegal Encodings770if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))771return false;772773uint32_t total_extra_bits = 0;774775astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_num_partitions - 1, 2);776if (pStats)777pStats->m_header_bits += 2;778779if (log_block.m_num_partitions > 1)780{781if (log_block.m_partition_id >= NUM_PARTITION_PATTERNS)782return false;783784astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_partition_id, 10);785if (pStats)786pStats->m_header_bits += 10;787788uint32_t highest_cem = 0, lowest_cem = UINT32_MAX;789for (uint32_t j = 0; j < log_block.m_num_partitions; j++)790{791highest_cem = my_max<uint32_t>(highest_cem, log_block.m_color_endpoint_modes[j]);792lowest_cem = my_min<uint32_t>(lowest_cem, log_block.m_color_endpoint_modes[j]);793}794795if (highest_cem > 15)796return false;797798// Ensure CEM range is contiguous799if (((highest_cem >> 2) > (1 + (lowest_cem >> 2))))800return false;801802// See tables 79/80803uint32_t encoded_cem = log_block.m_color_endpoint_modes[0] << 2;804if (lowest_cem != highest_cem)805{806encoded_cem = my_min<uint32_t>(3, 1 + (lowest_cem >> 2));807808// See tables at 23.11 Color Endpoint Mode809for (uint32_t j = 0; j < log_block.m_num_partitions; j++)810{811const int M = log_block.m_color_endpoint_modes[j] & 3;812813const int C = (log_block.m_color_endpoint_modes[j] >> 2) - ((encoded_cem & 3) - 1);814if ((C & 1) != C)815return false;816817encoded_cem |= (C << (2 + j)) | (M << (2 + log_block.m_num_partitions + 2 * j));818}819820total_extra_bits = 3 * log_block.m_num_partitions - 4;821822if ((total_weight_bits + total_extra_bits) > 128)823return false;824825uint32_t cem_bit_pos = 128 - total_weight_bits - total_extra_bits;826astc_set_bits(&phys_block.m_vals[0], cem_bit_pos, encoded_cem >> 6, total_extra_bits);827if (pStats)828pStats->m_header_bits += total_extra_bits;829}830831astc_set_bits(&phys_block.m_vals[0], bit_pos, encoded_cem & 0x3f, 6);832if (pStats)833pStats->m_header_bits += 6;834}835else836{837if (log_block.m_partition_id)838return false;839if (log_block.m_color_endpoint_modes[0] > 15)840return false;841842astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_color_endpoint_modes[0], 4);843if (pStats)844pStats->m_header_bits += 4;845}846847if (log_block.m_dual_plane)848{849if (log_block.m_num_partitions > 3)850return false;851852total_extra_bits += 2;853854uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits;855astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2);856if (pStats)857pStats->m_header_bits += 2;858}859860const uint32_t total_config_bits = bit_pos + total_extra_bits;861const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;862if (num_remaining_bits < 0)863return false;864865uint32_t total_cem_vals = 0;866for (uint32_t j = 0; j < log_block.m_num_partitions; j++)867total_cem_vals += 2 + 2 * (log_block.m_color_endpoint_modes[j] >> 2);868869if (total_cem_vals > MAX_ENDPOINTS)870return false;871872int endpoint_ise_range = -1;873for (int k = 20; k > 0; k--)874{875int bits = get_ise_sequence_bits(total_cem_vals, k);876if (bits <= num_remaining_bits)877{878endpoint_ise_range = k;879break;880}881}882883// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints884if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)885return false;886887// Ensure the caller utilized the right endpoint ISE range.888if ((int)log_block.m_endpoint_ise_range != endpoint_ise_range)889{890if (pExpected_endpoint_range)891*pExpected_endpoint_range = endpoint_ise_range;892return false;893}894895if (pStats)896{897pStats->m_endpoint_bits += get_ise_sequence_bits(total_cem_vals, endpoint_ise_range);898pStats->m_weight_bits += get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);899}900901// Pack endpoints forwards902encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range);903904// Pack weights backwards905uint32_t weight_data[4] = { 0 };906encode_bise(weight_data, log_block.m_weights, 0, total_grid_weights, log_block.m_weight_ise_range);907908for (uint32_t i = 0; i < 4; i++)909phys_block.m_vals[i] |= rev_dword(weight_data[3 - i]);910911return true;912}913914static inline uint32_t bit_replication_scale(uint32_t src, int num_src_bits, int num_dst_bits)915{916assert(num_src_bits <= num_dst_bits);917assert((src & ((1 << num_src_bits) - 1)) == src);918919uint32_t dst = 0;920for (int shift = num_dst_bits - num_src_bits; shift > -num_src_bits; shift -= num_src_bits)921dst |= (shift >= 0) ? (src << shift) : (src >> -shift);922923return dst;924}925926uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range)927{928assert((ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE));929assert(val < get_ise_levels(ise_range));930931uint32_t u = 0;932933switch (ise_range)934{935case 5:936{937u = bit_replication_scale(val, 3, 8);938break;939}940case 8:941{942u = bit_replication_scale(val, 4, 8);943break;944}945case 11:946{947u = bit_replication_scale(val, 5, 8);948break;949}950case 14:951{952u = bit_replication_scale(val, 6, 8);953break;954}955case 17:956{957u = bit_replication_scale(val, 7, 8);958break;959}960case 20:961{962u = val;963break;964}965case 4:966case 6:967case 7:968case 9:969case 10:970case 12:971case 13:972case 15:973case 16:974case 18:975case 19:976{977const uint32_t num_bits = g_ise_range_table[ise_range][0];978const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);979const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);980981// compute Table 103 row index982const int range_index = (num_bits * 2 + (num_quints ? 1 : 0)) - 2;983984assert(range_index >= 0 && range_index <= 10);985986uint32_t bits = val & ((1 << num_bits) - 1);987uint32_t tval = val >> num_bits;988989assert(tval < (num_trits ? 3U : 5U));990991uint32_t a = bits & 1;992uint32_t b = (bits >> 1) & 1;993uint32_t c = (bits >> 2) & 1;994uint32_t d = (bits >> 3) & 1;995uint32_t e = (bits >> 4) & 1;996uint32_t f = (bits >> 5) & 1;997998uint32_t A = a ? 511 : 0;999uint32_t B = 0;10001001switch (range_index)1002{1003case 2:1004{1005// 8765432101006// b000b0bb01007B = (b << 1) | (b << 2) | (b << 4) | (b << 8);1008break;1009}1010case 3:1011{1012// 8765432101013// b0000bb001014B = (b << 2) | (b << 3) | (b << 8);1015break;1016}1017case 4:1018{1019// 8765432101020// cb000cbcb1021B = b | (c << 1) | (b << 2) | (c << 3) | (b << 7) | (c << 8);1022break;1023}1024case 5:1025{1026// 8765432101027// cb0000cbc1028B = c | (b << 1) | (c << 2) | (b << 7) | (c << 8);1029break;1030}1031case 6:1032{1033// 8765432101034// dcb000dcb1035B = b | (c << 1) | (d << 2) | (b << 6) | (c << 7) | (d << 8);1036break;1037}1038case 7:1039{1040// 8765432101041// dcb0000dc1042B = c | (d << 1) | (b << 6) | (c << 7) | (d << 8);1043break;1044}1045case 8:1046{1047// 8765432101048// edcb000ed1049B = d | (e << 1) | (b << 5) | (c << 6) | (d << 7) | (e << 8);1050break;1051}1052case 9:1053{1054// 8765432101055// edcb0000e1056B = e | (b << 5) | (c << 6) | (d << 7) | (e << 8);1057break;1058}1059case 10:1060{1061// 8765432101062// fedcb000f1063B = f | (b << 4) | (c << 5) | (d << 6) | (e << 7) | (f << 8);1064break;1065}1066default:1067break;1068}10691070static uint8_t C_vals[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };1071uint32_t C = C_vals[range_index];1072uint32_t D = tval;10731074u = D * C + B;1075u = u ^ A;1076u = (A & 0x80) | (u >> 2);10771078break;1079}1080default:1081{1082assert(0);1083break;1084}1085}10861087return u;1088}10891090uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range)1091{1092assert(val < get_ise_levels(ise_range));10931094uint32_t u = 0;1095switch (ise_range)1096{1097case 0:1098{1099u = val ? 63 : 0;1100break;1101}1102case 1: // 0-21103{1104const uint8_t s_tab_0_2[3] = { 0, 32, 63 };1105u = s_tab_0_2[val];1106break;1107}1108case 2: // 0-31109{1110u = bit_replication_scale(val, 2, 6);1111break;1112}1113case 3: // 0-41114{1115const uint8_t s_tab_0_4[5] = { 0, 16, 32, 47, 63 };1116u = s_tab_0_4[val];1117break;1118}1119case 5: // 0-71120{1121u = bit_replication_scale(val, 3, 6);1122break;1123}1124case 8: // 0-151125{1126u = bit_replication_scale(val, 4, 6);1127break;1128}1129case 11: // 0-311130{1131u = bit_replication_scale(val, 5, 6);1132break;1133}1134case 4: // 0-51135case 6: // 0-91136case 7: // 0-111137case 9: // 0-191138case 10: // 0-231139{1140const uint32_t num_bits = g_ise_range_table[ise_range][0];1141const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);1142const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);11431144// compute Table 103 row index1145const int range_index = num_bits * 2 + (num_quints ? 1 : 0);11461147// Extract bits and tris/quints from value1148const uint32_t bits = val & ((1u << num_bits) - 1);1149const uint32_t D = val >> num_bits;11501151assert(D < (num_trits ? 3U : 5U));11521153// Now dequantize1154// See Table 103. ASTC weight unquantization parameters1155static const uint32_t C_table[5] = { 50, 28, 23, 13, 11 };11561157const uint32_t a = bits & 1, b = (bits >> 1) & 1, c = (bits >> 2) & 1;11581159const uint32_t A = (a == 0) ? 0 : 0x7F;11601161uint32_t B = 0;1162if (range_index == 4)1163B = ((b << 6) | (b << 2) | (b << 0));1164else if (range_index == 5)1165B = ((b << 6) | (b << 1));1166else if (range_index == 6)1167B = ((c << 6) | (b << 5) | (c << 1) | (b << 0));11681169const uint32_t C = C_table[range_index - 2];11701171u = D * C + B;1172u = u ^ A;1173u = (A & 0x20) | (u >> 2);1174break;1175}1176default:1177assert(0);1178break;1179}11801181if (u > 32)1182u++;11831184return u;1185}11861187// Returns the nearest ISE symbol given a [0,255] endpoint value.1188uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range)1189{1190assert(ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE && ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE);11911192const uint32_t total_levels = get_ise_levels(ise_range);1193int best_e = INT_MAX, best_index = 0;1194for (uint32_t i = 0; i < total_levels; i++)1195{1196const int qv = dequant_bise_endpoint(i, ise_range);1197int e = labs(v - qv);1198if (e < best_e)1199{1200best_e = e;1201best_index = i;1202if (!best_e)1203break;1204}1205}1206return best_index;1207}12081209// Returns the nearest ISE weight given a [0,64] endpoint value.1210uint32_t find_nearest_bise_weight(int v, uint32_t ise_range)1211{1212assert(ise_range >= FIRST_VALID_WEIGHT_ISE_RANGE && ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);1213assert(v <= (int)MAX_WEIGHT_VALUE);12141215const uint32_t total_levels = get_ise_levels(ise_range);1216int best_e = INT_MAX, best_index = 0;1217for (uint32_t i = 0; i < total_levels; i++)1218{1219const int qv = dequant_bise_weight(i, ise_range);1220int e = labs(v - qv);1221if (e < best_e)1222{1223best_e = e;1224best_index = i;1225if (!best_e)1226break;1227}1228}1229return best_index;1230}12311232void create_quant_tables(1233uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]1234uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]1235uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]1236uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]1237uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights1238bool weight_flag) // false if block endpoints, true if weights1239{1240const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256;12411242for (uint32_t i = 0; i < num_dequant_vals; i++)1243{1244uint32_t bise_index = weight_flag ? astc_helpers::find_nearest_bise_weight(i, ise_range) : astc_helpers::find_nearest_bise_endpoint(i, ise_range);12451246if (pVal_to_ise)1247pVal_to_ise[i] = (uint8_t)bise_index;12481249if (pISE_to_val)1250pISE_to_val[bise_index] = weight_flag ? (uint8_t)astc_helpers::dequant_bise_weight(bise_index, ise_range) : (uint8_t)astc_helpers::dequant_bise_endpoint(bise_index, ise_range);1251}12521253if (pISE_to_rank || pRank_to_ISE)1254{1255const uint32_t num_levels = get_ise_levels(ise_range);12561257if (!g_ise_range_table[ise_range][1] && !g_ise_range_table[ise_range][2])1258{1259// Only bits1260for (uint32_t i = 0; i < num_levels; i++)1261{1262if (pISE_to_rank)1263pISE_to_rank[i] = (uint8_t)i;12641265if (pRank_to_ISE)1266pRank_to_ISE[i] = (uint8_t)i;1267}1268}1269else1270{1271// Range has trits or quints1272uint32_t vals[256];1273for (uint32_t i = 0; i < num_levels; i++)1274{1275uint32_t v = weight_flag ? astc_helpers::dequant_bise_weight(i, ise_range) : astc_helpers::dequant_bise_endpoint(i, ise_range);12761277// Low=ISE value1278// High=dequantized value1279vals[i] = (v << 16) | i;1280}12811282// Sorts by dequantized value1283std::sort(vals, vals + num_levels);12841285for (uint32_t rank = 0; rank < num_levels; rank++)1286{1287uint32_t ise_val = (uint8_t)vals[rank];12881289if (pISE_to_rank)1290pISE_to_rank[ise_val] = (uint8_t)rank;12911292if (pRank_to_ISE)1293pRank_to_ISE[rank] = (uint8_t)ise_val;1294}1295}1296}1297}12981299void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats)1300{1301uint8_t* pDst = (uint8_t*)&blk.m_vals[0];1302memset(pDst, 0xFF, 16);13031304pDst[0] = 0b11111100;1305pDst[1] = 0b11111101;13061307pDst[8] = (uint8_t)rh;1308pDst[9] = (uint8_t)(rh >> 8);1309pDst[10] = (uint8_t)gh;1310pDst[11] = (uint8_t)(gh >> 8);1311pDst[12] = (uint8_t)bh;1312pDst[13] = (uint8_t)(bh >> 8);1313pDst[14] = (uint8_t)ah;1314pDst[15] = (uint8_t)(ah >> 8);13151316if (pStats)1317pStats->m_header_bits += 128;1318}13191320// rh-ah are half-floats1321void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats *pStats)1322{1323uint8_t* pDst = (uint8_t*)&blk.m_vals[0];1324memset(pDst, 0xFF, 16);13251326pDst[0] = 0b11111100;13271328pDst[8] = (uint8_t)rh;1329pDst[9] = (uint8_t)(rh >> 8);1330pDst[10] = (uint8_t)gh;1331pDst[11] = (uint8_t)(gh >> 8);1332pDst[12] = (uint8_t)bh;1333pDst[13] = (uint8_t)(bh >> 8);1334pDst[14] = (uint8_t)ah;1335pDst[15] = (uint8_t)(ah >> 8);13361337if (pStats)1338pStats->m_header_bits += 128;1339}13401341bool is_cem_ldr(uint32_t mode)1342{1343switch (mode)1344{1345case CEM_LDR_LUM_DIRECT:1346case CEM_LDR_LUM_BASE_PLUS_OFS:1347case CEM_LDR_LUM_ALPHA_DIRECT:1348case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:1349case CEM_LDR_RGB_BASE_SCALE:1350case CEM_LDR_RGB_DIRECT:1351case CEM_LDR_RGB_BASE_PLUS_OFFSET:1352case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:1353case CEM_LDR_RGBA_DIRECT:1354case CEM_LDR_RGBA_BASE_PLUS_OFFSET:1355return true;1356default:1357break;1358}13591360return false;1361}13621363bool is_valid_block_size(uint32_t w, uint32_t h)1364{1365assert((w >= MIN_BLOCK_DIM) && (w <= MAX_BLOCK_DIM));1366assert((h >= MIN_BLOCK_DIM) && (h <= MAX_BLOCK_DIM));13671368#define SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true;1369SIZECHK(4, 4);1370SIZECHK(5, 4);13711372SIZECHK(5, 5);13731374SIZECHK(6, 5);1375SIZECHK(6, 6);13761377SIZECHK(8, 5);1378SIZECHK(8, 6);1379SIZECHK(10, 5);1380SIZECHK(10, 6);13811382SIZECHK(8, 8);1383SIZECHK(10, 8);1384SIZECHK(10, 10);13851386SIZECHK(12, 10);1387SIZECHK(12, 12);1388#undef SIZECHK13891390return false;1391}13921393bool block_has_any_hdr_cems(const log_astc_block& log_blk)1394{1395assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));13961397for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)1398if (is_cem_hdr(log_blk.m_color_endpoint_modes[i]))1399return true;14001401return false;1402}14031404bool block_has_any_ldr_cems(const log_astc_block& log_blk)1405{1406assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));14071408for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)1409if (!is_cem_hdr(log_blk.m_color_endpoint_modes[i]))1410return true;14111412return false;1413}14141415dequant_tables g_dequant_tables;14161417void precompute_texel_partitions_4x4();1418void precompute_texel_partitions_6x6();14191420void init_tables(bool init_rank_tabs)1421{1422g_dequant_tables.init(init_rank_tabs);14231424precompute_texel_partitions_4x4();1425precompute_texel_partitions_6x6();1426}14271428void compute_upsample_weights(1429int block_width, int block_height,1430int weight_grid_width, int weight_grid_height,1431weighted_sample* pWeights) // there will be block_width * block_height bilinear samples1432{1433const uint32_t scaleX = (1024 + block_width / 2) / (block_width - 1);1434const uint32_t scaleY = (1024 + block_height / 2) / (block_height - 1);14351436for (int texelY = 0; texelY < block_height; texelY++)1437{1438for (int texelX = 0; texelX < block_width; texelX++)1439{1440const uint32_t gX = (scaleX * texelX * (weight_grid_width - 1) + 32) >> 6;1441const uint32_t gY = (scaleY * texelY * (weight_grid_height - 1) + 32) >> 6;1442const uint32_t jX = gX >> 4;1443const uint32_t jY = gY >> 4;1444const uint32_t fX = gX & 0xf;1445const uint32_t fY = gY & 0xf;1446const uint32_t w11 = (fX * fY + 8) >> 4;1447const uint32_t w10 = fY - w11;1448const uint32_t w01 = fX - w11;1449const uint32_t w00 = 16 - fX - fY + w11;14501451weighted_sample& s = pWeights[texelX + texelY * block_width];1452s.m_src_x = (uint8_t)jX;1453s.m_src_y = (uint8_t)jY;1454s.m_weights[0][0] = (uint8_t)w00;1455s.m_weights[0][1] = (uint8_t)w01;1456s.m_weights[1][0] = (uint8_t)w10;1457s.m_weights[1][1] = (uint8_t)w11;1458}1459}1460}14611462// Should be dequantized [0,64] weights1463void upsample_weight_grid(1464uint32_t bx, uint32_t by, // destination/to dimension1465uint32_t wx, uint32_t wy, // source/from dimension1466const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]1467uint8_t* pDst_weights) // [by][bx]1468{1469assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12));1470assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by));14711472const uint32_t total_src_weights = wx * wy;1473const uint32_t total_dst_weights = bx * by;14741475if (total_src_weights == total_dst_weights)1476{1477memcpy(pDst_weights, pSrc_weights, total_src_weights);1478return;1479}14801481weighted_sample weights[12 * 12];1482compute_upsample_weights(bx, by, wx, wy, weights);14831484const weighted_sample* pS = weights;14851486for (uint32_t y = 0; y < by; y++)1487{1488for (uint32_t x = 0; x < bx; x++, ++pS)1489{1490const uint32_t w00 = pS->m_weights[0][0];1491const uint32_t w01 = pS->m_weights[0][1];1492const uint32_t w10 = pS->m_weights[1][0];1493const uint32_t w11 = pS->m_weights[1][1];14941495assert(w00 || w01 || w10 || w11);14961497const uint32_t sx = pS->m_src_x, sy = pS->m_src_y;14981499uint32_t total = 8;1500if (w00) total += pSrc_weights[bounds_check(sx + sy * wx, 0U, total_src_weights)] * w00;1501if (w01) total += pSrc_weights[bounds_check(sx + 1 + sy * wx, 0U, total_src_weights)] * w01;1502if (w10) total += pSrc_weights[bounds_check(sx + (sy + 1) * wx, 0U, total_src_weights)] * w10;1503if (w11) total += pSrc_weights[bounds_check(sx + 1 + (sy + 1) * wx, 0U, total_src_weights)] * w11;15041505pDst_weights[x + y * bx] = (uint8_t)(total >> 4);1506}1507}1508}15091510inline uint32_t hash52(uint32_t v)1511{1512uint32_t p = v;1513p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;1514p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;1515p ^= p << 6; p ^= p >> 17;1516return p;1517}15181519// small_block = num_blk_pixels < 311520int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block)1521{1522assert(zIn == 0);15231524const uint32_t x = small_block ? xIn << 1 : xIn;1525const uint32_t y = small_block ? yIn << 1 : yIn;1526const uint32_t z = small_block ? zIn << 1 : zIn;1527const uint32_t seed = seedIn + 1024 * (num_partitions - 1);1528const uint32_t rnum = hash52(seed);15291530uint8_t seed1 = (uint8_t)(rnum & 0xf);1531uint8_t seed2 = (uint8_t)((rnum >> 4) & 0xf);1532uint8_t seed3 = (uint8_t)((rnum >> 8) & 0xf);1533uint8_t seed4 = (uint8_t)((rnum >> 12) & 0xf);1534uint8_t seed5 = (uint8_t)((rnum >> 16) & 0xf);1535uint8_t seed6 = (uint8_t)((rnum >> 20) & 0xf);1536uint8_t seed7 = (uint8_t)((rnum >> 24) & 0xf);1537uint8_t seed8 = (uint8_t)((rnum >> 28) & 0xf);1538uint8_t seed9 = (uint8_t)((rnum >> 18) & 0xf);1539uint8_t seed10 = (uint8_t)((rnum >> 22) & 0xf);1540uint8_t seed11 = (uint8_t)((rnum >> 26) & 0xf);1541uint8_t seed12 = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf);15421543seed1 = (uint8_t)(seed1 * seed1);1544seed2 = (uint8_t)(seed2 * seed2);1545seed3 = (uint8_t)(seed3 * seed3);1546seed4 = (uint8_t)(seed4 * seed4);1547seed5 = (uint8_t)(seed5 * seed5);1548seed6 = (uint8_t)(seed6 * seed6);1549seed7 = (uint8_t)(seed7 * seed7);1550seed8 = (uint8_t)(seed8 * seed8);1551seed9 = (uint8_t)(seed9 * seed9);1552seed10 = (uint8_t)(seed10 * seed10);1553seed11 = (uint8_t)(seed11 * seed11);1554seed12 = (uint8_t)(seed12 * seed12);15551556const int shA = (seed & 2) != 0 ? 4 : 5;1557const int shB = (num_partitions == 3) ? 6 : 5;1558const int sh1 = (seed & 1) != 0 ? shA : shB;1559const int sh2 = (seed & 1) != 0 ? shB : shA;1560const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2;15611562seed1 = (uint8_t)(seed1 >> sh1);1563seed2 = (uint8_t)(seed2 >> sh2);1564seed3 = (uint8_t)(seed3 >> sh1);1565seed4 = (uint8_t)(seed4 >> sh2);1566seed5 = (uint8_t)(seed5 >> sh1);1567seed6 = (uint8_t)(seed6 >> sh2);1568seed7 = (uint8_t)(seed7 >> sh1);1569seed8 = (uint8_t)(seed8 >> sh2);1570seed9 = (uint8_t)(seed9 >> sh3);1571seed10 = (uint8_t)(seed10 >> sh3);1572seed11 = (uint8_t)(seed11 >> sh3);1573seed12 = (uint8_t)(seed12 >> sh3);15741575const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14));1576const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10));1577const int c = (num_partitions >= 3) ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0;1578const int d = (num_partitions >= 4) ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0;15791580return (a >= b && a >= c && a >= d) ? 01581: (b >= c && b >= d) ? 11582: (c >= d) ? 21583: 3;1584}15851586// 4x4, 2 and 3 subsets1587static uint32_t g_texel_partitions_4x4[1024][2];15881589// 6x6, 2 and 3 subsets (2 subsets low 4 bits, 3 subsets high 4 bits)1590static uint8_t g_texel_partitions_6x6[1024][6 * 6];15911592void precompute_texel_partitions_4x4()1593{1594for (uint32_t p = 0; p < 1024; p++)1595{1596uint32_t v2 = 0, v3 = 0;15971598for (uint32_t y = 0; y < 4; y++)1599{1600for (uint32_t x = 0; x < 4; x++)1601{1602const uint32_t shift = x * 2 + y * 8;1603v2 |= (compute_texel_partition(p, x, y, 0, 2, true) << shift);1604v3 |= (compute_texel_partition(p, x, y, 0, 3, true) << shift);1605}1606}16071608g_texel_partitions_4x4[p][0] = v2;1609g_texel_partitions_4x4[p][1] = v3;1610}1611}16121613void precompute_texel_partitions_6x6()1614{1615for (uint32_t p = 0; p < 1024; p++)1616{1617for (uint32_t y = 0; y < 6; y++)1618{1619for (uint32_t x = 0; x < 6; x++)1620{1621const uint32_t p2 = compute_texel_partition(p, x, y, 0, 2, false);1622const uint32_t p3 = compute_texel_partition(p, x, y, 0, 3, false);16231624assert((p2 <= 1) && (p3 <= 2));1625g_texel_partitions_6x6[p][x + y * 6] = (uint8_t)((p3 << 4) | p2);1626}1627}1628}1629}16301631static inline int get_precompute_texel_partitions_4x4(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions)1632{1633assert(g_texel_partitions_4x4[1][0]);1634assert(seed < 1024);1635assert((x <= 3) && (y <= 3));1636assert((num_partitions >= 2) && (num_partitions <= 3));16371638const uint32_t shift = x * 2 + y * 8;1639return (g_texel_partitions_4x4[seed][num_partitions - 2] >> shift) & 3;1640}16411642static inline int get_precompute_texel_partitions_6x6(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions)1643{1644assert(g_texel_partitions_6x6[0][0]);1645assert(seed < 1024);1646assert((x <= 5) && (y <= 5));1647assert((num_partitions >= 2) && (num_partitions <= 3));16481649const uint32_t shift = (num_partitions == 3) ? 4 : 0;1650return (g_texel_partitions_6x6[seed][x + y * 6] >> shift) & 3;1651}16521653void blue_contract(1654int r, int g, int b, int a,1655int &dr, int &dg, int &db, int &da)1656{1657dr = (r + b) >> 1;1658dg = (g + b) >> 1;1659db = b;1660da = a;1661}16621663inline void bit_transfer_signed(int& a, int& b)1664{1665b >>= 1;1666b |= (a & 0x80);1667a >>= 1;1668a &= 0x3F;1669if ((a & 0x20) != 0)1670a -= 0x40;1671}16721673static inline int clamp(int a, int l, int h)1674{1675if (a < l)1676a = l;1677else if (a > h)1678a = h;1679return a;1680}16811682static inline float clampf(float a, float l, float h)1683{1684if (a < l)1685a = l;1686else if (a > h)1687a = h;1688return a;1689}16901691inline int sign_extend(int src, int num_src_bits)1692{1693assert((num_src_bits >= 2) && (num_src_bits <= 31));16941695const bool negative = (src & (1 << (num_src_bits - 1))) != 0;1696if (negative)1697return src | ~((1 << num_src_bits) - 1);1698else1699return src & ((1 << num_src_bits) - 1);1700}17011702// endpoints is [4][2]1703void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t *pE)1704{1705assert(cem_index <= CEM_HDR_RGB_HDR_ALPHA);17061707int v0 = pE[0], v1 = pE[1];17081709int& e0_r = pEndpoints[0][0], &e0_g = pEndpoints[1][0], &e0_b = pEndpoints[2][0], &e0_a = pEndpoints[3][0];1710int& e1_r = pEndpoints[0][1], &e1_g = pEndpoints[1][1], &e1_b = pEndpoints[2][1], &e1_a = pEndpoints[3][1];17111712switch (cem_index)1713{1714case CEM_LDR_LUM_DIRECT:1715{1716e0_r = v0; e1_r = v1;1717e0_g = v0; e1_g = v1;1718e0_b = v0; e1_b = v1;1719e0_a = 0xFF; e1_a = 0xFF;1720break;1721}1722case CEM_LDR_LUM_BASE_PLUS_OFS:1723{1724int l0 = (v0 >> 2) | (v1 & 0xc0);1725int l1 = l0 + (v1 & 0x3f);17261727if (l1 > 0xFF)1728l1 = 0xFF;17291730e0_r = l0; e1_r = l1;1731e0_g = l0; e1_g = l1;1732e0_b = l0; e1_b = l1;1733e0_a = 0xFF; e1_a = 0xFF;1734break;1735}1736case CEM_LDR_LUM_ALPHA_DIRECT:1737{1738int v2 = pE[2], v3 = pE[3];17391740e0_r = v0; e1_r = v1;1741e0_g = v0; e1_g = v1;1742e0_b = v0; e1_b = v1;1743e0_a = v2; e1_a = v3;1744break;1745}1746case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:1747{1748int v2 = pE[2], v3 = pE[3];17491750bit_transfer_signed(v1, v0);1751bit_transfer_signed(v3, v2);17521753e0_r = v0; e1_r = v0 + v1;1754e0_g = v0; e1_g = v0 + v1;1755e0_b = v0; e1_b = v0 + v1;1756e0_a = v2; e1_a = v2 + v3;17571758for (uint32_t c = 0; c < 4; c++)1759{1760pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);1761pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);1762}17631764break;1765}1766case CEM_LDR_RGB_BASE_SCALE:1767{1768int v2 = pE[2], v3 = pE[3];17691770e0_r = (v0 * v3) >> 8; e1_r = v0;1771e0_g = (v1 * v3) >> 8; e1_g = v1;1772e0_b = (v2 * v3) >> 8; e1_b = v2;1773e0_a = 0xFF; e1_a = 0xFF;17741775break;1776}1777case CEM_LDR_RGB_DIRECT:1778{1779int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];17801781if ((v1 + v3 + v5) >= (v0 + v2 + v4))1782{1783e0_r = v0; e1_r = v1;1784e0_g = v2; e1_g = v3;1785e0_b = v4; e1_b = v5;1786e0_a = 0xFF; e1_a = 0xFF;1787}1788else1789{1790blue_contract(v1, v3, v5, 0xFF, e0_r, e0_g, e0_b, e0_a);1791blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);1792}17931794break;1795}1796case CEM_LDR_RGB_BASE_PLUS_OFFSET:1797{1798int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];17991800bit_transfer_signed(v1, v0);1801bit_transfer_signed(v3, v2);1802bit_transfer_signed(v5, v4);18031804if ((v1 + v3 + v5) >= 0)1805{1806e0_r = v0; e1_r = v0 + v1;1807e0_g = v2; e1_g = v2 + v3;1808e0_b = v4; e1_b = v4 + v5;1809e0_a = 0xFF; e1_a = 0xFF;1810}1811else1812{1813blue_contract(v0 + v1, v2 + v3, v4 + v5, 0xFF, e0_r, e0_g, e0_b, e0_a);1814blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);1815}18161817for (uint32_t c = 0; c < 4; c++)1818{1819pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);1820pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);1821}18221823break;1824}1825case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:1826{1827int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];18281829e0_r = (v0 * v3) >> 8; e1_r = v0;1830e0_g = (v1 * v3) >> 8; e1_g = v1;1831e0_b = (v2 * v3) >> 8; e1_b = v2;1832e0_a = v4; e1_a = v5;18331834break;1835}1836case CEM_LDR_RGBA_DIRECT:1837{1838int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];18391840if ((v1 + v3 + v5) >= (v0 + v2 + v4))1841{1842e0_r = v0; e1_r = v1;1843e0_g = v2; e1_g = v3;1844e0_b = v4; e1_b = v5;1845e0_a = v6; e1_a = v7;1846}1847else1848{1849blue_contract(v1, v3, v5, v7, e0_r, e0_g, e0_b, e0_a);1850blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);1851}18521853break;1854}1855case CEM_LDR_RGBA_BASE_PLUS_OFFSET:1856{1857int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];18581859bit_transfer_signed(v1, v0);1860bit_transfer_signed(v3, v2);1861bit_transfer_signed(v5, v4);1862bit_transfer_signed(v7, v6);18631864if ((v1 + v3 + v5) >= 0)1865{1866e0_r = v0; e1_r = v0 + v1;1867e0_g = v2; e1_g = v2 + v3;1868e0_b = v4; e1_b = v4 + v5;1869e0_a = v6; e1_a = v6 + v7;1870}1871else1872{1873blue_contract(v0 + v1, v2 + v3, v4 + v5, v6 + v7, e0_r, e0_g, e0_b, e0_a);1874blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);1875}18761877for (uint32_t c = 0; c < 4; c++)1878{1879pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);1880pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);1881}18821883break;1884}1885case CEM_HDR_LUM_LARGE_RANGE:1886{1887int y0, y1;1888if (v1 >= v0)1889{1890y0 = (v0 << 4);1891y1 = (v1 << 4);1892}1893else1894{1895y0 = (v1 << 4) + 8;1896y1 = (v0 << 4) - 8;1897}18981899e0_r = y0; e1_r = y1;1900e0_g = y0; e1_g = y1;1901e0_b = y0; e1_b = y1;1902e0_a = 0x780; e1_a = 0x780;19031904break;1905}1906case CEM_HDR_LUM_SMALL_RANGE:1907{1908int y0, y1, d;19091910if ((v0 & 0x80) != 0)1911{1912y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);1913d = (v1 & 0x1F) << 2;1914}1915else1916{1917y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);1918d = (v1 & 0x0F) << 1;1919}19201921y1 = y0 + d;1922if (y1 > 0xFFF)1923y1 = 0xFFF;19241925e0_r = y0; e1_r = y1;1926e0_g = y0; e1_g = y1;1927e0_b = y0; e1_b = y1;1928e0_a = 0x780; e1_a = 0x780;19291930break;1931}1932case CEM_HDR_RGB_BASE_SCALE:1933{1934int v2 = pE[2], v3 = pE[3];19351936int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);19371938int majcomp, mode;1939if ((modeval & 0xC) != 0xC)1940{1941majcomp = modeval >> 2;1942mode = modeval & 3;1943}1944else if (modeval != 0xF)1945{1946majcomp = modeval & 3;1947mode = 4;1948}1949else1950{1951majcomp = 0;1952mode = 5;1953}19541955int red = v0 & 0x3f;1956int green = v1 & 0x1f;1957int blue = v2 & 0x1f;1958int scale = v3 & 0x1f;19591960int x0 = (v1 >> 6) & 1;1961int x1 = (v1 >> 5) & 1;1962int x2 = (v2 >> 6) & 1;1963int x3 = (v2 >> 5) & 1;1964int x4 = (v3 >> 7) & 1;1965int x5 = (v3 >> 6) & 1;1966int x6 = (v3 >> 5) & 1;19671968int ohm = 1 << mode;1969if (ohm & 0x30) green |= x0 << 6;1970if (ohm & 0x3A) green |= x1 << 5;1971if (ohm & 0x30) blue |= x2 << 6;1972if (ohm & 0x3A) blue |= x3 << 5;1973if (ohm & 0x3D) scale |= x6 << 5;1974if (ohm & 0x2D) scale |= x5 << 6;1975if (ohm & 0x04) scale |= x4 << 7;1976if (ohm & 0x3B) red |= x4 << 6;1977if (ohm & 0x04) red |= x3 << 6;1978if (ohm & 0x10) red |= x5 << 7;1979if (ohm & 0x0F) red |= x2 << 7;1980if (ohm & 0x05) red |= x1 << 8;1981if (ohm & 0x0A) red |= x0 << 8;1982if (ohm & 0x05) red |= x0 << 9;1983if (ohm & 0x02) red |= x6 << 9;1984if (ohm & 0x01) red |= x3 << 10;1985if (ohm & 0x02) red |= x5 << 10;19861987static const int s_shamts[6] = { 1,1,2,3,4,5 };19881989const int shamt = s_shamts[mode];1990red <<= shamt;1991green <<= shamt;1992blue <<= shamt;1993scale <<= shamt;19941995if (mode != 5)1996{1997green = red - green;1998blue = red - blue;1999}20002001if (majcomp == 1)2002std::swap(red, green);20032004if (majcomp == 2)2005std::swap(red, blue);20062007e1_r = clamp(red, 0, 0xFFF);2008e1_g = clamp(green, 0, 0xFFF);2009e1_b = clamp(blue, 0, 0xFFF);2010e1_a = 0x780;20112012e0_r = clamp(red - scale, 0, 0xFFF);2013e0_g = clamp(green - scale, 0, 0xFFF);2014e0_b = clamp(blue - scale, 0, 0xFFF);2015e0_a = 0x780;20162017break;2018}2019case CEM_HDR_RGB_HDR_ALPHA:2020case CEM_HDR_RGB_LDR_ALPHA:2021case CEM_HDR_RGB:2022{2023int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];20242025int majcomp = ((v4 & 0x80) >> 7) | ((v5 & 0x80) >> 6);20262027e0_a = 0x780;2028e1_a = 0x780;20292030if (majcomp == 3)2031{2032e0_r = v0 << 4;2033e0_g = v2 << 4;2034e0_b = (v4 & 0x7f) << 5;20352036e1_r = v1 << 4;2037e1_g = v3 << 4;2038e1_b = (v5 & 0x7f) << 5;2039}2040else2041{2042int mode = ((v1 & 0x80) >> 7) | ((v2 & 0x80) >> 6) | ((v3 & 0x80) >> 5);2043int va = v0 | ((v1 & 0x40) << 2);2044int vb0 = v2 & 0x3f;2045int vb1 = v3 & 0x3f;2046int vc = v1 & 0x3f;2047int vd0 = v4 & 0x7f;2048int vd1 = v5 & 0x7f;20492050static const int s_dbitstab[8] = { 7,6,7,6,5,6,5,6 };2051vd0 = sign_extend(vd0, s_dbitstab[mode]);2052vd1 = sign_extend(vd1, s_dbitstab[mode]);20532054int x0 = (v2 >> 6) & 1;2055int x1 = (v3 >> 6) & 1;2056int x2 = (v4 >> 6) & 1;2057int x3 = (v5 >> 6) & 1;2058int x4 = (v4 >> 5) & 1;2059int x5 = (v5 >> 5) & 1;20602061int ohm = 1 << mode;2062if (ohm & 0xA4) va |= x0 << 9;2063if (ohm & 0x08) va |= x2 << 9;2064if (ohm & 0x50) va |= x4 << 9;2065if (ohm & 0x50) va |= x5 << 10;2066if (ohm & 0xA0) va |= x1 << 10;2067if (ohm & 0xC0) va |= x2 << 11;2068if (ohm & 0x04) vc |= x1 << 6;2069if (ohm & 0xE8) vc |= x3 << 6;2070if (ohm & 0x20) vc |= x2 << 7;2071if (ohm & 0x5B) vb0 |= x0 << 6;2072if (ohm & 0x5B) vb1 |= x1 << 6;2073if (ohm & 0x12) vb0 |= x2 << 7;2074if (ohm & 0x12) vb1 |= x3 << 7;20752076int shamt = (mode >> 1) ^ 3;2077va = (uint32_t)va << shamt;2078vb0 = (uint32_t)vb0 << shamt;2079vb1 = (uint32_t)vb1 << shamt;2080vc = (uint32_t)vc << shamt;2081vd0 = (uint32_t)vd0 << shamt;2082vd1 = (uint32_t)vd1 << shamt;20832084e1_r = clamp(va, 0, 0xFFF);2085e1_g = clamp(va - vb0, 0, 0xFFF);2086e1_b = clamp(va - vb1, 0, 0xFFF);20872088e0_r = clamp(va - vc, 0, 0xFFF);2089e0_g = clamp(va - vb0 - vc - vd0, 0, 0xFFF);2090e0_b = clamp(va - vb1 - vc - vd1, 0, 0xFFF);20912092if (majcomp == 1)2093{2094std::swap(e0_r, e0_g);2095std::swap(e1_r, e1_g);2096}2097else if (majcomp == 2)2098{2099std::swap(e0_r, e0_b);2100std::swap(e1_r, e1_b);2101}2102}21032104if (cem_index == CEM_HDR_RGB_LDR_ALPHA)2105{2106int v6 = pE[6], v7 = pE[7];21072108e0_a = v6;2109e1_a = v7;2110}2111else if (cem_index == CEM_HDR_RGB_HDR_ALPHA)2112{2113int v6 = pE[6], v7 = pE[7];21142115// Extract mode bits2116int mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);2117v6 &= 0x7F;2118v7 &= 0x7F;21192120if (mode == 3)2121{2122e0_a = v6 << 5;2123e1_a = v7 << 5;2124}2125else2126{2127v6 |= (v7 << (mode + 1)) & 0x780;2128v7 &= (0x3F >> mode);2129v7 ^= (0x20 >> mode);2130v7 -= (0x20 >> mode);2131v6 <<= (4 - mode);2132v7 <<= (4 - mode);21332134v7 += v6;2135v7 = clamp(v7, 0, 0xFFF);2136e0_a = v6;2137e1_a = v7;2138}2139}21402141break;2142}2143default:2144{2145assert(0);2146for (uint32_t c = 0; c < 4; c++)2147{2148pEndpoints[c][0] = 0;2149pEndpoints[c][1] = 0;2150}2151break;2152}2153}2154}21552156static inline bool is_half_inf_or_nan(half_float v)2157{2158return get_bits(v, 10, 14) == 31;2159}21602161// This float->half conversion matches how "F32TO16" works on Intel GPU's.2162half_float float_to_half(float val, bool toward_zero)2163{2164union { float f; int32_t i; uint32_t u; } fi = { val };2165const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;2166int s = flt_s, e = 0, m = 0;21672168// inf/NaN2169if (flt_e == 0xff)2170{2171e = 31;2172if (flt_m != 0) // NaN2173m = 1;2174}2175// not zero or denormal2176else if (flt_e != 0)2177{2178int new_exp = flt_e - 127;2179if (new_exp > 15)2180e = 31;2181else if (new_exp < -14)2182{2183if (toward_zero)2184m = (int)truncf((1 << 24) * fabsf(fi.f));2185else2186m = lrintf((1 << 24) * fabsf(fi.f));2187}2188else2189{2190e = new_exp + 15;2191if (toward_zero)2192m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13)));2193else2194m = lrintf((float)flt_m * (1.0f / (float)(1 << 13)));2195}2196}21972198assert((0 <= m) && (m <= 1024));2199if (m == 1024)2200{2201e++;2202m = 0;2203}22042205assert((s >= 0) && (s <= 1));2206assert((e >= 0) && (e <= 31));2207assert((m >= 0) && (m <= 1023));22082209half_float result = (half_float)((s << 15) | (e << 10) | m);2210return result;2211}22122213float half_to_float(half_float hval)2214{2215union { float f; uint32_t u; } x = { 0 };22162217uint32_t s = ((uint32_t)hval >> 15) & 1;2218uint32_t e = ((uint32_t)hval >> 10) & 0x1F;2219uint32_t m = (uint32_t)hval & 0x3FF;22202221if (!e)2222{2223if (!m)2224{2225// +- 02226x.u = s << 31;2227return x.f;2228}2229else2230{2231// denormalized2232while (!(m & 0x00000400))2233{2234m <<= 1;2235--e;2236}22372238++e;2239m &= ~0x00000400;2240}2241}2242else if (e == 31)2243{2244if (m == 0)2245{2246// +/- INF2247x.u = (s << 31) | 0x7f800000;2248return x.f;2249}2250else2251{2252// +/- NaN2253x.u = (s << 31) | 0x7f800000 | (m << 13);2254return x.f;2255}2256}22572258e = e + (127 - 15);2259m = m << 13;22602261assert(s <= 1);2262assert(m <= 0x7FFFFF);2263assert(e <= 255);22642265x.u = m | (e << 23) | (s << 31);2266return x.f;2267}22682269// See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt2270const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31;2271const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS);2272const int RGB9E5_MANTISSA_VALUES = (1 << RGB9E5_MANTISSA_BITS);2273const int MAX_RGB9E5_MANTISSA = (RGB9E5_MANTISSA_VALUES - 1);2274//const int MAX_RGB9E5 = (int)(((float)MAX_RGB9E5_MANTISSA) / RGB9E5_MANTISSA_VALUES * (1 << MAX_RGB9E5_EXP));2275const int EPSILON_RGB9E5 = (int)((1.0f / (float)RGB9E5_MANTISSA_VALUES) / (float)(1 << RGB9E5_EXP_BIAS));22762277void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b)2278{2279int x = packed & 511;2280int y = (packed >> 9) & 511;2281int z = (packed >> 18) & 511;2282int w = (packed >> 27) & 31;22832284const float scale = powf(2.0f, static_cast<float>(w - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));22852286r = x * scale;2287g = y * scale;2288b = z * scale;2289}22902291// floor_log2 is not correct for the denorm and zero values, but we are going to do a max of this value with the minimum rgb9e5 exponent that will hide these problem cases.2292static inline int floor_log2(float x)2293{2294union float7542295{2296unsigned int raw;2297float value;2298};22992300float754 f;2301f.value = x;2302// Extract float exponent2303return ((f.raw >> 23) & 0xFF) - 127;2304}23052306static inline int maximumi(int a, int b) { return (a > b) ? a : b; }2307static inline float maximumf(float a, float b) { return (a > b) ? a : b; }23082309uint32_t pack_rgb9e5(float r, float g, float b)2310{2311r = clampf(r, 0.0f, MAX_RGB9E5);2312g = clampf(g, 0.0f, MAX_RGB9E5);2313b = clampf(b, 0.0f, MAX_RGB9E5);23142315float maxrgb = maximumf(maximumf(r, g), b);2316int exp_shared = maximumi(-RGB9E5_EXP_BIAS - 1, floor_log2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;2317assert((exp_shared >= 0) && (exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP));23182319float denom = powf(2.0f, (float)(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));23202321int maxm = (int)floorf((maxrgb / denom) + 0.5f);2322if (maxm == (MAX_RGB9E5_MANTISSA + 1))2323{2324denom *= 2;2325exp_shared += 1;2326assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);2327}2328else2329{2330assert(maxm <= MAX_RGB9E5_MANTISSA);2331}23322333int rm = (int)floorf((r / denom) + 0.5f);2334int gm = (int)floorf((g / denom) + 0.5f);2335int bm = (int)floorf((b / denom) + 0.5f);23362337assert((rm >= 0) && (rm <= MAX_RGB9E5_MANTISSA));2338assert((gm >= 0) && (gm <= MAX_RGB9E5_MANTISSA));2339assert((bm >= 0) && (bm <= MAX_RGB9E5_MANTISSA));23402341return rm | (gm << 9) | (bm << 18) | (exp_shared << 27);2342}23432344static inline int clz17(uint32_t x)2345{2346assert(x <= 0x1FFFF);2347x &= 0x1FFFF;23482349if (!x)2350return 17;23512352uint32_t n = 0;2353while ((x & 0x10000) == 0)2354{2355x <<= 1u;2356n++;2357}23582359return n;2360}23612362static inline uint32_t pack_rgb9e5_ldr_astc(int Cr, int Cg, int Cb)2363{2364int lz = clz17(Cr | Cg | Cb | 1);2365if (Cr == 65535) { Cr = 65536; lz = 0; }2366if (Cg == 65535) { Cg = 65536; lz = 0; }2367if (Cb == 65535) { Cb = 65536; lz = 0; }2368Cr <<= lz; Cg <<= lz; Cb <<= lz;2369Cr = (Cr >> 8) & 0x1FF;2370Cg = (Cg >> 8) & 0x1FF;2371Cb = (Cb >> 8) & 0x1FF;2372uint32_t exponent = 16 - lz;2373uint32_t texel = (exponent << 27) | (Cb << 18) | (Cg << 9) | Cr;2374return texel;2375}23762377static inline uint32_t pack_rgb9e5_hdr_astc(int Cr, int Cg, int Cb)2378{2379if (Cr > 0x7c00) Cr = 0; else if (Cr == 0x7c00) Cr = 0x7bff;2380if (Cg > 0x7c00) Cg = 0; else if (Cg == 0x7c00) Cg = 0x7bff;2381if (Cb > 0x7c00) Cb = 0; else if (Cb == 0x7c00) Cb = 0x7bff;2382int Re = (Cr >> 10) & 0x1F;2383int Ge = (Cg >> 10) & 0x1F;2384int Be = (Cb >> 10) & 0x1F;2385int Rex = (Re == 0) ? 1 : Re;2386int Gex = (Ge == 0) ? 1 : Ge;2387int Bex = (Be == 0) ? 1 : Be;2388int Xm = ((Cr | Cg | Cb) & 0x200) >> 9;2389int Xe = Re | Ge | Be;2390uint32_t rshift, gshift, bshift, expo;23912392if (Xe == 0)2393{2394expo = rshift = gshift = bshift = Xm;2395}2396else if (Re >= Ge && Re >= Be)2397{2398expo = Rex + 1;2399rshift = 2;2400gshift = Rex - Gex + 2;2401bshift = Rex - Bex + 2;2402}2403else if (Ge >= Be)2404{2405expo = Gex + 1;2406rshift = Gex - Rex + 2;2407gshift = 2;2408bshift = Gex - Bex + 2;2409}2410else2411{2412expo = Bex + 1;2413rshift = Bex - Rex + 2;2414gshift = Bex - Gex + 2;2415bshift = 2;2416}24172418int Rm = (Cr & 0x3FF) | (Re == 0 ? 0 : 0x400);2419int Gm = (Cg & 0x3FF) | (Ge == 0 ? 0 : 0x400);2420int Bm = (Cb & 0x3FF) | (Be == 0 ? 0 : 0x400);2421Rm = (Rm >> rshift) & 0x1FF;2422Gm = (Gm >> gshift) & 0x1FF;2423Bm = (Bm >> bshift) & 0x1FF;24242425uint32_t texel = (expo << 27) | (Bm << 18) | (Gm << 9) | (Rm << 0);2426return texel;2427}24282429// Important: pPixels is either 32-bit/texel or 64-bit/texel.2430bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode)2431{2432assert(is_valid_block_size(blk_width, blk_height));24332434assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size());2435if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size())2436return false;24372438const uint32_t num_blk_pixels = blk_width * blk_height;24392440// Write block error color2441if (dec_mode == cDecodeModeHDR16)2442{2443// NaN's2444memset(pPixels, 0xFF, num_blk_pixels * sizeof(half_float) * 4);2445}2446else if (dec_mode == cDecodeModeRGB9E5)2447{2448const uint32_t purple_9e5 = pack_rgb9e5(1.0f, 0.0f, 1.0f);24492450for (uint32_t i = 0; i < num_blk_pixels; i++)2451((uint32_t*)pPixels)[i] = purple_9e5;2452}2453else2454{2455for (uint32_t i = 0; i < num_blk_pixels; i++)2456((uint32_t*)pPixels)[i] = 0xFFFF00FF;2457}24582459if (log_blk.m_error_flag)2460{2461// Should this return false? It's not an invalid logical block config, though.2462return false;2463}24642465// Handle solid color blocks2466if (log_blk.m_solid_color_flag_ldr)2467{2468// LDR solid block2469if (dec_mode == cDecodeModeHDR16)2470{2471// Convert LDR pixels to half-float2472half_float h[4];2473for (uint32_t c = 0; c < 4; c++)2474h[c] = (log_blk.m_solid_color[c] == 0xFFFF) ? 0x3C00 : float_to_half((float)log_blk.m_solid_color[c] * (1.0f / 65536.0f), true);24752476for (uint32_t i = 0; i < num_blk_pixels; i++)2477memcpy((uint16_t*)pPixels + i * 4, h, sizeof(half_float) * 4);2478}2479else if (dec_mode == cDecodeModeRGB9E5)2480{2481float r = (log_blk.m_solid_color[0] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[0] * (1.0f / 65536.0f));2482float g = (log_blk.m_solid_color[1] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[1] * (1.0f / 65536.0f));2483float b = (log_blk.m_solid_color[2] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[2] * (1.0f / 65536.0f));24842485const uint32_t packed = pack_rgb9e5(r, g, b);24862487for (uint32_t i = 0; i < num_blk_pixels; i++)2488((uint32_t*)pPixels)[i] = packed;2489}2490else2491{2492// Convert LDR pixels to 8-bits2493for (uint32_t i = 0; i < num_blk_pixels; i++)2494for (uint32_t c = 0; c < 4; c++)2495((uint8_t*)pPixels)[i * 4 + c] = (log_blk.m_solid_color[c] >> 8);2496}24972498return true;2499}2500else if (log_blk.m_solid_color_flag_hdr)2501{2502// HDR solid block, decode mode must be half-float or RGB9E52503if (dec_mode == cDecodeModeHDR16)2504{2505for (uint32_t i = 0; i < num_blk_pixels; i++)2506memcpy((uint16_t*)pPixels + i * 4, log_blk.m_solid_color, sizeof(half_float) * 4);2507}2508else if (dec_mode == cDecodeModeRGB9E5)2509{2510float r = half_to_float(log_blk.m_solid_color[0]);2511float g = half_to_float(log_blk.m_solid_color[1]);2512float b = half_to_float(log_blk.m_solid_color[2]);25132514const uint32_t packed = pack_rgb9e5(r, g, b);25152516for (uint32_t i = 0; i < num_blk_pixels; i++)2517((uint32_t*)pPixels)[i] = packed;2518}2519else2520{2521return false;2522}25232524return true;2525}25262527// Sanity check block's config2528if ((log_blk.m_grid_width < 2) || (log_blk.m_grid_height < 2))2529return false;2530if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))2531return false;25322533if ((log_blk.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_blk.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))2534return false;2535if ((log_blk.m_weight_ise_range < FIRST_VALID_WEIGHT_ISE_RANGE) || (log_blk.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE))2536return false;2537if ((log_blk.m_num_partitions < 1) || (log_blk.m_num_partitions > MAX_PARTITIONS))2538return false;2539if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > MAX_DUAL_PLANE_PARTITIONS))2540return false;2541if (log_blk.m_partition_id >= NUM_PARTITION_PATTERNS)2542return false;2543if ((log_blk.m_num_partitions == 1) && (log_blk.m_partition_id > 0))2544return false;2545if (log_blk.m_color_component_selector > 3)2546return false;25472548const uint32_t total_endpoint_levels = get_ise_levels(log_blk.m_endpoint_ise_range);2549const uint32_t total_weight_levels = get_ise_levels(log_blk.m_weight_ise_range);25502551bool is_ldr_endpoints[MAX_PARTITIONS];25522553// Check CEM's2554uint32_t total_cem_vals = 0;2555for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)2556{2557if (log_blk.m_color_endpoint_modes[i] > 15)2558return false;25592560total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[i]);25612562is_ldr_endpoints[i] = is_cem_ldr(log_blk.m_color_endpoint_modes[i]);2563}25642565if (total_cem_vals > MAX_ENDPOINTS)2566return false;25672568const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range);2569const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data();25702571// Dequantized endpoints to [0,255]2572uint8_t dequantized_endpoints[MAX_ENDPOINTS];2573for (uint32_t i = 0; i < total_cem_vals; i++)2574{2575if (log_blk.m_endpoints[i] >= total_endpoint_levels)2576return false;2577dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]];2578}25792580// Dequantize weights to [0,64]2581uint8_t dequantized_weights[2][12 * 12];25822583const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range);2584const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data();25852586const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height;2587for (uint32_t i = 0; i < total_weight_vals; i++)2588{2589if (log_blk.m_weights[i] >= total_weight_levels)2590return false;25912592const uint32_t plane_index = log_blk.m_dual_plane ? (i & 1) : 0;2593const uint32_t grid_index = log_blk.m_dual_plane ? (i >> 1) : i;25942595dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]];2596}25972598// Upsample weight grid. [0,64] weights2599uint8_t upsampled_weights[2][12 * 12];26002601upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[0][0], &upsampled_weights[0][0]);2602if (log_blk.m_dual_plane)2603upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[1][0], &upsampled_weights[1][0]);26042605// Decode CEM's2606int endpoints[4][4][2]; // [subset][comp][l/h]26072608uint32_t endpoint_val_index = 0;2609for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++)2610{2611const uint32_t cem_index = log_blk.m_color_endpoint_modes[subset];26122613decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]);26142615endpoint_val_index += get_num_cem_values(cem_index);2616}26172618// Decode texels2619const bool small_block = num_blk_pixels < 31;2620const bool use_precomputed_texel_partitions_4x4 = (blk_width == 4) && (blk_height == 4) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);2621const bool use_precomputed_texel_partitions_6x6 = (blk_width == 6) && (blk_height == 6) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);2622const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX;26232624bool success = true;26252626if (dec_mode == cDecodeModeRGB9E5)2627{2628// returns uint32_t's2629for (uint32_t y = 0; y < blk_height; y++)2630{2631for (uint32_t x = 0; x < blk_width; x++)2632{2633const uint32_t pixel_index = x + y * blk_width;26342635uint32_t subset = 0;2636if (log_blk.m_num_partitions > 1)2637{2638if (use_precomputed_texel_partitions_4x4)2639subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);2640else if (use_precomputed_texel_partitions_6x6)2641subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);2642else2643subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);2644}26452646int comp[3];26472648for (uint32_t c = 0; c < 3; c++)2649{2650const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];26512652if (is_ldr_endpoints[subset])2653{2654assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));2655assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));26562657int le = endpoints[subset][c][0];2658int he = endpoints[subset][c][1];26592660le = (le << 8) | le;2661he = (he << 8) | he;26622663int k = weight_interpolate(le, he, w);2664assert((k >= 0) && (k <= 0xFFFF));26652666comp[c] = k; // 1.02667}2668else2669{2670assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));2671assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));26722673int le = endpoints[subset][c][0] << 4;2674int he = endpoints[subset][c][1] << 4;26752676int qlog16 = weight_interpolate(le, he, w);26772678comp[c] = qlog16_to_half(qlog16);26792680if (is_half_inf_or_nan((half_float)comp[c]))2681comp[c] = 0x7BFF;2682}26832684} // c26852686uint32_t packed;2687if (is_ldr_endpoints[subset])2688packed = pack_rgb9e5_ldr_astc(comp[0], comp[1], comp[2]);2689else2690packed = pack_rgb9e5_hdr_astc(comp[0], comp[1], comp[2]);26912692((uint32_t*)pPixels)[pixel_index] = packed;26932694} // x2695} // y2696}2697else if (dec_mode == cDecodeModeHDR16)2698{2699// Note: must round towards zero when converting float to half for ASTC (18.19 Weight Application)27002701// returns half floats2702for (uint32_t y = 0; y < blk_height; y++)2703{2704for (uint32_t x = 0; x < blk_width; x++)2705{2706const uint32_t pixel_index = x + y * blk_width;27072708uint32_t subset = 0;2709if (log_blk.m_num_partitions > 1)2710{2711if (use_precomputed_texel_partitions_4x4)2712subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);2713else if (use_precomputed_texel_partitions_6x6)2714subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);2715else2716subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);2717}27182719for (uint32_t c = 0; c < 4; c++)2720{2721const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];27222723half_float o;27242725if ( (is_ldr_endpoints[subset]) ||2726((log_blk.m_color_endpoint_modes[subset] == CEM_HDR_RGB_LDR_ALPHA) && (c == 3)) )2727{2728assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));2729assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));27302731int le = endpoints[subset][c][0];2732int he = endpoints[subset][c][1];27332734le = (le << 8) | le;2735he = (he << 8) | he;27362737int k = weight_interpolate(le, he, w);2738assert((k >= 0) && (k <= 0xFFFF));27392740if (k == 0xFFFF)2741o = 0x3C00; // 1.02742else2743o = float_to_half((float)k * (1.0f / 65536.0f), true);2744}2745else2746{2747assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));2748assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));27492750int le = endpoints[subset][c][0] << 4;2751int he = endpoints[subset][c][1] << 4;27522753int qlog16 = weight_interpolate(le, he, w);27542755o = qlog16_to_half(qlog16);27562757if (is_half_inf_or_nan(o))2758o = 0x7BFF;2759}27602761((half_float*)pPixels)[pixel_index * 4 + c] = o;2762}27632764} // x2765} // y2766}2767else2768{2769// returns uint8_t's2770for (uint32_t y = 0; y < blk_height; y++)2771{2772for (uint32_t x = 0; x < blk_width; x++)2773{2774const uint32_t pixel_index = x + y * blk_width;27752776uint32_t subset = 0;2777if (log_blk.m_num_partitions > 1)2778{2779if (use_precomputed_texel_partitions_4x4)2780subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);2781else if (use_precomputed_texel_partitions_6x6)2782subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);2783else2784subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);2785}27862787if (!is_ldr_endpoints[subset])2788{2789((uint32_t*)pPixels)[pixel_index * 4] = 0xFFFF00FF;2790success = false;2791}2792else2793{2794for (uint32_t c = 0; c < 4; c++)2795{2796const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];27972798int le = endpoints[subset][c][0];2799int he = endpoints[subset][c][1];28002801// FIXME: the spec is apparently wrong? this matches ARM's and Google's decoder2802//if ((dec_mode == cDecodeModeSRGB8) && (c <= 2))2803// See https://github.com/ARM-software/astc-encoder/issues/4472804if (dec_mode == cDecodeModeSRGB8)2805{2806le = (le << 8) | 0x80;2807he = (he << 8) | 0x80;2808}2809else2810{2811le = (le << 8) | le;2812he = (he << 8) | he;2813}28142815uint32_t k = weight_interpolate(le, he, w);28162817// FIXME: This is what the spec says to do in LDR mode, but this is not what ARM's decoder does2818// See decompress_symbolic_block(), decode_texel() and unorm16_to_sf16.2819// It seems to effectively divide by 65535.0 and convert to FP16, then back to float, mul by 255.0, add .5 and then convert to 8-bit.2820((uint8_t*)pPixels)[pixel_index * 4 + c] = (uint8_t)(k >> 8);2821}2822}28232824} // x2825} // y2826}28272828return success;2829}28302831//------------------------------------------------2832// Physical to logical block decoding28332834// unsigned 128-bit int, with some signed helpers2835class uint1282836{2837uint64_t m_lo, m_hi;28382839public:2840uint128() = default;2841inline uint128(uint64_t lo) : m_lo(lo), m_hi(0) { }2842inline uint128(uint64_t lo, uint64_t hi) : m_lo(lo), m_hi(hi) { }2843inline uint128(const uint128& other) : m_lo(other.m_lo), m_hi(other.m_hi) { }28442845inline uint128& set_signed(int64_t lo) { m_lo = lo; m_hi = (lo < 0) ? UINT64_MAX : 0; return *this; }2846inline uint128& set(uint64_t lo) { m_lo = lo; m_hi = 0; return *this; }28472848inline explicit operator uint8_t () const { return (uint8_t)m_lo; }2849inline explicit operator uint16_t () const { return (uint16_t)m_lo; }2850inline explicit operator uint32_t () const { return (uint32_t)m_lo; }2851inline explicit operator uint64_t () const { return m_lo; }28522853inline uint128& operator= (const uint128& rhs) { m_lo = rhs.m_lo; m_hi = rhs.m_hi; return *this; }2854inline uint128& operator= (const uint64_t val) { m_lo = val; m_hi = 0; return *this; }28552856inline uint64_t get_low() const { return m_lo; }2857inline uint64_t& get_low() { return m_lo; }28582859inline uint64_t get_high() const { return m_hi; }2860inline uint64_t& get_high() { return m_hi; }28612862inline bool operator== (const uint128& rhs) const { return (m_lo == rhs.m_lo) && (m_hi == rhs.m_hi); }2863inline bool operator!= (const uint128& rhs) const { return (m_lo != rhs.m_lo) || (m_hi != rhs.m_hi); }28642865inline bool operator< (const uint128& rhs) const2866{2867if (m_hi < rhs.m_hi)2868return true;28692870if (m_hi == rhs.m_hi)2871{2872if (m_lo < rhs.m_lo)2873return true;2874}28752876return false;2877}28782879inline bool operator> (const uint128& rhs) const { return (rhs < *this); }28802881inline bool operator<= (const uint128& rhs) const { return (*this == rhs) || (*this < rhs); }2882inline bool operator>= (const uint128& rhs) const { return (*this == rhs) || (*this > rhs); }28832884inline bool is_zero() const { return (m_lo == 0) && (m_hi == 0); }2885inline bool is_all_ones() const { return (m_lo == UINT64_MAX) && (m_hi == UINT64_MAX); }2886inline bool is_non_zero() const { return (m_lo != 0) || (m_hi != 0); }2887inline explicit operator bool() const { return is_non_zero(); }2888inline bool is_signed() const { return ((int64_t)m_hi) < 0; }28892890inline bool signed_less(const uint128& rhs) const2891{2892const bool l_signed = is_signed(), r_signed = rhs.is_signed();28932894if (l_signed == r_signed)2895return *this < rhs;28962897if (l_signed && !r_signed)2898return true;28992900assert(!l_signed && r_signed);2901return false;2902}29032904inline bool signed_greater(const uint128& rhs) const { return rhs.signed_less(*this); }2905inline bool signed_less_equal(const uint128& rhs) const { return !rhs.signed_less(*this); }2906inline bool signed_greater_equal(const uint128& rhs) const { return !signed_less(rhs); }29072908double get_double() const2909{2910double res = 0;29112912if (m_hi)2913res = (double)m_hi * pow(2.0f, 64.0f);29142915res += (double)m_lo;29162917return res;2918}29192920double get_signed_double() const2921{2922if (is_signed())2923return -(uint128(*this).abs().get_double());2924else2925return get_double();2926}29272928inline uint128 abs() const2929{2930uint128 res(*this);2931if (res.is_signed())2932res = -res;2933return res;2934}29352936inline uint128& operator<<= (int shift)2937{2938assert(shift >= 0);2939if (shift < 0)2940return *this;29412942m_hi = (shift >= 64) ? ((shift >= 128) ? 0 : (m_lo << (shift - 64))) : (m_hi << shift);29432944if ((shift) && (shift < 64))2945m_hi |= (m_lo >> (64 - shift));29462947m_lo = (shift >= 64) ? 0 : (m_lo << shift);29482949return *this;2950}29512952inline uint128 operator<< (int shift) const { uint128 res(*this); res <<= shift; return res; }29532954inline uint128& operator>>= (int shift)2955{2956assert(shift >= 0);2957if (shift < 0)2958return *this;29592960m_lo = (shift >= 64) ? ((shift >= 128) ? 0 : (m_hi >> (shift - 64))) : (m_lo >> shift);29612962if ((shift) && (shift < 64))2963m_lo |= (m_hi << (64 - shift));29642965m_hi = (shift >= 64) ? 0 : (m_hi >> shift);29662967return *this;2968}29692970inline uint128 operator>> (int shift) const { uint128 res(*this); res >>= shift; return res; }29712972inline uint128 signed_shift_right(int shift) const2973{2974uint128 res(*this);2975res >>= shift;29762977if (is_signed())2978{2979uint128 x(0U);2980x = ~x;2981x >>= shift;2982res |= (~x);2983}29842985return res;2986}29872988inline uint128& operator |= (const uint128& rhs) { m_lo |= rhs.m_lo; m_hi |= rhs.m_hi; return *this; }2989inline uint128 operator | (const uint128& rhs) const { uint128 res(*this); res |= rhs; return res; }29902991inline uint128& operator &= (const uint128& rhs) { m_lo &= rhs.m_lo; m_hi &= rhs.m_hi; return *this; }2992inline uint128 operator & (const uint128& rhs) const { uint128 res(*this); res &= rhs; return res; }29932994inline uint128& operator ^= (const uint128& rhs) { m_lo ^= rhs.m_lo; m_hi ^= rhs.m_hi; return *this; }2995inline uint128 operator ^ (const uint128& rhs) const { uint128 res(*this); res ^= rhs; return res; }29962997inline uint128 operator ~() const { return uint128(~m_lo, ~m_hi); }29982999inline uint128 operator -() const { uint128 res(~*this); if (++res.m_lo == 0) ++res.m_hi; return res; }30003001// prefix3002inline uint128 operator ++()3003{3004if (++m_lo == 0)3005++m_hi;3006return *this;3007}30083009// postfix3010inline uint128 operator ++(int)3011{3012uint128 res(*this);3013if (++m_lo == 0)3014++m_hi;3015return res;3016}30173018// prefix3019inline uint128 operator --()3020{3021const uint64_t t = m_lo;3022if (--m_lo > t)3023--m_hi;3024return *this;3025}30263027// postfix3028inline uint128 operator --(int)3029{3030const uint64_t t = m_lo;3031uint128 res(*this);3032if (--m_lo > t)3033--m_hi;3034return res;3035}30363037inline uint128& operator+= (const uint128& rhs)3038{3039const uint64_t t = m_lo + rhs.m_lo;3040m_hi = m_hi + rhs.m_hi + (t < m_lo);3041m_lo = t;3042return *this;3043}30443045inline uint128 operator+ (const uint128& rhs) const { uint128 res(*this); res += rhs; return res; }30463047inline uint128& operator-= (const uint128& rhs)3048{3049const uint64_t t = m_lo - rhs.m_lo;3050m_hi = m_hi - rhs.m_hi - (t > m_lo);3051m_lo = t;3052return *this;3053}30543055inline uint128 operator- (const uint128& rhs) const { uint128 res(*this); res -= rhs; return res; }30563057// computes bit by bit, very slow3058uint128& operator*=(const uint128& rhs)3059{3060uint128 temp(*this), result(0U);30613062for (uint128 bitmask(rhs); bitmask; bitmask >>= 1, temp <<= 1)3063if (bitmask.get_low() & 1)3064result += temp;30653066*this = result;3067return *this;3068}30693070uint128 operator*(const uint128& rhs) const { uint128 res(*this); res *= rhs; return res; }30713072// computes bit by bit, very slow3073friend uint128 divide(const uint128& dividend, const uint128& divisor, uint128& remainder)3074{3075remainder = 0;30763077if (!divisor)3078{3079assert(0);3080return ~uint128(0U);3081}30823083uint128 quotient(0), one(1);30843085for (int i = 127; i >= 0; i--)3086{3087remainder = (remainder << 1) | ((dividend >> i) & one);3088if (remainder >= divisor)3089{3090remainder -= divisor;3091quotient |= (one << i);3092}3093}30943095return quotient;3096}30973098uint128 operator/(const uint128& rhs) const { uint128 remainder, res; res = divide(*this, rhs, remainder); return res; }3099uint128 operator/=(const uint128& rhs) { uint128 remainder; *this = divide(*this, rhs, remainder); return *this; }31003101uint128 operator%(const uint128& rhs) const { uint128 remainder; divide(*this, rhs, remainder); return remainder; }3102uint128 operator%=(const uint128& rhs) { uint128 remainder; divide(*this, rhs, remainder); *this = remainder; return *this; }31033104void print_hex(FILE* pFile) const3105{3106fprintf(pFile, "0x%016llx%016llx", (unsigned long long int)m_hi, (unsigned long long int)m_lo);3107}31083109void format_unsigned(std::string& res) const3110{3111basisu::vector<uint8_t> digits;3112digits.reserve(39 + 1);31133114uint128 k(*this), ten(10);3115do3116{3117uint128 r;3118k = divide(k, ten, r);3119digits.push_back((uint8_t)r);3120} while (k);31213122for (int i = (int)digits.size() - 1; i >= 0; i--)3123res += ('0' + digits[i]);3124}31253126void format_signed(std::string& res) const3127{3128uint128 val(*this);31293130if (val.is_signed())3131{3132res.push_back('-');3133val = -val;3134}31353136val.format_unsigned(res);3137}31383139void print_unsigned(FILE* pFile)3140{3141std::string str;3142format_unsigned(str);3143fprintf(pFile, "%s", str.c_str());3144}31453146void print_signed(FILE* pFile)3147{3148std::string str;3149format_signed(str);3150fprintf(pFile, "%s", str.c_str());3151}31523153uint128 get_reversed_bits() const3154{3155uint128 res;31563157const uint32_t* pSrc = (const uint32_t*)this;3158uint32_t* pDst = (uint32_t*)&res;31593160pDst[0] = rev_dword(pSrc[3]);3161pDst[1] = rev_dword(pSrc[2]);3162pDst[2] = rev_dword(pSrc[1]);3163pDst[3] = rev_dword(pSrc[0]);31643165return res;3166}31673168uint128 get_byteswapped() const3169{3170uint128 res;31713172const uint8_t* pSrc = (const uint8_t*)this;3173uint8_t* pDst = (uint8_t*)&res;31743175for (uint32_t i = 0; i < 16; i++)3176pDst[i] = pSrc[15 - i];31773178return res;3179}31803181inline uint64_t get_bits64(uint32_t bit_ofs, uint32_t bit_len) const3182{3183assert(bit_ofs < 128);3184assert(bit_len && (bit_len <= 64) && ((bit_ofs + bit_len) <= 128));31853186uint128 res(*this);3187res >>= bit_ofs;31883189const uint64_t bitmask = (bit_len == 64) ? UINT64_MAX : ((1ull << bit_len) - 1);3190return res.get_low() & bitmask;3191}31923193inline uint32_t get_bits(uint32_t bit_ofs, uint32_t bit_len) const3194{3195assert(bit_len <= 32);3196return (uint32_t)get_bits64(bit_ofs, bit_len);3197}31983199inline uint32_t next_bits(uint32_t& bit_ofs, uint32_t len) const3200{3201assert(len && (len <= 32));3202uint32_t x = get_bits(bit_ofs, len);3203bit_ofs += len;3204return x;3205}32063207inline uint128& set_bits(uint64_t val, uint32_t bit_ofs, uint32_t num_bits)3208{3209assert(bit_ofs < 128);3210assert(num_bits && (num_bits <= 64) && ((bit_ofs + num_bits) <= 128));32113212uint128 bitmask(1);3213bitmask = (bitmask << num_bits) - 1;3214assert(uint128(val) <= bitmask);32153216bitmask <<= bit_ofs;3217*this &= ~bitmask;32183219*this = *this | (uint128(val) << bit_ofs);3220return *this;3221}3222};32233224static bool decode_void_extent(const uint128& bits, log_astc_block& log_blk)3225{3226if (bits.get_bits(10, 2) != 0b11)3227return false;32283229uint32_t bit_ofs = 12;3230const uint32_t min_s = bits.next_bits(bit_ofs, 13);3231const uint32_t max_s = bits.next_bits(bit_ofs, 13);3232const uint32_t min_t = bits.next_bits(bit_ofs, 13);3233const uint32_t max_t = bits.next_bits(bit_ofs, 13);3234assert(bit_ofs == 64);32353236const bool all_extents_all_ones = (min_s == 0x1FFF) && (max_s == 0x1FFF) && (min_t == 0x1FFF) && (max_t == 0x1FFF);32373238if (!all_extents_all_ones && ((min_s >= max_s) || (min_t >= max_t)))3239return false;32403241const bool hdr_flag = bits.get_bits(9, 1) != 0;32423243if (hdr_flag)3244log_blk.m_solid_color_flag_hdr = true;3245else3246log_blk.m_solid_color_flag_ldr = true;32473248log_blk.m_solid_color[0] = (uint16_t)bits.get_bits(64, 16);3249log_blk.m_solid_color[1] = (uint16_t)bits.get_bits(80, 16);3250log_blk.m_solid_color[2] = (uint16_t)bits.get_bits(96, 16);3251log_blk.m_solid_color[3] = (uint16_t)bits.get_bits(112, 16);32523253if (log_blk.m_solid_color_flag_hdr)3254{3255for (uint32_t c = 0; c < 4; c++)3256if (is_half_inf_or_nan(log_blk.m_solid_color[c]))3257return false;3258}32593260return true;3261}32623263struct astc_dec_row3264{3265int8_t Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;3266};32673268static const astc_dec_row s_dec_rows[10] =3269{3270// Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;3271{ 10, 9, 7, 2, 5, 2, 4, 2, 4, 0, 1 }, // 4 23272{ 10, 9, 7, 2, 5, 2, 8, 2, 4, 0, 1 }, // 8 23273{ 10, 9, 5, 2, 7, 2, 2, 8, 4, 0, 1 }, // 2 83274{ 10, 9, 5, 2, 7, 1, 2, 6, 4, 0, 1 }, // 2 632753276{ 10, 9, 7, 1, 5, 2, 2, 2, 4, 0, 1 }, // 2 23277{ 10, 9, 0, 0, 5, 2, 12, 2, 4, 2, 3 }, // 12 23278{ 10, 9, 5, 2, 0, 0, 2, 12, 4, 2, 3 }, // 2 123279{ 10, 9, 0, 0, 0, 0, 6, 10, 4, 2, 3 }, // 6 1032803281{ 10, 9, 0, 0, 0, 0, 10, 6, 4, 2, 3 }, // 10 63282{ -1, -1, 5, 2, 9, 2, 6, 6, 4, 2, 3 }, // 6 63283};32843285static bool decode_config(const uint128& bits, log_astc_block& log_blk)3286{3287// Reserved3288if (bits.get_bits(0, 4) == 0)3289return false;32903291// Reserved3292if ((bits.get_bits(0, 2) == 0) && (bits.get_bits(6, 3) == 0b111))3293{3294if (bits.get_bits(2, 4) != 0b1111)3295return false;3296}32973298// Void extent3299if (bits.get_bits(0, 9) == 0b111111100)3300return decode_void_extent(bits, log_blk);33013302// Check rows3303const uint32_t x0_2 = bits.get_bits(0, 2), x2_2 = bits.get_bits(2, 2);3304const uint32_t x5_4 = bits.get_bits(5, 4), x8_1 = bits.get_bits(8, 1);3305const uint32_t x7_2 = bits.get_bits(7, 2);33063307int row_index = -1;3308if (x0_2 == 0)3309{3310if (x7_2 == 0b00)3311row_index = 5;3312else if (x7_2 == 0b01)3313row_index = 6;3314else if (x5_4 == 0b1100)3315row_index = 7;3316else if (x5_4 == 0b1101)3317row_index = 8;3318else if (x7_2 == 0b10)3319row_index = 9;3320}3321else3322{3323if (x2_2 == 0b00)3324row_index = 0;3325else if (x2_2 == 0b01)3326row_index = 1;3327else if (x2_2 == 0b10)3328row_index = 2;3329else if ((x2_2 == 0b11) && (x8_1 == 0))3330row_index = 3;3331else if ((x2_2 == 0b11) && (x8_1 == 1))3332row_index = 4;3333}3334if (row_index < 0)3335return false;33363337const astc_dec_row& r = s_dec_rows[row_index];33383339bool P = false, Dp = false;3340uint32_t W = r.W_bias, H = r.H_bias;33413342if (r.P_ofs >= 0)3343P = bits.get_bits(r.P_ofs, 1) != 0;33443345if (r.Dp_ofs >= 0)3346Dp = bits.get_bits(r.Dp_ofs, 1) != 0;33473348if (r.W_size)3349W += bits.get_bits(r.W_ofs, r.W_size);33503351if (r.H_size)3352H += bits.get_bits(r.H_ofs, r.H_size);33533354assert((W >= MIN_GRID_DIM) && (W <= MAX_BLOCK_DIM));3355assert((H >= MIN_GRID_DIM) && (H <= MAX_BLOCK_DIM));33563357int p0 = bits.get_bits(r.p0_ofs, 1);3358int p1 = bits.get_bits(r.p1_ofs, 1);3359int p2 = bits.get_bits(r.p2_ofs, 1);33603361uint32_t p = p0 | (p1 << 1) | (p2 << 2);3362if (p < 2)3363return false;33643365log_blk.m_grid_width = (uint8_t)W;3366log_blk.m_grid_height = (uint8_t)H;33673368log_blk.m_weight_ise_range = (uint8_t)((p - 2) + (P * BISE_10_LEVELS));3369assert(log_blk.m_weight_ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);33703371log_blk.m_dual_plane = Dp;33723373return true;3374}33753376static inline uint32_t read_le_dword(const uint8_t* pBytes)3377{3378return (pBytes[0]) | (pBytes[1] << 8U) | (pBytes[2] << 16U) | (pBytes[3] << 24U);3379}33803381// See 18.12.Integer Sequence Encoding - tables computed by executing the decoder functions with all possible 8/7-bit inputs.3382static const uint8_t s_trit_decode[256][5] =3383{3384{0,0,0,0,0},{1,0,0,0,0},{2,0,0,0,0},{0,0,2,0,0},{0,1,0,0,0},{1,1,0,0,0},{2,1,0,0,0},{1,0,2,0,0},3385{0,2,0,0,0},{1,2,0,0,0},{2,2,0,0,0},{2,0,2,0,0},{0,2,2,0,0},{1,2,2,0,0},{2,2,2,0,0},{2,0,2,0,0},3386{0,0,1,0,0},{1,0,1,0,0},{2,0,1,0,0},{0,1,2,0,0},{0,1,1,0,0},{1,1,1,0,0},{2,1,1,0,0},{1,1,2,0,0},3387{0,2,1,0,0},{1,2,1,0,0},{2,2,1,0,0},{2,1,2,0,0},{0,0,0,2,2},{1,0,0,2,2},{2,0,0,2,2},{0,0,2,2,2},3388{0,0,0,1,0},{1,0,0,1,0},{2,0,0,1,0},{0,0,2,1,0},{0,1,0,1,0},{1,1,0,1,0},{2,1,0,1,0},{1,0,2,1,0},3389{0,2,0,1,0},{1,2,0,1,0},{2,2,0,1,0},{2,0,2,1,0},{0,2,2,1,0},{1,2,2,1,0},{2,2,2,1,0},{2,0,2,1,0},3390{0,0,1,1,0},{1,0,1,1,0},{2,0,1,1,0},{0,1,2,1,0},{0,1,1,1,0},{1,1,1,1,0},{2,1,1,1,0},{1,1,2,1,0},3391{0,2,1,1,0},{1,2,1,1,0},{2,2,1,1,0},{2,1,2,1,0},{0,1,0,2,2},{1,1,0,2,2},{2,1,0,2,2},{1,0,2,2,2},3392{0,0,0,2,0},{1,0,0,2,0},{2,0,0,2,0},{0,0,2,2,0},{0,1,0,2,0},{1,1,0,2,0},{2,1,0,2,0},{1,0,2,2,0},3393{0,2,0,2,0},{1,2,0,2,0},{2,2,0,2,0},{2,0,2,2,0},{0,2,2,2,0},{1,2,2,2,0},{2,2,2,2,0},{2,0,2,2,0},3394{0,0,1,2,0},{1,0,1,2,0},{2,0,1,2,0},{0,1,2,2,0},{0,1,1,2,0},{1,1,1,2,0},{2,1,1,2,0},{1,1,2,2,0},3395{0,2,1,2,0},{1,2,1,2,0},{2,2,1,2,0},{2,1,2,2,0},{0,2,0,2,2},{1,2,0,2,2},{2,2,0,2,2},{2,0,2,2,2},3396{0,0,0,0,2},{1,0,0,0,2},{2,0,0,0,2},{0,0,2,0,2},{0,1,0,0,2},{1,1,0,0,2},{2,1,0,0,2},{1,0,2,0,2},3397{0,2,0,0,2},{1,2,0,0,2},{2,2,0,0,2},{2,0,2,0,2},{0,2,2,0,2},{1,2,2,0,2},{2,2,2,0,2},{2,0,2,0,2},3398{0,0,1,0,2},{1,0,1,0,2},{2,0,1,0,2},{0,1,2,0,2},{0,1,1,0,2},{1,1,1,0,2},{2,1,1,0,2},{1,1,2,0,2},3399{0,2,1,0,2},{1,2,1,0,2},{2,2,1,0,2},{2,1,2,0,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,0,2,2,2},3400{0,0,0,0,1},{1,0,0,0,1},{2,0,0,0,1},{0,0,2,0,1},{0,1,0,0,1},{1,1,0,0,1},{2,1,0,0,1},{1,0,2,0,1},3401{0,2,0,0,1},{1,2,0,0,1},{2,2,0,0,1},{2,0,2,0,1},{0,2,2,0,1},{1,2,2,0,1},{2,2,2,0,1},{2,0,2,0,1},3402{0,0,1,0,1},{1,0,1,0,1},{2,0,1,0,1},{0,1,2,0,1},{0,1,1,0,1},{1,1,1,0,1},{2,1,1,0,1},{1,1,2,0,1},3403{0,2,1,0,1},{1,2,1,0,1},{2,2,1,0,1},{2,1,2,0,1},{0,0,1,2,2},{1,0,1,2,2},{2,0,1,2,2},{0,1,2,2,2},3404{0,0,0,1,1},{1,0,0,1,1},{2,0,0,1,1},{0,0,2,1,1},{0,1,0,1,1},{1,1,0,1,1},{2,1,0,1,1},{1,0,2,1,1},3405{0,2,0,1,1},{1,2,0,1,1},{2,2,0,1,1},{2,0,2,1,1},{0,2,2,1,1},{1,2,2,1,1},{2,2,2,1,1},{2,0,2,1,1},3406{0,0,1,1,1},{1,0,1,1,1},{2,0,1,1,1},{0,1,2,1,1},{0,1,1,1,1},{1,1,1,1,1},{2,1,1,1,1},{1,1,2,1,1},3407{0,2,1,1,1},{1,2,1,1,1},{2,2,1,1,1},{2,1,2,1,1},{0,1,1,2,2},{1,1,1,2,2},{2,1,1,2,2},{1,1,2,2,2},3408{0,0,0,2,1},{1,0,0,2,1},{2,0,0,2,1},{0,0,2,2,1},{0,1,0,2,1},{1,1,0,2,1},{2,1,0,2,1},{1,0,2,2,1},3409{0,2,0,2,1},{1,2,0,2,1},{2,2,0,2,1},{2,0,2,2,1},{0,2,2,2,1},{1,2,2,2,1},{2,2,2,2,1},{2,0,2,2,1},3410{0,0,1,2,1},{1,0,1,2,1},{2,0,1,2,1},{0,1,2,2,1},{0,1,1,2,1},{1,1,1,2,1},{2,1,1,2,1},{1,1,2,2,1},3411{0,2,1,2,1},{1,2,1,2,1},{2,2,1,2,1},{2,1,2,2,1},{0,2,1,2,2},{1,2,1,2,2},{2,2,1,2,2},{2,1,2,2,2},3412{0,0,0,1,2},{1,0,0,1,2},{2,0,0,1,2},{0,0,2,1,2},{0,1,0,1,2},{1,1,0,1,2},{2,1,0,1,2},{1,0,2,1,2},3413{0,2,0,1,2},{1,2,0,1,2},{2,2,0,1,2},{2,0,2,1,2},{0,2,2,1,2},{1,2,2,1,2},{2,2,2,1,2},{2,0,2,1,2},3414{0,0,1,1,2},{1,0,1,1,2},{2,0,1,1,2},{0,1,2,1,2},{0,1,1,1,2},{1,1,1,1,2},{2,1,1,1,2},{1,1,2,1,2},3415{0,2,1,1,2},{1,2,1,1,2},{2,2,1,1,2},{2,1,2,1,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,1,2,2,2}3416};34173418static const uint8_t s_quint_decode[128][3] =3419{3420{0,0,0},{1,0,0},{2,0,0},{3,0,0},{4,0,0},{0,4,0},{4,4,0},{4,4,4},3421{0,1,0},{1,1,0},{2,1,0},{3,1,0},{4,1,0},{1,4,0},{4,4,1},{4,4,4},3422{0,2,0},{1,2,0},{2,2,0},{3,2,0},{4,2,0},{2,4,0},{4,4,2},{4,4,4},3423{0,3,0},{1,3,0},{2,3,0},{3,3,0},{4,3,0},{3,4,0},{4,4,3},{4,4,4},3424{0,0,1},{1,0,1},{2,0,1},{3,0,1},{4,0,1},{0,4,1},{4,0,4},{0,4,4},3425{0,1,1},{1,1,1},{2,1,1},{3,1,1},{4,1,1},{1,4,1},{4,1,4},{1,4,4},3426{0,2,1},{1,2,1},{2,2,1},{3,2,1},{4,2,1},{2,4,1},{4,2,4},{2,4,4},3427{0,3,1},{1,3,1},{2,3,1},{3,3,1},{4,3,1},{3,4,1},{4,3,4},{3,4,4},3428{0,0,2},{1,0,2},{2,0,2},{3,0,2},{4,0,2},{0,4,2},{2,0,4},{3,0,4},3429{0,1,2},{1,1,2},{2,1,2},{3,1,2},{4,1,2},{1,4,2},{2,1,4},{3,1,4},3430{0,2,2},{1,2,2},{2,2,2},{3,2,2},{4,2,2},{2,4,2},{2,2,4},{3,2,4},3431{0,3,2},{1,3,2},{2,3,2},{3,3,2},{4,3,2},{3,4,2},{2,3,4},{3,3,4},3432{0,0,3},{1,0,3},{2,0,3},{3,0,3},{4,0,3},{0,4,3},{0,0,4},{1,0,4},3433{0,1,3},{1,1,3},{2,1,3},{3,1,3},{4,1,3},{1,4,3},{0,1,4},{1,1,4},3434{0,2,3},{1,2,3},{2,2,3},{3,2,3},{4,2,3},{2,4,3},{0,2,4},{1,2,4},3435{0,3,3},{1,3,3},{2,3,3},{3,3,3},{4,3,3},{3,4,3},{0,3,4},{1,3,4}3436};34373438static void decode_trit_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)3439{3440assert((num_vals >= 1) && (num_vals <= 5));3441uint32_t m[5] = { 0 }, T = 0;34423443static const uint8_t s_t_bits[5] = { 2, 2, 1, 2, 1 };34443445for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)3446{3447if (bits_per_val)3448m[c] = bits.next_bits(bit_ofs, bits_per_val);3449T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);3450T_ofs += s_t_bits[c];3451}34523453const uint8_t (&p_trits)[5] = s_trit_decode[T];34543455for (uint32_t i = 0; i < num_vals; i++)3456pVals[i] = (uint8_t)((p_trits[i] << bits_per_val) | m[i]);3457}34583459static void decode_quint_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)3460{3461assert((num_vals >= 1) && (num_vals <= 3));3462uint32_t m[3] = { 0 }, T = 0;34633464static const uint8_t s_t_bits[3] = { 3, 2, 2 };34653466for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)3467{3468if (bits_per_val)3469m[c] = bits.next_bits(bit_ofs, bits_per_val);3470T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);3471T_ofs += s_t_bits[c];3472}34733474const uint8_t (&p_quints)[3] = s_quint_decode[T];34753476for (uint32_t i = 0; i < num_vals; i++)3477pVals[i] = (uint8_t)((p_quints[i] << bits_per_val) | m[i]);3478}34793480static void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t bit_ofs)3481{3482assert(num_vals && (ise_range < TOTAL_ISE_RANGES));34833484const uint32_t bits_per_val = g_ise_range_table[ise_range][0];34853486if (g_ise_range_table[ise_range][1])3487{3488// Trits+bits, 5 vals per block, 7 bits extra per block3489const uint32_t total_blocks = (num_vals + 4) / 5;3490for (uint32_t b = 0; b < total_blocks; b++)3491{3492const uint32_t num_vals_in_block = std::min<int>(num_vals - 5 * b, 5);3493decode_trit_block(pVals + 5 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);3494}3495}3496else if (g_ise_range_table[ise_range][2])3497{3498// Quints+bits, 3 vals per block, 8 bits extra per block3499const uint32_t total_blocks = (num_vals + 2) / 3;3500for (uint32_t b = 0; b < total_blocks; b++)3501{3502const uint32_t num_vals_in_block = std::min<int>(num_vals - 3 * b, 3);3503decode_quint_block(pVals + 3 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);3504}3505}3506else3507{3508assert(bits_per_val);35093510// Only bits3511for (uint32_t i = 0; i < num_vals; i++)3512pVals[i] = (uint8_t)bits.next_bits(bit_ofs, bits_per_val);3513}3514}35153516void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t* pBits128, uint32_t bit_ofs)3517{3518const uint128 bits(3519(uint64_t)read_le_dword(pBits128) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t))) << 32),3520(uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 3)) << 32));35213522return decode_bise(ise_range, pVals, num_vals, bits, bit_ofs);3523}35243525// Decodes a physical ASTC block to a logical ASTC block.3526// blk_width/blk_height are only used to validate the weight grid's dimensions.3527bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height)3528{3529assert(is_valid_block_size(blk_width, blk_height));35303531const uint8_t* pS = (uint8_t*)pASTC_block;35323533log_blk.clear();3534log_blk.m_error_flag = true;35353536const uint128 bits(3537(uint64_t)read_le_dword(pS) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t))) << 32),3538(uint64_t)read_le_dword(pS + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t) * 3)) << 32));35393540const uint128 rev_bits(bits.get_reversed_bits());35413542if (!decode_config(bits, log_blk))3543return false;35443545if (log_blk.m_solid_color_flag_hdr || log_blk.m_solid_color_flag_ldr)3546{3547// Void extent3548log_blk.m_error_flag = false;3549return true;3550}35513552// Check grid dimensions3553if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))3554return false;35553556// Now we have the grid width/height, dual plane, weight ISE range35573558const uint32_t total_grid_weights = (log_blk.m_dual_plane ? 2 : 1) * (log_blk.m_grid_width * log_blk.m_grid_height);3559const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_blk.m_weight_ise_range);35603561// 18.24 Illegal Encodings3562if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))3563return false;35643565const uint32_t end_of_weight_bit_ofs = 128 - total_weight_bits;35663567uint32_t total_extra_bits = 0;35683569// Right before the weight bits, there may be extra CEM bits, then the 2 CCS bits if dual plane.35703571log_blk.m_num_partitions = (uint8_t)(bits.get_bits(11, 2) + 1);3572if (log_blk.m_num_partitions == 1)3573log_blk.m_color_endpoint_modes[0] = (uint8_t)(bits.get_bits(13, 4)); // read CEM bits3574else3575{3576// 2 or more partitions3577if (log_blk.m_dual_plane && (log_blk.m_num_partitions == 4))3578return false;35793580log_blk.m_partition_id = (uint16_t)bits.get_bits(13, 10);35813582uint32_t cem_bits = bits.get_bits(23, 6);35833584if ((cem_bits & 3) == 0)3585{3586// All CEM's the same3587for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)3588log_blk.m_color_endpoint_modes[i] = (uint8_t)(cem_bits >> 2);3589}3590else3591{3592// CEM's different, but within up to 2 adjacent classes3593const uint32_t first_cem_index = ((cem_bits & 3) - 1) * 4;35943595total_extra_bits = 3 * log_blk.m_num_partitions - 4;35963597if ((total_weight_bits + total_extra_bits) > 128)3598return false;35993600uint32_t cem_bit_pos = end_of_weight_bit_ofs - total_extra_bits;36013602uint32_t c[4] = { 0 }, m[4] = { 0 };36033604cem_bits >>= 2;3605for (uint32_t i = 0; i < log_blk.m_num_partitions; i++, cem_bits >>= 1)3606c[i] = cem_bits & 1;36073608switch (log_blk.m_num_partitions)3609{3610case 2:3611{3612m[0] = cem_bits & 3;3613m[1] = bits.next_bits(cem_bit_pos, 2);3614break;3615}3616case 3:3617{3618m[0] = cem_bits & 1;3619m[0] |= (bits.next_bits(cem_bit_pos, 1) << 1);3620m[1] = bits.next_bits(cem_bit_pos, 2);3621m[2] = bits.next_bits(cem_bit_pos, 2);3622break;3623}3624case 4:3625{3626for (uint32_t i = 0; i < 4; i++)3627m[i] = bits.next_bits(cem_bit_pos, 2);3628break;3629}3630default:3631{3632assert(0);3633break;3634}3635}36363637assert(cem_bit_pos == end_of_weight_bit_ofs);36383639for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)3640{3641log_blk.m_color_endpoint_modes[i] = (uint8_t)(first_cem_index + (c[i] * 4) + m[i]);3642assert(log_blk.m_color_endpoint_modes[i] <= 15);3643}3644}3645}36463647// Now we have all the CEM indices.36483649if (log_blk.m_dual_plane)3650{3651// Read CCS bits, beneath any CEM bits3652total_extra_bits += 2;36533654if (total_extra_bits > end_of_weight_bit_ofs)3655return false;36563657uint32_t ccs_bit_pos = end_of_weight_bit_ofs - total_extra_bits;3658log_blk.m_color_component_selector = (uint8_t)(bits.get_bits(ccs_bit_pos, 2));3659}36603661uint32_t config_bit_pos = 11 + 2; // config+num_parts3662if (log_blk.m_num_partitions == 1)3663config_bit_pos += 4; // CEM bits3664else3665config_bit_pos += 10 + 6; // part_id+CEM bits36663667// config+num_parts+total_extra_bits (CEM extra+CCS)3668uint32_t total_config_bits = config_bit_pos + total_extra_bits;36693670// Compute number of remaining bits in block3671const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;3672if (num_remaining_bits < 0)3673return false;36743675// Compute total number of ISE encoded color endpoint mode values3676uint32_t total_cem_vals = 0;3677for (uint32_t j = 0; j < log_blk.m_num_partitions; j++)3678total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[j]);36793680if (total_cem_vals > MAX_ENDPOINTS)3681return false;36823683// Infer endpoint ISE range based off the # of values we need to encode, and the # of remaining bits in the block3684int endpoint_ise_range = -1;3685for (int k = 20; k > 0; k--)3686{3687int b = get_ise_sequence_bits(total_cem_vals, k);3688if (b <= num_remaining_bits)3689{3690endpoint_ise_range = k;3691break;3692}3693}36943695// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints3696if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)3697return false;36983699log_blk.m_endpoint_ise_range = (uint8_t)endpoint_ise_range;37003701// Decode endpoints forwards in block3702decode_bise(log_blk.m_endpoint_ise_range, log_blk.m_endpoints, total_cem_vals, bits, config_bit_pos);37033704// Decode grid weights backwards in block3705decode_bise(log_blk.m_weight_ise_range, log_blk.m_weights, total_grid_weights, rev_bits, 0);37063707log_blk.m_error_flag = false;37083709return true;3710}37113712} // namespace astc_helpers37133714#endif //BASISU_ASTC_HELPERS_IMPLEMENTATION371537163717