Path: blob/master/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h
9905 views
// basisu_transcoder_internal.h - Universal texture format transcoder library.1// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.2//3// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing4//5// Licensed under the Apache License, Version 2.0 (the "License");6// you may not use this file except in compliance with the License.7// You may obtain a copy of the License at8//9// http://www.apache.org/licenses/LICENSE-2.010//11// Unless required by applicable law or agreed to in writing, software12// distributed under the License is distributed on an "AS IS" BASIS,13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.14// See the License for the specific language governing permissions and15// limitations under the License.16#pragma once1718#ifdef _MSC_VER19#pragma warning (disable: 4127) // conditional expression is constant20#endif2122// v1.50: Added UASTC HDR 4x4 support23// v1.60: Added RDO ASTC HDR 6x6 and intermediate support24#define BASISD_LIB_VERSION 16025#define BASISD_VERSION_STRING "01.60"2627#ifdef _DEBUG28#define BASISD_BUILD_DEBUG29#else30#define BASISD_BUILD_RELEASE31#endif3233#include "basisu.h"3435#define BASISD_znew (z = 36969 * (z & 65535) + (z >> 16))3637namespace basisu38{39extern bool g_debug_printf;40}4142namespace basist43{44// Low-level formats directly supported by the transcoder (other supported texture formats are combinations of these low-level block formats).45// You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices.46enum class block_format47{48cETC1, // ETC1S RGB49cETC2_RGBA, // full ETC2 EAC RGBA8 block50cBC1, // DXT1 RGB51cBC3, // BC4 block followed by a four color BC1 block52cBC4, // DXT5A (alpha block only)53cBC5, // two BC4 blocks54cPVRTC1_4_RGB, // opaque-only PVRTC1 4bpp55cPVRTC1_4_RGBA, // PVRTC1 4bpp RGBA56cBC7, // Full BC7 block, any mode57cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block)58cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.)59cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format)60cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC61// data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking.6263cATC_RGB,64cATC_RGBA_INTERPOLATED_ALPHA,65cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size6667cPVRTC2_4_RGB,68cPVRTC2_4_RGBA,6970cETC2_EAC_R11,71cETC2_EAC_RG11,7273cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits)7475cRGB32, // Writes RGB components to 32bpp output pixels76cRGBA32, // Writes RGB255 components to 32bpp output pixels77cA32, // Writes alpha component to 32bpp output pixels7879cRGB565,80cBGR565,8182cRGBA4444_COLOR,83cRGBA4444_ALPHA,84cRGBA4444_COLOR_OPAQUE,85cRGBA4444,86cRGBA_HALF,87cRGB_HALF,88cRGB_9E5,8990cUASTC_4x4, // LDR, universal91cUASTC_HDR_4x4, // HDR, transcodes only to 4x4 HDR ASTC, BC6H, or uncompressed92cBC6H,93cASTC_HDR_4x4,94cASTC_HDR_6x6,9596cTotalBlockFormats97};9899inline uint32_t get_block_width(block_format fmt)100{101switch (fmt)102{103case block_format::cFXT1_RGB:104return 8;105case block_format::cASTC_HDR_6x6:106return 6;107default:108break;109}110return 4;111}112113inline uint32_t get_block_height(block_format fmt)114{115switch (fmt)116{117case block_format::cASTC_HDR_6x6:118return 6;119default:120break;121}122return 4;123}124125const int COLOR5_PAL0_PREV_HI = 9, COLOR5_PAL0_DELTA_LO = -9, COLOR5_PAL0_DELTA_HI = 31;126const int COLOR5_PAL1_PREV_HI = 21, COLOR5_PAL1_DELTA_LO = -21, COLOR5_PAL1_DELTA_HI = 21;127const int COLOR5_PAL2_PREV_HI = 31, COLOR5_PAL2_DELTA_LO = -31, COLOR5_PAL2_DELTA_HI = 9;128const int COLOR5_PAL_MIN_DELTA_B_RUNLEN = 3, COLOR5_PAL_DELTA_5_RUNLEN_VLC_BITS = 3;129130const uint32_t ENDPOINT_PRED_TOTAL_SYMBOLS = (4 * 4 * 4 * 4) + 1;131const uint32_t ENDPOINT_PRED_REPEAT_LAST_SYMBOL = ENDPOINT_PRED_TOTAL_SYMBOLS - 1;132const uint32_t ENDPOINT_PRED_MIN_REPEAT_COUNT = 3;133const uint32_t ENDPOINT_PRED_COUNT_VLC_BITS = 4;134135const uint32_t NUM_ENDPOINT_PREDS = 3;// BASISU_ARRAY_SIZE(g_endpoint_preds);136const uint32_t CR_ENDPOINT_PRED_INDEX = NUM_ENDPOINT_PREDS - 1;137const uint32_t NO_ENDPOINT_PRED_INDEX = 3;//NUM_ENDPOINT_PREDS;138const uint32_t MAX_SELECTOR_HISTORY_BUF_SIZE = 64;139const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3;140const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6;141const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS);142143uint16_t crc16(const void *r, size_t size, uint16_t crc);144145class huffman_decoding_table146{147friend class bitwise_decoder;148149public:150huffman_decoding_table()151{152}153154void clear()155{156basisu::clear_vector(m_code_sizes);157basisu::clear_vector(m_lookup);158basisu::clear_vector(m_tree);159}160161bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits)162{163if (!total_syms)164{165clear();166return true;167}168169m_code_sizes.resize(total_syms);170memcpy(&m_code_sizes[0], pCode_sizes, total_syms);171172const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;173174m_lookup.resize(0);175m_lookup.resize(huffman_fast_lookup_size);176177m_tree.resize(0);178m_tree.resize(total_syms * 2);179180uint32_t syms_using_codesize[basisu::cHuffmanMaxSupportedInternalCodeSize + 1];181basisu::clear_obj(syms_using_codesize);182for (uint32_t i = 0; i < total_syms; i++)183{184if (pCode_sizes[i] > basisu::cHuffmanMaxSupportedInternalCodeSize)185return false;186syms_using_codesize[pCode_sizes[i]]++;187}188189uint32_t next_code[basisu::cHuffmanMaxSupportedInternalCodeSize + 1];190next_code[0] = next_code[1] = 0;191192uint32_t used_syms = 0, total = 0;193for (uint32_t i = 1; i < basisu::cHuffmanMaxSupportedInternalCodeSize; i++)194{195used_syms += syms_using_codesize[i];196next_code[i + 1] = (total = ((total + syms_using_codesize[i]) << 1));197}198199if (((1U << basisu::cHuffmanMaxSupportedInternalCodeSize) != total) && (used_syms != 1U))200return false;201202for (int tree_next = -1, sym_index = 0; sym_index < (int)total_syms; ++sym_index)203{204uint32_t rev_code = 0, l, cur_code, code_size = pCode_sizes[sym_index];205if (!code_size)206continue;207208cur_code = next_code[code_size]++;209210for (l = code_size; l > 0; l--, cur_code >>= 1)211rev_code = (rev_code << 1) | (cur_code & 1);212213if (code_size <= fast_lookup_bits)214{215uint32_t k = (code_size << 16) | sym_index;216while (rev_code < huffman_fast_lookup_size)217{218if (m_lookup[rev_code] != 0)219{220// Supplied codesizes can't create a valid prefix code.221return false;222}223224m_lookup[rev_code] = k;225rev_code += (1 << code_size);226}227continue;228}229230int tree_cur;231if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)]))232{233const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1);234if (m_lookup[idx] != 0)235{236// Supplied codesizes can't create a valid prefix code.237return false;238}239240m_lookup[idx] = tree_next;241tree_cur = tree_next;242tree_next -= 2;243}244245if (tree_cur >= 0)246{247// Supplied codesizes can't create a valid prefix code.248return false;249}250251rev_code >>= (fast_lookup_bits - 1);252253for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--)254{255tree_cur -= ((rev_code >>= 1) & 1);256257const int idx = -tree_cur - 1;258if (idx < 0)259return false;260else if (idx >= (int)m_tree.size())261m_tree.resize(idx + 1);262263if (!m_tree[idx])264{265m_tree[idx] = (int16_t)tree_next;266tree_cur = tree_next;267tree_next -= 2;268}269else270{271tree_cur = m_tree[idx];272if (tree_cur >= 0)273{274// Supplied codesizes can't create a valid prefix code.275return false;276}277}278}279280tree_cur -= ((rev_code >>= 1) & 1);281282const int idx = -tree_cur - 1;283if (idx < 0)284return false;285else if (idx >= (int)m_tree.size())286m_tree.resize(idx + 1);287288if (m_tree[idx] != 0)289{290// Supplied codesizes can't create a valid prefix code.291return false;292}293294m_tree[idx] = (int16_t)sym_index;295}296297return true;298}299300const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; }301const basisu::int_vec &get_lookup() const { return m_lookup; }302const basisu::int16_vec &get_tree() const { return m_tree; }303304bool is_valid() const { return m_code_sizes.size() > 0; }305306private:307basisu::uint8_vec m_code_sizes;308basisu::int_vec m_lookup;309basisu::int16_vec m_tree;310};311312class bitwise_decoder313{314public:315bitwise_decoder() :316m_buf_size(0),317m_pBuf(nullptr),318m_pBuf_start(nullptr),319m_pBuf_end(nullptr),320m_bit_buf(0),321m_bit_buf_size(0)322{323}324325void clear()326{327m_buf_size = 0;328m_pBuf = nullptr;329m_pBuf_start = nullptr;330m_pBuf_end = nullptr;331m_bit_buf = 0;332m_bit_buf_size = 0;333}334335bool init(const uint8_t *pBuf, uint32_t buf_size)336{337if ((!pBuf) && (buf_size))338return false;339340m_buf_size = buf_size;341m_pBuf = pBuf;342m_pBuf_start = pBuf;343m_pBuf_end = pBuf + buf_size;344m_bit_buf = 0;345m_bit_buf_size = 0;346return true;347}348349void stop()350{351}352353inline uint32_t peek_bits(uint32_t num_bits)354{355if (!num_bits)356return 0;357358assert(num_bits <= 25);359360while (m_bit_buf_size < num_bits)361{362uint32_t c = 0;363if (m_pBuf < m_pBuf_end)364c = *m_pBuf++;365366m_bit_buf |= (c << m_bit_buf_size);367m_bit_buf_size += 8;368assert(m_bit_buf_size <= 32);369}370371return m_bit_buf & ((1 << num_bits) - 1);372}373374void remove_bits(uint32_t num_bits)375{376assert(m_bit_buf_size >= num_bits);377378m_bit_buf >>= num_bits;379m_bit_buf_size -= num_bits;380}381382uint32_t get_bits(uint32_t num_bits)383{384if (num_bits > 25)385{386assert(num_bits <= 32);387388const uint32_t bits0 = peek_bits(25);389m_bit_buf >>= 25;390m_bit_buf_size -= 25;391num_bits -= 25;392393const uint32_t bits = peek_bits(num_bits);394m_bit_buf >>= num_bits;395m_bit_buf_size -= num_bits;396397return bits0 | (bits << 25);398}399400const uint32_t bits = peek_bits(num_bits);401402m_bit_buf >>= num_bits;403m_bit_buf_size -= num_bits;404405return bits;406}407408uint32_t decode_truncated_binary(uint32_t n)409{410assert(n >= 2);411412const uint32_t k = basisu::floor_log2i(n);413const uint32_t u = (1 << (k + 1)) - n;414415uint32_t result = get_bits(k);416417if (result >= u)418result = ((result << 1) | get_bits(1)) - u;419420return result;421}422423uint32_t decode_rice(uint32_t m)424{425assert(m);426427uint32_t q = 0;428for (;;)429{430uint32_t k = peek_bits(16);431432uint32_t l = 0;433while (k & 1)434{435l++;436k >>= 1;437}438439q += l;440441remove_bits(l);442443if (l < 16)444break;445}446447return (q << m) + (get_bits(m + 1) >> 1);448}449450inline uint32_t decode_vlc(uint32_t chunk_bits)451{452assert(chunk_bits);453454const uint32_t chunk_size = 1 << chunk_bits;455const uint32_t chunk_mask = chunk_size - 1;456457uint32_t v = 0;458uint32_t ofs = 0;459460for ( ; ; )461{462uint32_t s = get_bits(chunk_bits + 1);463v |= ((s & chunk_mask) << ofs);464ofs += chunk_bits;465466if ((s & chunk_size) == 0)467break;468469if (ofs >= 32)470{471assert(0);472break;473}474}475476return v;477}478479inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits)480{481assert(ct.m_code_sizes.size());482483const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;484485while (m_bit_buf_size < 16)486{487uint32_t c = 0;488if (m_pBuf < m_pBuf_end)489c = *m_pBuf++;490491m_bit_buf |= (c << m_bit_buf_size);492m_bit_buf_size += 8;493assert(m_bit_buf_size <= 32);494}495496int code_len;497498int sym;499if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0)500{501code_len = sym >> 16;502sym &= 0xFFFF;503}504else505{506code_len = fast_lookup_bits;507do508{509sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1510} while (sym < 0);511}512513m_bit_buf >>= code_len;514m_bit_buf_size -= code_len;515516return sym;517}518519bool read_huffman_table(huffman_decoding_table &ct)520{521ct.clear();522523const uint32_t total_used_syms = get_bits(basisu::cHuffmanMaxSymsLog2);524525if (!total_used_syms)526return true;527if (total_used_syms > basisu::cHuffmanMaxSyms)528return false;529530uint8_t code_length_code_sizes[basisu::cHuffmanTotalCodelengthCodes];531basisu::clear_obj(code_length_code_sizes);532533const uint32_t num_codelength_codes = get_bits(5);534if ((num_codelength_codes < 1) || (num_codelength_codes > basisu::cHuffmanTotalCodelengthCodes))535return false;536537for (uint32_t i = 0; i < num_codelength_codes; i++)538code_length_code_sizes[basisu::g_huffman_sorted_codelength_codes[i]] = static_cast<uint8_t>(get_bits(3));539540huffman_decoding_table code_length_table;541if (!code_length_table.init(basisu::cHuffmanTotalCodelengthCodes, code_length_code_sizes))542return false;543544if (!code_length_table.is_valid())545return false;546547basisu::uint8_vec code_sizes(total_used_syms);548549uint32_t cur = 0;550while (cur < total_used_syms)551{552int c = decode_huffman(code_length_table);553554if (c <= 16)555code_sizes[cur++] = static_cast<uint8_t>(c);556else if (c == basisu::cHuffmanSmallZeroRunCode)557cur += get_bits(basisu::cHuffmanSmallZeroRunExtraBits) + basisu::cHuffmanSmallZeroRunSizeMin;558else if (c == basisu::cHuffmanBigZeroRunCode)559cur += get_bits(basisu::cHuffmanBigZeroRunExtraBits) + basisu::cHuffmanBigZeroRunSizeMin;560else561{562if (!cur)563return false;564565uint32_t l;566if (c == basisu::cHuffmanSmallRepeatCode)567l = get_bits(basisu::cHuffmanSmallRepeatExtraBits) + basisu::cHuffmanSmallRepeatSizeMin;568else569l = get_bits(basisu::cHuffmanBigRepeatExtraBits) + basisu::cHuffmanBigRepeatSizeMin;570571const uint8_t prev = code_sizes[cur - 1];572if (prev == 0)573return false;574do575{576if (cur >= total_used_syms)577return false;578code_sizes[cur++] = prev;579} while (--l > 0);580}581}582583if (cur != total_used_syms)584return false;585586return ct.init(total_used_syms, &code_sizes[0]);587}588589size_t get_bits_remaining() const590{591size_t total_bytes_remaining = m_pBuf_end - m_pBuf;592return total_bytes_remaining * 8 + m_bit_buf_size;593}594595private:596uint32_t m_buf_size;597const uint8_t *m_pBuf;598const uint8_t *m_pBuf_start;599const uint8_t *m_pBuf_end;600601uint32_t m_bit_buf;602uint32_t m_bit_buf_size;603};604605inline uint32_t basisd_rand(uint32_t seed)606{607if (!seed)608seed++;609uint32_t z = seed;610BASISD_znew;611return z;612}613614// Returns random number in [0,limit). Max limit is 0xFFFF.615inline uint32_t basisd_urand(uint32_t& seed, uint32_t limit)616{617seed = basisd_rand(seed);618return (((seed ^ (seed >> 16)) & 0xFFFF) * limit) >> 16;619}620621class approx_move_to_front622{623public:624approx_move_to_front(uint32_t n)625{626init(n);627}628629void init(uint32_t n)630{631m_values.resize(n);632m_rover = n / 2;633}634635const basisu::int_vec& get_values() const { return m_values; }636basisu::int_vec& get_values() { return m_values; }637638uint32_t size() const { return (uint32_t)m_values.size(); }639640const int& operator[] (uint32_t index) const { return m_values[index]; }641int operator[] (uint32_t index) { return m_values[index]; }642643void add(int new_value)644{645m_values[m_rover++] = new_value;646if (m_rover == m_values.size())647m_rover = (uint32_t)m_values.size() / 2;648}649650void use(uint32_t index)651{652if (index)653{654//std::swap(m_values[index / 2], m_values[index]);655int x = m_values[index / 2];656int y = m_values[index];657m_values[index / 2] = y;658m_values[index] = x;659}660}661662// returns -1 if not found663int find(int value) const664{665for (uint32_t i = 0; i < m_values.size(); i++)666if (m_values[i] == value)667return i;668return -1;669}670671void reset()672{673const uint32_t n = (uint32_t)m_values.size();674675m_values.clear();676677init(n);678}679680private:681basisu::int_vec m_values;682uint32_t m_rover;683};684685struct decoder_etc_block;686687inline uint8_t clamp255(int32_t i)688{689return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);690}691692enum eNoClamp693{694cNoClamp = 0695};696697struct color32698{699union700{701struct702{703uint8_t r;704uint8_t g;705uint8_t b;706uint8_t a;707};708709uint8_t c[4];710711uint32_t m;712};713714color32() { }715716color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }717color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); }718719void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); c[3] = static_cast<uint8_t>(va); }720721void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); }722void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }723724void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); }725726uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; }727uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; }728729bool operator== (const color32&rhs) const { return m == rhs.m; }730731static color32 comp_min(const color32& a, const color32& b) { return color32(cNoClamp, basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); }732static color32 comp_max(const color32& a, const color32& b) { return color32(cNoClamp, basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); }733};734735struct endpoint736{737color32 m_color5;738uint8_t m_inten5;739bool operator== (const endpoint& rhs) const740{741return (m_color5.r == rhs.m_color5.r) && (m_color5.g == rhs.m_color5.g) && (m_color5.b == rhs.m_color5.b) && (m_inten5 == rhs.m_inten5);742}743bool operator!= (const endpoint& rhs) const { return !(*this == rhs); }744};745746struct selector747{748// Plain selectors (2-bits per value)749uint8_t m_selectors[4];750751// ETC1 selectors752uint8_t m_bytes[4];753754uint8_t m_lo_selector, m_hi_selector;755uint8_t m_num_unique_selectors;756bool operator== (const selector& rhs) const757{758return (m_selectors[0] == rhs.m_selectors[0]) &&759(m_selectors[1] == rhs.m_selectors[1]) &&760(m_selectors[2] == rhs.m_selectors[2]) &&761(m_selectors[3] == rhs.m_selectors[3]);762}763bool operator!= (const selector& rhs) const764{765return !(*this == rhs);766}767768void init_flags()769{770uint32_t hist[4] = { 0, 0, 0, 0 };771for (uint32_t y = 0; y < 4; y++)772{773for (uint32_t x = 0; x < 4; x++)774{775uint32_t s = get_selector(x, y);776hist[s]++;777}778}779780m_lo_selector = 3;781m_hi_selector = 0;782m_num_unique_selectors = 0;783784for (uint32_t i = 0; i < 4; i++)785{786if (hist[i])787{788m_num_unique_selectors++;789if (i < m_lo_selector) m_lo_selector = static_cast<uint8_t>(i);790if (i > m_hi_selector) m_hi_selector = static_cast<uint8_t>(i);791}792}793}794795// Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.796inline uint32_t get_selector(uint32_t x, uint32_t y) const797{798assert((x < 4) && (y < 4));799return (m_selectors[y] >> (x * 2)) & 3;800}801802void set_selector(uint32_t x, uint32_t y, uint32_t val)803{804static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 };805806assert((x | y | val) < 4);807808m_selectors[y] &= ~(3 << (x * 2));809m_selectors[y] |= (val << (x * 2));810811const uint32_t etc1_bit_index = x * 4 + y;812813uint8_t *p = &m_bytes[3 - (etc1_bit_index >> 3)];814815const uint32_t byte_bit_ofs = etc1_bit_index & 7;816const uint32_t mask = 1 << byte_bit_ofs;817818const uint32_t etc1_val = s_selector_index_to_etc1[val];819820const uint32_t lsb = etc1_val & 1;821const uint32_t msb = etc1_val >> 1;822823p[0] &= ~mask;824p[0] |= (lsb << byte_bit_ofs);825826p[-2] &= ~mask;827p[-2] |= (msb << byte_bit_ofs);828}829};830831bool basis_block_format_is_uncompressed(block_format tex_type);832833//------------------------------------834835typedef uint16_t half_float;836837const double MIN_DENORM_HALF_FLOAT = 0.000000059604645; // smallest positive subnormal number838const double MIN_HALF_FLOAT = 0.00006103515625; // smallest positive normal number839const double MAX_HALF_FLOAT = 65504.0; // largest normal number840const uint32_t MAX_HALF_FLOAT_AS_INT_BITS = 0x7BFF; // the half float rep for 65504.0841842inline uint32_t get_bits(uint32_t val, int low, int high)843{844const int num_bits = (high - low) + 1;845assert((num_bits >= 1) && (num_bits <= 32));846847val >>= low;848if (num_bits != 32)849val &= ((1u << num_bits) - 1);850851return val;852}853854inline bool is_half_inf_or_nan(half_float v)855{856return get_bits(v, 10, 14) == 31;857}858859inline bool is_half_denorm(half_float v)860{861int e = (v >> 10) & 31;862return !e;863}864865inline int get_half_exp(half_float v)866{867int e = ((v >> 10) & 31);868return e ? (e - 15) : -14;869}870871inline int get_half_mantissa(half_float v)872{873if (is_half_denorm(v))874return v & 0x3FF;875return (v & 0x3FF) | 0x400;876}877878inline float get_half_mantissaf(half_float v)879{880return ((float)get_half_mantissa(v)) / 1024.0f;881}882883inline int get_half_sign(half_float v)884{885return v ? ((v & 0x8000) ? -1 : 1) : 0;886}887888inline bool half_is_signed(half_float v)889{890return (v & 0x8000) != 0;891}892893#if 0894int hexp = get_half_exp(Cf);895float hman = get_half_mantissaf(Cf);896int hsign = get_half_sign(Cf);897float k = powf(2.0f, hexp) * hman * hsign;898if (is_half_inf_or_nan(Cf))899k = std::numeric_limits<float>::quiet_NaN();900#endif901902half_float float_to_half(float val);903904inline float half_to_float(half_float hval)905{906union { float f; uint32_t u; } x = { 0 };907908uint32_t s = ((uint32_t)hval >> 15) & 1;909uint32_t e = ((uint32_t)hval >> 10) & 0x1F;910uint32_t m = (uint32_t)hval & 0x3FF;911912if (!e)913{914if (!m)915{916// +- 0917x.u = s << 31;918return x.f;919}920else921{922// denormalized923while (!(m & 0x00000400))924{925m <<= 1;926--e;927}928929++e;930m &= ~0x00000400;931}932}933else if (e == 31)934{935if (m == 0)936{937// +/- INF938x.u = (s << 31) | 0x7f800000;939return x.f;940}941else942{943// +/- NaN944x.u = (s << 31) | 0x7f800000 | (m << 13);945return x.f;946}947}948949e = e + (127 - 15);950m = m << 13;951952assert(s <= 1);953assert(m <= 0x7FFFFF);954assert(e <= 255);955956x.u = m | (e << 23) | (s << 31);957return x.f;958}959960// Originally from bc6h_enc.h961962void bc6h_enc_init();963964const uint32_t MAX_BLOG16_VAL = 0xFFFF;965966// BC6H internals967const uint32_t NUM_BC6H_MODES = 14;968const uint32_t BC6H_LAST_MODE_INDEX = 13;969const uint32_t BC6H_FIRST_1SUBSET_MODE_INDEX = 10; // in the MS docs, this is "mode 11" (where the first mode is 1), 60 bits for endpoints (10.10, 10.10, 10.10), 63 bits for weights970const uint32_t TOTAL_BC6H_PARTITION_PATTERNS = 32;971972extern const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4]; // base, r, g, b973974struct bc6h_bit_layout975{976int8_t m_comp; // R=0,G=1,B=2,D=3 (D=partition index)977int8_t m_index; // 0-3, 0-1 Low/High subset 1, 2-3 Low/High subset 2, -1=partition index (d)978int8_t m_last_bit;979int8_t m_first_bit; // may be -1 if a single bit, may be >m_last_bit if reversed980};981982const uint32_t MAX_BC6H_LAYOUT_INDEX = 25;983extern const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX];984985extern const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4]; // [y][x]986987extern const uint8_t g_bc6h_weight3[8];988extern const uint8_t g_bc6h_weight4[16];989990extern const int8_t g_bc6h_mode_lookup[32];991992// Converts b16 to half float993inline half_float bc6h_blog16_to_half(uint32_t comp)994{995assert(comp <= 0xFFFF);996997// scale the magnitude by 31/64998comp = (comp * 31u) >> 6u;999return (half_float)comp;1000}10011002const uint32_t MAX_BC6H_HALF_FLOAT_AS_UINT = 0x7BFF;10031004// Inverts bc6h_blog16_to_half().1005// Returns the nearest blog16 given a half value.1006inline uint32_t bc6h_half_to_blog16(half_float h)1007{1008assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);1009return (h * 64 + 30) / 31;1010}10111012// Suboptimal, but very close.1013inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits)1014{1015assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);1016return (h * 64 + 30) / (31 * (1 << (16 - num_bits)));1017}10181019struct bc6h_block1020{1021uint8_t m_bytes[16];1022};10231024void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);1025void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);1026void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);1027void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);1028void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]1029void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]1030bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]);10311032struct bc6h_logical_block1033{1034uint32_t m_mode;1035uint32_t m_partition_pattern; // must be 0 if 1 subset1036uint32_t m_endpoints[3][4]; // [comp][subset*2+lh_index] - must be already properly packed1037uint8_t m_weights[16]; // weights must be of the proper size, taking into account skipped MSB's which must be 010381039void clear()1040{1041basisu::clear_obj(*this);1042}1043};10441045void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk);10461047namespace bc7_mode_5_encoder1048{1049void encode_bc7_mode_5_block(void* pDst_block, color32* pPixels, bool hq_mode);1050}10511052} // namespace basist105310541055105610571058