CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/ext/basis_universal/basisu_transcoder_internal.h
Views: 1401
// basisu_transcoder_internal.h - Universal texture format transcoder library.1// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.2//3// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing4//5// Licensed under the Apache License, Version 2.0 (the "License");6// you may not use this file except in compliance with the License.7// You may obtain a copy of the License at8//9// http://www.apache.org/licenses/LICENSE-2.010//11// Unless required by applicable law or agreed to in writing, software12// distributed under the License is distributed on an "AS IS" BASIS,13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.14// See the License for the specific language governing permissions and15// limitations under the License.16#pragma once1718#ifdef _MSC_VER19#pragma warning (disable: 4127) // conditional expression is constant20#endif2122#define BASISD_LIB_VERSION 11623#define BASISD_VERSION_STRING "01.16"2425#ifdef _DEBUG26#define BASISD_BUILD_DEBUG27#else28#define BASISD_BUILD_RELEASE29#endif3031#include "basisu.h"3233#define BASISD_znew (z = 36969 * (z & 65535) + (z >> 16))3435namespace basisu36{37extern bool g_debug_printf;38}3940namespace basist41{42// Low-level formats directly supported by the transcoder (other supported texture formats are combinations of these low-level block formats).43// You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices.44enum class block_format45{46cETC1, // ETC1S RGB47cETC2_RGBA, // full ETC2 EAC RGBA8 block48cBC1, // DXT1 RGB49cBC3, // BC4 block followed by a four color BC1 block50cBC4, // DXT5A (alpha block only)51cBC5, // two BC4 blocks52cPVRTC1_4_RGB, // opaque-only PVRTC1 4bpp53cPVRTC1_4_RGBA, // PVRTC1 4bpp RGBA54cBC7, // Full BC7 block, any mode55cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block)56cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.)57cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format)58cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC59// data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking.6061cATC_RGB,62cATC_RGBA_INTERPOLATED_ALPHA,63cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size6465cPVRTC2_4_RGB,66cPVRTC2_4_RGBA,6768cETC2_EAC_R11,69cETC2_EAC_RG11,7071cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits)7273cRGB32, // Writes RGB components to 32bpp output pixels74cRGBA32, // Writes RGB255 components to 32bpp output pixels75cA32, // Writes alpha component to 32bpp output pixels7677cRGB565,78cBGR565,7980cRGBA4444_COLOR,81cRGBA4444_ALPHA,82cRGBA4444_COLOR_OPAQUE,83cRGBA4444,8485cUASTC_4x4,8687cTotalBlockFormats88};8990const int COLOR5_PAL0_PREV_HI = 9, COLOR5_PAL0_DELTA_LO = -9, COLOR5_PAL0_DELTA_HI = 31;91const int COLOR5_PAL1_PREV_HI = 21, COLOR5_PAL1_DELTA_LO = -21, COLOR5_PAL1_DELTA_HI = 21;92const int COLOR5_PAL2_PREV_HI = 31, COLOR5_PAL2_DELTA_LO = -31, COLOR5_PAL2_DELTA_HI = 9;93const int COLOR5_PAL_MIN_DELTA_B_RUNLEN = 3, COLOR5_PAL_DELTA_5_RUNLEN_VLC_BITS = 3;9495const uint32_t ENDPOINT_PRED_TOTAL_SYMBOLS = (4 * 4 * 4 * 4) + 1;96const uint32_t ENDPOINT_PRED_REPEAT_LAST_SYMBOL = ENDPOINT_PRED_TOTAL_SYMBOLS - 1;97const uint32_t ENDPOINT_PRED_MIN_REPEAT_COUNT = 3;98const uint32_t ENDPOINT_PRED_COUNT_VLC_BITS = 4;99100const uint32_t NUM_ENDPOINT_PREDS = 3;// BASISU_ARRAY_SIZE(g_endpoint_preds);101const uint32_t CR_ENDPOINT_PRED_INDEX = NUM_ENDPOINT_PREDS - 1;102const uint32_t NO_ENDPOINT_PRED_INDEX = 3;//NUM_ENDPOINT_PREDS;103const uint32_t MAX_SELECTOR_HISTORY_BUF_SIZE = 64;104const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3;105const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6;106const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS);107108uint16_t crc16(const void *r, size_t size, uint16_t crc);109110class huffman_decoding_table111{112friend class bitwise_decoder;113114public:115huffman_decoding_table()116{117}118119void clear()120{121basisu::clear_vector(m_code_sizes);122basisu::clear_vector(m_lookup);123basisu::clear_vector(m_tree);124}125126bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits)127{128if (!total_syms)129{130clear();131return true;132}133134m_code_sizes.resize(total_syms);135memcpy(&m_code_sizes[0], pCode_sizes, total_syms);136137const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;138139m_lookup.resize(0);140m_lookup.resize(huffman_fast_lookup_size);141142m_tree.resize(0);143m_tree.resize(total_syms * 2);144145uint32_t syms_using_codesize[basisu::cHuffmanMaxSupportedInternalCodeSize + 1];146basisu::clear_obj(syms_using_codesize);147for (uint32_t i = 0; i < total_syms; i++)148{149if (pCode_sizes[i] > basisu::cHuffmanMaxSupportedInternalCodeSize)150return false;151syms_using_codesize[pCode_sizes[i]]++;152}153154uint32_t next_code[basisu::cHuffmanMaxSupportedInternalCodeSize + 1];155next_code[0] = next_code[1] = 0;156157uint32_t used_syms = 0, total = 0;158for (uint32_t i = 1; i < basisu::cHuffmanMaxSupportedInternalCodeSize; i++)159{160used_syms += syms_using_codesize[i];161next_code[i + 1] = (total = ((total + syms_using_codesize[i]) << 1));162}163164if (((1U << basisu::cHuffmanMaxSupportedInternalCodeSize) != total) && (used_syms != 1U))165return false;166167for (int tree_next = -1, sym_index = 0; sym_index < (int)total_syms; ++sym_index)168{169uint32_t rev_code = 0, l, cur_code, code_size = pCode_sizes[sym_index];170if (!code_size)171continue;172173cur_code = next_code[code_size]++;174175for (l = code_size; l > 0; l--, cur_code >>= 1)176rev_code = (rev_code << 1) | (cur_code & 1);177178if (code_size <= fast_lookup_bits)179{180uint32_t k = (code_size << 16) | sym_index;181while (rev_code < huffman_fast_lookup_size)182{183if (m_lookup[rev_code] != 0)184{185// Supplied codesizes can't create a valid prefix code.186return false;187}188189m_lookup[rev_code] = k;190rev_code += (1 << code_size);191}192continue;193}194195int tree_cur;196if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)]))197{198const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1);199if (m_lookup[idx] != 0)200{201// Supplied codesizes can't create a valid prefix code.202return false;203}204205m_lookup[idx] = tree_next;206tree_cur = tree_next;207tree_next -= 2;208}209210if (tree_cur >= 0)211{212// Supplied codesizes can't create a valid prefix code.213return false;214}215216rev_code >>= (fast_lookup_bits - 1);217218for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--)219{220tree_cur -= ((rev_code >>= 1) & 1);221222const int idx = -tree_cur - 1;223if (idx < 0)224return false;225else if (idx >= (int)m_tree.size())226m_tree.resize(idx + 1);227228if (!m_tree[idx])229{230m_tree[idx] = (int16_t)tree_next;231tree_cur = tree_next;232tree_next -= 2;233}234else235{236tree_cur = m_tree[idx];237if (tree_cur >= 0)238{239// Supplied codesizes can't create a valid prefix code.240return false;241}242}243}244245tree_cur -= ((rev_code >>= 1) & 1);246247const int idx = -tree_cur - 1;248if (idx < 0)249return false;250else if (idx >= (int)m_tree.size())251m_tree.resize(idx + 1);252253if (m_tree[idx] != 0)254{255// Supplied codesizes can't create a valid prefix code.256return false;257}258259m_tree[idx] = (int16_t)sym_index;260}261262return true;263}264265const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; }266const basisu::int_vec get_lookup() const { return m_lookup; }267const basisu::int16_vec get_tree() const { return m_tree; }268269bool is_valid() const { return m_code_sizes.size() > 0; }270271private:272basisu::uint8_vec m_code_sizes;273basisu::int_vec m_lookup;274basisu::int16_vec m_tree;275};276277class bitwise_decoder278{279public:280bitwise_decoder() :281m_buf_size(0),282m_pBuf(nullptr),283m_pBuf_start(nullptr),284m_pBuf_end(nullptr),285m_bit_buf(0),286m_bit_buf_size(0)287{288}289290void clear()291{292m_buf_size = 0;293m_pBuf = nullptr;294m_pBuf_start = nullptr;295m_pBuf_end = nullptr;296m_bit_buf = 0;297m_bit_buf_size = 0;298}299300bool init(const uint8_t *pBuf, uint32_t buf_size)301{302if ((!pBuf) && (buf_size))303return false;304305m_buf_size = buf_size;306m_pBuf = pBuf;307m_pBuf_start = pBuf;308m_pBuf_end = pBuf + buf_size;309m_bit_buf = 0;310m_bit_buf_size = 0;311return true;312}313314void stop()315{316}317318inline uint32_t peek_bits(uint32_t num_bits)319{320if (!num_bits)321return 0;322323assert(num_bits <= 25);324325while (m_bit_buf_size < num_bits)326{327uint32_t c = 0;328if (m_pBuf < m_pBuf_end)329c = *m_pBuf++;330331m_bit_buf |= (c << m_bit_buf_size);332m_bit_buf_size += 8;333assert(m_bit_buf_size <= 32);334}335336return m_bit_buf & ((1 << num_bits) - 1);337}338339void remove_bits(uint32_t num_bits)340{341assert(m_bit_buf_size >= num_bits);342343m_bit_buf >>= num_bits;344m_bit_buf_size -= num_bits;345}346347uint32_t get_bits(uint32_t num_bits)348{349if (num_bits > 25)350{351assert(num_bits <= 32);352353const uint32_t bits0 = peek_bits(25);354m_bit_buf >>= 25;355m_bit_buf_size -= 25;356num_bits -= 25;357358const uint32_t bits = peek_bits(num_bits);359m_bit_buf >>= num_bits;360m_bit_buf_size -= num_bits;361362return bits0 | (bits << 25);363}364365const uint32_t bits = peek_bits(num_bits);366367m_bit_buf >>= num_bits;368m_bit_buf_size -= num_bits;369370return bits;371}372373uint32_t decode_truncated_binary(uint32_t n)374{375assert(n >= 2);376377const uint32_t k = basisu::floor_log2i(n);378const uint32_t u = (1 << (k + 1)) - n;379380uint32_t result = get_bits(k);381382if (result >= u)383result = ((result << 1) | get_bits(1)) - u;384385return result;386}387388uint32_t decode_rice(uint32_t m)389{390assert(m);391392uint32_t q = 0;393for (;;)394{395uint32_t k = peek_bits(16);396397uint32_t l = 0;398while (k & 1)399{400l++;401k >>= 1;402}403404q += l;405406remove_bits(l);407408if (l < 16)409break;410}411412return (q << m) + (get_bits(m + 1) >> 1);413}414415inline uint32_t decode_vlc(uint32_t chunk_bits)416{417assert(chunk_bits);418419const uint32_t chunk_size = 1 << chunk_bits;420const uint32_t chunk_mask = chunk_size - 1;421422uint32_t v = 0;423uint32_t ofs = 0;424425for ( ; ; )426{427uint32_t s = get_bits(chunk_bits + 1);428v |= ((s & chunk_mask) << ofs);429ofs += chunk_bits;430431if ((s & chunk_size) == 0)432break;433434if (ofs >= 32)435{436assert(0);437break;438}439}440441return v;442}443444inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits)445{446assert(ct.m_code_sizes.size());447448const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;449450while (m_bit_buf_size < 16)451{452uint32_t c = 0;453if (m_pBuf < m_pBuf_end)454c = *m_pBuf++;455456m_bit_buf |= (c << m_bit_buf_size);457m_bit_buf_size += 8;458assert(m_bit_buf_size <= 32);459}460461int code_len;462463int sym;464if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0)465{466code_len = sym >> 16;467sym &= 0xFFFF;468}469else470{471code_len = fast_lookup_bits;472do473{474sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1475} while (sym < 0);476}477478m_bit_buf >>= code_len;479m_bit_buf_size -= code_len;480481return sym;482}483484bool read_huffman_table(huffman_decoding_table &ct)485{486ct.clear();487488const uint32_t total_used_syms = get_bits(basisu::cHuffmanMaxSymsLog2);489490if (!total_used_syms)491return true;492if (total_used_syms > basisu::cHuffmanMaxSyms)493return false;494495uint8_t code_length_code_sizes[basisu::cHuffmanTotalCodelengthCodes];496basisu::clear_obj(code_length_code_sizes);497498const uint32_t num_codelength_codes = get_bits(5);499if ((num_codelength_codes < 1) || (num_codelength_codes > basisu::cHuffmanTotalCodelengthCodes))500return false;501502for (uint32_t i = 0; i < num_codelength_codes; i++)503code_length_code_sizes[basisu::g_huffman_sorted_codelength_codes[i]] = static_cast<uint8_t>(get_bits(3));504505huffman_decoding_table code_length_table;506if (!code_length_table.init(basisu::cHuffmanTotalCodelengthCodes, code_length_code_sizes))507return false;508509if (!code_length_table.is_valid())510return false;511512basisu::uint8_vec code_sizes(total_used_syms);513514uint32_t cur = 0;515while (cur < total_used_syms)516{517int c = decode_huffman(code_length_table);518519if (c <= 16)520code_sizes[cur++] = static_cast<uint8_t>(c);521else if (c == basisu::cHuffmanSmallZeroRunCode)522cur += get_bits(basisu::cHuffmanSmallZeroRunExtraBits) + basisu::cHuffmanSmallZeroRunSizeMin;523else if (c == basisu::cHuffmanBigZeroRunCode)524cur += get_bits(basisu::cHuffmanBigZeroRunExtraBits) + basisu::cHuffmanBigZeroRunSizeMin;525else526{527if (!cur)528return false;529530uint32_t l;531if (c == basisu::cHuffmanSmallRepeatCode)532l = get_bits(basisu::cHuffmanSmallRepeatExtraBits) + basisu::cHuffmanSmallRepeatSizeMin;533else534l = get_bits(basisu::cHuffmanBigRepeatExtraBits) + basisu::cHuffmanBigRepeatSizeMin;535536const uint8_t prev = code_sizes[cur - 1];537if (prev == 0)538return false;539do540{541if (cur >= total_used_syms)542return false;543code_sizes[cur++] = prev;544} while (--l > 0);545}546}547548if (cur != total_used_syms)549return false;550551return ct.init(total_used_syms, &code_sizes[0]);552}553554private:555uint32_t m_buf_size;556const uint8_t *m_pBuf;557const uint8_t *m_pBuf_start;558const uint8_t *m_pBuf_end;559560uint32_t m_bit_buf;561uint32_t m_bit_buf_size;562};563564inline uint32_t basisd_rand(uint32_t seed)565{566if (!seed)567seed++;568uint32_t z = seed;569BASISD_znew;570return z;571}572573// Returns random number in [0,limit). Max limit is 0xFFFF.574inline uint32_t basisd_urand(uint32_t& seed, uint32_t limit)575{576seed = basisd_rand(seed);577return (((seed ^ (seed >> 16)) & 0xFFFF) * limit) >> 16;578}579580class approx_move_to_front581{582public:583approx_move_to_front(uint32_t n)584{585init(n);586}587588void init(uint32_t n)589{590m_values.resize(n);591m_rover = n / 2;592}593594const basisu::int_vec& get_values() const { return m_values; }595basisu::int_vec& get_values() { return m_values; }596597uint32_t size() const { return (uint32_t)m_values.size(); }598599const int& operator[] (uint32_t index) const { return m_values[index]; }600int operator[] (uint32_t index) { return m_values[index]; }601602void add(int new_value)603{604m_values[m_rover++] = new_value;605if (m_rover == m_values.size())606m_rover = (uint32_t)m_values.size() / 2;607}608609void use(uint32_t index)610{611if (index)612{613//std::swap(m_values[index / 2], m_values[index]);614int x = m_values[index / 2];615int y = m_values[index];616m_values[index / 2] = y;617m_values[index] = x;618}619}620621// returns -1 if not found622int find(int value) const623{624for (uint32_t i = 0; i < m_values.size(); i++)625if (m_values[i] == value)626return i;627return -1;628}629630void reset()631{632const uint32_t n = (uint32_t)m_values.size();633634m_values.clear();635636init(n);637}638639private:640basisu::int_vec m_values;641uint32_t m_rover;642};643644struct decoder_etc_block;645646inline uint8_t clamp255(int32_t i)647{648return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);649}650651enum eNoClamp652{653cNoClamp = 0654};655656struct color32657{658union659{660struct661{662uint8_t r;663uint8_t g;664uint8_t b;665uint8_t a;666};667668uint8_t c[4];669670uint32_t m;671};672673color32() { }674675color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }676color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); }677678void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); c[3] = static_cast<uint8_t>(va); }679680void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); }681void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }682683void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); }684685uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; }686uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; }687688bool operator== (const color32&rhs) const { return m == rhs.m; }689690static color32 comp_min(const color32& a, const color32& b) { return color32(cNoClamp, basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); }691static color32 comp_max(const color32& a, const color32& b) { return color32(cNoClamp, basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); }692};693694struct endpoint695{696color32 m_color5;697uint8_t m_inten5;698bool operator== (const endpoint& rhs) const699{700return (m_color5.r == rhs.m_color5.r) && (m_color5.g == rhs.m_color5.g) && (m_color5.b == rhs.m_color5.b) && (m_inten5 == rhs.m_inten5);701}702bool operator!= (const endpoint& rhs) const { return !(*this == rhs); }703};704705struct selector706{707// Plain selectors (2-bits per value)708uint8_t m_selectors[4];709710// ETC1 selectors711uint8_t m_bytes[4];712713uint8_t m_lo_selector, m_hi_selector;714uint8_t m_num_unique_selectors;715bool operator== (const selector& rhs) const716{717return (m_selectors[0] == rhs.m_selectors[0]) &&718(m_selectors[1] == rhs.m_selectors[1]) &&719(m_selectors[2] == rhs.m_selectors[2]) &&720(m_selectors[3] == rhs.m_selectors[3]);721}722bool operator!= (const selector& rhs) const723{724return !(*this == rhs);725}726727void init_flags()728{729uint32_t hist[4] = { 0, 0, 0, 0 };730for (uint32_t y = 0; y < 4; y++)731{732for (uint32_t x = 0; x < 4; x++)733{734uint32_t s = get_selector(x, y);735hist[s]++;736}737}738739m_lo_selector = 3;740m_hi_selector = 0;741m_num_unique_selectors = 0;742743for (uint32_t i = 0; i < 4; i++)744{745if (hist[i])746{747m_num_unique_selectors++;748if (i < m_lo_selector) m_lo_selector = static_cast<uint8_t>(i);749if (i > m_hi_selector) m_hi_selector = static_cast<uint8_t>(i);750}751}752}753754// Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.755inline uint32_t get_selector(uint32_t x, uint32_t y) const756{757assert((x < 4) && (y < 4));758return (m_selectors[y] >> (x * 2)) & 3;759}760761void set_selector(uint32_t x, uint32_t y, uint32_t val)762{763static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 };764765assert((x | y | val) < 4);766767m_selectors[y] &= ~(3 << (x * 2));768m_selectors[y] |= (val << (x * 2));769770const uint32_t etc1_bit_index = x * 4 + y;771772uint8_t *p = &m_bytes[3 - (etc1_bit_index >> 3)];773774const uint32_t byte_bit_ofs = etc1_bit_index & 7;775const uint32_t mask = 1 << byte_bit_ofs;776777const uint32_t etc1_val = s_selector_index_to_etc1[val];778779const uint32_t lsb = etc1_val & 1;780const uint32_t msb = etc1_val >> 1;781782p[0] &= ~mask;783p[0] |= (lsb << byte_bit_ofs);784785p[-2] &= ~mask;786p[-2] |= (msb << byte_bit_ofs);787}788};789790bool basis_block_format_is_uncompressed(block_format tex_type);791792} // namespace basist793794795796797798