Path: blob/master/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
9902 views
// basisu_gpu_texture.cpp1// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.2//3// Licensed under the Apache License, Version 2.0 (the "License");4// you may not use this file except in compliance with the License.5// You may obtain a copy of the License at6//7// http://www.apache.org/licenses/LICENSE-2.08//9// Unless required by applicable law or agreed to in writing, software10// distributed under the License is distributed on an "AS IS" BASIS,11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12// See the License for the specific language governing permissions and13// limitations under the License.14#include "basisu_gpu_texture.h"15#include "basisu_enc.h"16#include "basisu_pvrtc1_4.h"17#include "3rdparty/android_astc_decomp.h"18#include "basisu_bc7enc.h"19#include "../transcoder/basisu_astc_hdr_core.h"2021#define BASISU_USE_GOOGLE_ASTC_DECODER (1)2223namespace basisu24{25//------------------------------------------------------------------------------------------------26// ETC2 EAC2728void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels)29{30static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8");3132const eac_a8_block *pBlock = static_cast<const eac_a8_block *>(pBlock_bits);3334const int8_t *pTable = g_etc2_eac_tables[pBlock->m_table];3536const uint64_t selector_bits = pBlock->get_selector_bits();3738const int32_t base = pBlock->m_base;39const int32_t mul = pBlock->m_multiplier;4041pPixels[0].a = clamp255(base + pTable[pBlock->get_selector(0, 0, selector_bits)] * mul);42pPixels[1].a = clamp255(base + pTable[pBlock->get_selector(1, 0, selector_bits)] * mul);43pPixels[2].a = clamp255(base + pTable[pBlock->get_selector(2, 0, selector_bits)] * mul);44pPixels[3].a = clamp255(base + pTable[pBlock->get_selector(3, 0, selector_bits)] * mul);4546pPixels[4].a = clamp255(base + pTable[pBlock->get_selector(0, 1, selector_bits)] * mul);47pPixels[5].a = clamp255(base + pTable[pBlock->get_selector(1, 1, selector_bits)] * mul);48pPixels[6].a = clamp255(base + pTable[pBlock->get_selector(2, 1, selector_bits)] * mul);49pPixels[7].a = clamp255(base + pTable[pBlock->get_selector(3, 1, selector_bits)] * mul);5051pPixels[8].a = clamp255(base + pTable[pBlock->get_selector(0, 2, selector_bits)] * mul);52pPixels[9].a = clamp255(base + pTable[pBlock->get_selector(1, 2, selector_bits)] * mul);53pPixels[10].a = clamp255(base + pTable[pBlock->get_selector(2, 2, selector_bits)] * mul);54pPixels[11].a = clamp255(base + pTable[pBlock->get_selector(3, 2, selector_bits)] * mul);5556pPixels[12].a = clamp255(base + pTable[pBlock->get_selector(0, 3, selector_bits)] * mul);57pPixels[13].a = clamp255(base + pTable[pBlock->get_selector(1, 3, selector_bits)] * mul);58pPixels[14].a = clamp255(base + pTable[pBlock->get_selector(2, 3, selector_bits)] * mul);59pPixels[15].a = clamp255(base + pTable[pBlock->get_selector(3, 3, selector_bits)] * mul);60}6162//------------------------------------------------------------------------------------------------63// BC164struct bc1_block65{66enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };6768uint8_t m_low_color[cTotalEndpointBytes];69uint8_t m_high_color[cTotalEndpointBytes];70uint8_t m_selectors[cTotalSelectorBytes];7172inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }73inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }7475static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b)76{77r = (c >> 11) & 31;78g = (c >> 5) & 63;79b = c & 31;8081r = (r << 3) | (r >> 2);82g = (g << 2) | (g >> 4);83b = (b << 3) | (b >> 2);84}8586inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * 2)) & 3; }87};8889// Returns true if the block uses 3 color punchthrough alpha mode.90bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)91{92static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");9394const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);9596const uint32_t l = pBlock->get_low_color();97const uint32_t h = pBlock->get_high_color();9899color_rgba c[4];100101uint32_t r0, g0, b0, r1, g1, b1;102bc1_block::unpack_color(l, r0, g0, b0);103bc1_block::unpack_color(h, r1, g1, b1);104105c[0].set_noclamp_rgba(r0, g0, b0, 255);106c[1].set_noclamp_rgba(r1, g1, b1, 255);107108bool used_punchthrough = false;109110if (l > h)111{112c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);113c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);114}115else116{117c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);118c[3].set_noclamp_rgba(0, 0, 0, 0);119used_punchthrough = true;120}121122if (set_alpha)123{124for (uint32_t y = 0; y < 4; y++, pPixels += 4)125{126pPixels[0] = c[pBlock->get_selector(0, y)];127pPixels[1] = c[pBlock->get_selector(1, y)];128pPixels[2] = c[pBlock->get_selector(2, y)];129pPixels[3] = c[pBlock->get_selector(3, y)];130}131}132else133{134for (uint32_t y = 0; y < 4; y++, pPixels += 4)135{136pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);137pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);138pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);139pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);140}141}142143return used_punchthrough;144}145146bool unpack_bc1_nv(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)147{148static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");149150const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);151152const uint32_t l = pBlock->get_low_color();153const uint32_t h = pBlock->get_high_color();154155color_rgba c[4];156157int r0 = (l >> 11) & 31;158int g0 = (l >> 5) & 63;159int b0 = l & 31;160int r1 = (h >> 11) & 31;161int g1 = (h >> 5) & 63;162int b1 = h & 31;163164c[0].b = (uint8_t)((3 * b0 * 22) / 8);165c[0].g = (uint8_t)((g0 << 2) | (g0 >> 4));166c[0].r = (uint8_t)((3 * r0 * 22) / 8);167c[0].a = 0xFF;168169c[1].r = (uint8_t)((3 * r1 * 22) / 8);170c[1].g = (uint8_t)((g1 << 2) | (g1 >> 4));171c[1].b = (uint8_t)((3 * b1 * 22) / 8);172c[1].a = 0xFF;173174int gdiff = c[1].g - c[0].g;175176bool used_punchthrough = false;177178if (l > h)179{180c[2].r = (uint8_t)(((2 * r0 + r1) * 22) / 8);181c[2].g = (uint8_t)(((256 * c[0].g + gdiff/4 + 128 + gdiff * 80) / 256));182c[2].b = (uint8_t)(((2 * b0 + b1) * 22) / 8);183c[2].a = 0xFF;184185c[3].r = (uint8_t)(((2 * r1 + r0) * 22) / 8);186c[3].g = (uint8_t)((256 * c[1].g - gdiff/4 + 128 - gdiff * 80) / 256);187c[3].b = (uint8_t)(((2 * b1 + b0) * 22) / 8);188c[3].a = 0xFF;189}190else191{192c[2].r = (uint8_t)(((r0 + r1) * 33) / 8);193c[2].g = (uint8_t)((256 * c[0].g + gdiff/4 + 128 + gdiff * 128) / 256);194c[2].b = (uint8_t)(((b0 + b1) * 33) / 8);195c[2].a = 0xFF;196197c[3].set_noclamp_rgba(0, 0, 0, 0);198used_punchthrough = true;199}200201if (set_alpha)202{203for (uint32_t y = 0; y < 4; y++, pPixels += 4)204{205pPixels[0] = c[pBlock->get_selector(0, y)];206pPixels[1] = c[pBlock->get_selector(1, y)];207pPixels[2] = c[pBlock->get_selector(2, y)];208pPixels[3] = c[pBlock->get_selector(3, y)];209}210}211else212{213for (uint32_t y = 0; y < 4; y++, pPixels += 4)214{215pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);216pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);217pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);218pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);219}220}221222return used_punchthrough;223}224225static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; }226static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; }227228bool unpack_bc1_amd(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)229{230const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);231232const uint32_t l = pBlock->get_low_color();233const uint32_t h = pBlock->get_high_color();234235color_rgba c[4];236237uint32_t r0, g0, b0, r1, g1, b1;238bc1_block::unpack_color(l, r0, g0, b0);239bc1_block::unpack_color(h, r1, g1, b1);240241c[0].set_noclamp_rgba(r0, g0, b0, 255);242c[1].set_noclamp_rgba(r1, g1, b1, 255);243244bool used_punchthrough = false;245246if (l > h)247{248c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);249c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);250}251else252{253c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);254c[3].set_noclamp_rgba(0, 0, 0, 0);255used_punchthrough = true;256}257258if (set_alpha)259{260for (uint32_t y = 0; y < 4; y++, pPixels += 4)261{262pPixels[0] = c[pBlock->get_selector(0, y)];263pPixels[1] = c[pBlock->get_selector(1, y)];264pPixels[2] = c[pBlock->get_selector(2, y)];265pPixels[3] = c[pBlock->get_selector(3, y)];266}267}268else269{270for (uint32_t y = 0; y < 4; y++, pPixels += 4)271{272pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);273pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);274pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);275pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);276}277}278279return used_punchthrough;280}281282//------------------------------------------------------------------------------------------------283// BC3-5284285struct bc4_block286{287enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };288uint8_t m_endpoints[2];289290uint8_t m_selectors[cTotalSelectorBytes];291292inline uint32_t get_low_alpha() const { return m_endpoints[0]; }293inline uint32_t get_high_alpha() const { return m_endpoints[1]; }294inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }295296inline uint64_t get_selector_bits() const297{298return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) |299(((uint64_t)m_selectors[4]) << 32U) |300(((uint64_t)m_selectors[5]) << 40U);301}302303inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const304{305assert((x < 4U) && (y < 4U));306return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1);307}308309static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h)310{311pDst[0] = static_cast<uint8_t>(l);312pDst[1] = static_cast<uint8_t>(h);313pDst[2] = static_cast<uint8_t>((l * 4 + h) / 5);314pDst[3] = static_cast<uint8_t>((l * 3 + h * 2) / 5);315pDst[4] = static_cast<uint8_t>((l * 2 + h * 3) / 5);316pDst[5] = static_cast<uint8_t>((l + h * 4) / 5);317pDst[6] = 0;318pDst[7] = 255;319return 6;320}321322static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h)323{324pDst[0] = static_cast<uint8_t>(l);325pDst[1] = static_cast<uint8_t>(h);326pDst[2] = static_cast<uint8_t>((l * 6 + h) / 7);327pDst[3] = static_cast<uint8_t>((l * 5 + h * 2) / 7);328pDst[4] = static_cast<uint8_t>((l * 4 + h * 3) / 7);329pDst[5] = static_cast<uint8_t>((l * 3 + h * 4) / 7);330pDst[6] = static_cast<uint8_t>((l * 2 + h * 5) / 7);331pDst[7] = static_cast<uint8_t>((l + h * 6) / 7);332return 8;333}334335static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h)336{337if (l > h)338return get_block_values8(pDst, l, h);339else340return get_block_values6(pDst, l, h);341}342};343344void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride)345{346static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8");347348const bc4_block *pBlock = static_cast<const bc4_block *>(pBlock_bits);349350uint8_t sel_values[8];351bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha());352353const uint64_t selector_bits = pBlock->get_selector_bits();354355for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U))356{357pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)];358pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)];359pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)];360pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)];361}362}363364// Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3.365bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels)366{367bool success = true;368369if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(bc4_block), pPixels, true))370success = false;371372unpack_bc4(pBlock_bits, &pPixels[0].a, sizeof(color_rgba));373374return success;375}376377// writes RG378void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels)379{380unpack_bc4(pBlock_bits, &pPixels[0].r, sizeof(color_rgba));381unpack_bc4((const uint8_t *)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba));382}383384//------------------------------------------------------------------------------------------------385// ATC isn't officially documented, so I'm assuming these references:386// http://www.guildsoftware.com/papers/2012.Converting.DXTC.to.ATC.pdf387// https://github.com/Triang3l/S3TConv/blob/master/s3tconv_atitc.c388// The paper incorrectly says the ATC lerp factors are 1/3 and 2/3, but they are actually 3/8 and 5/8.389void unpack_atc(const void* pBlock_bits, color_rgba* pPixels)390{391const uint8_t* pBytes = static_cast<const uint8_t*>(pBlock_bits);392393const uint16_t color0 = pBytes[0] | (pBytes[1] << 8U);394const uint16_t color1 = pBytes[2] | (pBytes[3] << 8U);395uint32_t sels = pBytes[4] | (pBytes[5] << 8U) | (pBytes[6] << 16U) | (pBytes[7] << 24U);396397const bool mode = (color0 & 0x8000) != 0;398399color_rgba c[4];400401c[0].set((color0 >> 10) & 31, (color0 >> 5) & 31, color0 & 31, 255);402c[0].r = (c[0].r << 3) | (c[0].r >> 2);403c[0].g = (c[0].g << 3) | (c[0].g >> 2);404c[0].b = (c[0].b << 3) | (c[0].b >> 2);405406c[3].set((color1 >> 11) & 31, (color1 >> 5) & 63, color1 & 31, 255);407c[3].r = (c[3].r << 3) | (c[3].r >> 2);408c[3].g = (c[3].g << 2) | (c[3].g >> 4);409c[3].b = (c[3].b << 3) | (c[3].b >> 2);410411if (mode)412{413c[1].set(basisu::maximum(0, c[0].r - (c[3].r >> 2)), basisu::maximum(0, c[0].g - (c[3].g >> 2)), basisu::maximum(0, c[0].b - (c[3].b >> 2)), 255);414c[2] = c[0];415c[0].set(0, 0, 0, 255);416}417else418{419c[1].r = (c[0].r * 5 + c[3].r * 3) >> 3;420c[1].g = (c[0].g * 5 + c[3].g * 3) >> 3;421c[1].b = (c[0].b * 5 + c[3].b * 3) >> 3;422423c[2].r = (c[0].r * 3 + c[3].r * 5) >> 3;424c[2].g = (c[0].g * 3 + c[3].g * 5) >> 3;425c[2].b = (c[0].b * 3 + c[3].b * 5) >> 3;426}427428for (uint32_t i = 0; i < 16; i++)429{430const uint32_t s = sels & 3;431432pPixels[i] = c[s];433434sels >>= 2;435}436}437438//------------------------------------------------------------------------------------------------439// BC7 mode 0-7 decompression.440// Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines.441442static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; }443static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; }444445static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6; }446static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - basist::g_bc7_weights3[w]) + h * basist::g_bc7_weights3[w] + 32) >> 6; }447static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - basist::g_bc7_weights4[w]) + h * basist::g_bc7_weights4[w] + 32) >> 6; }448static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits)449{450assert(l <= 255 && h <= 255);451switch (bits)452{453case 2: return bc7_interp2(l, h, w);454case 3: return bc7_interp3(l, h, w);455case 4: return bc7_interp4(l, h, w);456default:457break;458}459return 0;460}461462bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)463{464//const uint32_t SUBSETS = 3;465const uint32_t ENDPOINTS = 6;466const uint32_t COMPS = 3;467const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2;468const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5;469const uint32_t PBITS = (mode == 0) ? 6 : 0;470const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;471472uint32_t bit_offset = 0;473const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);474475if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;476477const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6);478479color_rgba endpoints[ENDPOINTS];480for (uint32_t c = 0; c < COMPS; c++)481for (uint32_t e = 0; e < ENDPOINTS; e++)482endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);483484uint32_t pbits[6];485for (uint32_t p = 0; p < PBITS; p++)486pbits[p] = read_bits32(pBuf, bit_offset, 1);487488uint32_t weights[16];489for (uint32_t i = 0; i < 16; i++)490weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_third_subset_1[part]) || (i == basist::g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);491492assert(bit_offset == 128);493494for (uint32_t e = 0; e < ENDPOINTS; e++)495for (uint32_t c = 0; c < 4; c++)496endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS)));497498color_rgba block_colors[3][8];499for (uint32_t s = 0; s < 3; s++)500for (uint32_t i = 0; i < WEIGHT_VALS; i++)501{502for (uint32_t c = 0; c < 3; c++)503block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);504block_colors[s][i][3] = 255;505}506507for (uint32_t i = 0; i < 16; i++)508pPixels[i] = block_colors[basist::g_bc7_partition3[part * 16 + i]][weights[i]];509510return true;511}512513bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)514{515//const uint32_t SUBSETS = 2;516const uint32_t ENDPOINTS = 4;517const uint32_t COMPS = (mode == 7) ? 4 : 3;518const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2;519const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7);520const uint32_t PBITS = (mode == 1) ? 2 : 4;521const uint32_t SHARED_PBITS = (mode == 1) ? true : false;522const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;523524uint32_t bit_offset = 0;525const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);526527if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;528529const uint32_t part = read_bits32(pBuf, bit_offset, 6);530531color_rgba endpoints[ENDPOINTS];532for (uint32_t c = 0; c < COMPS; c++)533for (uint32_t e = 0; e < ENDPOINTS; e++)534endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);535536uint32_t pbits[4];537for (uint32_t p = 0; p < PBITS; p++)538pbits[p] = read_bits32(pBuf, bit_offset, 1);539540uint32_t weights[16];541for (uint32_t i = 0; i < 16; i++)542weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);543544assert(bit_offset == 128);545546for (uint32_t e = 0; e < ENDPOINTS; e++)547for (uint32_t c = 0; c < 4; c++)548endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS));549550color_rgba block_colors[2][8];551for (uint32_t s = 0; s < 2; s++)552for (uint32_t i = 0; i < WEIGHT_VALS; i++)553{554for (uint32_t c = 0; c < COMPS; c++)555block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);556block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3];557}558559for (uint32_t i = 0; i < 16; i++)560pPixels[i] = block_colors[basist::g_bc7_partition2[part * 16 + i]][weights[i]];561562return true;563}564565bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)566{567const uint32_t ENDPOINTS = 2;568const uint32_t COMPS = 4;569const uint32_t WEIGHT_BITS = 2;570const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2;571const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7;572const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8;573//const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;574//const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS;575576uint32_t bit_offset = 0;577const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);578579if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;580581const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2);582const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0;583584color_rgba endpoints[ENDPOINTS];585for (uint32_t c = 0; c < COMPS; c++)586for (uint32_t e = 0; e < ENDPOINTS; e++)587endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);588589const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS };590591uint32_t weights[16], a_weights[16];592593for (uint32_t i = 0; i < 16; i++)594(index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0));595596for (uint32_t i = 0; i < 16; i++)597(index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0));598599assert(bit_offset == 128);600601for (uint32_t e = 0; e < ENDPOINTS; e++)602for (uint32_t c = 0; c < 4; c++)603endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);604605color_rgba block_colors[8];606for (uint32_t i = 0; i < (1U << weight_bits[0]); i++)607for (uint32_t c = 0; c < 3; c++)608block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]);609610for (uint32_t i = 0; i < (1U << weight_bits[1]); i++)611block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]);612613for (uint32_t i = 0; i < 16; i++)614{615pPixels[i] = block_colors[weights[i]];616pPixels[i].a = block_colors[a_weights[i]].a;617if (comp_rot >= 1)618std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]);619}620621return true;622}623624struct bc7_mode_6625{626struct627{628uint64_t m_mode : 7;629uint64_t m_r0 : 7;630uint64_t m_r1 : 7;631uint64_t m_g0 : 7;632uint64_t m_g1 : 7;633uint64_t m_b0 : 7;634uint64_t m_b1 : 7;635uint64_t m_a0 : 7;636uint64_t m_a1 : 7;637uint64_t m_p0 : 1;638} m_lo;639640union641{642struct643{644uint64_t m_p1 : 1;645uint64_t m_s00 : 3;646uint64_t m_s10 : 4;647uint64_t m_s20 : 4;648uint64_t m_s30 : 4;649650uint64_t m_s01 : 4;651uint64_t m_s11 : 4;652uint64_t m_s21 : 4;653uint64_t m_s31 : 4;654655uint64_t m_s02 : 4;656uint64_t m_s12 : 4;657uint64_t m_s22 : 4;658uint64_t m_s32 : 4;659660uint64_t m_s03 : 4;661uint64_t m_s13 : 4;662uint64_t m_s23 : 4;663uint64_t m_s33 : 4;664665} m_hi;666667uint64_t m_hi_bits;668};669};670671bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels)672{673static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16");674675const bc7_mode_6 &block = *static_cast<const bc7_mode_6 *>(pBlock_bits);676677if (block.m_lo.m_mode != (1 << 6))678return false;679680const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0);681const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0);682const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0);683const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0);684const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1);685const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1);686const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1);687const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1);688689color_rgba vals[16];690for (uint32_t i = 0; i < 16; i++)691{692const uint32_t w = basist::g_bc7_weights4[i];693const uint32_t iw = 64 - w;694vals[i].set_noclamp_rgba(695(r0 * iw + r1 * w + 32) >> 6,696(g0 * iw + g1 * w + 32) >> 6,697(b0 * iw + b1 * w + 32) >> 6,698(a0 * iw + a1 * w + 32) >> 6);699}700701pPixels[0] = vals[block.m_hi.m_s00];702pPixels[1] = vals[block.m_hi.m_s10];703pPixels[2] = vals[block.m_hi.m_s20];704pPixels[3] = vals[block.m_hi.m_s30];705706pPixels[4] = vals[block.m_hi.m_s01];707pPixels[5] = vals[block.m_hi.m_s11];708pPixels[6] = vals[block.m_hi.m_s21];709pPixels[7] = vals[block.m_hi.m_s31];710711pPixels[8] = vals[block.m_hi.m_s02];712pPixels[9] = vals[block.m_hi.m_s12];713pPixels[10] = vals[block.m_hi.m_s22];714pPixels[11] = vals[block.m_hi.m_s32];715716pPixels[12] = vals[block.m_hi.m_s03];717pPixels[13] = vals[block.m_hi.m_s13];718pPixels[14] = vals[block.m_hi.m_s23];719pPixels[15] = vals[block.m_hi.m_s33];720721return true;722}723724bool unpack_bc7(const void *pBlock, color_rgba *pPixels)725{726const uint32_t first_byte = static_cast<const uint8_t*>(pBlock)[0];727728for (uint32_t mode = 0; mode <= 7; mode++)729{730if (first_byte & (1U << mode))731{732switch (mode)733{734case 0:735case 2:736return unpack_bc7_mode0_2(mode, pBlock, pPixels);737case 1:738case 3:739case 7:740return unpack_bc7_mode1_3_7(mode, pBlock, pPixels);741case 4:742case 5:743return unpack_bc7_mode4_5(mode, pBlock, pPixels);744case 6:745return unpack_bc7_mode6(pBlock, pPixels);746default:747break;748}749}750}751752return false;753}754755static inline int bc6h_sign_extend(int val, int bits)756{757assert((bits >= 1) && (bits < 32));758assert((val >= 0) && (val < (1 << bits)));759return (val << (32 - bits)) >> (32 - bits);760}761762static inline int bc6h_apply_delta(int base, int delta, int num_bits, int is_signed)763{764int bitmask = ((1 << num_bits) - 1);765int v = (base + delta) & bitmask;766return is_signed ? bc6h_sign_extend(v, num_bits) : v;767}768769static int bc6h_dequantize(int val, int bits, int is_signed)770{771int result;772if (is_signed)773{774if (bits >= 16)775result = val;776else777{778int s_flag = 0;779if (val < 0)780{781s_flag = 1;782val = -val;783}784785if (val == 0)786result = 0;787else if (val >= ((1 << (bits - 1)) - 1))788result = 0x7FFF;789else790result = ((val << 15) + 0x4000) >> (bits - 1);791792if (s_flag)793result = -result;794}795}796else797{798if (bits >= 15)799result = val;800else if (!val)801result = 0;802else if (val == ((1 << bits) - 1))803result = 0xFFFF;804else805result = ((val << 16) + 0x8000) >> bits;806}807return result;808}809810static inline int bc6h_interpolate(int a, int b, const uint8_t* pWeights, int index)811{812return (a * (64 - (int)pWeights[index]) + b * (int)pWeights[index] + 32) >> 6;813}814815static inline basist::half_float bc6h_convert_to_half(int val, int is_signed)816{817if (!is_signed)818{819// scale by 31/64820return (basist::half_float)((val * 31) >> 6);821}822823// scale by 31/32824val = (val < 0) ? -(((-val) * 31) >> 5) : (val * 31) >> 5;825826int s = 0;827if (val < 0)828{829s = 0x8000;830val = -val;831}832833return (basist::half_float)(s | val);834}835836static inline uint32_t bc6h_get_bits(uint32_t num_bits, uint64_t& l, uint64_t& h, uint32_t& total_bits)837{838assert((num_bits) && (num_bits <= 63));839840uint32_t v = (uint32_t)(l & ((1U << num_bits) - 1U));841842l >>= num_bits;843l |= (h << (64U - num_bits));844h >>= num_bits;845846total_bits += num_bits;847assert(total_bits <= 128);848849return v;850}851852static inline uint32_t bc6h_reverse_bits(uint32_t v, uint32_t num_bits)853{854uint32_t res = 0;855for (uint32_t i = 0; i < num_bits; i++)856{857uint32_t bit = (v & (1u << i)) != 0u;858res |= (bit << (num_bits - 1u - i));859}860return res;861}862863static inline uint64_t bc6h_read_le_qword(const void* p)864{865const uint8_t* pSrc = static_cast<const uint8_t*>(p);866return ((uint64_t)read_le_dword(pSrc)) | (((uint64_t)read_le_dword(pSrc + sizeof(uint32_t))) << 32U);867}868869bool unpack_bc6h(const void* pSrc_block, void* pDst_block, bool is_signed, uint32_t dest_pitch_in_halfs)870{871assert(dest_pitch_in_halfs >= 4 * 3);872873const uint32_t MAX_SUBSETS = 2, MAX_COMPS = 3;874875const uint8_t* pSrc = static_cast<const uint8_t*>(pSrc_block);876basist::half_float* pDst = static_cast<basist::half_float*>(pDst_block);877878uint64_t blo = bc6h_read_le_qword(pSrc), bhi = bc6h_read_le_qword(pSrc + sizeof(uint64_t));879880// Unpack mode881const int mode = basist::g_bc6h_mode_lookup[blo & 31];882if (mode < 0)883{884for (int y = 0; y < 4; y++)885{886memset(pDst, 0, sizeof(basist::half_float) * 4);887pDst += dest_pitch_in_halfs;888}889return false;890}891892// Skip mode bits893uint32_t total_bits_read = 0;894bc6h_get_bits((mode < 2) ? 2 : 5, blo, bhi, total_bits_read);895896assert(mode < (int)basist::NUM_BC6H_MODES);897898const uint32_t num_subsets = (mode >= 10) ? 1 : 2;899const bool is_mode_9_or_10 = (mode == 9) || (mode == 10);900901// Unpack endpoint components902int comps[MAX_SUBSETS][MAX_COMPS][2] = { { { 0 } } }; // [subset][comp][l/h]903int part_index = 0;904905uint32_t layout_index = 0;906while (layout_index < basist::MAX_BC6H_LAYOUT_INDEX)907{908const basist::bc6h_bit_layout& layout = basist::g_bc6h_bit_layouts[mode][layout_index];909910if (layout.m_comp < 0)911break;912913const int subset = layout.m_index >> 1, lh_index = layout.m_index & 1;914assert((layout.m_comp == 3) || ((subset >= 0) && (subset < (int)MAX_SUBSETS)));915916const int last_bit = layout.m_last_bit, first_bit = layout.m_first_bit;917assert(last_bit >= 0);918919int& res = (layout.m_comp == 3) ? part_index : comps[subset][layout.m_comp][lh_index];920921if (first_bit < 0)922{923res |= (bc6h_get_bits(1, blo, bhi, total_bits_read) << last_bit);924}925else926{927const int total_bits = iabs(last_bit - first_bit) + 1;928const int bit_shift = basisu::minimum(first_bit, last_bit);929930int b = bc6h_get_bits(total_bits, blo, bhi, total_bits_read);931932if (last_bit < first_bit)933b = bc6h_reverse_bits(b, total_bits);934935res |= (b << bit_shift);936}937938layout_index++;939}940assert(layout_index != basist::MAX_BC6H_LAYOUT_INDEX);941942// Sign extend/dequantize endpoints943const int num_sig_bits = basist::g_bc6h_mode_sig_bits[mode][0];944if (is_signed)945{946for (uint32_t comp = 0; comp < 3; comp++)947comps[0][comp][0] = bc6h_sign_extend(comps[0][comp][0], num_sig_bits);948}949950if (is_signed || !is_mode_9_or_10)951{952for (uint32_t subset = 0; subset < num_subsets; subset++)953for (uint32_t comp = 0; comp < 3; comp++)954for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++)955comps[subset][comp][lh] = bc6h_sign_extend(comps[subset][comp][lh], basist::g_bc6h_mode_sig_bits[mode][1 + comp]);956}957958if (!is_mode_9_or_10)959{960for (uint32_t subset = 0; subset < num_subsets; subset++)961for (uint32_t comp = 0; comp < 3; comp++)962for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++)963comps[subset][comp][lh] = bc6h_apply_delta(comps[0][comp][0], comps[subset][comp][lh], num_sig_bits, is_signed);964}965966for (uint32_t subset = 0; subset < num_subsets; subset++)967for (uint32_t comp = 0; comp < 3; comp++)968for (uint32_t lh = 0; lh < 2; lh++)969comps[subset][comp][lh] = bc6h_dequantize(comps[subset][comp][lh], num_sig_bits, is_signed);970971// Now unpack weights and output texels972const int weight_bits = (mode >= 10) ? 4 : 3;973const uint8_t* pWeights = (mode >= 10) ? basist::g_bc6h_weight4 : basist::g_bc6h_weight3;974975dest_pitch_in_halfs -= 4 * 3;976977for (uint32_t y = 0; y < 4; y++)978{979for (uint32_t x = 0; x < 4; x++)980{981int subset = (num_subsets == 1) ? ((x | y) ? 0 : 0x80) : basist::g_bc6h_2subset_patterns[part_index][y][x];982const int num_bits = weight_bits + ((subset & 0x80) ? -1 : 0);983984subset &= 1;985986const int weight_index = bc6h_get_bits(num_bits, blo, bhi, total_bits_read);987988pDst[0] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][0][0], comps[subset][0][1], pWeights, weight_index), is_signed);989pDst[1] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][1][0], comps[subset][1][1], pWeights, weight_index), is_signed);990pDst[2] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][2][0], comps[subset][2][1], pWeights, weight_index), is_signed);991992pDst += 3;993}994995pDst += dest_pitch_in_halfs;996}997998assert(total_bits_read == 128);999return true;1000}1001//------------------------------------------------------------------------------------------------1002// FXT1 (for fun, and because some modern Intel parts support it, and because a subset is like BC1)10031004struct fxt1_block1005{1006union1007{1008struct1009{1010uint64_t m_t00 : 2;1011uint64_t m_t01 : 2;1012uint64_t m_t02 : 2;1013uint64_t m_t03 : 2;1014uint64_t m_t04 : 2;1015uint64_t m_t05 : 2;1016uint64_t m_t06 : 2;1017uint64_t m_t07 : 2;1018uint64_t m_t08 : 2;1019uint64_t m_t09 : 2;1020uint64_t m_t10 : 2;1021uint64_t m_t11 : 2;1022uint64_t m_t12 : 2;1023uint64_t m_t13 : 2;1024uint64_t m_t14 : 2;1025uint64_t m_t15 : 2;1026uint64_t m_t16 : 2;1027uint64_t m_t17 : 2;1028uint64_t m_t18 : 2;1029uint64_t m_t19 : 2;1030uint64_t m_t20 : 2;1031uint64_t m_t21 : 2;1032uint64_t m_t22 : 2;1033uint64_t m_t23 : 2;1034uint64_t m_t24 : 2;1035uint64_t m_t25 : 2;1036uint64_t m_t26 : 2;1037uint64_t m_t27 : 2;1038uint64_t m_t28 : 2;1039uint64_t m_t29 : 2;1040uint64_t m_t30 : 2;1041uint64_t m_t31 : 2;1042} m_lo;1043uint64_t m_lo_bits;1044uint8_t m_sels[8];1045};10461047union1048{1049struct1050{1051#ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING1052// This is the format that 3DFX's DECOMP.EXE tool expects, which I'm assuming is what the actual 3DFX hardware wanted.1053// Unfortunately, color0/color1 and color2/color3 are flipped relative to the official OpenGL extension and Intel's documentation!1054uint64_t m_b1 : 5;1055uint64_t m_g1 : 5;1056uint64_t m_r1 : 5;1057uint64_t m_b0 : 5;1058uint64_t m_g0 : 5;1059uint64_t m_r0 : 5;1060uint64_t m_b3 : 5;1061uint64_t m_g3 : 5;1062uint64_t m_r3 : 5;1063uint64_t m_b2 : 5;1064uint64_t m_g2 : 5;1065uint64_t m_r2 : 5;1066#else1067// Intel's encoding, and the encoding in the OpenGL FXT1 spec.1068uint64_t m_b0 : 5;1069uint64_t m_g0 : 5;1070uint64_t m_r0 : 5;1071uint64_t m_b1 : 5;1072uint64_t m_g1 : 5;1073uint64_t m_r1 : 5;1074uint64_t m_b2 : 5;1075uint64_t m_g2 : 5;1076uint64_t m_r2 : 5;1077uint64_t m_b3 : 5;1078uint64_t m_g3 : 5;1079uint64_t m_r3 : 5;1080#endif1081uint64_t m_alpha : 1;1082uint64_t m_glsb : 2;1083uint64_t m_mode : 1;1084} m_hi;10851086uint64_t m_hi_bits;1087};1088};10891090static color_rgba expand_565(const color_rgba& c)1091{1092return color_rgba((c.r << 3) | (c.r >> 2), (c.g << 2) | (c.g >> 4), (c.b << 3) | (c.b >> 2), 255);1093}10941095// We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment.1096bool unpack_fxt1(const void *p, color_rgba *pPixels)1097{1098const fxt1_block* pBlock = static_cast<const fxt1_block*>(p);10991100if (pBlock->m_hi.m_mode == 0)1101return false;1102if (pBlock->m_hi.m_alpha == 1)1103return false;11041105color_rgba colors[4];11061107colors[0].r = pBlock->m_hi.m_r0;1108colors[0].g = (uint8_t)((pBlock->m_hi.m_g0 << 1) | ((pBlock->m_lo.m_t00 >> 1) ^ (pBlock->m_hi.m_glsb & 1)));1109colors[0].b = pBlock->m_hi.m_b0;1110colors[0].a = 255;11111112colors[1].r = pBlock->m_hi.m_r1;1113colors[1].g = (uint8_t)((pBlock->m_hi.m_g1 << 1) | (pBlock->m_hi.m_glsb & 1));1114colors[1].b = pBlock->m_hi.m_b1;1115colors[1].a = 255;11161117colors[2].r = pBlock->m_hi.m_r2;1118colors[2].g = (uint8_t)((pBlock->m_hi.m_g2 << 1) | ((pBlock->m_lo.m_t16 >> 1) ^ (pBlock->m_hi.m_glsb >> 1)));1119colors[2].b = pBlock->m_hi.m_b2;1120colors[2].a = 255;11211122colors[3].r = pBlock->m_hi.m_r3;1123colors[3].g = (uint8_t)((pBlock->m_hi.m_g3 << 1) | (pBlock->m_hi.m_glsb >> 1));1124colors[3].b = pBlock->m_hi.m_b3;1125colors[3].a = 255;11261127for (uint32_t i = 0; i < 4; i++)1128colors[i] = expand_565(colors[i]);11291130color_rgba block0_colors[4];1131block0_colors[0] = colors[0];1132block0_colors[1] = color_rgba((colors[0].r * 2 + colors[1].r + 1) / 3, (colors[0].g * 2 + colors[1].g + 1) / 3, (colors[0].b * 2 + colors[1].b + 1) / 3, 255);1133block0_colors[2] = color_rgba((colors[1].r * 2 + colors[0].r + 1) / 3, (colors[1].g * 2 + colors[0].g + 1) / 3, (colors[1].b * 2 + colors[0].b + 1) / 3, 255);1134block0_colors[3] = colors[1];11351136for (uint32_t i = 0; i < 16; i++)1137{1138const uint32_t sel = (pBlock->m_sels[i >> 2] >> ((i & 3) * 2)) & 3;11391140const uint32_t x = i & 3;1141const uint32_t y = i >> 2;1142pPixels[x + y * 8] = block0_colors[sel];1143}11441145color_rgba block1_colors[4];1146block1_colors[0] = colors[2];1147block1_colors[1] = color_rgba((colors[2].r * 2 + colors[3].r + 1) / 3, (colors[2].g * 2 + colors[3].g + 1) / 3, (colors[2].b * 2 + colors[3].b + 1) / 3, 255);1148block1_colors[2] = color_rgba((colors[3].r * 2 + colors[2].r + 1) / 3, (colors[3].g * 2 + colors[2].g + 1) / 3, (colors[3].b * 2 + colors[2].b + 1) / 3, 255);1149block1_colors[3] = colors[3];11501151for (uint32_t i = 0; i < 16; i++)1152{1153const uint32_t sel = (pBlock->m_sels[4 + (i >> 2)] >> ((i & 3) * 2)) & 3;11541155const uint32_t x = i & 3;1156const uint32_t y = i >> 2;1157pPixels[4 + x + y * 8] = block1_colors[sel];1158}11591160return true;1161}11621163//------------------------------------------------------------------------------------------------1164// PVRTC2 (non-interpolated, hard_flag=1 modulation=0 subset only!)11651166struct pvrtc2_block1167{1168uint8_t m_modulation[4];11691170union1171{1172union1173{1174// Opaque mode: RGB colora=554 and colorb=5551175struct1176{1177uint32_t m_mod_flag : 1;1178uint32_t m_blue_a : 4;1179uint32_t m_green_a : 5;1180uint32_t m_red_a : 5;1181uint32_t m_hard_flag : 1;1182uint32_t m_blue_b : 5;1183uint32_t m_green_b : 5;1184uint32_t m_red_b : 5;1185uint32_t m_opaque_flag : 1;11861187} m_opaque_color_data;11881189// Transparent mode: RGBA colora=4433 and colorb=44431190struct1191{1192uint32_t m_mod_flag : 1;1193uint32_t m_blue_a : 3;1194uint32_t m_green_a : 4;1195uint32_t m_red_a : 4;1196uint32_t m_alpha_a : 3;1197uint32_t m_hard_flag : 1;1198uint32_t m_blue_b : 4;1199uint32_t m_green_b : 4;1200uint32_t m_red_b : 4;1201uint32_t m_alpha_b : 3;1202uint32_t m_opaque_flag : 1;12031204} m_trans_color_data;1205};12061207uint32_t m_color_data_bits;1208};1209};12101211static color_rgba convert_rgb_555_to_888(const color_rgba& col)1212{1213return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), 255);1214}12151216static color_rgba convert_rgba_5554_to_8888(const color_rgba& col)1217{1218return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]);1219}12201221// PVRTC2 is currently limited to only what our transcoder outputs (non-interpolated, hard_flag=1 modulation=0). In this mode, PVRTC2 looks much like BC1/ATC.1222bool unpack_pvrtc2(const void *p, color_rgba *pPixels)1223{1224const pvrtc2_block* pBlock = static_cast<const pvrtc2_block*>(p);12251226if ((!pBlock->m_opaque_color_data.m_hard_flag) || (pBlock->m_opaque_color_data.m_mod_flag))1227{1228// This mode isn't supported by the transcoder, so we aren't bothering with it here.1229return false;1230}12311232color_rgba colors[4];12331234if (pBlock->m_opaque_color_data.m_opaque_flag)1235{1236// colora=5541237color_rgba color_a(pBlock->m_opaque_color_data.m_red_a, pBlock->m_opaque_color_data.m_green_a, (pBlock->m_opaque_color_data.m_blue_a << 1) | (pBlock->m_opaque_color_data.m_blue_a >> 3), 255);12381239// colora=5551240color_rgba color_b(pBlock->m_opaque_color_data.m_red_b, pBlock->m_opaque_color_data.m_green_b, pBlock->m_opaque_color_data.m_blue_b, 255);12411242colors[0] = convert_rgb_555_to_888(color_a);1243colors[3] = convert_rgb_555_to_888(color_b);12441245colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, 255);1246colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, 255);1247}1248else1249{1250// colora=44331251color_rgba color_a(1252(pBlock->m_trans_color_data.m_red_a << 1) | (pBlock->m_trans_color_data.m_red_a >> 3),1253(pBlock->m_trans_color_data.m_green_a << 1) | (pBlock->m_trans_color_data.m_green_a >> 3),1254(pBlock->m_trans_color_data.m_blue_a << 2) | (pBlock->m_trans_color_data.m_blue_a >> 1),1255pBlock->m_trans_color_data.m_alpha_a << 1);12561257//colorb=44431258color_rgba color_b(1259(pBlock->m_trans_color_data.m_red_b << 1) | (pBlock->m_trans_color_data.m_red_b >> 3),1260(pBlock->m_trans_color_data.m_green_b << 1) | (pBlock->m_trans_color_data.m_green_b >> 3),1261(pBlock->m_trans_color_data.m_blue_b << 1) | (pBlock->m_trans_color_data.m_blue_b >> 3),1262(pBlock->m_trans_color_data.m_alpha_b << 1) | 1);12631264colors[0] = convert_rgba_5554_to_8888(color_a);1265colors[3] = convert_rgba_5554_to_8888(color_b);1266}12671268colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, (colors[0].a * 5 + colors[3].a * 3) / 8);1269colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, (colors[0].a * 3 + colors[3].a * 5) / 8);12701271for (uint32_t i = 0; i < 16; i++)1272{1273const uint32_t sel = (pBlock->m_modulation[i >> 2] >> ((i & 3) * 2)) & 3;1274pPixels[i] = colors[sel];1275}12761277return true;1278}12791280//------------------------------------------------------------------------------------------------1281// ETC2 EAC R11 or RG1112821283struct etc2_eac_r111284{1285uint64_t m_base : 8;1286uint64_t m_table : 4;1287uint64_t m_mul : 4;1288uint64_t m_sels_0 : 8;1289uint64_t m_sels_1 : 8;1290uint64_t m_sels_2 : 8;1291uint64_t m_sels_3 : 8;1292uint64_t m_sels_4 : 8;1293uint64_t m_sels_5 : 8;12941295uint64_t get_sels() const1296{1297return ((uint64_t)m_sels_0 << 40U) | ((uint64_t)m_sels_1 << 32U) | ((uint64_t)m_sels_2 << 24U) | ((uint64_t)m_sels_3 << 16U) | ((uint64_t)m_sels_4 << 8U) | m_sels_5;1298}12991300void set_sels(uint64_t v)1301{1302m_sels_0 = (v >> 40U) & 0xFF;1303m_sels_1 = (v >> 32U) & 0xFF;1304m_sels_2 = (v >> 24U) & 0xFF;1305m_sels_3 = (v >> 16U) & 0xFF;1306m_sels_4 = (v >> 8U) & 0xFF;1307m_sels_5 = v & 0xFF;1308}1309};13101311struct etc2_eac_rg111312{1313etc2_eac_r11 m_c[2];1314};13151316void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c)1317{1318const etc2_eac_r11* pBlock = static_cast<const etc2_eac_r11*>(p);1319const uint64_t sels = pBlock->get_sels();13201321const int base = (int)pBlock->m_base * 8 + 4;1322const int mul = pBlock->m_mul ? ((int)pBlock->m_mul * 8) : 1;1323const int table = (int)pBlock->m_table;13241325for (uint32_t y = 0; y < 4; y++)1326{1327for (uint32_t x = 0; x < 4; x++)1328{1329const uint32_t shift = 45 - ((y + x * 4) * 3);13301331const uint32_t sel = (uint32_t)((sels >> shift) & 7);13321333int val = base + g_etc2_eac_tables[table][sel] * mul;1334val = clamp<int>(val, 0, 2047);13351336// Convert to 8-bits with rounding1337//pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1024) / 2047);1338pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1023) / 2047);13391340} // x1341} // y1342}13431344void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels)1345{1346for (uint32_t c = 0; c < 2; c++)1347{1348const etc2_eac_r11* pBlock = &static_cast<const etc2_eac_rg11*>(p)->m_c[c];13491350unpack_etc2_eac_r(pBlock, pPixels, c);1351}1352}13531354//------------------------------------------------------------------------------------------------1355// UASTC13561357void unpack_uastc(const void* p, color_rgba* pPixels)1358{1359basist::unpack_uastc(*static_cast<const basist::uastc_block*>(p), (basist::color32 *)pPixels, false);1360}13611362// Unpacks to RGBA, R, RG, or A. LDR GPU texture formats only.1363bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels)1364{1365switch (fmt)1366{1367case texture_format::cBC1:1368{1369unpack_bc1(pBlock, pPixels, true);1370break;1371}1372case texture_format::cBC1_NV:1373{1374unpack_bc1_nv(pBlock, pPixels, true);1375break;1376}1377case texture_format::cBC1_AMD:1378{1379unpack_bc1_amd(pBlock, pPixels, true);1380break;1381}1382case texture_format::cBC3:1383{1384return unpack_bc3(pBlock, pPixels);1385}1386case texture_format::cBC4:1387{1388// Unpack to R1389unpack_bc4(pBlock, &pPixels[0].r, sizeof(color_rgba));1390break;1391}1392case texture_format::cBC5:1393{1394unpack_bc5(pBlock, pPixels);1395break;1396}1397case texture_format::cBC7:1398{1399return unpack_bc7(pBlock, pPixels);1400}1401// Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color)1402case texture_format::cETC2_RGB:1403case texture_format::cETC1:1404case texture_format::cETC1S:1405{1406return unpack_etc1(*static_cast<const etc_block*>(pBlock), pPixels);1407}1408case texture_format::cETC2_RGBA:1409{1410if (!unpack_etc1(static_cast<const etc_block*>(pBlock)[1], pPixels))1411return false;1412unpack_etc2_eac(pBlock, pPixels);1413break;1414}1415case texture_format::cETC2_ALPHA:1416{1417// Unpack to A1418unpack_etc2_eac(pBlock, pPixels);1419break;1420}1421case texture_format::cBC6HSigned:1422case texture_format::cBC6HUnsigned:1423case texture_format::cASTC_HDR_4x4:1424case texture_format::cUASTC_HDR_4x4:1425case texture_format::cASTC_HDR_6x6:1426{1427// Can't unpack HDR blocks in unpack_block() because it returns 32bpp pixel data.1428assert(0);1429return false;1430}1431case texture_format::cASTC_LDR_4x4:1432{1433const bool astc_srgb = false;1434bool status = basisu_astc::astc::decompress_ldr(reinterpret_cast<uint8_t*>(pPixels), static_cast<const uint8_t*>(pBlock), astc_srgb, 4, 4);1435assert(status);14361437if (!status)1438return false;14391440break;1441}1442case texture_format::cATC_RGB:1443{1444unpack_atc(pBlock, pPixels);1445break;1446}1447case texture_format::cATC_RGBA_INTERPOLATED_ALPHA:1448{1449unpack_atc(static_cast<const uint8_t*>(pBlock) + 8, pPixels);1450unpack_bc4(pBlock, &pPixels[0].a, sizeof(color_rgba));1451break;1452}1453case texture_format::cFXT1_RGB:1454{1455unpack_fxt1(pBlock, pPixels);1456break;1457}1458case texture_format::cPVRTC2_4_RGBA:1459{1460unpack_pvrtc2(pBlock, pPixels);1461break;1462}1463case texture_format::cETC2_R11_EAC:1464{1465unpack_etc2_eac_r(static_cast<const etc2_eac_r11 *>(pBlock), pPixels, 0);1466break;1467}1468case texture_format::cETC2_RG11_EAC:1469{1470unpack_etc2_eac_rg(pBlock, pPixels);1471break;1472}1473case texture_format::cUASTC4x4:1474{1475unpack_uastc(pBlock, pPixels);1476break;1477}1478default:1479{1480assert(0);1481// TODO1482return false;1483}1484}1485return true;1486}14871488bool unpack_block_hdr(texture_format fmt, const void* pBlock, vec4F* pPixels)1489{1490switch (fmt)1491{1492case texture_format::cASTC_HDR_6x6:1493{1494#if BASISU_USE_GOOGLE_ASTC_DECODER1495bool status = basisu_astc::astc::decompress_hdr(&pPixels[0][0], (uint8_t*)pBlock, 6, 6);1496assert(status);1497if (!status)1498return false;1499#else1500// Use our decoder1501basist::half_float half_block[6 * 6][4];15021503astc_helpers::log_astc_block log_blk;1504if (!astc_helpers::unpack_block(pBlock, log_blk, 6, 6))1505return false;1506if (!astc_helpers::decode_block(log_blk, half_block, 6, 6, astc_helpers::cDecodeModeHDR16))1507return false;15081509for (uint32_t p = 0; p < (6 * 6); p++)1510{1511pPixels[p][0] = basist::half_to_float(half_block[p][0]);1512pPixels[p][1] = basist::half_to_float(half_block[p][1]);1513pPixels[p][2] = basist::half_to_float(half_block[p][2]);1514pPixels[p][3] = basist::half_to_float(half_block[p][3]);1515}1516#endif1517return true;1518}1519case texture_format::cASTC_HDR_4x4:1520case texture_format::cUASTC_HDR_4x4:1521{1522#if BASISU_USE_GOOGLE_ASTC_DECODER1523// Use Google's decoder1524bool status = basisu_astc::astc::decompress_hdr(&pPixels[0][0], (uint8_t*)pBlock, 4, 4);1525assert(status);1526if (!status)1527return false;1528#else1529// Use our decoder1530basist::half_float half_block[16][4];15311532astc_helpers::log_astc_block log_blk;1533if (!astc_helpers::unpack_block(pBlock, log_blk, 4, 4))1534return false;1535if (!astc_helpers::decode_block(log_blk, half_block, 4, 4, astc_helpers::cDecodeModeHDR16))1536return false;15371538for (uint32_t p = 0; p < 16; p++)1539{1540pPixels[p][0] = basist::half_to_float(half_block[p][0]);1541pPixels[p][1] = basist::half_to_float(half_block[p][1]);1542pPixels[p][2] = basist::half_to_float(half_block[p][2]);1543pPixels[p][3] = basist::half_to_float(half_block[p][3]);1544}15451546//memset(pPixels, 0, sizeof(vec4F) * 16);1547#endif1548return true;1549}1550case texture_format::cBC6HSigned:1551case texture_format::cBC6HUnsigned:1552{1553basist::half_float half_block[16][3];15541555unpack_bc6h(pBlock, half_block, fmt == texture_format::cBC6HSigned);15561557for (uint32_t p = 0; p < 16; p++)1558{1559pPixels[p][0] = basist::half_to_float(half_block[p][0]);1560pPixels[p][1] = basist::half_to_float(half_block[p][1]);1561pPixels[p][2] = basist::half_to_float(half_block[p][2]);1562pPixels[p][3] = 1.0f;1563}15641565return true;1566}1567default:1568{1569break;1570}1571}15721573assert(0);1574return false;1575}15761577bool gpu_image::unpack(image& img) const1578{1579img.resize(get_pixel_width(), get_pixel_height());1580img.set_all(g_black_color);15811582if (!img.get_width() || !img.get_height())1583return true;15841585if ((m_fmt == texture_format::cPVRTC1_4_RGB) || (m_fmt == texture_format::cPVRTC1_4_RGBA))1586{1587pvrtc4_image pi(m_width, m_height);15881589if (get_total_blocks() != pi.get_total_blocks())1590return false;15911592memcpy(&pi.get_blocks()[0], get_ptr(), get_size_in_bytes());15931594pi.deswizzle();15951596pi.unpack_all_pixels(img);15971598return true;1599}16001601assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize));1602color_rgba pixels[cMaxBlockSize * cMaxBlockSize];1603for (uint32_t i = 0; i < cMaxBlockSize * cMaxBlockSize; i++)1604pixels[i] = g_black_color;16051606bool success = true;16071608for (uint32_t by = 0; by < m_blocks_y; by++)1609{1610for (uint32_t bx = 0; bx < m_blocks_x; bx++)1611{1612const void* pBlock = get_block_ptr(bx, by);16131614if (!unpack_block(m_fmt, pBlock, pixels))1615success = false;16161617img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height);1618} // bx1619} // by16201621return success;1622}16231624bool gpu_image::unpack_hdr(imagef& img) const1625{1626if ((m_fmt != texture_format::cASTC_HDR_4x4) && (m_fmt != texture_format::cUASTC_HDR_4x4) && (m_fmt != texture_format::cASTC_HDR_6x6) &&1627(m_fmt != texture_format::cBC6HUnsigned) && (m_fmt != texture_format::cBC6HSigned))1628{1629// Can't call on LDR images, at least currently. (Could unpack the LDR data and convert to float.)1630assert(0);1631return false;1632}16331634img.resize(get_pixel_width(), get_pixel_height());1635img.set_all(vec4F(0.0f));16361637if (!img.get_width() || !img.get_height())1638return true;16391640assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize));1641vec4F pixels[cMaxBlockSize * cMaxBlockSize];1642clear_obj(pixels);16431644bool success = true;16451646for (uint32_t by = 0; by < m_blocks_y; by++)1647{1648for (uint32_t bx = 0; bx < m_blocks_x; bx++)1649{1650const void* pBlock = get_block_ptr(bx, by);16511652if (!unpack_block_hdr(m_fmt, pBlock, pixels))1653success = false;16541655img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height);1656} // bx1657} // by16581659return success;1660}16611662// KTX1 texture file writing1663static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };16641665// KTX/GL enums1666enum1667{1668KTX_ENDIAN = 0x04030201,1669KTX_OPPOSITE_ENDIAN = 0x01020304,1670KTX_ETC1_RGB8_OES = 0x8D64,1671KTX_RED = 0x1903,1672KTX_RG = 0x8227,1673KTX_RGB = 0x1907,1674KTX_RGBA = 0x1908,16751676KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0,1677KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3,1678KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB,1679KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD,1680KTX_COMPRESSED_RGB8_ETC2 = 0x9274,1681KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278,1682KTX_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C,1683KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D,1684KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT = 0x8E8E,1685KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F,1686KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00,1687KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02,16881689KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0,1690KTX_COMPRESSED_RGBA_ASTC_5x4_KHR = 0x93B1,1691KTX_COMPRESSED_RGBA_ASTC_5x5_KHR = 0x93B2,1692KTX_COMPRESSED_RGBA_ASTC_6x5_KHR = 0x93B3,1693KTX_COMPRESSED_RGBA_ASTC_6x6_KHR = 0x93B4,1694KTX_COMPRESSED_RGBA_ASTC_8x5_KHR = 0x93B5,1695KTX_COMPRESSED_RGBA_ASTC_8x6_KHR = 0x93B6,1696KTX_COMPRESSED_RGBA_ASTC_8x8_KHR = 0x93B7,1697KTX_COMPRESSED_RGBA_ASTC_10x5_KHR = 0x93B8,1698KTX_COMPRESSED_RGBA_ASTC_10x6_KHR = 0x93B9,1699KTX_COMPRESSED_RGBA_ASTC_10x8_KHR = 0x93BA,1700KTX_COMPRESSED_RGBA_ASTC_10x10_KHR = 0x93BB,1701KTX_COMPRESSED_RGBA_ASTC_12x10_KHR = 0x93BC,1702KTX_COMPRESSED_RGBA_ASTC_12x12_KHR = 0x93BD,17031704KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR = 0x93D0,1705KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR = 0x93D1,1706KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR = 0x93D2,1707KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR = 0x93D3,1708KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR = 0x93D4,1709KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR = 0x93D5,1710KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR = 0x93D6,1711KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR = 0x93D7,1712KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR = 0x93D8,1713KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR = 0x93D9,1714KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR = 0x93DA,1715KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR = 0x93DB,1716KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR = 0x93DC,1717KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR = 0x93DD,17181719KTX_COMPRESSED_RGBA_UASTC_4x4_KHR = 0x94CC, // TODO - Use proper value!17201721KTX_ATC_RGB_AMD = 0x8C92,1722KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD = 0x87EE,17231724KTX_COMPRESSED_RGB_FXT1_3DFX = 0x86B0,1725KTX_COMPRESSED_RGBA_FXT1_3DFX = 0x86B1,1726KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG = 0x9138,1727KTX_COMPRESSED_R11_EAC = 0x9270,1728KTX_COMPRESSED_RG11_EAC = 0x92721729};17301731struct ktx_header1732{1733uint8_t m_identifier[12];1734packed_uint<4> m_endianness;1735packed_uint<4> m_glType;1736packed_uint<4> m_glTypeSize;1737packed_uint<4> m_glFormat;1738packed_uint<4> m_glInternalFormat;1739packed_uint<4> m_glBaseInternalFormat;1740packed_uint<4> m_pixelWidth;1741packed_uint<4> m_pixelHeight;1742packed_uint<4> m_pixelDepth;1743packed_uint<4> m_numberOfArrayElements;1744packed_uint<4> m_numberOfFaces;1745packed_uint<4> m_numberOfMipmapLevels;1746packed_uint<4> m_bytesOfKeyValueData;17471748void clear() { clear_obj(*this); }1749};17501751// Input is a texture array of mipmapped gpu_image's: gpu_images[array_index][level_index]1752bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag)1753{1754if (!gpu_images.size())1755{1756assert(0);1757return false;1758}17591760uint32_t width = 0, height = 0, total_levels = 0;1761basisu::texture_format fmt = texture_format::cInvalidTextureFormat;17621763// Sanity check the input1764if (cubemap_flag)1765{1766if ((gpu_images.size() % 6) != 0)1767{1768assert(0);1769return false;1770}1771}17721773for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)1774{1775const gpu_image_vec &levels = gpu_images[array_index];17761777if (!levels.size())1778{1779// Empty mip chain1780assert(0);1781return false;1782}17831784if (!array_index)1785{1786width = levels[0].get_pixel_width();1787height = levels[0].get_pixel_height();1788total_levels = (uint32_t)levels.size();1789fmt = levels[0].get_format();1790}1791else1792{1793if ((width != levels[0].get_pixel_width()) ||1794(height != levels[0].get_pixel_height()) ||1795(total_levels != levels.size()))1796{1797// All cubemap/texture array faces must be the same dimension1798assert(0);1799return false;1800}1801}18021803for (uint32_t level_index = 0; level_index < levels.size(); level_index++)1804{1805if (level_index)1806{1807if ( (levels[level_index].get_pixel_width() != maximum<uint32_t>(1, levels[0].get_pixel_width() >> level_index)) ||1808(levels[level_index].get_pixel_height() != maximum<uint32_t>(1, levels[0].get_pixel_height() >> level_index)) )1809{1810// Malformed mipmap chain1811assert(0);1812return false;1813}1814}18151816if (fmt != levels[level_index].get_format())1817{1818// All input textures must use the same GPU format1819assert(0);1820return false;1821}1822}1823}18241825uint32_t internal_fmt = KTX_ETC1_RGB8_OES, base_internal_fmt = KTX_RGB;18261827switch (fmt)1828{1829case texture_format::cBC1:1830case texture_format::cBC1_NV:1831case texture_format::cBC1_AMD:1832{1833internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT;1834break;1835}1836case texture_format::cBC3:1837{1838internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT;1839base_internal_fmt = KTX_RGBA;1840break;1841}1842case texture_format::cBC4:1843{1844internal_fmt = KTX_COMPRESSED_RED_RGTC1_EXT;// KTX_COMPRESSED_LUMINANCE_LATC1_EXT;1845base_internal_fmt = KTX_RED;1846break;1847}1848case texture_format::cBC5:1849{1850internal_fmt = KTX_COMPRESSED_RED_GREEN_RGTC2_EXT;1851base_internal_fmt = KTX_RG;1852break;1853}1854case texture_format::cETC1:1855case texture_format::cETC1S:1856{1857internal_fmt = KTX_ETC1_RGB8_OES;1858break;1859}1860case texture_format::cETC2_RGB:1861{1862internal_fmt = KTX_COMPRESSED_RGB8_ETC2;1863break;1864}1865case texture_format::cETC2_RGBA:1866{1867internal_fmt = KTX_COMPRESSED_RGBA8_ETC2_EAC;1868base_internal_fmt = KTX_RGBA;1869break;1870}1871case texture_format::cBC6HSigned:1872{1873internal_fmt = KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT;1874base_internal_fmt = KTX_RGBA;1875break;1876}1877case texture_format::cBC6HUnsigned:1878{1879internal_fmt = KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT;1880base_internal_fmt = KTX_RGBA;1881break;1882}1883case texture_format::cBC7:1884{1885internal_fmt = KTX_COMPRESSED_RGBA_BPTC_UNORM;1886base_internal_fmt = KTX_RGBA;1887break;1888}1889case texture_format::cPVRTC1_4_RGB:1890{1891internal_fmt = KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG;1892break;1893}1894case texture_format::cPVRTC1_4_RGBA:1895{1896internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG;1897base_internal_fmt = KTX_RGBA;1898break;1899}1900case texture_format::cASTC_HDR_6x6:1901{1902internal_fmt = KTX_COMPRESSED_RGBA_ASTC_6x6_KHR;1903// TODO: should we write RGB? We don't support generating HDR 6x6 with alpha.1904base_internal_fmt = KTX_RGBA;1905break;1906}1907// We use different enums for HDR vs. LDR ASTC, but internally they are both just ASTC.1908case texture_format::cASTC_LDR_4x4:1909case texture_format::cASTC_HDR_4x4:1910case texture_format::cUASTC_HDR_4x4: // UASTC_HDR is just HDR-only ASTC1911{1912internal_fmt = KTX_COMPRESSED_RGBA_ASTC_4x4_KHR;1913base_internal_fmt = KTX_RGBA;1914break;1915}1916case texture_format::cATC_RGB:1917{1918internal_fmt = KTX_ATC_RGB_AMD;1919break;1920}1921case texture_format::cATC_RGBA_INTERPOLATED_ALPHA:1922{1923internal_fmt = KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD;1924base_internal_fmt = KTX_RGBA;1925break;1926}1927case texture_format::cETC2_R11_EAC:1928{1929internal_fmt = KTX_COMPRESSED_R11_EAC;1930base_internal_fmt = KTX_RED;1931break;1932}1933case texture_format::cETC2_RG11_EAC:1934{1935internal_fmt = KTX_COMPRESSED_RG11_EAC;1936base_internal_fmt = KTX_RG;1937break;1938}1939case texture_format::cUASTC4x4:1940{1941internal_fmt = KTX_COMPRESSED_RGBA_UASTC_4x4_KHR;1942base_internal_fmt = KTX_RGBA;1943break;1944}1945case texture_format::cFXT1_RGB:1946{1947internal_fmt = KTX_COMPRESSED_RGB_FXT1_3DFX;1948break;1949}1950case texture_format::cPVRTC2_4_RGBA:1951{1952internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG;1953base_internal_fmt = KTX_RGBA;1954break;1955}1956default:1957{1958// TODO1959assert(0);1960return false;1961}1962}19631964ktx_header header;1965header.clear();1966memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id));1967header.m_endianness = KTX_ENDIAN;19681969header.m_pixelWidth = width;1970header.m_pixelHeight = height;19711972header.m_glTypeSize = 1;19731974header.m_glInternalFormat = internal_fmt;1975header.m_glBaseInternalFormat = base_internal_fmt;19761977header.m_numberOfArrayElements = (uint32_t)(cubemap_flag ? (gpu_images.size() / 6) : gpu_images.size());1978if (header.m_numberOfArrayElements == 1)1979header.m_numberOfArrayElements = 0;19801981header.m_numberOfMipmapLevels = total_levels;1982header.m_numberOfFaces = cubemap_flag ? 6 : 1;19831984append_vector(ktx_data, (uint8_t*)&header, sizeof(header));19851986for (uint32_t level_index = 0; level_index < total_levels; level_index++)1987{1988uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes();19891990if ((header.m_numberOfFaces == 1) || (header.m_numberOfArrayElements > 1))1991{1992img_size = img_size * header.m_numberOfFaces * maximum<uint32_t>(1, header.m_numberOfArrayElements);1993}19941995assert(img_size && ((img_size & 3) == 0));19961997packed_uint<4> packed_img_size(img_size);1998append_vector(ktx_data, (uint8_t*)&packed_img_size, sizeof(packed_img_size));19992000uint32_t bytes_written = 0;2001(void)bytes_written;20022003for (uint32_t array_index = 0; array_index < maximum<uint32_t>(1, header.m_numberOfArrayElements); array_index++)2004{2005for (uint32_t face_index = 0; face_index < header.m_numberOfFaces; face_index++)2006{2007const gpu_image& img = gpu_images[cubemap_flag ? (array_index * 6 + face_index) : array_index][level_index];20082009append_vector(ktx_data, (uint8_t*)img.get_ptr(), img.get_size_in_bytes());20102011bytes_written += img.get_size_in_bytes();2012}20132014} // array_index20152016} // level_index20172018return true;2019}20202021bool does_dds_support_format(texture_format fmt)2022{2023switch (fmt)2024{2025case texture_format::cBC1_NV:2026case texture_format::cBC1_AMD:2027case texture_format::cBC1:2028case texture_format::cBC3:2029case texture_format::cBC4:2030case texture_format::cBC5:2031case texture_format::cBC6HSigned:2032case texture_format::cBC6HUnsigned:2033case texture_format::cBC7:2034return true;2035default:2036break;2037}2038return false;2039}20402041// Only supports the basic DirectX BC texture formats.2042// gpu_images array is: [face/layer][mipmap level]2043// For cubemap arrays, # of face/layers must be a multiple of 6.2044// Accepts 2D, 2D mipmapped, 2D array, 2D array mipmapped2045// and cubemap, cubemap mipmapped, and cubemap array mipmapped.2046bool write_dds_file(uint8_vec &dds_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)2047{2048return false;2049}20502051bool write_dds_file(const char* pFilename, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)2052{2053uint8_vec dds_data;20542055if (!write_dds_file(dds_data, gpu_images, cubemap_flag, use_srgb_format))2056return false;20572058if (!write_vec_to_file(pFilename, dds_data))2059{2060fprintf(stderr, "write_dds_file: Failed writing DDS file data\n");2061return false;2062}20632064return true;2065}20662067bool read_uncompressed_dds_file(const char* pFilename, basisu::vector<image> &ldr_mips, basisu::vector<imagef>& hdr_mips)2068{2069return false;2070}20712072bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag, bool use_srgb_format)2073{2074std::string extension(string_tolower(string_get_extension(pFilename)));20752076uint8_vec filedata;2077if (extension == "ktx")2078{2079if (!create_ktx_texture_file(filedata, g, cubemap_flag))2080return false;2081}2082else if (extension == "pvr")2083{2084// TODO2085return false;2086}2087else if (extension == "dds")2088{2089if (!write_dds_file(filedata, g, cubemap_flag, use_srgb_format))2090return false;2091}2092else2093{2094// unsupported texture format2095assert(0);2096return false;2097}20982099return basisu::write_vec_to_file(pFilename, filedata);2100}21012102bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g, bool use_srgb_format)2103{2104basisu::vector<gpu_image_vec> a;2105a.push_back(g);2106return write_compressed_texture_file(pFilename, a, false, use_srgb_format);2107}21082109bool write_compressed_texture_file(const char* pFilename, const gpu_image& g, bool use_srgb_format)2110{2111basisu::vector<gpu_image_vec> v;2112enlarge_vector(v, 1)->push_back(g);2113return write_compressed_texture_file(pFilename, v, false, use_srgb_format);2114}21152116//const uint32_t OUT_FILE_MAGIC = 'TEXC';2117struct out_file_header2118{2119packed_uint<4> m_magic;2120packed_uint<4> m_pad;2121packed_uint<4> m_width;2122packed_uint<4> m_height;2123};21242125// As no modern tool supports FXT1 format .KTX files, let's write .OUT files and make sure 3DFX's original tools shipped in 1999 can decode our encoded output.2126bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi)2127{2128out_file_header hdr;2129//hdr.m_magic = OUT_FILE_MAGIC;2130hdr.m_magic.m_bytes[0] = 67;2131hdr.m_magic.m_bytes[1] = 88;2132hdr.m_magic.m_bytes[2] = 69;2133hdr.m_magic.m_bytes[3] = 84;2134hdr.m_pad = 0;2135hdr.m_width = gi.get_blocks_x() * 8;2136hdr.m_height = gi.get_blocks_y() * 4;21372138FILE* pFile = nullptr;2139#ifdef _WIN322140fopen_s(&pFile, pFilename, "wb");2141#else2142pFile = fopen(pFilename, "wb");2143#endif2144if (!pFile)2145return false;21462147fwrite(&hdr, sizeof(hdr), 1, pFile);2148fwrite(gi.get_ptr(), gi.get_size_in_bytes(), 1, pFile);21492150return fclose(pFile) != EOF;2151}21522153// The .astc texture format is readable using ARM's astcenc, AMD Compressonator, and other engines/tools. It oddly doesn't support mipmaps, limiting2154// its usefulness/relevance.2155// https://github.com/ARM-software/astc-encoder/blob/main/Docs/FileFormat.md2156bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y)2157{2158assert(pBlocks && (block_width >= 4) && (block_height >= 4) && (dim_x > 0) && (dim_y > 0));21592160uint8_vec file_data;2161file_data.push_back(0x13);2162file_data.push_back(0xAB);2163file_data.push_back(0xA1);2164file_data.push_back(0x5C);21652166file_data.push_back((uint8_t)block_width);2167file_data.push_back((uint8_t)block_height);2168file_data.push_back(1);21692170file_data.push_back((uint8_t)dim_x);2171file_data.push_back((uint8_t)(dim_x >> 8));2172file_data.push_back((uint8_t)(dim_x >> 16));21732174file_data.push_back((uint8_t)dim_y);2175file_data.push_back((uint8_t)(dim_y >> 8));2176file_data.push_back((uint8_t)(dim_y >> 16));21772178file_data.push_back((uint8_t)1);2179file_data.push_back((uint8_t)0);2180file_data.push_back((uint8_t)0);21812182const uint32_t num_blocks_x = (dim_x + block_width - 1) / block_width;2183const uint32_t num_blocks_y = (dim_y + block_height - 1) / block_height;21842185const uint32_t total_bytes = num_blocks_x * num_blocks_y * 16;21862187const size_t cur_size = file_data.size();21882189file_data.resize(cur_size + total_bytes);21902191memcpy(&file_data[cur_size], pBlocks, total_bytes);21922193return write_vec_to_file(pFilename, file_data);2194}21952196} // basisu2197219821992200