Path: blob/master/thirdparty/astcenc/astcenc_block_sizes.cpp
9896 views
// SPDX-License-Identifier: Apache-2.01// ----------------------------------------------------------------------------2// Copyright 2011-2025 Arm Limited3//4// Licensed under the Apache License, Version 2.0 (the "License"); you may not5// use this file except in compliance with the License. You may obtain a copy6// of the License at:7//8// http://www.apache.org/licenses/LICENSE-2.09//10// Unless required by applicable law or agreed to in writing, software11// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT12// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the13// License for the specific language governing permissions and limitations14// under the License.15// ----------------------------------------------------------------------------1617/**18* @brief Functions to generate block size descriptor and decimation tables.19*/2021#include "astcenc_internal.h"2223/**24* @brief Decode the properties of an encoded 2D block mode.25*26* @param block_mode The encoded block mode.27* @param[out] x_weights The number of weights in the X dimension.28* @param[out] y_weights The number of weights in the Y dimension.29* @param[out] is_dual_plane True if this block mode has two weight planes.30* @param[out] quant_mode The quantization level for the weights.31* @param[out] weight_bits The storage bit count for the weights.32*33* @return Returns true if a valid mode, false otherwise.34*/35static bool decode_block_mode_2d(36unsigned int block_mode,37unsigned int& x_weights,38unsigned int& y_weights,39bool& is_dual_plane,40unsigned int& quant_mode,41unsigned int& weight_bits42) {43unsigned int base_quant_mode = (block_mode >> 4) & 1;44unsigned int H = (block_mode >> 9) & 1;45unsigned int D = (block_mode >> 10) & 1;46unsigned int A = (block_mode >> 5) & 0x3;4748x_weights = 0;49y_weights = 0;5051if ((block_mode & 3) != 0)52{53base_quant_mode |= (block_mode & 3) << 1;54unsigned int B = (block_mode >> 7) & 3;55switch ((block_mode >> 2) & 3)56{57case 0:58x_weights = B + 4;59y_weights = A + 2;60break;61case 1:62x_weights = B + 8;63y_weights = A + 2;64break;65case 2:66x_weights = A + 2;67y_weights = B + 8;68break;69case 3:70B &= 1;71if (block_mode & 0x100)72{73x_weights = B + 2;74y_weights = A + 2;75}76else77{78x_weights = A + 2;79y_weights = B + 6;80}81break;82}83}84else85{86base_quant_mode |= ((block_mode >> 2) & 3) << 1;87if (((block_mode >> 2) & 3) == 0)88{89return false;90}9192unsigned int B = (block_mode >> 9) & 3;93switch ((block_mode >> 7) & 3)94{95case 0:96x_weights = 12;97y_weights = A + 2;98break;99case 1:100x_weights = A + 2;101y_weights = 12;102break;103case 2:104x_weights = A + 6;105y_weights = B + 6;106D = 0;107H = 0;108break;109case 3:110switch ((block_mode >> 5) & 3)111{112case 0:113x_weights = 6;114y_weights = 10;115break;116case 1:117x_weights = 10;118y_weights = 6;119break;120case 2:121case 3:122return false;123}124break;125}126}127128unsigned int weight_count = x_weights * y_weights * (D + 1);129quant_mode = (base_quant_mode - 2) + 6 * H;130is_dual_plane = D != 0;131132weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));133return (weight_count <= BLOCK_MAX_WEIGHTS &&134weight_bits >= BLOCK_MIN_WEIGHT_BITS &&135weight_bits <= BLOCK_MAX_WEIGHT_BITS);136}137138/**139* @brief Decode the properties of an encoded 3D block mode.140*141* @param block_mode The encoded block mode.142* @param[out] x_weights The number of weights in the X dimension.143* @param[out] y_weights The number of weights in the Y dimension.144* @param[out] z_weights The number of weights in the Z dimension.145* @param[out] is_dual_plane True if this block mode has two weight planes.146* @param[out] quant_mode The quantization level for the weights.147* @param[out] weight_bits The storage bit count for the weights.148*149* @return Returns true if a valid mode, false otherwise.150*/151static bool decode_block_mode_3d(152unsigned int block_mode,153unsigned int& x_weights,154unsigned int& y_weights,155unsigned int& z_weights,156bool& is_dual_plane,157unsigned int& quant_mode,158unsigned int& weight_bits159) {160unsigned int base_quant_mode = (block_mode >> 4) & 1;161unsigned int H = (block_mode >> 9) & 1;162unsigned int D = (block_mode >> 10) & 1;163unsigned int A = (block_mode >> 5) & 0x3;164165x_weights = 0;166y_weights = 0;167z_weights = 0;168169if ((block_mode & 3) != 0)170{171base_quant_mode |= (block_mode & 3) << 1;172unsigned int B = (block_mode >> 7) & 3;173unsigned int C = (block_mode >> 2) & 0x3;174x_weights = A + 2;175y_weights = B + 2;176z_weights = C + 2;177}178else179{180base_quant_mode |= ((block_mode >> 2) & 3) << 1;181if (((block_mode >> 2) & 3) == 0)182{183return false;184}185186int B = (block_mode >> 9) & 3;187if (((block_mode >> 7) & 3) != 3)188{189D = 0;190H = 0;191}192switch ((block_mode >> 7) & 3)193{194case 0:195x_weights = 6;196y_weights = B + 2;197z_weights = A + 2;198break;199case 1:200x_weights = A + 2;201y_weights = 6;202z_weights = B + 2;203break;204case 2:205x_weights = A + 2;206y_weights = B + 2;207z_weights = 6;208break;209case 3:210x_weights = 2;211y_weights = 2;212z_weights = 2;213switch ((block_mode >> 5) & 3)214{215case 0:216x_weights = 6;217break;218case 1:219y_weights = 6;220break;221case 2:222z_weights = 6;223break;224case 3:225return false;226}227break;228}229}230231unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1);232quant_mode = (base_quant_mode - 2) + 6 * H;233is_dual_plane = D != 0;234235weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));236return (weight_count <= BLOCK_MAX_WEIGHTS &&237weight_bits >= BLOCK_MIN_WEIGHT_BITS &&238weight_bits <= BLOCK_MAX_WEIGHT_BITS);239}240241/**242* @brief Create a 2D decimation entry for a block-size and weight-decimation pair.243*244* @param x_texels The number of texels in the X dimension.245* @param y_texels The number of texels in the Y dimension.246* @param x_weights The number of weights in the X dimension.247* @param y_weights The number of weights in the Y dimension.248* @param[out] di The decimation info structure to populate.249* @param[out] wb The decimation table init scratch working buffers.250*/251static void init_decimation_info_2d(252unsigned int x_texels,253unsigned int y_texels,254unsigned int x_weights,255unsigned int y_weights,256decimation_info& di,257dt_init_working_buffers& wb258) {259unsigned int texels_per_block = x_texels * y_texels;260unsigned int weights_per_block = x_weights * y_weights;261262uint8_t max_texel_count_of_weight = 0;263264promise(weights_per_block > 0);265promise(texels_per_block > 0);266promise(x_texels > 0);267promise(y_texels > 0);268269for (unsigned int i = 0; i < weights_per_block; i++)270{271wb.texel_count_of_weight[i] = 0;272}273274for (unsigned int i = 0; i < texels_per_block; i++)275{276wb.weight_count_of_texel[i] = 0;277}278279for (unsigned int y = 0; y < y_texels; y++)280{281for (unsigned int x = 0; x < x_texels; x++)282{283unsigned int texel = y * x_texels + x;284285unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;286unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;287288unsigned int x_weight_frac = x_weight & 0xF;289unsigned int y_weight_frac = y_weight & 0xF;290unsigned int x_weight_int = x_weight >> 4;291unsigned int y_weight_int = y_weight >> 4;292293unsigned int qweight[4];294qweight[0] = x_weight_int + y_weight_int * x_weights;295qweight[1] = qweight[0] + 1;296qweight[2] = qweight[0] + x_weights;297qweight[3] = qweight[2] + 1;298299// Truncated-precision bilinear interpolation300unsigned int prod = x_weight_frac * y_weight_frac;301302unsigned int weight[4];303weight[3] = (prod + 8) >> 4;304weight[1] = x_weight_frac - weight[3];305weight[2] = y_weight_frac - weight[3];306weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];307308for (unsigned int i = 0; i < 4; i++)309{310if (weight[i] != 0)311{312wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);313wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);314wb.weight_count_of_texel[texel]++;315wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);316wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);317wb.texel_count_of_weight[qweight[i]]++;318max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);319}320}321}322}323324uint8_t max_texel_weight_count = 0;325for (unsigned int i = 0; i < texels_per_block; i++)326{327di.texel_weight_count[i] = wb.weight_count_of_texel[i];328max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);329330for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)331{332di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];333di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);334di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];335}336337// Init all 4 entries so we can rely on zeros for vectorization338for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++)339{340di.texel_weight_contribs_int_tr[j][i] = 0;341di.texel_weight_contribs_float_tr[j][i] = 0.0f;342di.texel_weights_tr[j][i] = 0;343}344}345346di.max_texel_weight_count = max_texel_weight_count;347348for (unsigned int i = 0; i < weights_per_block; i++)349{350unsigned int texel_count_wt = wb.texel_count_of_weight[i];351di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);352353for (unsigned int j = 0; j < texel_count_wt; j++)354{355uint8_t texel = wb.texels_of_weight[i][j];356357// Create transposed versions of these for better vectorization358di.weight_texels_tr[j][i] = texel;359di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);360361// Store the per-texel contribution of this weight for each texel it contributes to362di.texel_contrib_for_weight[j][i] = 0.0f;363for (unsigned int k = 0; k < 4; k++)364{365uint8_t dttw = di.texel_weights_tr[k][texel];366float dttwf = di.texel_weight_contribs_float_tr[k][texel];367if (dttw == i && dttwf != 0.0f)368{369di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];370break;371}372}373}374375// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails376// Match last texel in active lane in SIMD group, for better gathers377uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];378for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)379{380di.weight_texels_tr[j][i] = last_texel;381di.weights_texel_contribs_tr[j][i] = 0.0f;382}383}384385// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails386size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);387for (size_t i = texels_per_block; i < texels_per_block_simd; i++)388{389di.texel_weight_count[i] = 0;390391for (size_t j = 0; j < 4; j++)392{393di.texel_weight_contribs_float_tr[j][i] = 0;394di.texel_weights_tr[j][i] = 0;395di.texel_weight_contribs_int_tr[j][i] = 0;396}397}398399// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails400// Match last texel in active lane in SIMD group, for better gathers401unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];402uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];403404size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);405for (size_t i = weights_per_block; i < weights_per_block_simd; i++)406{407di.weight_texel_count[i] = 0;408409for (size_t j = 0; j < max_texel_count_of_weight; j++)410{411di.weight_texels_tr[j][i] = last_texel;412di.weights_texel_contribs_tr[j][i] = 0.0f;413}414}415416di.texel_count = static_cast<uint8_t>(texels_per_block);417di.weight_count = static_cast<uint8_t>(weights_per_block);418di.weight_x = static_cast<uint8_t>(x_weights);419di.weight_y = static_cast<uint8_t>(y_weights);420di.weight_z = 1;421}422423/**424* @brief Create a 3D decimation entry for a block-size and weight-decimation pair.425*426* @param x_texels The number of texels in the X dimension.427* @param y_texels The number of texels in the Y dimension.428* @param z_texels The number of texels in the Z dimension.429* @param x_weights The number of weights in the X dimension.430* @param y_weights The number of weights in the Y dimension.431* @param z_weights The number of weights in the Z dimension.432* @param[out] di The decimation info structure to populate.433@param[out] wb The decimation table init scratch working buffers.434*/435static void init_decimation_info_3d(436unsigned int x_texels,437unsigned int y_texels,438unsigned int z_texels,439unsigned int x_weights,440unsigned int y_weights,441unsigned int z_weights,442decimation_info& di,443dt_init_working_buffers& wb444) {445unsigned int texels_per_block = x_texels * y_texels * z_texels;446unsigned int weights_per_block = x_weights * y_weights * z_weights;447448uint8_t max_texel_count_of_weight = 0;449450promise(weights_per_block > 0);451promise(texels_per_block > 0);452453for (unsigned int i = 0; i < weights_per_block; i++)454{455wb.texel_count_of_weight[i] = 0;456}457458for (unsigned int i = 0; i < texels_per_block; i++)459{460wb.weight_count_of_texel[i] = 0;461}462463for (unsigned int z = 0; z < z_texels; z++)464{465for (unsigned int y = 0; y < y_texels; y++)466{467for (unsigned int x = 0; x < x_texels; x++)468{469int texel = (z * y_texels + y) * x_texels + x;470471int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;472int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;473int z_weight = (((1024 + z_texels / 2) / (z_texels - 1)) * z * (z_weights - 1) + 32) >> 6;474475int x_weight_frac = x_weight & 0xF;476int y_weight_frac = y_weight & 0xF;477int z_weight_frac = z_weight & 0xF;478int x_weight_int = x_weight >> 4;479int y_weight_int = y_weight >> 4;480int z_weight_int = z_weight >> 4;481int qweight[4];482int weight[4];483qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;484qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);485486// simplex interpolation487int fs = x_weight_frac;488int ft = y_weight_frac;489int fp = z_weight_frac;490491int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));492int N = x_weights;493int NM = x_weights * y_weights;494495int s1, s2, w0, w1, w2, w3;496switch (cas)497{498case 7:499s1 = 1;500s2 = N;501w0 = 16 - fs;502w1 = fs - ft;503w2 = ft - fp;504w3 = fp;505break;506case 3:507s1 = N;508s2 = 1;509w0 = 16 - ft;510w1 = ft - fs;511w2 = fs - fp;512w3 = fp;513break;514case 5:515s1 = 1;516s2 = NM;517w0 = 16 - fs;518w1 = fs - fp;519w2 = fp - ft;520w3 = ft;521break;522case 4:523s1 = NM;524s2 = 1;525w0 = 16 - fp;526w1 = fp - fs;527w2 = fs - ft;528w3 = ft;529break;530case 2:531s1 = N;532s2 = NM;533w0 = 16 - ft;534w1 = ft - fp;535w2 = fp - fs;536w3 = fs;537break;538case 0:539s1 = NM;540s2 = N;541w0 = 16 - fp;542w1 = fp - ft;543w2 = ft - fs;544w3 = fs;545break;546default:547s1 = NM;548s2 = N;549w0 = 16 - fp;550w1 = fp - ft;551w2 = ft - fs;552w3 = fs;553break;554}555556qweight[1] = qweight[0] + s1;557qweight[2] = qweight[1] + s2;558weight[0] = w0;559weight[1] = w1;560weight[2] = w2;561weight[3] = w3;562563for (unsigned int i = 0; i < 4; i++)564{565if (weight[i] != 0)566{567wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);568wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);569wb.weight_count_of_texel[texel]++;570wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);571wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);572wb.texel_count_of_weight[qweight[i]]++;573max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);574}575}576}577}578}579580uint8_t max_texel_weight_count = 0;581for (unsigned int i = 0; i < texels_per_block; i++)582{583di.texel_weight_count[i] = wb.weight_count_of_texel[i];584max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);585586// Init all 4 entries so we can rely on zeros for vectorization587for (unsigned int j = 0; j < 4; j++)588{589di.texel_weight_contribs_int_tr[j][i] = 0;590di.texel_weight_contribs_float_tr[j][i] = 0.0f;591di.texel_weights_tr[j][i] = 0;592}593594for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)595{596di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];597di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);598di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];599}600}601602di.max_texel_weight_count = max_texel_weight_count;603604for (unsigned int i = 0; i < weights_per_block; i++)605{606unsigned int texel_count_wt = wb.texel_count_of_weight[i];607di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);608609for (unsigned int j = 0; j < texel_count_wt; j++)610{611unsigned int texel = wb.texels_of_weight[i][j];612613// Create transposed versions of these for better vectorization614di.weight_texels_tr[j][i] = static_cast<uint8_t>(texel);615di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);616617// Store the per-texel contribution of this weight for each texel it contributes to618di.texel_contrib_for_weight[j][i] = 0.0f;619for (unsigned int k = 0; k < 4; k++)620{621uint8_t dttw = di.texel_weights_tr[k][texel];622float dttwf = di.texel_weight_contribs_float_tr[k][texel];623if (dttw == i && dttwf != 0.0f)624{625di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];626break;627}628}629}630631// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails632// Match last texel in active lane in SIMD group, for better gathers633uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];634for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)635{636di.weight_texels_tr[j][i] = last_texel;637di.weights_texel_contribs_tr[j][i] = 0.0f;638}639}640641// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails642size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);643for (size_t i = texels_per_block; i < texels_per_block_simd; i++)644{645di.texel_weight_count[i] = 0;646647for (size_t j = 0; j < 4; j++)648{649di.texel_weight_contribs_float_tr[j][i] = 0;650di.texel_weights_tr[j][i] = 0;651di.texel_weight_contribs_int_tr[j][i] = 0;652}653}654655// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails656// Match last texel in active lane in SIMD group, for better gathers657int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];658uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];659660size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);661for (size_t i = weights_per_block; i < weights_per_block_simd; i++)662{663di.weight_texel_count[i] = 0;664665for (size_t j = 0; j < max_texel_count_of_weight; j++)666{667di.weight_texels_tr[j][i] = last_texel;668di.weights_texel_contribs_tr[j][i] = 0.0f;669}670}671672di.texel_count = static_cast<uint8_t>(texels_per_block);673di.weight_count = static_cast<uint8_t>(weights_per_block);674di.weight_x = static_cast<uint8_t>(x_weights);675di.weight_y = static_cast<uint8_t>(y_weights);676di.weight_z = static_cast<uint8_t>(z_weights);677}678679/**680* @brief Assign the texels to use for kmeans clustering.681*682* The max limit is @c BLOCK_MAX_KMEANS_TEXELS; above this a random selection is used.683* The @c bsd.texel_count is an input and must be populated beforehand.684*685* @param[in,out] bsd The block size descriptor to populate.686*/687static void assign_kmeans_texels(688block_size_descriptor& bsd689) {690// Use all texels for kmeans on a small block691if (bsd.texel_count <= BLOCK_MAX_KMEANS_TEXELS)692{693for (uint8_t i = 0; i < bsd.texel_count; i++)694{695bsd.kmeans_texels[i] = i;696}697698return;699}700701// Select a random subset of BLOCK_MAX_KMEANS_TEXELS for kmeans on a large block702uint64_t rng_state[2];703astc::rand_init(rng_state);704705// Initialize array used for tracking used indices706bool seen[BLOCK_MAX_TEXELS];707for (uint8_t i = 0; i < bsd.texel_count; i++)708{709seen[i] = false;710}711712// Assign 64 random indices, retrying if we see repeats713unsigned int arr_elements_set = 0;714while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS)715{716uint8_t texel = static_cast<uint8_t>(astc::rand(rng_state));717texel = texel % bsd.texel_count;718if (!seen[texel])719{720bsd.kmeans_texels[arr_elements_set++] = texel;721seen[texel] = true;722}723}724}725726/**727* @brief Allocate a single 2D decimation table entry.728*729* @param x_texels The number of texels in the X dimension.730* @param y_texels The number of texels in the Y dimension.731* @param x_weights The number of weights in the X dimension.732* @param y_weights The number of weights in the Y dimension.733* @param bsd The block size descriptor we are populating.734* @param wb The decimation table init scratch working buffers.735* @param index The packed array index to populate.736*/737static void construct_dt_entry_2d(738unsigned int x_texels,739unsigned int y_texels,740unsigned int x_weights,741unsigned int y_weights,742block_size_descriptor& bsd,743dt_init_working_buffers& wb,744unsigned int index745) {746unsigned int weight_count = x_weights * y_weights;747assert(weight_count <= BLOCK_MAX_WEIGHTS);748749bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS;750751decimation_info& di = bsd.decimation_tables[index];752init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, di, wb);753754int maxprec_1plane = -1;755int maxprec_2planes = -1;756for (int i = 0; i < 12; i++)757{758unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));759if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)760{761maxprec_1plane = i;762}763764if (try_2planes)765{766unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));767if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)768{769maxprec_2planes = i;770}771}772}773774// At least one of the two should be valid ...775assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);776bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);777bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);778bsd.decimation_modes[index].refprec_1plane = 0;779bsd.decimation_modes[index].refprec_2planes = 0;780}781782/**783* @brief Allocate block modes and decimation tables for a single 2D block size.784*785* @param x_texels The number of texels in the X dimension.786* @param y_texels The number of texels in the Y dimension.787* @param can_omit_modes Can we discard modes that astcenc won't use, even if legal?788* @param mode_cutoff Percentile cutoff in range [0,1]. Low values more likely to be used.789* @param[out] bsd The block size descriptor to populate.790*/791static void construct_block_size_descriptor_2d(792unsigned int x_texels,793unsigned int y_texels,794bool can_omit_modes,795float mode_cutoff,796block_size_descriptor& bsd797) {798// Store a remap table for storing packed decimation modes.799// Indexing uses [Y * 16 + X] and max size for each axis is 12.800static const unsigned int MAX_DMI = 12 * 16 + 12;801int decimation_mode_index[MAX_DMI];802803dt_init_working_buffers* wb = new dt_init_working_buffers;804805bsd.xdim = static_cast<uint8_t>(x_texels);806bsd.ydim = static_cast<uint8_t>(y_texels);807bsd.zdim = 1;808bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels);809810for (unsigned int i = 0; i < MAX_DMI; i++)811{812decimation_mode_index[i] = -1;813}814815// Gather all the decimation grids that can be used with the current block816#if !defined(ASTCENC_DECOMPRESS_ONLY)817const float *percentiles = get_2d_percentile_table(x_texels, y_texels);818float always_cutoff = 0.0f;819#else820// Unused in decompress-only builds821(void)can_omit_modes;822(void)mode_cutoff;823#endif824825// Construct the list of block formats referencing the decimation tables826unsigned int packed_bm_idx = 0;827unsigned int packed_dm_idx = 0;828829// Trackers830unsigned int bm_counts[4] { 0 };831unsigned int dm_counts[4] { 0 };832833// Clear the list to a known-bad value834for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)835{836bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;837}838839// Iterate four times to build a usefully ordered list:840// - Pass 0 - keep selected single plane "always" block modes841// - Pass 1 - keep selected single plane "non-always" block modes842// - Pass 2 - keep select dual plane block modes843// - Pass 3 - keep everything else that's legal844unsigned int limit = can_omit_modes ? 3 : 4;845for (unsigned int j = 0; j < limit; j ++)846{847for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)848{849// Skip modes we've already included in a previous pass850if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)851{852continue;853}854855// Decode parameters856unsigned int x_weights;857unsigned int y_weights;858bool is_dual_plane;859unsigned int quant_mode;860unsigned int weight_bits;861bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);862863// Always skip invalid encodings for the current block size864if (!valid || (x_weights > x_texels) || (y_weights > y_texels))865{866continue;867}868869// Selectively skip dual plane encodings870if (((j <= 1) && is_dual_plane) || (j == 2 && !is_dual_plane))871{872continue;873}874875// Always skip encodings we can't physically encode based on876// generic encoding bit availability877if (is_dual_plane)878{879// This is the only check we need as only support 1 partition880if ((109 - weight_bits) <= 0)881{882continue;883}884}885else886{887// This is conservative - fewer bits may be available for > 1 partition888if ((111 - weight_bits) <= 0)889{890continue;891}892}893894// Selectively skip encodings based on percentile895bool percentile_hit = false;896#if !defined(ASTCENC_DECOMPRESS_ONLY)897if (j == 0)898{899percentile_hit = percentiles[i] <= always_cutoff;900}901else902{903percentile_hit = percentiles[i] <= mode_cutoff;904}905#endif906907if (j != 3 && !percentile_hit)908{909continue;910}911912// Allocate and initialize the decimation table entry if we've not used it yet913int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];914if (decimation_mode < 0)915{916construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx);917decimation_mode_index[y_weights * 16 + x_weights] = packed_dm_idx;918decimation_mode = packed_dm_idx;919920dm_counts[j]++;921packed_dm_idx++;922}923924auto& bm = bsd.block_modes[packed_bm_idx];925926bm.decimation_mode = static_cast<uint8_t>(decimation_mode);927bm.quant_mode = static_cast<uint8_t>(quant_mode);928bm.is_dual_plane = static_cast<uint8_t>(is_dual_plane);929bm.weight_bits = static_cast<uint8_t>(weight_bits);930bm.mode_index = static_cast<uint16_t>(i);931932auto& dm = bsd.decimation_modes[decimation_mode];933934if (is_dual_plane)935{936dm.set_ref_2plane(bm.get_weight_quant_mode());937}938else939{940dm.set_ref_1plane(bm.get_weight_quant_mode());941}942943bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx);944945packed_bm_idx++;946bm_counts[j]++;947}948}949950bsd.block_mode_count_1plane_always = bm_counts[0];951bsd.block_mode_count_1plane_selected = bm_counts[0] + bm_counts[1];952bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1] + bm_counts[2];953bsd.block_mode_count_all = bm_counts[0] + bm_counts[1] + bm_counts[2] + bm_counts[3];954955bsd.decimation_mode_count_always = dm_counts[0];956bsd.decimation_mode_count_selected = dm_counts[0] + dm_counts[1] + dm_counts[2];957bsd.decimation_mode_count_all = dm_counts[0] + dm_counts[1] + dm_counts[2] + dm_counts[3];958959#if !defined(ASTCENC_DECOMPRESS_ONLY)960assert(bsd.block_mode_count_1plane_always > 0);961assert(bsd.decimation_mode_count_always > 0);962963delete[] percentiles;964#endif965966// Ensure the end of the array contains valid data (should never get read)967for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++)968{969bsd.decimation_modes[i].maxprec_1plane = -1;970bsd.decimation_modes[i].maxprec_2planes = -1;971bsd.decimation_modes[i].refprec_1plane = 0;972bsd.decimation_modes[i].refprec_2planes = 0;973}974975// Determine the texels to use for kmeans clustering.976assign_kmeans_texels(bsd);977978delete wb;979}980981/**982* @brief Allocate block modes and decimation tables for a single 3D block size.983*984* TODO: This function doesn't include all of the heuristics that we use for 2D block sizes such as985* the percentile mode cutoffs. If 3D becomes more widely used we should look at this.986*987* @param x_texels The number of texels in the X dimension.988* @param y_texels The number of texels in the Y dimension.989* @param z_texels The number of texels in the Z dimension.990* @param[out] bsd The block size descriptor to populate.991*/992static void construct_block_size_descriptor_3d(993unsigned int x_texels,994unsigned int y_texels,995unsigned int z_texels,996block_size_descriptor& bsd997) {998// Store a remap table for storing packed decimation modes.999// Indexing uses [Z * 64 + Y * 8 + X] and max size for each axis is 6.1000static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6;1001int decimation_mode_index[MAX_DMI];1002unsigned int decimation_mode_count = 0;10031004dt_init_working_buffers* wb = new dt_init_working_buffers;10051006bsd.xdim = static_cast<uint8_t>(x_texels);1007bsd.ydim = static_cast<uint8_t>(y_texels);1008bsd.zdim = static_cast<uint8_t>(z_texels);1009bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels * z_texels);10101011for (unsigned int i = 0; i < MAX_DMI; i++)1012{1013decimation_mode_index[i] = -1;1014}10151016// gather all the infill-modes that can be used with the current block size1017for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++)1018{1019for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++)1020{1021for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++)1022{1023unsigned int weight_count = x_weights * y_weights * z_weights;1024if (weight_count > BLOCK_MAX_WEIGHTS)1025{1026continue;1027}10281029decimation_info& di = bsd.decimation_tables[decimation_mode_count];1030decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;1031init_decimation_info_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, di, *wb);10321033int maxprec_1plane = -1;1034int maxprec_2planes = -1;1035for (unsigned int i = 0; i < 12; i++)1036{1037unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));1038if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)1039{1040maxprec_1plane = i;1041}10421043unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));1044if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)1045{1046maxprec_2planes = i;1047}1048}10491050if ((2 * weight_count) > BLOCK_MAX_WEIGHTS)1051{1052maxprec_2planes = -1;1053}10541055bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);1056bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);1057bsd.decimation_modes[decimation_mode_count].refprec_1plane = maxprec_1plane == -1 ? 0 : 0xFFFF;1058bsd.decimation_modes[decimation_mode_count].refprec_2planes = maxprec_2planes == -1 ? 0 : 0xFFFF;1059decimation_mode_count++;1060}1061}1062}10631064// Ensure the end of the array contains valid data (should never get read)1065for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)1066{1067bsd.decimation_modes[i].maxprec_1plane = -1;1068bsd.decimation_modes[i].maxprec_2planes = -1;1069bsd.decimation_modes[i].refprec_1plane = 0;1070bsd.decimation_modes[i].refprec_2planes = 0;1071}10721073bsd.decimation_mode_count_always = 0; // Skipped for 3D modes1074bsd.decimation_mode_count_selected = decimation_mode_count;1075bsd.decimation_mode_count_all = decimation_mode_count;10761077// Construct the list of block formats referencing the decimation tables10781079// Clear the list to a known-bad value1080for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)1081{1082bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;1083}10841085unsigned int packed_idx = 0;1086unsigned int bm_counts[2] { 0 };10871088// Iterate two times to build a usefully ordered list:1089// - Pass 0 - keep valid single plane block modes1090// - Pass 1 - keep valid dual plane block modes1091for (unsigned int j = 0; j < 2; j++)1092{1093for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)1094{1095// Skip modes we've already included in a previous pass1096if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)1097{1098continue;1099}11001101unsigned int x_weights;1102unsigned int y_weights;1103unsigned int z_weights;1104bool is_dual_plane;1105unsigned int quant_mode;1106unsigned int weight_bits;11071108bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits);1109// Skip invalid encodings1110if (!valid || x_weights > x_texels || y_weights > y_texels || z_weights > z_texels)1111{1112continue;1113}11141115// Skip encodings in the wrong iteration1116if ((j == 0 && is_dual_plane) || (j == 1 && !is_dual_plane))1117{1118continue;1119}11201121// Always skip encodings we can't physically encode based on bit availability1122if (is_dual_plane)1123{1124// This is the only check we need as only support 1 partition1125if ((109 - weight_bits) <= 0)1126{1127continue;1128}1129}1130else1131{1132// This is conservative - fewer bits may be available for > 1 partition1133if ((111 - weight_bits) <= 0)1134{1135continue;1136}1137}11381139int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];1140bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);1141bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);1142bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);1143bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);1144bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);11451146bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);1147bm_counts[j]++;1148packed_idx++;1149}1150}11511152bsd.block_mode_count_1plane_always = 0; // Skipped for 3D modes1153bsd.block_mode_count_1plane_selected = bm_counts[0];1154bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1];1155bsd.block_mode_count_all = bm_counts[0] + bm_counts[1];11561157// Determine the texels to use for kmeans clustering.1158assign_kmeans_texels(bsd);11591160delete wb;1161}11621163/* See header for documentation. */1164void init_block_size_descriptor(1165unsigned int x_texels,1166unsigned int y_texels,1167unsigned int z_texels,1168bool can_omit_modes,1169unsigned int partition_count_cutoff,1170float mode_cutoff,1171block_size_descriptor& bsd1172) {1173if (z_texels > 1)1174{1175construct_block_size_descriptor_3d(x_texels, y_texels, z_texels, bsd);1176}1177else1178{1179construct_block_size_descriptor_2d(x_texels, y_texels, can_omit_modes, mode_cutoff, bsd);1180}11811182init_partition_tables(bsd, can_omit_modes, partition_count_cutoff);1183}118411851186