Path: blob/master/thirdparty/astcenc/astcenc_partition_tables.cpp
9896 views
// SPDX-License-Identifier: Apache-2.01// ----------------------------------------------------------------------------2// Copyright 2011-2023 Arm Limited3//4// Licensed under the Apache License, Version 2.0 (the "License"); you may not5// use this file except in compliance with the License. You may obtain a copy6// of the License at:7//8// http://www.apache.org/licenses/LICENSE-2.09//10// Unless required by applicable law or agreed to in writing, software11// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT12// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the13// License for the specific language governing permissions and limitations14// under the License.15// ----------------------------------------------------------------------------1617/**18* @brief Functions for generating partition tables on demand.19*/2021#include "astcenc_internal.h"2223/** @brief The number of 64-bit words needed to represent a canonical partition bit pattern. */24#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)2526/**27* @brief Generate a canonical representation of a partition pattern.28*29* The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store30* the remapped texel index. Remapping ensures that we only match on the partition pattern,31* independent of the partition order generated by the hash.32*33* @param texel_count The number of texels in the block.34* @param partition_of_texel The partition assignments, in hash order.35* @param[out] bit_pattern The output bit pattern representation.36*/37static void generate_canonical_partitioning(38unsigned int texel_count,39const uint8_t* partition_of_texel,40uint64_t bit_pattern[BIT_PATTERN_WORDS]41) {42// Clear the pattern43for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++)44{45bit_pattern[i] = 0;46}4748// Store a mapping to reorder the raw partitions so that the partitions are ordered such49// that the lowest texel index in partition N is smaller than the lowest texel index in50// partition N + 1.51int mapped_index[BLOCK_MAX_PARTITIONS];52int map_weight_count = 0;5354for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)55{56mapped_index[i] = -1;57}5859for (unsigned int i = 0; i < texel_count; i++)60{61int index = partition_of_texel[i];62if (mapped_index[index] < 0)63{64mapped_index[index] = map_weight_count++;65}6667uint64_t xlat_index = mapped_index[index];68bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F));69}70}7172/**73* @brief Compare two canonical patterns to see if they are the same.74*75* @param part1 The first canonical bit pattern to check.76* @param part2 The second canonical bit pattern to check.77*78* @return @c true if the patterns are the same, @c false otherwise.79*/80static bool compare_canonical_partitionings(81const uint64_t part1[BIT_PATTERN_WORDS],82const uint64_t part2[BIT_PATTERN_WORDS]83) {84return (part1[0] == part2[0])85#if BIT_PATTERN_WORDS > 186&& (part1[1] == part2[1])87#endif88#if BIT_PATTERN_WORDS > 289&& (part1[2] == part2[2])90#endif91#if BIT_PATTERN_WORDS > 392&& (part1[3] == part2[3])93#endif94#if BIT_PATTERN_WORDS > 495&& (part1[4] == part2[4])96#endif97#if BIT_PATTERN_WORDS > 598&& (part1[5] == part2[5])99#endif100#if BIT_PATTERN_WORDS > 6101&& (part1[6] == part2[6])102#endif103;104}105106/**107* @brief Hash function used for procedural partition assignment.108*109* @param inp The hash seed.110*111* @return The hashed value.112*/113static uint32_t hash52(114uint32_t inp115) {116inp ^= inp >> 15;117118// (2^4 + 1) * (2^7 + 1) * (2^17 - 1)119inp *= 0xEEDE0891;120inp ^= inp >> 5;121inp += inp << 16;122inp ^= inp >> 7;123inp ^= inp >> 3;124inp ^= inp << 6;125inp ^= inp >> 17;126return inp;127}128129/**130* @brief Select texel assignment for a single coordinate.131*132* @param seed The seed - the partition index from the block.133* @param x The texel X coordinate in the block.134* @param y The texel Y coordinate in the block.135* @param z The texel Z coordinate in the block.136* @param partition_count The total partition count of this encoding.137* @param small_block @c true if the block has fewer than 32 texels.138*139* @return The assigned partition index for this texel.140*/141static uint8_t select_partition(142int seed,143int x,144int y,145int z,146int partition_count,147bool small_block148) {149// For small blocks bias the coordinates to get better distribution150if (small_block)151{152x <<= 1;153y <<= 1;154z <<= 1;155}156157seed += (partition_count - 1) * 1024;158159uint32_t rnum = hash52(seed);160161uint8_t seed1 = rnum & 0xF;162uint8_t seed2 = (rnum >> 4) & 0xF;163uint8_t seed3 = (rnum >> 8) & 0xF;164uint8_t seed4 = (rnum >> 12) & 0xF;165uint8_t seed5 = (rnum >> 16) & 0xF;166uint8_t seed6 = (rnum >> 20) & 0xF;167uint8_t seed7 = (rnum >> 24) & 0xF;168uint8_t seed8 = (rnum >> 28) & 0xF;169uint8_t seed9 = (rnum >> 18) & 0xF;170uint8_t seed10 = (rnum >> 22) & 0xF;171uint8_t seed11 = (rnum >> 26) & 0xF;172uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;173174// Squaring all the seeds in order to bias their distribution towards lower values.175seed1 *= seed1;176seed2 *= seed2;177seed3 *= seed3;178seed4 *= seed4;179seed5 *= seed5;180seed6 *= seed6;181seed7 *= seed7;182seed8 *= seed8;183seed9 *= seed9;184seed10 *= seed10;185seed11 *= seed11;186seed12 *= seed12;187188int sh1, sh2;189if (seed & 1)190{191sh1 = (seed & 2 ? 4 : 5);192sh2 = (partition_count == 3 ? 6 : 5);193}194else195{196sh1 = (partition_count == 3 ? 6 : 5);197sh2 = (seed & 2 ? 4 : 5);198}199200int sh3 = (seed & 0x10) ? sh1 : sh2;201202seed1 >>= sh1;203seed2 >>= sh2;204seed3 >>= sh1;205seed4 >>= sh2;206seed5 >>= sh1;207seed6 >>= sh2;208seed7 >>= sh1;209seed8 >>= sh2;210211seed9 >>= sh3;212seed10 >>= sh3;213seed11 >>= sh3;214seed12 >>= sh3;215216int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);217int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);218int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);219int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);220221// Apply the saw222a &= 0x3F;223b &= 0x3F;224c &= 0x3F;225d &= 0x3F;226227// Remove some of the components if we are to output < 4 partitions.228if (partition_count <= 3)229{230d = 0;231}232233if (partition_count <= 2)234{235c = 0;236}237238if (partition_count <= 1)239{240b = 0;241}242243uint8_t partition;244if (a >= b && a >= c && a >= d)245{246partition = 0;247}248else if (b >= c && b >= d)249{250partition = 1;251}252else if (c >= d)253{254partition = 2;255}256else257{258partition = 3;259}260261return partition;262}263264/**265* @brief Generate a single partition info structure.266*267* @param[out] bsd The block size information.268* @param partition_count The partition count of this partitioning.269* @param partition_index The partition index / seed of this partitioning.270* @param partition_remap_index The remapped partition index of this partitioning.271* @param[out] pi The partition info structure to populate.272*273* @return True if this is a useful partition index, False if we can skip it.274*/275static bool generate_one_partition_info_entry(276block_size_descriptor& bsd,277unsigned int partition_count,278unsigned int partition_index,279unsigned int partition_remap_index,280partition_info& pi281) {282int texels_per_block = bsd.texel_count;283bool small_block = texels_per_block < 32;284285uint8_t *partition_of_texel = pi.partition_of_texel;286287// Assign texels to partitions288int texel_idx = 0;289int counts[BLOCK_MAX_PARTITIONS] { 0 };290for (unsigned int z = 0; z < bsd.zdim; z++)291{292for (unsigned int y = 0; y < bsd.ydim; y++)293{294for (unsigned int x = 0; x < bsd.xdim; x++)295{296uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);297pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++);298*partition_of_texel++ = part;299}300}301}302303// Fill loop tail so we can overfetch later304for (unsigned int i = 0; i < partition_count; i++)305{306size_t ptex_count = counts[i];307size_t ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count);308for (size_t j = ptex_count; j < ptex_count_simd; j++)309{310pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1];311}312}313314// Populate the actual procedural partition count315if (counts[0] == 0)316{317pi.partition_count = 0;318}319else if (counts[1] == 0)320{321pi.partition_count = 1;322}323else if (counts[2] == 0)324{325pi.partition_count = 2;326}327else if (counts[3] == 0)328{329pi.partition_count = 3;330}331else332{333pi.partition_count = 4;334}335336// Populate the partition index337pi.partition_index = static_cast<uint16_t>(partition_index);338339// Populate the coverage bitmaps for 2/3/4 partitions340uint64_t* bitmaps { nullptr };341if (partition_count == 2)342{343bitmaps = bsd.coverage_bitmaps_2[partition_remap_index];344}345else if (partition_count == 3)346{347bitmaps = bsd.coverage_bitmaps_3[partition_remap_index];348}349else if (partition_count == 4)350{351bitmaps = bsd.coverage_bitmaps_4[partition_remap_index];352}353354for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)355{356pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]);357}358359// Valid partitionings have texels in all of the requested partitions360bool valid = pi.partition_count == partition_count;361362if (bitmaps)363{364// Populate the partition coverage bitmap365for (unsigned int i = 0; i < partition_count; i++)366{367bitmaps[i] = 0ULL;368}369370unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);371for (unsigned int i = 0; i < texels_to_process; i++)372{373unsigned int idx = bsd.kmeans_texels[i];374bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i;375}376}377378return valid;379}380381static void build_partition_table_for_one_partition_count(382block_size_descriptor& bsd,383bool can_omit_partitionings,384unsigned int partition_count_cutoff,385unsigned int partition_count,386partition_info* ptab,387uint64_t* canonical_patterns388) {389unsigned int next_index = 0;390bsd.partitioning_count_selected[partition_count - 1] = 0;391bsd.partitioning_count_all[partition_count - 1] = 0;392393// Skip tables larger than config max partition count if we can omit modes394if (can_omit_partitionings && (partition_count > partition_count_cutoff))395{396return;397}398399// Iterate through twice400// - Pass 0: Keep selected partitionings401// - Pass 1: Keep non-selected partitionings (skip if in omit mode)402unsigned int max_iter = can_omit_partitionings ? 1 : 2;403404// Tracker for things we built in the first iteration405uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 };406for (unsigned int x = 0; x < max_iter; x++)407{408for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)409{410// Don't include things we built in the first pass411if ((x == 1) && build[i])412{413continue;414}415416bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);417if ((x == 0) && !keep_useful)418{419continue;420}421422generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS);423bool keep_canonical = true;424for (unsigned int j = 0; j < next_index; j++)425{426bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns + j * BIT_PATTERN_WORDS);427if (match)428{429keep_canonical = false;430break;431}432}433434if (keep_useful && keep_canonical)435{436if (x == 0)437{438bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);439bsd.partitioning_count_selected[partition_count - 1]++;440bsd.partitioning_count_all[partition_count - 1]++;441build[i] = 1;442next_index++;443}444}445else446{447if (x == 1)448{449bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);450bsd.partitioning_count_all[partition_count - 1]++;451next_index++;452}453}454}455}456}457458/* See header for documentation. */459void init_partition_tables(460block_size_descriptor& bsd,461bool can_omit_partitionings,462unsigned int partition_count_cutoff463) {464partition_info* par_tab2 = bsd.partitionings;465partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS;466partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS;467partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS;468469generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1);470bsd.partitioning_count_selected[0] = 1;471bsd.partitioning_count_all[0] = 1;472473uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * BIT_PATTERN_WORDS];474475build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns);476build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns);477build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns);478479delete[] canonical_patterns;480}481482483