Path: blob/master/thirdparty/astcenc/astcenc_decompress_symbolic.cpp
9896 views
// SPDX-License-Identifier: Apache-2.01// ----------------------------------------------------------------------------2// Copyright 2011-2024 Arm Limited3//4// Licensed under the Apache License, Version 2.0 (the "License"); you may not5// use this file except in compliance with the License. You may obtain a copy6// of the License at:7//8// http://www.apache.org/licenses/LICENSE-2.09//10// Unless required by applicable law or agreed to in writing, software11// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT12// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the13// License for the specific language governing permissions and limitations14// under the License.15// ----------------------------------------------------------------------------1617/**18* @brief Functions to decompress a symbolic block.19*/2021#include "astcenc_internal.h"2223#include <stdio.h>24#include <assert.h>2526/**27* @brief Compute the integer linear interpolation of two color endpoints.28*29* @param u8_mask The mask for lanes using decode_unorm8 rather than decode_f16.30* @param color0 The endpoint0 color.31* @param color1 The endpoint1 color.32* @param weights The interpolation weight (between 0 and 64).33*34* @return The interpolated color.35*/36static vint4 lerp_color_int(37vmask4 u8_mask,38vint4 color0,39vint4 color1,40vint4 weights41) {42vint4 weight1 = weights;43vint4 weight0 = vint4(64) - weight1;4445vint4 color = (color0 * weight0) + (color1 * weight1) + vint4(32);46color = asr<6>(color);4748// For decode_unorm8 values force the codec to bit replicate. This allows the49// rest of the codec to assume the full 0xFFFF range for everything and ignore50// the decode_mode setting51vint4 color_u8 = asr<8>(color) * vint4(257);52color = select(color, color_u8, u8_mask);5354return color;55}5657/**58* @brief Convert integer color value into a float value for the decoder.59*60* @param data The integer color value post-interpolation.61* @param lns_mask If set treat lane as HDR (LNS) else LDR (unorm16).62*63* @return The float color value.64*/65static inline vfloat4 decode_texel(66vint4 data,67vmask4 lns_mask68) {69vint4 color_lns = vint4::zero();70vint4 color_unorm = vint4::zero();7172if (any(lns_mask))73{74color_lns = lns_to_sf16(data);75}7677if (!all(lns_mask))78{79color_unorm = unorm16_to_sf16(data);80}8182// Pick components and then convert to FP1683vint4 datai = select(color_unorm, color_lns, lns_mask);84return float16_to_float(datai);85}8687/* See header for documentation. */88void unpack_weights(89const block_size_descriptor& bsd,90const symbolic_compressed_block& scb,91const decimation_info& di,92bool is_dual_plane,93int weights_plane1[BLOCK_MAX_TEXELS],94int weights_plane2[BLOCK_MAX_TEXELS]95) {96// Safe to overshoot as all arrays are allocated to full size97if (!is_dual_plane)98{99// Build full 64-entry weight lookup table100vtable_64x8 table;101vtable_prepare(table, scb.weights);102103for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)104{105vint summed_value(8);106vint weight_count(di.texel_weight_count + i);107int max_weight_count = hmax_s(weight_count);108109promise(max_weight_count > 0);110for (int j = 0; j < max_weight_count; j++)111{112vint texel_weights(di.texel_weights_tr[j] + i);113vint texel_weights_int(di.texel_weight_contribs_int_tr[j] + i);114115summed_value += vtable_lookup_32bit(table, texel_weights) * texel_weights_int;116}117118store(lsr<4>(summed_value), weights_plane1 + i);119}120}121else122{123// Build a 32-entry weight lookup table per plane124// Plane 1125vtable_32x8 tab_plane1;126vtable_prepare(tab_plane1, scb.weights);127128// Plane 2129vtable_32x8 tab_plane2;130vtable_prepare(tab_plane2, scb.weights + 32);131132for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)133{134vint sum_plane1(8);135vint sum_plane2(8);136137vint weight_count(di.texel_weight_count + i);138int max_weight_count = hmax_s(weight_count);139140promise(max_weight_count > 0);141for (int j = 0; j < max_weight_count; j++)142{143vint texel_weights(di.texel_weights_tr[j] + i);144vint texel_weights_int(di.texel_weight_contribs_int_tr[j] + i);145146sum_plane1 += vtable_lookup_32bit(tab_plane1, texel_weights) * texel_weights_int;147sum_plane2 += vtable_lookup_32bit(tab_plane2, texel_weights) * texel_weights_int;148}149150store(lsr<4>(sum_plane1), weights_plane1 + i);151store(lsr<4>(sum_plane2), weights_plane2 + i);152}153}154}155156/**157* @brief Return an FP32 NaN value for use in error colors.158*159* This NaN encoding will turn into 0xFFFF when converted to an FP16 NaN.160*161* @return The float color value.162*/163static float error_color_nan()164{165if32 v;166v.u = 0xFFFFE000U;167return v.f;168}169170/* See header for documentation. */171void decompress_symbolic_block(172astcenc_profile decode_mode,173const block_size_descriptor& bsd,174int xpos,175int ypos,176int zpos,177const symbolic_compressed_block& scb,178image_block& blk179) {180blk.xpos = xpos;181blk.ypos = ypos;182blk.zpos = zpos;183184blk.data_min = vfloat4::zero();185blk.data_mean = vfloat4::zero();186blk.data_max = vfloat4::zero();187blk.grayscale = false;188189// If we detected an error-block, blow up immediately.190if (scb.block_type == SYM_BTYPE_ERROR)191{192for (unsigned int i = 0; i < bsd.texel_count; i++)193{194blk.data_r[i] = error_color_nan();195blk.data_g[i] = error_color_nan();196blk.data_b[i] = error_color_nan();197blk.data_a[i] = error_color_nan();198blk.rgb_lns[i] = 0;199blk.alpha_lns[i] = 0;200}201202return;203}204205if ((scb.block_type == SYM_BTYPE_CONST_F16) ||206(scb.block_type == SYM_BTYPE_CONST_U16))207{208vfloat4 color;209uint8_t use_lns = 0;210211// UNORM16 constant color block212if (scb.block_type == SYM_BTYPE_CONST_U16)213{214vint4 colori(scb.constant_color);215216// Determine the UNORM8 rounding on the decode217vmask4 u8_mask = get_u8_component_mask(decode_mode, blk);218219// The real decoder would just use the top 8 bits, but we rescale220// in to a 16-bit value that rounds correctly.221vint4 colori_u8 = asr<8>(colori) * 257;222colori = select(colori, colori_u8, u8_mask);223224vint4 colorf16 = unorm16_to_sf16(colori);225color = float16_to_float(colorf16);226}227// FLOAT16 constant color block228else229{230switch (decode_mode)231{232case ASTCENC_PRF_LDR_SRGB:233case ASTCENC_PRF_LDR:234color = vfloat4(error_color_nan());235break;236case ASTCENC_PRF_HDR_RGB_LDR_A:237case ASTCENC_PRF_HDR:238// Constant-color block; unpack from FP16 to FP32.239color = float16_to_float(vint4(scb.constant_color));240use_lns = 1;241break;242}243}244245for (unsigned int i = 0; i < bsd.texel_count; i++)246{247blk.data_r[i] = color.lane<0>();248blk.data_g[i] = color.lane<1>();249blk.data_b[i] = color.lane<2>();250blk.data_a[i] = color.lane<3>();251blk.rgb_lns[i] = use_lns;252blk.alpha_lns[i] = use_lns;253}254255return;256}257258// Get the appropriate partition-table entry259int partition_count = scb.partition_count;260const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);261262// Get the appropriate block descriptors263const auto& bm = bsd.get_block_mode(scb.block_mode);264const auto& di = bsd.get_decimation_info(bm.decimation_mode);265266bool is_dual_plane = static_cast<bool>(bm.is_dual_plane);267268// Unquantize and undecimate the weights269int plane1_weights[BLOCK_MAX_TEXELS];270int plane2_weights[BLOCK_MAX_TEXELS];271unpack_weights(bsd, scb, di, is_dual_plane, plane1_weights, plane2_weights);272273// Now that we have endpoint colors and weights, we can unpack texel colors274int plane2_component = scb.plane2_component;275vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_component);276277vmask4 u8_mask = get_u8_component_mask(decode_mode, blk);278279for (int i = 0; i < partition_count; i++)280{281// Decode the color endpoints for this partition282vint4 ep0;283vint4 ep1;284bool rgb_lns;285bool a_lns;286287unpack_color_endpoints(decode_mode,288scb.color_formats[i],289scb.color_values[i],290rgb_lns, a_lns,291ep0, ep1);292293vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns);294295int texel_count = pi.partition_texel_count[i];296for (int j = 0; j < texel_count; j++)297{298int tix = pi.texels_of_partition[i][j];299vint4 weight = select(vint4(plane1_weights[tix]), vint4(plane2_weights[tix]), plane2_mask);300vint4 color = lerp_color_int(u8_mask, ep0, ep1, weight);301vfloat4 colorf = decode_texel(color, lns_mask);302303blk.data_r[tix] = colorf.lane<0>();304blk.data_g[tix] = colorf.lane<1>();305blk.data_b[tix] = colorf.lane<2>();306blk.data_a[tix] = colorf.lane<3>();307}308}309}310311#if !defined(ASTCENC_DECOMPRESS_ONLY)312313/* See header for documentation. */314float compute_symbolic_block_difference_2plane(315const astcenc_config& config,316const block_size_descriptor& bsd,317const symbolic_compressed_block& scb,318const image_block& blk319) {320// If we detected an error-block, blow up immediately.321if (scb.block_type == SYM_BTYPE_ERROR)322{323return ERROR_CALC_DEFAULT;324}325326assert(scb.block_mode >= 0);327assert(scb.partition_count == 1);328assert(bsd.get_block_mode(scb.block_mode).is_dual_plane == 1);329330// Get the appropriate block descriptor331const block_mode& bm = bsd.get_block_mode(scb.block_mode);332const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);333334// Unquantize and undecimate the weights335int plane1_weights[BLOCK_MAX_TEXELS];336int plane2_weights[BLOCK_MAX_TEXELS];337unpack_weights(bsd, scb, di, true, plane1_weights, plane2_weights);338339vmask4 plane2_mask = vint4::lane_id() == vint4(scb.plane2_component);340341vfloat4 summa = vfloat4::zero();342343// Decode the color endpoints for this partition344vint4 ep0;345vint4 ep1;346bool rgb_lns;347bool a_lns;348349unpack_color_endpoints(config.profile,350scb.color_formats[0],351scb.color_values[0],352rgb_lns, a_lns,353ep0, ep1);354355vmask4 u8_mask = get_u8_component_mask(config.profile, blk);356357// Unpack and compute error for each texel in the partition358unsigned int texel_count = bsd.texel_count;359for (unsigned int i = 0; i < texel_count; i++)360{361vint4 weight = select(vint4(plane1_weights[i]), vint4(plane2_weights[i]), plane2_mask);362vint4 colori = lerp_color_int(u8_mask, ep0, ep1, weight);363364vfloat4 color = int_to_float(colori);365vfloat4 oldColor = blk.texel(i);366367// Compare error using a perceptual decode metric for RGBM textures368if (config.flags & ASTCENC_FLG_MAP_RGBM)369{370// Fail encodings that result in zero weight M pixels. Note that this can cause371// "interesting" artifacts if we reject all useful encodings - we typically get max372// brightness encodings instead which look just as bad. We recommend users apply a373// bias to their stored M value, limiting the lower value to 16 or 32 to avoid374// getting small M values post-quantization, but we can't prove it would never375// happen, especially at low bit rates ...376if (color.lane<3>() == 0.0f)377{378return -ERROR_CALC_DEFAULT;379}380381// Compute error based on decoded RGBM color382color = vfloat4(383color.lane<0>() * color.lane<3>() * config.rgbm_m_scale,384color.lane<1>() * color.lane<3>() * config.rgbm_m_scale,385color.lane<2>() * color.lane<3>() * config.rgbm_m_scale,3861.0f387);388389oldColor = vfloat4(390oldColor.lane<0>() * oldColor.lane<3>() * config.rgbm_m_scale,391oldColor.lane<1>() * oldColor.lane<3>() * config.rgbm_m_scale,392oldColor.lane<2>() * oldColor.lane<3>() * config.rgbm_m_scale,3931.0f394);395}396397vfloat4 error = oldColor - color;398error = min(abs(error), 1e15f);399error = error * error;400401summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT);402}403404return summa.lane<0>();405}406407/* See header for documentation. */408float compute_symbolic_block_difference_1plane(409const astcenc_config& config,410const block_size_descriptor& bsd,411const symbolic_compressed_block& scb,412const image_block& blk413) {414assert(bsd.get_block_mode(scb.block_mode).is_dual_plane == 0);415416// If we detected an error-block, blow up immediately.417if (scb.block_type == SYM_BTYPE_ERROR)418{419return ERROR_CALC_DEFAULT;420}421422assert(scb.block_mode >= 0);423424// Get the appropriate partition-table entry425unsigned int partition_count = scb.partition_count;426const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);427428// Get the appropriate block descriptor429const block_mode& bm = bsd.get_block_mode(scb.block_mode);430const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);431432// Unquantize and undecimate the weights433int plane1_weights[BLOCK_MAX_TEXELS];434unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);435436vmask4 u8_mask = get_u8_component_mask(config.profile, blk);437438vfloat4 summa = vfloat4::zero();439for (unsigned int i = 0; i < partition_count; i++)440{441// Decode the color endpoints for this partition442vint4 ep0;443vint4 ep1;444bool rgb_lns;445bool a_lns;446447unpack_color_endpoints(config.profile,448scb.color_formats[i],449scb.color_values[i],450rgb_lns, a_lns,451ep0, ep1);452453// Unpack and compute error for each texel in the partition454unsigned int texel_count = pi.partition_texel_count[i];455for (unsigned int j = 0; j < texel_count; j++)456{457unsigned int tix = pi.texels_of_partition[i][j];458vint4 colori = lerp_color_int(u8_mask, ep0, ep1,459vint4(plane1_weights[tix]));460461vfloat4 color = int_to_float(colori);462vfloat4 oldColor = blk.texel(tix);463464// Compare error using a perceptual decode metric for RGBM textures465if (config.flags & ASTCENC_FLG_MAP_RGBM)466{467// Fail encodings that result in zero weight M pixels. Note that this can cause468// "interesting" artifacts if we reject all useful encodings - we typically get max469// brightness encodings instead which look just as bad. We recommend users apply a470// bias to their stored M value, limiting the lower value to 16 or 32 to avoid471// getting small M values post-quantization, but we can't prove it would never472// happen, especially at low bit rates ...473if (color.lane<3>() == 0.0f)474{475return -ERROR_CALC_DEFAULT;476}477478// Compute error based on decoded RGBM color479color = vfloat4(480color.lane<0>() * color.lane<3>() * config.rgbm_m_scale,481color.lane<1>() * color.lane<3>() * config.rgbm_m_scale,482color.lane<2>() * color.lane<3>() * config.rgbm_m_scale,4831.0f484);485486oldColor = vfloat4(487oldColor.lane<0>() * oldColor.lane<3>() * config.rgbm_m_scale,488oldColor.lane<1>() * oldColor.lane<3>() * config.rgbm_m_scale,489oldColor.lane<2>() * oldColor.lane<3>() * config.rgbm_m_scale,4901.0f491);492}493494vfloat4 error = oldColor - color;495error = min(abs(error), 1e15f);496error = error * error;497498summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT);499}500}501502return summa.lane<0>();503}504505/* See header for documentation. */506float compute_symbolic_block_difference_1plane_1partition(507const astcenc_config& config,508const block_size_descriptor& bsd,509const symbolic_compressed_block& scb,510const image_block& blk511) {512// If we detected an error-block, blow up immediately.513if (scb.block_type == SYM_BTYPE_ERROR)514{515return ERROR_CALC_DEFAULT;516}517518assert(scb.block_mode >= 0);519assert(bsd.get_partition_info(scb.partition_count, scb.partition_index).partition_count == 1);520521// Get the appropriate block descriptor522const block_mode& bm = bsd.get_block_mode(scb.block_mode);523const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);524525// Unquantize and undecimate the weights526ASTCENC_ALIGNAS int plane1_weights[BLOCK_MAX_TEXELS];527unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);528529// Decode the color endpoints for this partition530vint4 ep0;531vint4 ep1;532bool rgb_lns;533bool a_lns;534535unpack_color_endpoints(config.profile,536scb.color_formats[0],537scb.color_values[0],538rgb_lns, a_lns,539ep0, ep1);540541vmask4 u8_mask = get_u8_component_mask(config.profile, blk);542543// Unpack and compute error for each texel in the partition544vfloatacc summav = vfloatacc::zero();545546vint lane_id = vint::lane_id();547548unsigned int texel_count = bsd.texel_count;549for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)550{551// Compute EP1 contribution552vint weight1 = vint::loada(plane1_weights + i);553vint ep1_r = vint(ep1.lane<0>()) * weight1;554vint ep1_g = vint(ep1.lane<1>()) * weight1;555vint ep1_b = vint(ep1.lane<2>()) * weight1;556vint ep1_a = vint(ep1.lane<3>()) * weight1;557558// Compute EP0 contribution559vint weight0 = vint(64) - weight1;560vint ep0_r = vint(ep0.lane<0>()) * weight0;561vint ep0_g = vint(ep0.lane<1>()) * weight0;562vint ep0_b = vint(ep0.lane<2>()) * weight0;563vint ep0_a = vint(ep0.lane<3>()) * weight0;564565// Combine contributions566vint colori_r = asr<6>(ep0_r + ep1_r + vint(32));567vint colori_g = asr<6>(ep0_g + ep1_g + vint(32));568vint colori_b = asr<6>(ep0_b + ep1_b + vint(32));569vint colori_a = asr<6>(ep0_a + ep1_a + vint(32));570571// If using a U8 decode mode bit replicate top 8 bits572// so rest of codec can assume 0xFFFF max range everywhere573vint colori_r8 = asr<8>(colori_r) * vint(257);574colori_r = select(colori_r, colori_r8, vmask(u8_mask.lane<0>()));575576vint colori_g8 = asr<8>(colori_g) * vint(257);577colori_g = select(colori_g, colori_g8, vmask(u8_mask.lane<1>()));578579vint colori_b8 = asr<8>(colori_b) * vint(257);580colori_b = select(colori_b, colori_b8, vmask(u8_mask.lane<2>()));581582vint colori_a8 = asr<8>(colori_a) * vint(257);583colori_a = select(colori_a, colori_a8, vmask(u8_mask.lane<3>()));584585// Compute color diff586vfloat color_r = int_to_float(colori_r);587vfloat color_g = int_to_float(colori_g);588vfloat color_b = int_to_float(colori_b);589vfloat color_a = int_to_float(colori_a);590591vfloat color_orig_r = loada(blk.data_r + i);592vfloat color_orig_g = loada(blk.data_g + i);593vfloat color_orig_b = loada(blk.data_b + i);594vfloat color_orig_a = loada(blk.data_a + i);595596vfloat color_error_r = min(abs(color_orig_r - color_r), vfloat(1e15f));597vfloat color_error_g = min(abs(color_orig_g - color_g), vfloat(1e15f));598vfloat color_error_b = min(abs(color_orig_b - color_b), vfloat(1e15f));599vfloat color_error_a = min(abs(color_orig_a - color_a), vfloat(1e15f));600601// Compute squared error metric602color_error_r = color_error_r * color_error_r;603color_error_g = color_error_g * color_error_g;604color_error_b = color_error_b * color_error_b;605color_error_a = color_error_a * color_error_a;606607vfloat metric = color_error_r * blk.channel_weight.lane<0>()608+ color_error_g * blk.channel_weight.lane<1>()609+ color_error_b * blk.channel_weight.lane<2>()610+ color_error_a * blk.channel_weight.lane<3>();611612// Mask off bad lanes613vmask mask = lane_id < vint(texel_count);614lane_id += vint(ASTCENC_SIMD_WIDTH);615haccumulate(summav, metric, mask);616}617618return hadd_s(summav);619}620621#endif622623624