Path: blob/master/thirdparty/basis_universal/encoder/basisu_astc_hdr_common.h
9903 views
// File: basisu_astc_hdr_common.h1#pragma once2#include "basisu_enc.h"3#include "basisu_gpu_texture.h"4#include "../transcoder/basisu_astc_helpers.h"5#include "../transcoder/basisu_astc_hdr_core.h"67namespace basisu8{9const uint32_t MAX_ASTC_HDR_BLOCK_W = 6, MAX_ASTC_HDR_BLOCK_H = 6;10const uint32_t MAX_ASTC_HDR_ENC_BLOCK_PIXELS = 6 * 6;1112const uint32_t MODE11_TOTAL_SUBMODES = 8; // plus an extra hidden submode, directly encoded, for direct, so really 9 (see tables 99/100 of the ASTC spec)13const uint32_t MODE7_TOTAL_SUBMODES = 6;1415// [ise_range][0] = # levels16// [ise_range][1...] = lerp value [0,64]17// in ASTC order18// Supported ISE weight ranges: 0 to 11, 12 total19const uint32_t MIN_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_2_LEVELS; // ISE 0=2 levels20const uint32_t MAX_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_32_LEVELS; // ISE 11=16 levels21const uint32_t MIN_SUPPORTED_WEIGHT_LEVELS = 2;22const uint32_t MAX_SUPPORTED_WEIGHT_LEVELS = 32;2324extern const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][33];2526const float Q_LOG_BIAS_4x4 = .125f; // the original UASTC HDR 4x4 log bias27const float Q_LOG_BIAS_6x6 = 1.0f; // the log bias both encoders use now2829const float LDR_TO_HDR_NITS = 100.0f;3031struct astc_hdr_codec_base_options32{33float m_r_err_scale, m_g_err_scale;34float m_q_log_bias;3536bool m_ultra_quant;3738// If true, the ASTC HDR compressor is allowed to more aggressively vary weight indices for slightly higher compression in non-fastest mode. This will hurt BC6H quality, however.39bool m_allow_uber_mode;4041bool m_mode7_full_s_optimization;4243bool m_take_first_non_clamping_mode11_submode;44bool m_take_first_non_clamping_mode7_submode;4546bool m_disable_weight_plane_optimization;4748astc_hdr_codec_base_options() { init(); }4950void init();51};5253inline int get_bit(54int src_val, int src_bit)55{56assert(src_bit >= 0 && src_bit <= 31);57int bit = (src_val >> src_bit) & 1;58return bit;59}6061inline void pack_bit(62int& dst, int dst_bit,63int src_val, int src_bit = 0)64{65assert(dst_bit >= 0 && dst_bit <= 31);66int bit = get_bit(src_val, src_bit);67dst |= (bit << dst_bit);68}6970inline uint32_t get_max_qlog(uint32_t bits)71{72switch (bits)73{74case 7: return basist::MAX_QLOG7;75case 8: return basist::MAX_QLOG8;76case 9: return basist::MAX_QLOG9;77case 10: return basist::MAX_QLOG10;78case 11: return basist::MAX_QLOG11;79case 12: return basist::MAX_QLOG12;80case 16: return basist::MAX_QLOG16;81default: assert(0); break;82}83return 0;84}8586#if 087inline float get_max_qlog_val(uint32_t bits)88{89switch (bits)90{91case 7: return MAX_QLOG7_VAL;92case 8: return MAX_QLOG8_VAL;93case 9: return MAX_QLOG9_VAL;94case 10: return MAX_QLOG10_VAL;95case 11: return MAX_QLOG11_VAL;96case 12: return MAX_QLOG12_VAL;97case 16: return MAX_QLOG16_VAL;98default: assert(0); break;99}100return 0;101}102#endif103104#if 0105// Input is the low 11 bits of the qlog106// Returns the 10-bit mantissa of the half float value107int qlog11_to_half_float_mantissa(int M)108{109assert(M <= 0x7FF);110int Mt;111if (M < 512)112Mt = 3 * M;113else if (M >= 1536)114Mt = 5 * M - 2048;115else116Mt = 4 * M - 512;117return (Mt >> 3);118}119#endif120121// Input is the 10-bit mantissa of the half float value122// Output is the 11-bit qlog value123// Inverse of qlog11_to_half_float_mantissa()124inline int half_float_mantissa_to_qlog11(int hf)125{126int q0 = (hf * 8 + 2) / 3;127int q1 = (hf * 8 + 2048 + 4) / 5;128129if (q0 < 512)130return q0;131else if (q1 >= 1536)132return q1;133134int q2 = (hf * 8 + 512 + 2) / 4;135return q2;136}137138inline int half_to_qlog16(int hf)139{140assert(!basist::half_is_signed((basist::half_float)hf) && !basist::is_half_inf_or_nan((basist::half_float)hf));141142// extract 5 bits exponent, which is carried through to qlog16 unchanged143const int exp = (hf >> 10) & 0x1F;144145// extract and invert the 10 bit mantissa to nearest qlog11 (should be lossless)146const int mantissa = half_float_mantissa_to_qlog11(hf & 0x3FF);147assert(mantissa <= 0x7FF);148149// Now combine to qlog16, which is what ASTC HDR interpolates using the [0-64] weights.150uint32_t qlog16 = (exp << 11) | mantissa;151152// should be a lossless operation153assert(astc_helpers::qlog16_to_half(qlog16) == hf);154155return qlog16;156}157158void interpolate_qlog12_colors(159const int e[2][3],160basist::half_float* pDecoded_half,161vec3F* pDecoded_float,162uint32_t n, uint32_t ise_weight_range);163164bool get_astc_hdr_mode_11_block_colors(165const uint8_t* pEndpoints,166basist::half_float* pDecoded_half,167vec3F* pDecoded_float,168uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);169170bool get_astc_hdr_mode_7_block_colors(171const uint8_t* pEndpoints,172basist::half_float* pDecoded_half,173vec3F* pDecoded_float,174uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);175176// Fast high precision piecewise linear approximation of log2(bias+x).177// Half may be zero, positive or denormal. No NaN/Inf/negative.178BASISU_FORCE_INLINE double q(basist::half_float x, float log_bias)179{180union { float f; int32_t i; uint32_t u; } fi;181182fi.f = fast_half_to_float_pos_not_inf_or_nan(x);183184assert(fi.f >= 0.0f);185186fi.f += log_bias;187188return (double)fi.u; // approx log2f(fi.f), need to return double for the precision189}190191BASISU_FORCE_INLINE uint32_t q2(basist::half_float x, float log_bias)192{193union { float f; int32_t i; uint32_t u; } fi;194195fi.f = fast_half_to_float_pos_not_inf_or_nan(x);196197assert(fi.f >= 0.0f);198199fi.f += log_bias;200201return fi.u;202}203204double eval_selectors(205uint32_t num_pixels,206uint8_t* pWeights,207uint32_t ise_weight_range,208const basist::half_float* pBlock_pixels_half,209uint32_t num_weight_levels,210const basist::half_float* pDecoded_half,211const astc_hdr_codec_base_options& coptions,212uint32_t usable_selector_bitmask = UINT32_MAX);213214double eval_selectors_dual_plane(215uint32_t channel_index,216uint32_t num_pixels,217uint8_t* pWeights0, uint8_t* pWeights1,218const basist::half_float* pBlock_pixels_half,219uint32_t num_weight_levels,220const basist::half_float* pDecoded_half,221const astc_hdr_codec_base_options& coptions,222uint32_t usable_selector_bitmask = UINT32_MAX);223224double compute_block_error(uint32_t num_pixels, const basist::half_float* pOrig_block, const basist::half_float* pPacked_block, const astc_hdr_codec_base_options& coptions);225226const uint32_t FIRST_MODE7_SUBMODE_INDEX = 0;227const uint32_t MAX_MODE7_SUBMODE_INDEX = 5;228229bool pack_mode7(230const vec3F& high_color_q16, const float s_q16,231uint32_t ise_endpoint_range, uint8_t* pEndpoints,232uint32_t ise_weight_range, // only used for determining biasing during CEM 7 packing233const astc_hdr_codec_base_options& coptions,234int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used);235236bool try_mode7(237uint32_t num_pixels,238uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,239const vec3F& high_color_q16, const float s_q16,240const basist::half_float block_pixels_half[][3],241uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions,242uint32_t ise_endpoint_range,243int32_t first_submode = 0, int32_t last_submode = MAX_MODE7_SUBMODE_INDEX);244245bool pack_mode11(246const vec3F& low_color_q16, const vec3F& high_color_q16,247uint32_t ise_endpoint_range, uint8_t* pEndpoints,248const astc_hdr_codec_base_options& coptions,249bool direct_only, int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used);250251bool try_mode11(uint32_t num_pixels,252uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,253const vec3F& low_color_q16, const vec3F& high_color_q16,254const basist::half_float block_pixels_half[][3],255uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,256bool constrain_ise_weight_selectors,257int32_t first_submode, int32_t last_submode, bool ignore_clamping);258259bool try_mode11_dual_plane(uint32_t channel_index, uint32_t num_pixels,260uint8_t* pEndpoints, uint8_t* pWeights0, uint8_t* pWeights1, double& cur_block_error, uint32_t& submode_used,261const vec3F& low_color_q16, const vec3F& high_color_q16,262const basist::half_float block_pixels_half[][3],263uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,264bool constrain_ise_weight_selectors,265int32_t first_submode, int32_t last_submode, bool ignore_clamping);266267const int FIRST_MODE11_SUBMODE_INDEX = -1;268const int MAX_MODE11_SUBMODE_INDEX = 7;269270enum opt_mode_t271{272cNoOpt,273cOrdinaryLeastSquares,274cWeightedLeastSquares,275cWeightedLeastSquaresHeavy,276cWeightedAverage277};278279struct encode_astc_block_stats280{281uint32_t m_num_pixels;282vec3F m_mean_q16;283vec3F m_axis_q16;284285void init(uint32_t num_pixels, const vec4F pBlock_pixels_q16[]);286};287288double encode_astc_hdr_block_mode_11(289uint32_t num_pixels,290const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],291uint32_t ise_weight_range,292uint32_t& best_submode,293double cur_block_error,294uint8_t* blk_endpoints, uint8_t* blk_weights,295const astc_hdr_codec_base_options& coptions,296bool direct_only,297uint32_t ise_endpoint_range,298bool uber_mode,299bool constrain_ise_weight_selectors,300int32_t first_submode, int32_t last_submode, bool ignore_clamping,301opt_mode_t opt_mode,302const encode_astc_block_stats *pBlock_stats = nullptr);303304double encode_astc_hdr_block_downsampled_mode_11(305uint32_t block_x, uint32_t block_y, uint32_t grid_x, uint32_t grid_y,306uint32_t ise_weight_range, uint32_t ise_endpoint_range,307uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],308double cur_block_error,309int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode,310uint8_t* pBlk_endpoints, uint8_t* pBlk_weights, uint32_t& best_submode,311const astc_hdr_codec_base_options& coptions,312const encode_astc_block_stats* pBlock_stats = nullptr);313314double encode_astc_hdr_block_mode_11_dual_plane(315uint32_t num_pixels,316const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],317uint32_t channel_index, // 0-2318uint32_t ise_weight_range,319uint32_t& best_submode,320double cur_block_error,321uint8_t* blk_endpoints, uint8_t* blk_weights0, uint8_t* blk_weights1,322const astc_hdr_codec_base_options& coptions,323bool direct_only,324uint32_t ise_endpoint_range,325bool uber_mode,326bool constrain_ise_weight_selectors,327int32_t first_submode, int32_t last_submode,328bool ignore_clamping);329330double encode_astc_hdr_block_mode_7(331uint32_t num_pixels,332const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],333uint32_t ise_weight_range,334uint32_t& best_submode,335double cur_block_error,336uint8_t* blk_endpoints, //[4]337uint8_t* blk_weights, // [num_pixels]338const astc_hdr_codec_base_options& coptions,339uint32_t ise_endpoint_range,340int first_submode = 0, int last_submode = MAX_MODE7_SUBMODE_INDEX,341const encode_astc_block_stats *pBlock_stats = nullptr);342343//--------------------------------------------------------------------------------------------------------------------------344345struct mode11_log_desc346{347int32_t m_submode;348int32_t m_maj_comp;349350// Or R0, G0, B0 if maj_comp==3 (direct)351int32_t m_a; // positive352int32_t m_c; // positive353int32_t m_b0; // positive354355// Or R1, G1, B1 if maj_comp==3 (direct)356int32_t m_b1; // positive357int32_t m_d0; // if not direct, is signed358int32_t m_d1; // if not direct, is signed359360// limits if not direct361int32_t m_a_bits, m_c_bits, m_b_bits, m_d_bits;362int32_t m_max_a_val, m_max_c_val, m_max_b_val, m_min_d_val, m_max_d_val;363364void clear() { clear_obj(*this); }365366bool is_direct() const { return m_maj_comp == 3; }367};368369//--------------------------------------------------------------------------------------------------------------------------370bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& rgb_q16, float s_q16, int& max_clamp_mag, uint32_t ise_weight_range, bool early_out_if_clamped, int max_clamp_mag_accept_thresh);371372bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, int val_q[2][3], int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0);373bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0);374void pack_astc_mode11_direct(uint8_t* pEndpoints, vec3F l_q16, vec3F h_q16);375376bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints);377void unpack_mode11(const uint8_t* pEndpoints, mode11_log_desc& desc);378379void decode_cem_11_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index);380void decode_cem_7_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index);381382void dequantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_raw_weights);383384const float* get_6x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height);385386void downsample_weight_grid(387const float* pMatrix_weights,388uint32_t bx, uint32_t by, // source/from dimension (block size)389uint32_t wx, uint32_t wy, // dest/to dimension (grid size)390const uint8_t* pSrc_weights, // these are dequantized weights, NOT ISE symbols, [by][bx]391uint8_t* pDst_weights); // [wy][wx]392393void downsample_ise_weights(394uint32_t weight_ise_range, uint32_t quant_weight_ise_range,395uint32_t block_w, uint32_t block_h,396uint32_t grid_w, uint32_t grid_h,397const uint8_t* pSrc_weights, uint8_t* pDst_weights);398399void downsample_ise_weights_dual_plane(400uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range,401uint32_t block_w, uint32_t block_h,402uint32_t grid_w, uint32_t grid_h,403const uint8_t* pSrc_weights0, const uint8_t* pSrc_weights1,404uint8_t* pDst_weights);405406bool refine_endpoints(407uint32_t cem,408uint32_t endpoint_ise_range,409uint8_t* pEndpoint_vals, // the endpoints to optimize410uint32_t block_w, uint32_t block_h, // block dimensions411uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid412uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],413const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets414astc_hdr_codec_base_options& coptions, opt_mode_t opt_mode);415416extern bool g_astc_hdr_enc_initialized;417418// This MUST be called before encoding any blocks.419void astc_hdr_enc_init();420421} // namespace basisu422423424425