Path: blob/master/thirdparty/basis_universal/encoder/basisu_astc_hdr_common.cpp
9903 views
// File: basisu_astc_hdr_common.cpp1#include "basisu_enc.h"2#include "basisu_gpu_texture.h"3#include "../transcoder/basisu_astc_helpers.h"4#include "../transcoder/basisu_astc_hdr_core.h"5#include "basisu_astc_hdr_common.h"67using namespace basist;89#ifndef __EMSCRIPTEN__10#define BASISU_MULTITHREADED_INIT (0)11#endif1213namespace basisu14{1516const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][33] =17{18{ 2, 0, 64 }, // 0, note ise range=0 is invalid for 4x4 block sizes (<24 weight bits in the block)19{ 3, 0, 32, 64 }, // 120{ 4, 0, 21, 43, 64 }, // 221{ 5, 0, 16, 32, 48, 64 }, // 322{ 6, 0, 64, 12, 52, 25, 39 }, // 423{ 8, 0, 9, 18, 27, 37, 46, 55, 64 }, // 524{ 10, 0, 64, 7, 57, 14, 50, 21, 43, 28, 36 }, // 625{ 12, 0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36 }, // 726{ 16, 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 }, // 827{ 20, 0,64,16,48,3,61,19,45,6,58,23,41,9,55,26,38,13,51,29,35}, // 928{ 24, 0,64,8,56,16,48,24,40,2,62,11,53,19,45,27,37,5,59,13,51,22,42,30,34}, // 1029{ 32, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64}, // 1130};3132//--------------------------------------------------------------------------------------------------------------------------3334const float DEF_R_ERROR_SCALE = 2.0f;35const float DEF_G_ERROR_SCALE = 3.0f;3637void astc_hdr_codec_base_options::init()38{39m_r_err_scale = DEF_R_ERROR_SCALE;40m_g_err_scale = DEF_G_ERROR_SCALE;41m_q_log_bias = Q_LOG_BIAS_4x4;4243m_ultra_quant = false;4445// Disabling by default to avoid transcoding outliers (try kodim26). The quality lost is very low. TODO: Could include the uber result in the output.46m_allow_uber_mode = false;4748m_mode7_full_s_optimization = true;4950m_take_first_non_clamping_mode11_submode = false;51m_take_first_non_clamping_mode7_submode = false;5253m_disable_weight_plane_optimization = true;54}5556//--------------------------------------------------------------------------------------------------------------------------57// max usable qlog8 value is 247, 248=inf, >=249 is nan58// max usable qlog7 value is 123, 124=inf, >=125 is nan5960//const uint32_t TOTAL_USABLE_QLOG8 = 248; // 0-247 are usable, 0=0, 247=60416.0, 246=55296.06162// nearest values given a positive half float value (only)63static uint16_t g_half_to_qlog7[32768], g_half_to_qlog8[32768];6465const uint32_t HALF_TO_QLOG_TABS_MIN_BITS = 7;66const uint32_t HALF_TO_QLOG_TABS_MAX_BITS = 8;67static uint16_t* g_pHalf_to_qlog_tabs[2] =68{69g_half_to_qlog7,70g_half_to_qlog8,71};7273#if 074static inline uint32_t half_to_qlog7_8(half_float h, uint32_t bits)75{76assert((bits >= HALF_TO_QLOG_TABS_MIN_BITS) && (bits <= HALF_TO_QLOG_TABS_MAX_BITS));77assert(h < 32768);7879return g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS][h];80}81#endif8283// TODO: Tune this84static inline uint32_t quant_qlog16(uint32_t q16, uint32_t desired_bits)85{86assert((desired_bits >= 7) && (desired_bits <= 12));87assert(q16 <= 65535);8889const uint32_t shift = 16 - desired_bits;90uint32_t e = (q16 + (1U << (shift - 1U)) - 1U) >> shift;9192uint32_t max_val = (1U << desired_bits) - 1U;93e = minimum<uint32_t>(e, max_val);9495return e;96}9798static void compute_half_to_qlog_table(uint32_t bits, uint16_t* pTable, const basisu::vector<float>& qlog16_to_float)99{100assert(bits >= 5 && bits <= 12);101const uint32_t max_val = (1 << bits) - 1;102103const uint32_t FIRST_INVALID_QLOG16_INDEX = 63488; // first inf, rest are inf/nan's104assert(std::isinf(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX]));105assert(std::isinf(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX + 1]));106assert(!std::isnan(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX - 1]));107assert(!std::isinf(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX - 1]));108109// For all positive half-floats110for (uint32_t h = 0; h < 32768; h++)111{112// Skip invalid values113if (is_half_inf_or_nan((half_float)h))114continue;115const float desired_val = half_to_float((half_float)h);116117float best_err = BIG_FLOAT_VAL;118uint32_t best_qlog = 0;119120double prev_err = BIG_FLOAT_VAL;121122// For all possible qlog's123for (uint32_t i = 0; i <= max_val; i++)124{125// Skip invalid values126uint32_t idx = i << (16 - bits);127if (idx >= FIRST_INVALID_QLOG16_INDEX)128break;129130float v = qlog16_to_float[idx];131//assert(!std::isinf(v) && !std::isnan(v)); // too clostly in debug132133// Compute error134float err = fabsf(v - desired_val);135136if (err > prev_err)137{138// Every remaining entry will have guaranteed higher error139break;140}141142prev_err = err;143144// Find best145if (err < best_err)146{147best_err = err;148best_qlog = i;149150if (best_err == 0.0f)151break;152}153}154155pTable[h] = (uint16_t)best_qlog;156}157}158159static void init_qlog_tables()160{161basisu::vector<float> qlog16_to_float(65536);162163// for all possible qlog16, compute the corresponding half float164for (uint32_t i = 0; i <= 65535; i++)165{166half_float h = astc_helpers::qlog16_to_half(i);167168qlog16_to_float[i] = half_to_float(h);169}170171#if BASISU_MULTITHREADED_INIT172job_pool jp(3);173174for (uint32_t bits = HALF_TO_QLOG_TABS_MIN_BITS; bits <= HALF_TO_QLOG_TABS_MAX_BITS; bits++)175{176jp.add_job( [bits, &qlog16_to_float]() { compute_half_to_qlog_table(bits, g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS], qlog16_to_float); });177}178179jp.wait_for_all();180#else181// for all possible half floats, find the nearest qlog5-12 float182for (uint32_t bits = HALF_TO_QLOG_TABS_MIN_BITS; bits <= HALF_TO_QLOG_TABS_MAX_BITS; bits++)183{184compute_half_to_qlog_table(bits, g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS], qlog16_to_float);185186#if 0187std::vector<uint16_t> check_tab(32768);188compute_half_to_qlog_table_orig(bits, check_tab.data(), qlog16_to_float);189for (uint32_t i = 0; i < (1 << bits); i++)190{191assert(check_tab[i] == g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS][i]);192}193#endif194}195#endif // BASISU_MULTITHREADED_INIT196}197198//--------------------------------------------------------------------------------------------------------------------------199200static vec3F calc_mean(uint32_t num_pixels, const vec4F* pPixels)201{202vec3F mean(0.0f);203204for (uint32_t i = 0; i < num_pixels; i++)205{206const vec4F& p = pPixels[i];207208mean[0] += p[0];209mean[1] += p[1];210mean[2] += p[2];211}212213return mean / static_cast<float>(num_pixels);214}215216static vec3F calc_rgb_pca(uint32_t num_pixels, const vec4F* pPixels, const vec3F& mean_color)217{218float cov[6] = { 0, 0, 0, 0, 0, 0 };219220for (uint32_t i = 0; i < num_pixels; i++)221{222const vec4F& v = pPixels[i];223224float r = v[0] - mean_color[0];225float g = v[1] - mean_color[1];226float b = v[2] - mean_color[2];227228cov[0] += r * r;229cov[1] += r * g;230cov[2] += r * b;231cov[3] += g * g;232cov[4] += g * b;233cov[5] += b * b;234}235236float xr = .9f, xg = 1.0f, xb = .7f;237for (uint32_t iter = 0; iter < 3; iter++)238{239float r = xr * cov[0] + xg * cov[1] + xb * cov[2];240float g = xr * cov[1] + xg * cov[3] + xb * cov[4];241float b = xr * cov[2] + xg * cov[4] + xb * cov[5];242243float m = maximumf(maximumf(fabsf(r), fabsf(g)), fabsf(b));244245if (m > 1e-10f)246{247m = 1.0f / m;248249r *= m;250g *= m;251b *= m;252}253254xr = r;255xg = g;256xb = b;257}258259float len = xr * xr + xg * xg + xb * xb;260261vec3F axis(0.5773502691f);262263if (len >= 1e-10f)264{265len = 1.0f / sqrtf(len);266267xr *= len;268xg *= len;269xb *= len;270271axis.set(xr, xg, xb);272}273274return axis;275}276277void encode_astc_block_stats::init(uint32_t num_pixels, const vec4F pBlock_pixels_q16[])278{279m_num_pixels = num_pixels;280m_mean_q16 = calc_mean(num_pixels, pBlock_pixels_q16);281m_axis_q16 = calc_rgb_pca(num_pixels, pBlock_pixels_q16, m_mean_q16);282}283284static vec3F interp_color(const vec3F& mean, const vec3F& dir, float df, const aabb3F& colorspace_box, const aabb3F& input_box, bool* pInside = nullptr)285{286#if 0287assert(mean[0] >= input_box[0][0]);288assert(mean[1] >= input_box[0][1]);289assert(mean[2] >= input_box[0][2]);290assert(mean[0] <= input_box[1][0]);291assert(mean[1] <= input_box[1][1]);292assert(mean[2] <= input_box[1][2]);293#endif294295if (pInside)296*pInside = false;297298vec3F k(mean + dir * df);299if (colorspace_box.contains(k))300{301if (pInside)302*pInside = true;303304return k;305}306307// starts inside308vec3F s(mean);309310// ends outside311vec3F e(mean + dir * df);312313// a ray guaranteed to go from the outside to inside314ray3F r(e, (s - e).normalize_in_place());315vec3F c;316float t = 0.0f;317318intersection::result res = intersection::ray_aabb(c, t, r, input_box);319if (res != intersection::cSuccess)320c = k;321322return c;323}324325// all in Q16 space, 0-65535326static bool compute_least_squares_endpoints_rgb(327uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights,328vec3F* pXl, vec3F* pXh, const vec4F* pColors, const aabb3F& input_box)329{330// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf331// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf332// I did this in matrix form first, expanded out all the ops, then optimized it a bit.333float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;334float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;335float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f;336float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f;337338for (uint32_t i = 0; i < N; i++)339{340const uint32_t sel = pSelectors[i];341342z00 += pSelector_weights[sel][0];343z10 += pSelector_weights[sel][1];344z11 += pSelector_weights[sel][2];345346float w = pSelector_weights[sel][3];347348q00_r += w * pColors[i][0];349t_r += pColors[i][0];350351q00_g += w * pColors[i][1];352t_g += pColors[i][1];353354q00_b += w * pColors[i][2];355t_b += pColors[i][2];356}357358q10_r = t_r - q00_r;359q10_g = t_g - q00_g;360q10_b = t_b - q00_b;361362z01 = z10;363364float det = z00 * z11 - z01 * z10;365if (det == 0.0f)366return false;367368det = 1.0f / det;369370float iz00, iz01, iz10, iz11;371iz00 = z11 * det;372iz01 = -z01 * det;373iz10 = -z10 * det;374iz11 = z00 * det;375376(*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);377(*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);378379(*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g);380(*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g);381382(*pXl)[2] = (float)(iz00 * q00_b + iz01 * q10_b);383(*pXh)[2] = (float)(iz10 * q00_b + iz11 * q10_b);384385for (uint32_t c = 0; c < 3; c++)386{387float l = (*pXl)[c], h = (*pXh)[c];388389if (input_box.get_dim(c) < .0000125f)390{391l = input_box[0][c];392h = input_box[1][c];393}394395(*pXl)[c] = l;396(*pXh)[c] = h;397}398399vec3F mean((*pXl + *pXh) * .5f);400vec3F dir(*pXh - *pXl);401402float ln = dir.length();403if (ln)404{405dir /= ln;406407float ld = (*pXl - mean).dot(dir);408float hd = (*pXh - mean).dot(dir);409410aabb3F colorspace_box(vec3F(0.0f), vec3F(MAX_QLOG16_VAL));411412bool was_inside1 = false;413414vec3F l = interp_color(mean, dir, ld, colorspace_box, input_box, &was_inside1);415if (!was_inside1)416*pXl = l;417418bool was_inside2 = false;419vec3F h = interp_color(mean, dir, hd, colorspace_box, input_box, &was_inside2);420if (!was_inside2)421*pXh = h;422}423424pXl->clamp(0.0f, MAX_QLOG16_VAL);425pXh->clamp(0.0f, MAX_QLOG16_VAL);426427return true;428}429430static bool compute_least_squares_endpoints_rgb_raw_weights(431uint32_t N, const uint8_t* pRaw_weights,432vec3F* pXl, vec3F* pXh, const vec4F* pColors, const aabb3F& input_box)433{434// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf435// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf436// I did this in matrix form first, expanded out all the ops, then optimized it a bit.437float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;438float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;439float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f;440float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f;441442for (uint32_t i = 0; i < N; i++)443{444const float wt = (float)pRaw_weights[i] * (1.0f / 64.0f);445assert(wt <= 1.0f);446447const float w0 = wt * wt;448const float w1 = (1.0f - wt) * wt;449const float w2 = (1.0f - wt) * (1.0f - wt);450const float w3 = wt;451452z00 += w0;453z10 += w1;454z11 += w2;455456float w = w3;457q00_r += w * pColors[i][0];458t_r += pColors[i][0];459460q00_g += w * pColors[i][1];461t_g += pColors[i][1];462463q00_b += w * pColors[i][2];464t_b += pColors[i][2];465}466467q10_r = t_r - q00_r;468q10_g = t_g - q00_g;469q10_b = t_b - q00_b;470471z01 = z10;472473float det = z00 * z11 - z01 * z10;474if (det == 0.0f)475return false;476477det = 1.0f / det;478479float iz00, iz01, iz10, iz11;480iz00 = z11 * det;481iz01 = -z01 * det;482iz10 = -z10 * det;483iz11 = z00 * det;484485(*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);486(*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);487488(*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g);489(*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g);490491(*pXl)[2] = (float)(iz00 * q00_b + iz01 * q10_b);492(*pXh)[2] = (float)(iz10 * q00_b + iz11 * q10_b);493494for (uint32_t c = 0; c < 3; c++)495{496float l = (*pXl)[c], h = (*pXh)[c];497498if (input_box.get_dim(c) < .0000125f)499{500l = input_box[0][c];501h = input_box[1][c];502}503504(*pXl)[c] = l;505(*pXh)[c] = h;506}507508vec3F mean((*pXl + *pXh) * .5f);509vec3F dir(*pXh - *pXl);510511float ln = dir.length();512if (ln)513{514dir /= ln;515516float ld = (*pXl - mean).dot(dir);517float hd = (*pXh - mean).dot(dir);518519aabb3F colorspace_box(vec3F(0.0f), vec3F(MAX_QLOG16_VAL));520521bool was_inside1 = false;522523vec3F l = interp_color(mean, dir, ld, colorspace_box, input_box, &was_inside1);524if (!was_inside1)525*pXl = l;526527bool was_inside2 = false;528vec3F h = interp_color(mean, dir, hd, colorspace_box, input_box, &was_inside2);529if (!was_inside2)530*pXh = h;531}532533pXl->clamp(0.0f, MAX_QLOG16_VAL);534pXh->clamp(0.0f, MAX_QLOG16_VAL);535536return true;537}538539static bool compute_least_squares_endpoints_2D(540uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights,541vec2F* pXl, vec2F* pXh, const vec2F* pColors, const aabb2F& input_box)542{543// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf544// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf545// I did this in matrix form first, expanded out all the ops, then optimized it a bit.546float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;547float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;548float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f;549550for (uint32_t i = 0; i < N; i++)551{552const uint32_t sel = pSelectors[i];553z00 += pSelector_weights[sel][0];554z10 += pSelector_weights[sel][1];555z11 += pSelector_weights[sel][2];556557float w = pSelector_weights[sel][3];558q00_r += w * pColors[i][0];559t_r += pColors[i][0];560561q00_g += w * pColors[i][1];562t_g += pColors[i][1];563}564565q10_r = t_r - q00_r;566q10_g = t_g - q00_g;567568z01 = z10;569570float det = z00 * z11 - z01 * z10;571if (det == 0.0f)572return false;573574det = 1.0f / det;575576float iz00, iz01, iz10, iz11;577iz00 = z11 * det;578iz01 = -z01 * det;579iz10 = -z10 * det;580iz11 = z00 * det;581582(*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);583(*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);584585(*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g);586(*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g);587588for (uint32_t c = 0; c < 2; c++)589{590float l = (*pXl)[c], h = (*pXh)[c];591592if (input_box.get_dim(c) < .0000125f)593{594l = input_box[0][c];595h = input_box[1][c];596}597598(*pXl)[c] = l;599(*pXh)[c] = h;600}601602pXl->clamp(0.0f, MAX_QLOG16_VAL);603pXh->clamp(0.0f, MAX_QLOG16_VAL);604605return true;606}607608static bool compute_least_squares_endpoints_1D(609uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights,610vec1F* pXl, vec1F* pXh, const vec1F* pColors, const aabb1F& input_box)611{612// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf613// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf614// I did this in matrix form first, expanded out all the ops, then optimized it a bit.615float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;616float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;617618for (uint32_t i = 0; i < N; i++)619{620const uint32_t sel = pSelectors[i];621z00 += pSelector_weights[sel][0];622z10 += pSelector_weights[sel][1];623z11 += pSelector_weights[sel][2];624625float w = pSelector_weights[sel][3];626q00_r += w * pColors[i][0];627t_r += pColors[i][0];628}629630q10_r = t_r - q00_r;631632z01 = z10;633634float det = z00 * z11 - z01 * z10;635if (det == 0.0f)636return false;637638det = 1.0f / det;639640float iz00, iz01, iz10, iz11;641iz00 = z11 * det;642iz01 = -z01 * det;643iz10 = -z10 * det;644iz11 = z00 * det;645646(*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);647(*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);648649for (uint32_t c = 0; c < 1; c++)650{651float l = (*pXl)[c], h = (*pXh)[c];652653if (input_box.get_dim(c) < .0000125f)654{655l = input_box[0][c];656h = input_box[1][c];657}658659(*pXl)[c] = l;660(*pXh)[c] = h;661}662663pXl->clamp(0.0f, MAX_QLOG16_VAL);664pXh->clamp(0.0f, MAX_QLOG16_VAL);665666return true;667}668669static bool compute_weighted_least_squares_endpoints_rgb(670uint32_t N,671const uint8_t* pSelectors, const vec4F* pSelector_weights, const float* pRaw_weights, /* ti */672const float* pEmphasis_weights /* wi */,673vec3F* pXl, vec3F* pXh,674const vec4F* pColors, /* pi */675const aabb3F& input_box)676{677(void)input_box;678679assert(N);680assert((pSelectors && pSelector_weights) || pRaw_weights);681assert(pEmphasis_weights);682683// Pi = pixel colors684// Ti = project weights, [0,1]685// Wi = emphasis weights686687float total_wi = 0.0f;688for (uint32_t i = 0; i < N; i++)689total_wi += pEmphasis_weights[i];690691if (total_wi == 0.0f)692return false;693694float weighted_mean_tw = 0.0f;695float weighted_mean_pw[3] = { 0.0f };696697for (uint32_t i = 0; i < N; i++)698{699const float wi = pEmphasis_weights[i];700const float ti = pSelectors ? pSelector_weights[pSelectors[i]][3] : pRaw_weights[i];701const float pi_r = pColors[i][0], pi_g = pColors[i][1], pi_b = pColors[i][2];702703weighted_mean_tw += wi * ti;704705weighted_mean_pw[0] += wi * pi_r;706weighted_mean_pw[1] += wi * pi_g;707weighted_mean_pw[2] += wi * pi_b;708}709710weighted_mean_tw /= total_wi;711712weighted_mean_pw[0] /= total_wi;713weighted_mean_pw[1] /= total_wi;714weighted_mean_pw[2] /= total_wi;715716float spt[3] = { 0.0f };717float stt = 0.0f;718719for (uint32_t i = 0; i < N; i++)720{721const float wi = pEmphasis_weights[i];722const float ti = pSelectors ? pSelector_weights[pSelectors[i]][3] : pRaw_weights[i];723const float pi_r = pColors[i][0], pi_g = pColors[i][1], pi_b = pColors[i][2];724725spt[0] += wi * (pi_r - weighted_mean_pw[0]) * (ti - weighted_mean_tw);726spt[1] += wi * (pi_g - weighted_mean_pw[1]) * (ti - weighted_mean_tw);727spt[2] += wi * (pi_b - weighted_mean_pw[2]) * (ti - weighted_mean_tw);728729stt += wi * square(ti - weighted_mean_tw);730}731732if (stt == 0.0f)733return false;734735for (uint32_t i = 0; i < 3; i++)736{737float h = weighted_mean_pw[i] + (spt[i] / stt) * (1.0f - weighted_mean_tw);738float l = weighted_mean_pw[i] - (spt[i] / stt) * weighted_mean_tw;739740(*pXh)[i] = h;741(*pXl)[i] = l;742}743744pXl->clamp(0.0f, MAX_QLOG16_VAL);745pXh->clamp(0.0f, MAX_QLOG16_VAL);746747return true;748}749750static vec4F g_astc_ls_weights_ise[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS];751752static uint8_t g_map_astc_to_linear_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][astc_index] -> linear index753static uint8_t g_map_linear_to_astc_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][linear_index] -> astc_index754755static void encode_astc_hdr_init()756{757// Precomputed weight constants used during least fit determination. For each entry: w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w758for (uint32_t range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; range++)759{760const uint32_t num_levels = g_ise_weight_lerps[range][0];761assert(num_levels == astc_helpers::get_ise_levels(range));762assert((num_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_levels <= MAX_SUPPORTED_WEIGHT_LEVELS));763764for (uint32_t i = 0; i < num_levels; i++)765{766float w = g_ise_weight_lerps[range][1 + i] * (1.0f / 64.0f);767768g_astc_ls_weights_ise[range][i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w);769}770}771772for (uint32_t ise_range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; ise_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; ise_range++)773{774const uint32_t num_levels = g_ise_weight_lerps[ise_range][0];775assert((num_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_levels <= MAX_SUPPORTED_WEIGHT_LEVELS));776777uint32_t s[MAX_SUPPORTED_WEIGHT_LEVELS];778for (uint32_t i = 0; i < num_levels; i++)779s[i] = (g_ise_weight_lerps[ise_range][1 + i] << 8) + i;780781std::sort(s, s + num_levels);782783for (uint32_t i = 0; i < num_levels; i++)784g_map_linear_to_astc_order[ise_range][i] = (uint8_t)(s[i] & 0xFF);785786for (uint32_t i = 0; i < num_levels; i++)787g_map_astc_to_linear_order[ise_range][g_map_linear_to_astc_order[ise_range][i]] = (uint8_t)i;788}789790//init_quantize_tables();791}792793bool g_astc_hdr_enc_initialized;794795void astc_hdr_enc_init()796{797if (g_astc_hdr_enc_initialized)798return;799800astc_hdr_core_init();801802astc_helpers::init_tables(true);803804init_qlog_tables();805806encode_astc_hdr_init();807808g_astc_hdr_enc_initialized = true;809}810811void interpolate_qlog12_colors(812const int e[2][3],813half_float* pDecoded_half,814vec3F* pDecoded_float,815uint32_t n, uint32_t ise_weight_range)816{817assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));818819for (uint32_t i = 0; i < 2; i++)820{821for (uint32_t j = 0; j < 3; j++)822{823assert(in_range(e[i][j], 0, 0xFFF));824}825}826827for (uint32_t i = 0; i < n; i++)828{829const int c = g_ise_weight_lerps[ise_weight_range][1 + i];830assert(c == (int)astc_helpers::dequant_bise_weight(i, ise_weight_range));831832half_float rf, gf, bf;833834{835uint32_t r0 = e[0][0] << 4;836uint32_t r1 = e[1][0] << 4;837int ri = (r0 * (64 - c) + r1 * c + 32) / 64;838rf = astc_helpers::qlog16_to_half(ri);839}840841{842uint32_t g0 = e[0][1] << 4;843uint32_t g1 = e[1][1] << 4;844int gi = (g0 * (64 - c) + g1 * c + 32) / 64;845gf = astc_helpers::qlog16_to_half(gi);846}847848{849uint32_t b0 = e[0][2] << 4;850uint32_t b1 = e[1][2] << 4;851int bi = (b0 * (64 - c) + b1 * c + 32) / 64;852bf = astc_helpers::qlog16_to_half(bi);853}854855if (pDecoded_half)856{857pDecoded_half[i * 3 + 0] = rf;858pDecoded_half[i * 3 + 1] = gf;859pDecoded_half[i * 3 + 2] = bf;860}861862if (pDecoded_float)863{864pDecoded_float[i][0] = half_to_float(rf);865pDecoded_float[i][1] = half_to_float(gf);866pDecoded_float[i][2] = half_to_float(bf);867}868}869}870871// decoded in ASTC order, not linear order872// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded873bool get_astc_hdr_mode_11_block_colors(874const uint8_t* pEndpoints,875half_float* pDecoded_half,876vec3F* pDecoded_float,877uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range)878{879assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));880881int e[2][3];882if (!decode_mode11_to_qlog12(pEndpoints, e, ise_endpoint_range))883return false;884885interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range);886887return true;888}889890// decoded in ASTC order, not linear order891// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded892bool get_astc_hdr_mode_7_block_colors(893const uint8_t* pEndpoints,894half_float* pDecoded_half,895vec3F* pDecoded_float,896uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range)897{898assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));899900int e[2][3];901if (!decode_mode7_to_qlog12(pEndpoints, e, nullptr, ise_endpoint_range))902return false;903904interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range);905906return true;907}908909double eval_selectors_f(910uint32_t num_pixels,911uint8_t* pWeights,912const half_float* pBlock_pixels_half,913uint32_t num_weight_levels,914const half_float* pDecoded_half,915const astc_hdr_codec_base_options& coptions,916uint32_t usable_selector_bitmask)917{918assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));919assert(usable_selector_bitmask);920921const float R_WEIGHT = coptions.m_r_err_scale;922const float G_WEIGHT = coptions.m_g_err_scale;923924double total_error = 0;925926#ifdef _DEBUG927for (uint32_t i = 0; i < num_weight_levels; i++)928{929assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0]));930assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1]));931assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2]));932}933#endif934935double decoded_half_q[MAX_SUPPORTED_WEIGHT_LEVELS][3];936937for (uint32_t i = 0; i < num_weight_levels; i++)938{939const half_float* p = &pDecoded_half[i * 3];940941decoded_half_q[i][0] = q(p[0], coptions.m_q_log_bias);942decoded_half_q[i][1] = q(p[1], coptions.m_q_log_bias);943decoded_half_q[i][2] = q(p[2], coptions.m_q_log_bias);944}945946for (uint32_t p = 0; p < num_pixels; p++)947{948const half_float* pDesired_half = &pBlock_pixels_half[p * 3];949950const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias);951const double desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias);952const double desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias);953954double lowest_e = BIG_FLOAT_VAL;955956//double dists[MAX_SUPPORTED_WEIGHT_LEVELS];957958// this is an approximation of MSLE959for (uint32_t i = 0; i < num_weight_levels; i++)960{961if (((1 << i) & usable_selector_bitmask) == 0)962continue;963964// compute piecewise linear approximation of log2(a+eps)-log2(b+eps), for each component, then MSLE965double rd = decoded_half_q[i][0] - desired_half_r_q;966double gd = decoded_half_q[i][1] - desired_half_g_q;967double bd = decoded_half_q[i][2] - desired_half_b_q;968969double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;970971//dists[i] = e;972973if (e < lowest_e)974{975lowest_e = e;976pWeights[p] = (uint8_t)i;977}978}979980total_error += lowest_e;981982} // p983984return total_error;985}986987double eval_selectors(988uint32_t num_pixels,989uint8_t* pWeights,990uint32_t ise_weight_range,991const half_float* pBlock_pixels_half,992uint32_t num_weight_levels,993const half_float* pDecoded_half,994const astc_hdr_codec_base_options& coptions,995uint32_t usable_selector_bitmask)996{997if ((coptions.m_r_err_scale != 2.0f) || (coptions.m_g_err_scale != 3.0f))998{999return eval_selectors_f(1000num_pixels,1001pWeights,1002pBlock_pixels_half,1003num_weight_levels,1004pDecoded_half,1005coptions,1006usable_selector_bitmask);1007}10081009assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));1010assert(usable_selector_bitmask);10111012uint64_t total_error = 0;10131014#ifdef _DEBUG1015for (uint32_t i = 0; i < num_weight_levels; i++)1016{1017assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0]));1018assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1]));1019assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2]));1020}1021#endif10221023int64_t decoded_half_q[MAX_SUPPORTED_WEIGHT_LEVELS][3];10241025for (uint32_t i = 0; i < num_weight_levels; i++)1026{1027const half_float* p = &pDecoded_half[i * 3];10281029decoded_half_q[i][0] = q2(p[0], coptions.m_q_log_bias);1030decoded_half_q[i][1] = q2(p[1], coptions.m_q_log_bias);1031decoded_half_q[i][2] = q2(p[2], coptions.m_q_log_bias);1032}10331034if (usable_selector_bitmask != UINT32_MAX)1035{1036for (uint32_t p = 0; p < num_pixels; p++)1037{1038const half_float* pDesired_half = &pBlock_pixels_half[p * 3];10391040const int64_t desired_half_r_q = q2(pDesired_half[0], coptions.m_q_log_bias);1041const int64_t desired_half_g_q = q2(pDesired_half[1], coptions.m_q_log_bias);1042const int64_t desired_half_b_q = q2(pDesired_half[2], coptions.m_q_log_bias);10431044int64_t lowest_e = INT64_MAX;10451046for (uint32_t i = 0; i < num_weight_levels; i++)1047{1048if (((1 << i) & usable_selector_bitmask) == 0)1049continue;10501051int64_t rd = decoded_half_q[i][0] - desired_half_r_q;1052int64_t gd = decoded_half_q[i][1] - desired_half_g_q;1053int64_t bd = decoded_half_q[i][2] - desired_half_b_q;10541055int64_t e = 2 * (rd * rd) + 3 * (gd * gd) + bd * bd;10561057if (e < lowest_e)1058{1059lowest_e = e;1060pWeights[p] = (uint8_t)i;1061}1062}10631064total_error += lowest_e;10651066} // p1067}1068else1069{1070if ((num_weight_levels <= 4) || (coptions.m_disable_weight_plane_optimization))1071{1072for (uint32_t p = 0; p < num_pixels; p++)1073{1074const half_float* pDesired_half = &pBlock_pixels_half[p * 3];10751076const half_float desired_r = pDesired_half[0], desired_g = pDesired_half[1], desired_b = pDesired_half[2];10771078const int64_t desired_half_r_q = q2(desired_r, coptions.m_q_log_bias);1079const int64_t desired_half_g_q = q2(desired_g, coptions.m_q_log_bias);1080const int64_t desired_half_b_q = q2(desired_b, coptions.m_q_log_bias);10811082int64_t lowest_e = INT64_MAX;10831084uint32_t i;1085for (i = 0; (i + 1) < num_weight_levels; i += 2)1086{1087int64_t e0, e1;10881089{1090int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q; // 27 bits maximum with half float inputs1091int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q;1092int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q;1093e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i; // max 62 bits (27*2+3+5)1094}10951096{1097int64_t rd1 = decoded_half_q[i + 1][0] - desired_half_r_q;1098int64_t gd1 = decoded_half_q[i + 1][1] - desired_half_g_q;1099int64_t bd1 = decoded_half_q[i + 1][2] - desired_half_b_q;1100e1 = ((2 * (rd1 * rd1) + 3 * (gd1 * gd1) + bd1 * bd1) << 5) | (i + 1);1101}11021103lowest_e = minimum(lowest_e, e0, e1);1104}11051106if (i != num_weight_levels)1107{1108int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q;1109int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q;1110int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q;1111int64_t e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i;11121113lowest_e = minimum(lowest_e, e0);1114}11151116pWeights[p] = (uint8_t)(lowest_e & 31);11171118total_error += (lowest_e >> 5);11191120} // p1121}1122else1123{1124const auto& weight_val_to_ise_tab = astc_helpers::g_dequant_tables.get_weight_tab(ise_weight_range).m_val_to_ise;1125const int lo_index = weight_val_to_ise_tab[0], hi_index = weight_val_to_ise_tab[64], mid_index = weight_val_to_ise_tab[32];11261127const vec3F low_color((float)pDecoded_half[lo_index * 3 + 0], (float)pDecoded_half[lo_index * 3 + 1], (float)pDecoded_half[lo_index * 3 + 2]);1128const vec3F high_color((float)pDecoded_half[hi_index * 3 + 0], (float)pDecoded_half[hi_index * 3 + 1], (float)pDecoded_half[hi_index * 3 + 2]);1129const vec3F mid_color((float)pDecoded_half[mid_index * 3 + 0], (float)pDecoded_half[mid_index * 3 + 1], (float)pDecoded_half[mid_index * 3 + 2]);11301131const vec3F block_dir(high_color - low_color);11321133for (uint32_t p = 0; p < num_pixels; p++)1134{1135const half_float* pDesired_half = &pBlock_pixels_half[p * 3];11361137const half_float desired_r = pDesired_half[0], desired_g = pDesired_half[1], desired_b = pDesired_half[2];11381139const int64_t desired_half_r_q = q2(desired_r, coptions.m_q_log_bias);1140const int64_t desired_half_g_q = q2(desired_g, coptions.m_q_log_bias);1141const int64_t desired_half_b_q = q2(desired_b, coptions.m_q_log_bias);11421143// Determine which side of the middle plane the point is for a modest gain1144vec3F c((float)desired_r - mid_color[0], (float)desired_g - mid_color[1], (float)desired_b - mid_color[2]);1145float d = c.dot(block_dir);11461147int i = 0, high_index = (num_weight_levels / 2) + 1;1148if (d >= 0.0f)1149{1150i = num_weight_levels / 2;1151high_index = num_weight_levels;1152}11531154int64_t lowest_e = INT64_MAX;11551156for (; (i + 1) < high_index; i += 2)1157{1158int64_t e0, e1;11591160{1161int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q; // 27 bits maximum with half float inputs1162int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q;1163int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q;1164e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i; // max 62 bits (27*2+3+5)1165}11661167{1168int64_t rd1 = decoded_half_q[i + 1][0] - desired_half_r_q;1169int64_t gd1 = decoded_half_q[i + 1][1] - desired_half_g_q;1170int64_t bd1 = decoded_half_q[i + 1][2] - desired_half_b_q;1171e1 = ((2 * (rd1 * rd1) + 3 * (gd1 * gd1) + bd1 * bd1) << 5) | (i + 1);1172}11731174lowest_e = minimum(lowest_e, e0, e1);1175}11761177if (i != high_index)1178{1179int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q;1180int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q;1181int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q;1182int64_t e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i;11831184lowest_e = minimum(lowest_e, e0);1185}11861187pWeights[p] = (uint8_t)(lowest_e & 31);11881189total_error += (lowest_e >> 5);11901191} // p1192}1193}11941195return (double)total_error;1196}11971198//--------------------------------------------------------------------------------------------------------------------------11991200double eval_selectors_dual_plane(1201uint32_t channel_index,1202uint32_t num_pixels,1203uint8_t* pWeights0, uint8_t* pWeights1,1204const half_float* pBlock_pixels_half,1205uint32_t num_weight_levels,1206const half_float* pDecoded_half,1207const astc_hdr_codec_base_options& coptions,1208uint32_t usable_selector_bitmask)1209{1210assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));1211assert(usable_selector_bitmask);12121213const float R_WEIGHT = coptions.m_r_err_scale;1214const float G_WEIGHT = coptions.m_g_err_scale;12151216double total_error = 0;12171218#ifdef _DEBUG1219for (uint32_t i = 0; i < num_weight_levels; i++)1220{1221assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0]));1222assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1]));1223assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2]));1224}1225#endif12261227double decoded_half_q[MAX_SUPPORTED_WEIGHT_LEVELS][3];12281229for (uint32_t i = 0; i < num_weight_levels; i++)1230{1231const half_float* p = &pDecoded_half[i * 3];12321233decoded_half_q[i][0] = q(p[0], coptions.m_q_log_bias);1234decoded_half_q[i][1] = q(p[1], coptions.m_q_log_bias);1235decoded_half_q[i][2] = q(p[2], coptions.m_q_log_bias);1236}12371238const double channel_weights[3] = { R_WEIGHT, G_WEIGHT, 1.0f };12391240const uint32_t first_channel = (channel_index + 1) % 3;1241const uint32_t second_channel = (channel_index + 2) % 3;12421243// First plane1244const double first_channel_weight = channel_weights[first_channel];1245const double second_channel_weight = channel_weights[second_channel];12461247for (uint32_t p = 0; p < num_pixels; p++)1248{1249const half_float* pDesired_half = &pBlock_pixels_half[p * 3];12501251const double desired_half_x_q = q(pDesired_half[first_channel], coptions.m_q_log_bias);1252const double desired_half_y_q = q(pDesired_half[second_channel], coptions.m_q_log_bias);12531254double lowest_e = BIG_FLOAT_VAL;12551256// this is an approximation of MSLE1257for (uint32_t i = 0; i < num_weight_levels; i++)1258{1259if (((1 << i) & usable_selector_bitmask) == 0)1260continue;12611262double xd = decoded_half_q[i][first_channel] - desired_half_x_q;1263double yd = decoded_half_q[i][second_channel] - desired_half_y_q;12641265double e = first_channel_weight * (xd * xd) + second_channel_weight * (yd * yd);12661267if (e < lowest_e)1268{1269lowest_e = e;1270pWeights0[p] = (uint8_t)i;1271}1272}12731274total_error += lowest_e;12751276} // p12771278// Second plane1279const double alt_channel_weight = channel_weights[channel_index];12801281for (uint32_t p = 0; p < num_pixels; p++)1282{1283const half_float* pDesired_half = &pBlock_pixels_half[p * 3];12841285const double desired_half_a_q = q(pDesired_half[channel_index], coptions.m_q_log_bias);12861287double lowest_e = BIG_FLOAT_VAL;12881289// this is an approximation of MSLE1290for (uint32_t i = 0; i < num_weight_levels; i++)1291{1292if (((1 << i) & usable_selector_bitmask) == 0)1293continue;12941295double ad = decoded_half_q[i][channel_index] - desired_half_a_q;12961297double e = alt_channel_weight * (ad * ad);12981299if (e < lowest_e)1300{1301lowest_e = e;1302pWeights1[p] = (uint8_t)i;1303}1304}13051306total_error += lowest_e;13071308} // p13091310return total_error;1311}13121313//--------------------------------------------------------------------------------------------------------------------------13141315double compute_block_error(uint32_t num_pixels, const half_float* pOrig_block, const half_float* pPacked_block, const astc_hdr_codec_base_options& coptions)1316{1317const float R_WEIGHT = coptions.m_r_err_scale;1318const float G_WEIGHT = coptions.m_g_err_scale;13191320double total_error = 0;13211322for (uint32_t p = 0; p < num_pixels; p++)1323{1324double rd = q(pOrig_block[p * 3 + 0], coptions.m_q_log_bias) - q(pPacked_block[p * 3 + 0], coptions.m_q_log_bias);1325double gd = q(pOrig_block[p * 3 + 1], coptions.m_q_log_bias) - q(pPacked_block[p * 3 + 1], coptions.m_q_log_bias);1326double bd = q(pOrig_block[p * 3 + 2], coptions.m_q_log_bias) - q(pPacked_block[p * 3 + 2], coptions.m_q_log_bias);13271328double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;13291330total_error += e;1331}13321333return total_error;1334}13351336//--------------------------------------------------------------------------------------------------------------------------13371338double compute_block_error_from_raw_weights(1339uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3],1340const uint8_t* pRaw_weights,1341int endpoints_qlog12[2][3],1342const astc_hdr_codec_base_options& coptions)1343{1344// qlog12->qlog161345int trial_e[2][3];1346for (uint32_t i = 0; i < 3; i++)1347{1348assert(endpoints_qlog12[0][i] <= (int)basist::MAX_QLOG12);1349assert(endpoints_qlog12[1][i] <= (int)basist::MAX_QLOG12);13501351trial_e[0][i] = endpoints_qlog12[0][i] << 4;1352trial_e[1][i] = endpoints_qlog12[1][i] << 4;1353}13541355const float R_WEIGHT = coptions.m_r_err_scale, G_WEIGHT = coptions.m_g_err_scale;13561357double trial_error = 0;1358for (uint32_t p = 0; p < num_pixels; p++)1359{1360const half_float* pDesired_half = &pBlock_pixels_half[p][0];13611362const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias), desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias), desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias);13631364const uint32_t c = pRaw_weights[p];1365assert(c <= 64);13661367{1368half_float rf, gf, bf;1369{1370uint32_t r0 = trial_e[0][0], r1 = trial_e[1][0];1371int ri = (r0 * (64 - c) + r1 * c + 32) / 64;1372rf = astc_helpers::qlog16_to_half(ri);1373}1374{1375uint32_t g0 = trial_e[0][1], g1 = trial_e[1][1];1376int gi = (g0 * (64 - c) + g1 * c + 32) / 64;1377gf = astc_helpers::qlog16_to_half(gi);1378}1379{1380uint32_t b0 = trial_e[0][2], b1 = trial_e[1][2];1381int bi = (b0 * (64 - c) + b1 * c + 32) / 64;1382bf = astc_helpers::qlog16_to_half(bi);1383}13841385const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);1386const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;1387trial_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;1388}1389}13901391return trial_error;1392}13931394//--------------------------------------------------------------------------------------------------------------------------13951396static inline int compute_clamped_val(int v, int l, int h, bool& did_clamp, int& max_clamp_mag)1397{1398assert(l < h);13991400if (v < l)1401{1402max_clamp_mag = basisu::maximum<int>(max_clamp_mag, l - v);14031404v = l;1405did_clamp = true;1406}1407else if (v > h)1408{1409max_clamp_mag = basisu::maximum<int>(max_clamp_mag, v - h);14101411v = h;1412did_clamp = true;1413}14141415return v;1416}14171418//--------------------------------------------------------------------------------------------------------------------------14191420const uint8_t s_b_bits[8] = { 7, 8, 6, 7, 8, 6, 7, 6 };1421const uint8_t s_c_bits[8] = { 6, 6, 7, 7, 6, 7, 7, 7 };1422const uint8_t s_d_bits[8] = { 7, 6, 7, 6, 5, 6, 5, 6 };14231424// val_q[] must be already packed to qlog9-qlog12.1425bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, int val_q[2][3], int& max_clamp_mag, bool early_out_if_clamped, int max_clamp_mag_accept_thresh)1426{1427assert(submode <= 7);14281429const uint32_t a_bits = 9 + (submode >> 1);1430const uint32_t b_bits = s_b_bits[submode];1431const uint32_t c_bits = s_c_bits[submode];1432const uint32_t d_bits = s_d_bits[submode];14331434const int max_a_val = (1 << a_bits) - 1;1435const int max_b_val = (1 << b_bits) - 1;1436const int max_c_val = (1 << c_bits) - 1;14371438// The maximum usable value before it turns to NaN/Inf1439const int max_a_qlog = get_max_qlog(a_bits);1440BASISU_NOTE_UNUSED(max_a_qlog);14411442const int min_d_val = -(1 << (d_bits - 1));1443const int max_d_val = -min_d_val - 1;1444assert((max_d_val - min_d_val + 1) == (1 << d_bits));14451446int highest_q = -1, highest_val = 0, highest_comp = 0;14471448for (uint32_t c = 0; c < 3; c++)1449{1450assert(val_q[0][c] <= max_a_qlog);1451assert(val_q[1][c] <= max_a_qlog);1452}14531454for (uint32_t v = 0; v < 2; v++)1455{1456for (uint32_t c = 0; c < 3; c++)1457{1458assert(val_q[v][c] >= 0 && val_q[v][c] <= max_a_val);14591460if (val_q[v][c] > highest_q)1461{1462highest_q = val_q[v][c];1463highest_val = v;1464highest_comp = c;1465}1466}1467}14681469const bool had_tie = (val_q[highest_val ^ 1][highest_comp] == highest_q);14701471if (highest_val != 1)1472{1473for (uint32_t c = 0; c < 3; c++)1474{1475std::swap(val_q[0][c], val_q[1][c]);1476}1477}14781479if (highest_comp)1480{1481std::swap(val_q[0][0], val_q[0][highest_comp]);1482std::swap(val_q[1][0], val_q[1][highest_comp]);1483}14841485int orig_q[2][3];1486memcpy(orig_q, val_q, sizeof(int) * 6);14871488// val[1][0] is now guaranteed to be highest1489int best_va = 0, best_vb0 = 0, best_vb1 = 0, best_vc = 0, best_vd0 = 0, best_vd1 = 0;1490int best_max_clamp_mag = 0;1491bool best_did_clamp = false;1492int best_q[2][3] = { { 0, 0, 0}, { 0, 0, 0 } };1493BASISU_NOTE_UNUSED(best_q);1494uint32_t best_dist = UINT_MAX;14951496for (uint32_t pass = 0; pass < 2; pass++)1497{1498int trial_va = val_q[1][0];14991500assert(trial_va <= max_a_val);1501assert(trial_va >= val_q[1][1]);1502assert(trial_va >= val_q[1][2]);15031504assert(trial_va >= val_q[0][0]);1505assert(trial_va >= val_q[0][1]);1506assert(trial_va >= val_q[0][2]);15071508bool did_clamp = false;1509int trial_max_clamp_mag = 0;15101511int trial_vb0 = compute_clamped_val(trial_va - val_q[1][1], 0, max_b_val, did_clamp, trial_max_clamp_mag);1512int trial_vb1 = compute_clamped_val(trial_va - val_q[1][2], 0, max_b_val, did_clamp, trial_max_clamp_mag);1513int trial_vc = compute_clamped_val(trial_va - val_q[0][0], 0, max_c_val, did_clamp, trial_max_clamp_mag);1514int trial_vd0 = compute_clamped_val((trial_va - trial_vb0 - trial_vc) - val_q[0][1], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag);1515int trial_vd1 = compute_clamped_val((trial_va - trial_vb1 - trial_vc) - val_q[0][2], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag);15161517if ((early_out_if_clamped) && (did_clamp) && (trial_max_clamp_mag > max_clamp_mag_accept_thresh))1518{1519if ((!had_tie) || (pass == 1))1520{1521max_clamp_mag = trial_max_clamp_mag;1522return true;1523}1524}15251526if (!did_clamp)1527{1528// Make sure decoder gets the expected values1529assert(trial_va == val_q[1][0]);1530assert(trial_va - trial_vb0 == val_q[1][1]);1531assert(trial_va - trial_vb1 == val_q[1][2]);15321533assert((trial_va - trial_vc) == val_q[0][0]);1534assert((trial_va - trial_vb0 - trial_vc - trial_vd0) == val_q[0][1]);1535assert((trial_va - trial_vb1 - trial_vc - trial_vd1) == val_q[0][2]);1536}15371538const int r_e0 = clamp<int>(trial_va, 0, max_a_val);1539const int r_e1 = clamp<int>(trial_va - trial_vb0, 0, max_a_val);1540const int r_e2 = clamp<int>(trial_va - trial_vb1, 0, max_a_val);15411542const int r_f0 = clamp<int>(trial_va - trial_vc, 0, max_a_val);1543const int r_f1 = clamp<int>(trial_va - trial_vb0 - trial_vc - trial_vd0, 0, max_a_val);1544const int r_f2 = clamp<int>(trial_va - trial_vb1 - trial_vc - trial_vd1, 0, max_a_val);15451546assert(r_e0 <= max_a_qlog);1547assert(r_e1 <= max_a_qlog);1548assert(r_e2 <= max_a_qlog);15491550assert(r_f0 <= max_a_qlog);1551assert(r_f1 <= max_a_qlog);1552assert(r_f2 <= max_a_qlog);15531554if ((!did_clamp) || (!had_tie))1555{1556best_va = trial_va;1557best_vb0 = trial_vb0;1558best_vb1 = trial_vb1;1559best_vc = trial_vc;1560best_vd0 = trial_vd0;1561best_vd1 = trial_vd1;1562best_max_clamp_mag = trial_max_clamp_mag;1563best_did_clamp = did_clamp;15641565best_q[1][0] = r_e0;1566best_q[1][1] = r_e1;1567best_q[1][2] = r_e2;1568best_q[0][0] = r_f0;1569best_q[0][1] = r_f1;1570best_q[0][2] = r_f2;1571break;1572}15731574// we had a tie and it did clamp, try swapping L/H for a potential slight gain15751576const uint32_t r_dist1 = basisu::square<int>(r_e0 - val_q[1][0]) + basisu::square<int>(r_e1 - val_q[1][1]) + basisu::square<int>(r_e2 - val_q[1][2]);1577const uint32_t r_dist0 = basisu::square<int>(r_f0 - val_q[0][0]) + basisu::square<int>(r_f1 - val_q[0][1]) + basisu::square<int>(r_f2 - val_q[0][2]);15781579const uint32_t total_dist = r_dist1 + r_dist0;15801581if (total_dist < best_dist)1582{1583best_dist = total_dist;15841585best_va = trial_va;1586best_vb0 = trial_vb0;1587best_vb1 = trial_vb1;1588best_vc = trial_vc;1589best_vd0 = trial_vd0;1590best_vd1 = trial_vd1;1591best_did_clamp = did_clamp;15921593best_q[1][0] = r_e0;1594best_q[1][1] = r_e1;1595best_q[1][2] = r_e2;1596best_q[0][0] = r_f0;1597best_q[0][1] = r_f1;1598best_q[0][2] = r_f2;1599}16001601for (uint32_t c = 0; c < 3; c++)1602std::swap(val_q[0][c], val_q[1][c]);1603}16041605// pack bits now1606int v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 = 0;16071608int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0;1609switch (submode)1610{1611case 0:1612x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);1613break;1614case 1:1615x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);1616break;1617case 2:1618x0 = get_bit(best_va, 9); x1 = get_bit(best_vc, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);1619break;1620case 3:1621x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 9); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);1622break;1623case 4:1624x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10);1625break;1626case 5:1627x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_vc, 7); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);1628break;1629case 6:1630x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10);1631break;1632case 7:1633x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);1634break;1635default:1636break;1637}16381639// write mode1640pack_bit(v1, 7, submode, 0);1641pack_bit(v2, 7, submode, 1);1642pack_bit(v3, 7, submode, 2);16431644// highest component1645pack_bit(v4, 7, highest_comp, 0);1646pack_bit(v5, 7, highest_comp, 1);16471648// write bit 8 of va1649pack_bit(v1, 6, best_va, 8);16501651// extra bits1652pack_bit(v2, 6, x0);1653pack_bit(v3, 6, x1);1654pack_bit(v4, 6, x2);1655pack_bit(v5, 6, x3);1656pack_bit(v4, 5, x4);1657pack_bit(v5, 5, x5);16581659v0 = best_va & 0xFF;1660v1 |= (best_vc & 63);1661v2 |= (best_vb0 & 63);1662v3 |= (best_vb1 & 63);1663v4 |= (best_vd0 & 31);1664v5 |= (best_vd1 & 31);16651666assert(in_range(v0, 0, 255) && in_range(v1, 0, 255) && in_range(v2, 0, 255) && in_range(v3, 0, 255) && in_range(v4, 0, 255) && in_range(v5, 0, 255));16671668pEndpoints[0] = (uint8_t)v0;1669pEndpoints[1] = (uint8_t)v1;1670pEndpoints[2] = (uint8_t)v2;1671pEndpoints[3] = (uint8_t)v3;1672pEndpoints[4] = (uint8_t)v4;1673pEndpoints[5] = (uint8_t)v5;16741675#ifdef _DEBUG1676// Test for valid pack by unpacking1677{1678if (highest_comp)1679{1680std::swap(best_q[0][0], best_q[0][highest_comp]);1681std::swap(best_q[1][0], best_q[1][highest_comp]);16821683std::swap(orig_q[0][0], orig_q[0][highest_comp]);1684std::swap(orig_q[1][0], orig_q[1][highest_comp]);1685}16861687int test_e[2][3];1688decode_mode11_to_qlog12(pEndpoints, test_e, astc_helpers::BISE_256_LEVELS);1689for (uint32_t i = 0; i < 2; i++)1690{1691for (uint32_t j = 0; j < 3; j++)1692{1693assert(best_q[i][j] == test_e[i][j] >> (12 - a_bits));16941695if (!best_did_clamp)1696{1697assert((orig_q[i][j] == test_e[i][j] >> (12 - a_bits)) ||1698(orig_q[1 - i][j] == test_e[i][j] >> (12 - a_bits)));1699}1700}1701}1702}1703#endif17041705max_clamp_mag = best_max_clamp_mag;17061707return best_did_clamp;1708}17091710bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag, bool early_out_if_clamped, int max_clamp_mag_accept_thresh)1711{1712assert(submode <= 7);17131714const uint32_t a_bits = 9 + (submode >> 1);1715const int max_a_val = (1 << a_bits) - 1;17161717// The maximum usable value before it turns to NaN/Inf1718const int max_a_qlog = get_max_qlog(a_bits);17191720int val_q[2][3];17211722for (uint32_t c = 0; c < 3; c++)1723{1724#if 01725// This is very slightly better, but ~8% slower likely due to the table lookups.1726const half_float l = astc_helpers::qlog16_to_half((uint32_t)std::round(low_q16[c]));1727val_q[0][c] = half_to_qlog7_12(l, a_bits);17281729const half_float h = astc_helpers::qlog16_to_half((uint32_t)std::round(high_q16[c]));1730val_q[1][c] = half_to_qlog7_12(h, a_bits);1731#else1732// TODO: Tune quant_qlog16() for higher precision.1733val_q[0][c] = quant_qlog16((uint32_t)std::round(low_q16[c]), a_bits);1734val_q[1][c] = quant_qlog16((uint32_t)std::round(high_q16[c]), a_bits);1735#endif17361737#if 11738if (val_q[0][c] == val_q[1][c])1739{1740#if 01741if (l <= h)1742#else1743if (low_q16[c] < high_q16[c])1744#endif1745{1746if (val_q[0][c])1747val_q[0][c]--;17481749if (val_q[1][c] != max_a_val)1750val_q[1][c]++;1751}1752else1753{1754if (val_q[0][c] != max_a_val)1755val_q[0][c]++;17561757if (val_q[1][c])1758val_q[1][c]--;1759}1760}1761#endif17621763val_q[0][c] = minimum<uint32_t>(val_q[0][c], max_a_qlog);1764val_q[1][c] = minimum<uint32_t>(val_q[1][c], max_a_qlog);1765}17661767return pack_astc_mode11_submode(submode, pEndpoints, val_q, max_clamp_mag, early_out_if_clamped, max_clamp_mag_accept_thresh);1768}17691770//--------------------------------------------------------------------------------------------------------------------------17711772void pack_astc_mode11_direct(uint8_t* pEndpoints, vec3F l_q16, vec3F h_q16)1773{1774float lg = l_q16.dot(vec3F(1.0f)), hg = h_q16.dot(vec3F(1.0f));1775if (lg > hg)1776{1777// Ensure low endpoint is generally less bright than high in direct mode.1778std::swap(l_q16, h_q16);1779}17801781for (uint32_t i = 0; i < 3; i++)1782{1783// TODO: This goes from QLOG16->HALF->QLOG8/71784half_float l_half = astc_helpers::qlog16_to_half(clamp((int)std::round(l_q16[i]), 0, 65535));1785half_float h_half = astc_helpers::qlog16_to_half(clamp((int)std::round(h_q16[i]), 0, 65535));17861787int l_q, h_q;17881789if (i == 2)1790{1791l_q = g_half_to_qlog7[bounds_check((uint32_t)l_half, 0U, 32768U)];1792h_q = g_half_to_qlog7[bounds_check((uint32_t)h_half, 0U, 32768U)];17931794l_q = minimum<uint32_t>(l_q, MAX_QLOG7);1795h_q = minimum<uint32_t>(h_q, MAX_QLOG7);1796}1797else1798{1799l_q = g_half_to_qlog8[bounds_check((uint32_t)l_half, 0U, 32768U)];1800h_q = g_half_to_qlog8[bounds_check((uint32_t)h_half, 0U, 32768U)];18011802// this quantizes R and G as 7 bits vs. 8, for grayscale.1803//l_q = g_half_to_qlog7[bounds_check((uint32_t)l_half, 0U, 32768U)] << 1;1804//h_q = g_half_to_qlog7[bounds_check((uint32_t)h_half, 0U, 32768U)] << 1;18051806l_q = minimum<uint32_t>(l_q, MAX_QLOG8);1807h_q = minimum<uint32_t>(h_q, MAX_QLOG8);1808}18091810#if 11811if (l_q == h_q)1812{1813const int m = (i == 2) ? MAX_QLOG7 : MAX_QLOG8;18141815if (l_q16[i] <= h_q16[i])1816{1817if (l_q)1818l_q--;18191820if (h_q != m)1821h_q++;1822}1823else1824{1825if (h_q)1826h_q--;18271828if (l_q != m)1829l_q++;1830}1831}1832#endif18331834if (i == 2)1835{1836assert(l_q <= (int)MAX_QLOG7 && h_q <= (int)MAX_QLOG7);1837l_q |= 128;1838h_q |= 128;1839}1840else1841{1842assert(l_q <= (int)MAX_QLOG8 && h_q <= (int)MAX_QLOG8);1843}18441845pEndpoints[2 * i + 0] = (uint8_t)l_q;1846pEndpoints[2 * i + 1] = (uint8_t)h_q;1847}1848}18491850//--------------------------------------------------------------------------------------------------------------------------18511852bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& rgb_q16, float s_q16, int& max_clamp_mag, uint32_t ise_weight_range, bool early_out_if_clamped, int max_clamp_mag_accept_thresh)1853{1854assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));18551856assert(submode <= 5);1857max_clamp_mag = 0;18581859static const uint8_t s_r_bits[6] = { 11, 11, 10, 9, 8, 7 };1860static const uint8_t s_g_b_bits[6] = { 5, 6, 5, 6, 7, 7 };1861static const uint8_t s_s_bits[6] = { 7, 5, 8, 7, 6, 7 };18621863// The precision of the components1864const uint32_t prec_bits = s_r_bits[submode];18651866int qlog[4], pack_bits[4];18671868for (uint32_t i = 0; i < 4; i++)1869{1870const float f = (i == 3) ? s_q16 : rgb_q16[i];18711872// The # of bits the component is packed into1873if (i == 0)1874pack_bits[i] = s_r_bits[submode];1875else if (i == 3)1876pack_bits[i] = s_s_bits[submode];1877else1878pack_bits[i] = s_g_b_bits[submode];18791880#if 01881// this is slightly worse1882// TODO: going from qlog16 to half loses some precision. Then going from half to qlog 7-12 will have extra error.1883half_float h = qlog_to_half(clamp((int)std::round(f), 0, MAX_QLOG16), 16);1884qlog[i] = half_to_qlog7_12((half_float)bounds_check((uint32_t)h, 0U, 32768U), prec_bits);1885#else1886qlog[i] = quant_qlog16(clamp<int>((int)std::round(f), 0, MAX_QLOG16), prec_bits);18871888// Only bias if there are enough texel weights, 4=6 weights1889if (ise_weight_range >= 4)1890{1891// Explictly bias the high color, and the scale up, to better exploit the weights.1892// The quantized range also then encompases the complete input range.1893const uint32_t max_val = (1 << prec_bits) - 1;1894const uint32_t K = 3;1895if (i == 3)1896{1897qlog[i] = minimum<uint32_t>(qlog[i] + K * 2, max_val);1898}1899else1900{1901qlog[i] = minimum<uint32_t>(qlog[i] + K, max_val);1902}1903}1904#endif19051906if (i != 3)1907qlog[i] = minimum<uint32_t>(qlog[i], get_max_qlog(prec_bits));19081909// If S=0, we lose freedom for the texel weights to add any value.1910if ((i == 3) && (qlog[i] == 0))1911qlog[i] = 1;1912}19131914uint32_t maj_index = 0;19151916bool did_clamp = false;19171918if (submode != 5)1919{1920int largest_qlog = 0;1921for (uint32_t i = 0; i < 3; i++)1922{1923if (qlog[i] > largest_qlog)1924{1925largest_qlog = qlog[i];1926maj_index = i;1927}1928}19291930if (maj_index)1931{1932std::swap(qlog[0], qlog[maj_index]);1933}19341935assert(qlog[0] >= qlog[1]);1936assert(qlog[0] >= qlog[2]);19371938qlog[1] = qlog[0] - qlog[1];1939qlog[2] = qlog[0] - qlog[2];19401941for (uint32_t i = 1; i < 4; i++)1942{1943const int max_val = (1 << pack_bits[i]) - 1;19441945if (qlog[i] > max_val)1946{1947max_clamp_mag = maximum<int>(max_clamp_mag, qlog[i] - max_val);1948qlog[i] = max_val;1949did_clamp = true;19501951if ((early_out_if_clamped) && (max_clamp_mag > max_clamp_mag_accept_thresh))1952return true;1953}1954}1955}19561957for (uint32_t i = 0; i < 4; i++)1958{1959const int max_val = (1 << pack_bits[i]) - 1; (void)max_val;19601961assert(qlog[i] <= max_val);1962}19631964int mode = 0;19651966int r = qlog[0] & 63; // 6-bits1967int g = qlog[1] & 31; // 5-bits1968int b = qlog[2] & 31; // 5-bits1969int s = qlog[3] & 31; // 5-bits19701971int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0, x6 = 0;19721973switch (submode)1974{1975case 0:1976{1977mode = (maj_index << 2) | 0;1978assert((mode & 0xC) != 0xC);19791980x0 = get_bit(qlog[0], 9); // R91981x1 = get_bit(qlog[0], 8); // R81982x2 = get_bit(qlog[0], 7); // R71983x3 = get_bit(qlog[0], 10); // R101984x4 = get_bit(qlog[0], 6); // R61985x5 = get_bit(qlog[3], 6); // S61986x6 = get_bit(qlog[3], 5); // S51987break;1988}1989case 1:1990{1991mode = (maj_index << 2) | 1;1992assert((mode & 0xC) != 0xC);19931994x0 = get_bit(qlog[0], 8); // R81995x1 = get_bit(qlog[1], 5); // G51996x2 = get_bit(qlog[0], 7); // R71997x3 = get_bit(qlog[2], 5); // B51998x4 = get_bit(qlog[0], 6); // R61999x5 = get_bit(qlog[0], 10); // R102000x6 = get_bit(qlog[0], 9); // R92001break;2002}2003case 2:2004{2005mode = (maj_index << 2) | 2;2006assert((mode & 0xC) != 0xC);20072008x0 = get_bit(qlog[0], 9); // R92009x1 = get_bit(qlog[0], 8); // R82010x2 = get_bit(qlog[0], 7); // R72011x3 = get_bit(qlog[0], 6); // R62012x4 = get_bit(qlog[3], 7); // S72013x5 = get_bit(qlog[3], 6); // S62014x6 = get_bit(qlog[3], 5); // S52015break;2016}2017case 3:2018{2019mode = (maj_index << 2) | 3;2020assert((mode & 0xC) != 0xC);20212022x0 = get_bit(qlog[0], 8); // R82023x1 = get_bit(qlog[1], 5); // G52024x2 = get_bit(qlog[0], 7); // R72025x3 = get_bit(qlog[2], 5); // B52026x4 = get_bit(qlog[0], 6); // R62027x5 = get_bit(qlog[3], 6); // S62028x6 = get_bit(qlog[3], 5); // S52029break;2030}2031case 4:2032{2033mode = maj_index | 0xC; // 0b11002034assert((mode & 0xC) == 0xC);2035assert(mode != 0xF);20362037x0 = get_bit(qlog[1], 6); // G62038x1 = get_bit(qlog[1], 5); // G52039x2 = get_bit(qlog[2], 6); // B62040x3 = get_bit(qlog[2], 5); // B52041x4 = get_bit(qlog[0], 6); // R62042x5 = get_bit(qlog[0], 7); // R72043x6 = get_bit(qlog[3], 5); // S52044break;2045}2046case 5:2047{2048mode = 0xF;20492050x0 = get_bit(qlog[1], 6); // G62051x1 = get_bit(qlog[1], 5); // G52052x2 = get_bit(qlog[2], 6); // B62053x3 = get_bit(qlog[2], 5); // B52054x4 = get_bit(qlog[0], 6); // R62055x5 = get_bit(qlog[3], 6); // S62056x6 = get_bit(qlog[3], 5); // S52057break;2058}2059default:2060{2061assert(0);2062break;2063}2064}20652066pEndpoints[0] = (uint8_t)((get_bit(mode, 1) << 7) | (get_bit(mode, 0) << 6) | r);2067pEndpoints[1] = (uint8_t)((get_bit(mode, 2) << 7) | (x0 << 6) | (x1 << 5) | g);2068pEndpoints[2] = (uint8_t)((get_bit(mode, 3) << 7) | (x2 << 6) | (x3 << 5) | b);2069pEndpoints[3] = (uint8_t)((x4 << 7) | (x5 << 6) | (x6 << 5) | s);20702071#ifdef _DEBUG2072// Test for valid pack by unpacking2073{2074const int inv_shift = 12 - prec_bits;20752076int unpacked_e[2][3];2077if (submode != 5)2078{2079unpacked_e[1][0] = left_shift32(qlog[0], inv_shift);2080unpacked_e[1][1] = clamp(left_shift32((qlog[0] - qlog[1]), inv_shift), 0, 0xFFF);2081unpacked_e[1][2] = clamp(left_shift32((qlog[0] - qlog[2]), inv_shift), 0, 0xFFF);20822083unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF);2084unpacked_e[0][1] = clamp(left_shift32(((qlog[0] - qlog[1]) - qlog[3]), inv_shift), 0, 0xFFF);2085unpacked_e[0][2] = clamp(left_shift32(((qlog[0] - qlog[2]) - qlog[3]), inv_shift), 0, 0xFFF);2086}2087else2088{2089unpacked_e[1][0] = left_shift32(qlog[0], inv_shift);2090unpacked_e[1][1] = left_shift32(qlog[1], inv_shift);2091unpacked_e[1][2] = left_shift32(qlog[2], inv_shift);20922093unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF);2094unpacked_e[0][1] = clamp(left_shift32((qlog[1] - qlog[3]), inv_shift), 0, 0xFFF);2095unpacked_e[0][2] = clamp(left_shift32((qlog[2] - qlog[3]), inv_shift), 0, 0xFFF);2096}20972098if (maj_index)2099{2100std::swap(unpacked_e[0][0], unpacked_e[0][maj_index]);2101std::swap(unpacked_e[1][0], unpacked_e[1][maj_index]);2102}21032104int e[2][3];2105decode_mode7_to_qlog12_ise20(pEndpoints, e, nullptr);21062107for (uint32_t i = 0; i < 3; i++)2108{2109assert(unpacked_e[0][i] == e[0][i]);2110assert(unpacked_e[1][i] == e[1][i]);2111}2112}2113#endif21142115return did_clamp;2116}21172118//--------------------------------------------------------------------------------------------------------------------------21192120bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints)2121{2122memset(pEndpoints, 0, NUM_MODE11_ENDPOINTS);21232124if (desc.is_direct())2125{2126if ((desc.m_a < 0) || (desc.m_c < 0) || (desc.m_b0 < 0))2127return false;21282129if (!((desc.m_a <= 255) && (desc.m_c <= 255) && (desc.m_b0 <= 127)))2130return false;21312132pEndpoints[0] = (uint8_t)desc.m_a;2133pEndpoints[2] = (uint8_t)desc.m_c;2134pEndpoints[4] = (uint8_t)desc.m_b0 | 128;21352136if ((desc.m_b1 < 0) || (desc.m_d0 < 0) || (desc.m_d1 < 0))2137return false;21382139if (!((desc.m_b1 <= 255) && (desc.m_d0 <= 255) && (desc.m_d1 <= 127)))2140return false;21412142pEndpoints[1] = (uint8_t)desc.m_b1;2143pEndpoints[3] = (uint8_t)desc.m_d0;2144pEndpoints[5] = (uint8_t)desc.m_d1 | 128;21452146return true;2147}21482149if (!((desc.m_a >= 0) && (desc.m_a <= desc.m_max_a_val)))2150return false;2151if (!(((desc.m_c >= 0) && (desc.m_c <= desc.m_max_c_val))))2152return false;2153if (!((desc.m_b0 >= 0) && (desc.m_b0 <= desc.m_max_b_val)))2154return false;2155if (!((desc.m_b1 >= 0) && (desc.m_b1 <= desc.m_max_b_val)))2156return false;2157if (!((desc.m_d0 >= desc.m_min_d_val) && (desc.m_d0 <= desc.m_max_d_val)))2158return false;2159if (!((desc.m_d1 >= desc.m_min_d_val) && (desc.m_d1 <= desc.m_max_d_val)))2160return false;21612162const int va = desc.m_a, vb0 = desc.m_b0, vb1 = desc.m_b1, vc = desc.m_c, vd0 = desc.m_d0, vd1 = desc.m_d1;21632164int v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 = 0;21652166int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0;2167switch (desc.m_submode)2168{2169case 0:2170x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(vd0, 6); x3 = get_bit(vd1, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);2171break;2172case 1:2173x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(vb0, 7); x3 = get_bit(vb1, 7); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);2174break;2175case 2:2176x0 = get_bit(va, 9); x1 = get_bit(vc, 6); x2 = get_bit(vd0, 6); x3 = get_bit(vd1, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);2177break;2178case 3:2179x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(va, 9); x3 = get_bit(vc, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);2180break;2181case 4:2182x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(vb0, 7); x3 = get_bit(vb1, 7); x4 = get_bit(va, 9); x5 = get_bit(va, 10);2183break;2184case 5:2185x0 = get_bit(va, 9); x1 = get_bit(va, 10); x2 = get_bit(vc, 7); x3 = get_bit(vc, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);2186break;2187case 6:2188x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(va, 11); x3 = get_bit(vc, 6); x4 = get_bit(va, 9); x5 = get_bit(va, 10);2189break;2190case 7:2191x0 = get_bit(va, 9); x1 = get_bit(va, 10); x2 = get_bit(va, 11); x3 = get_bit(vc, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);2192break;2193default:2194break;2195}21962197// write mode2198pack_bit(v1, 7, desc.m_submode, 0);2199pack_bit(v2, 7, desc.m_submode, 1);2200pack_bit(v3, 7, desc.m_submode, 2);22012202// highest component2203pack_bit(v4, 7, desc.m_maj_comp, 0);2204pack_bit(v5, 7, desc.m_maj_comp, 1);22052206// write bit 8 of va2207pack_bit(v1, 6, va, 8);22082209// extra bits2210pack_bit(v2, 6, x0);2211pack_bit(v3, 6, x1);2212pack_bit(v4, 6, x2);2213pack_bit(v5, 6, x3);2214pack_bit(v4, 5, x4);2215pack_bit(v5, 5, x5);22162217v0 = va & 0xFF;2218v1 |= (vc & 63);2219v2 |= (vb0 & 63);2220v3 |= (vb1 & 63);2221v4 |= (vd0 & 31);2222v5 |= (vd1 & 31);22232224assert(in_range(v0, 0, 255) && in_range(v1, 0, 255) && in_range(v2, 0, 255) && in_range(v3, 0, 255) && in_range(v4, 0, 255) && in_range(v5, 0, 255));22252226pEndpoints[0] = (uint8_t)v0;2227pEndpoints[1] = (uint8_t)v1;2228pEndpoints[2] = (uint8_t)v2;2229pEndpoints[3] = (uint8_t)v3;2230pEndpoints[4] = (uint8_t)v4;2231pEndpoints[5] = (uint8_t)v5;22322233return true;2234}22352236static inline int astc_hdr_sign_extend(int src, int num_src_bits)2237{2238assert(basisu::in_range(num_src_bits, 2, 31));22392240const bool negative = (src & (1 << (num_src_bits - 1))) != 0;2241if (negative)2242return src | ~((1 << num_src_bits) - 1);2243else2244return src & ((1 << num_src_bits) - 1);2245}22462247void unpack_mode11(const uint8_t* pEndpoints, mode11_log_desc& desc)2248{2249clear_obj(desc);22502251pack_bit(desc.m_maj_comp, 0, pEndpoints[4], 7);2252pack_bit(desc.m_maj_comp, 1, pEndpoints[5], 7);22532254if (desc.m_maj_comp == 3)2255{2256desc.m_a = pEndpoints[0];2257desc.m_c = pEndpoints[2];2258desc.m_b0 = pEndpoints[4] & 0x7F;22592260desc.m_b1 = pEndpoints[1];2261desc.m_d0 = pEndpoints[3];2262desc.m_d1 = pEndpoints[5] & 0x7F;22632264return;2265}22662267pack_bit(desc.m_submode, 0, pEndpoints[1], 7);2268pack_bit(desc.m_submode, 1, pEndpoints[2], 7);2269pack_bit(desc.m_submode, 2, pEndpoints[3], 7);22702271desc.m_a = pEndpoints[0]; // 8 bits2272pack_bit(desc.m_a, 8, pEndpoints[1], 6);22732274desc.m_c = pEndpoints[1] & 63; // 6 bits2275desc.m_b0 = pEndpoints[2] & 63; // 6 bits2276desc.m_b1 = pEndpoints[3] & 63; // 6 bits2277desc.m_d0 = pEndpoints[4] & 31; // 5 bits2278desc.m_d1 = pEndpoints[5] & 31; // 5 bits22792280const int x0 = get_bit(pEndpoints[2], 6);2281const int x1 = get_bit(pEndpoints[3], 6);2282const int x2 = get_bit(pEndpoints[4], 6);2283const int x3 = get_bit(pEndpoints[5], 6);2284const int x4 = get_bit(pEndpoints[4], 5);2285const int x5 = get_bit(pEndpoints[5], 5);22862287switch (desc.m_submode)2288{2289case 0:2290pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_d0, 6, x2, 0); pack_bit(desc.m_d1, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);2291break;2292case 1:2293pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_b0, 7, x2, 0); pack_bit(desc.m_b1, 7, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);2294break;2295case 2:2296pack_bit(desc.m_a, 9, x0, 0); pack_bit(desc.m_c, 6, x1, 0); pack_bit(desc.m_d0, 6, x2, 0); pack_bit(desc.m_d1, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);2297break;2298case 3:2299pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_a, 9, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);2300break;2301case 4:2302pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_b0, 7, x2, 0); pack_bit(desc.m_b1, 7, x3, 0); pack_bit(desc.m_a, 9, x4, 0); pack_bit(desc.m_a, 10, x5, 0);2303break;2304case 5:2305pack_bit(desc.m_a, 9, x0, 0); pack_bit(desc.m_a, 10, x1, 0); pack_bit(desc.m_c, 7, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);2306break;2307case 6:2308pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_a, 11, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_a, 9, x4, 0); pack_bit(desc.m_a, 10, x5, 0);2309break;2310case 7:2311default:2312pack_bit(desc.m_a, 9, x0, 0); pack_bit(desc.m_a, 10, x1, 0); pack_bit(desc.m_a, 11, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);2313break;2314}23152316desc.m_a_bits = 9 + (desc.m_submode >> 1);2317desc.m_b_bits = s_b_bits[desc.m_submode];2318desc.m_c_bits = s_c_bits[desc.m_submode];2319desc.m_d_bits = s_d_bits[desc.m_submode];23202321desc.m_max_a_val = (1 << desc.m_a_bits) - 1;2322desc.m_max_b_val = (1 << desc.m_b_bits) - 1;2323desc.m_max_c_val = (1 << desc.m_c_bits) - 1;23242325desc.m_min_d_val = -(1 << (desc.m_d_bits - 1));2326desc.m_max_d_val = -desc.m_min_d_val - 1;23272328desc.m_d0 = astc_hdr_sign_extend(desc.m_d0, desc.m_d_bits);2329desc.m_d1 = astc_hdr_sign_extend(desc.m_d1, desc.m_d_bits);23302331assert((desc.m_a >= 0) && (desc.m_a <= desc.m_max_a_val));2332assert((desc.m_c >= 0) && (desc.m_c <= desc.m_max_c_val));2333assert((desc.m_b0 >= 0) && (desc.m_b0 <= desc.m_max_b_val));2334assert((desc.m_b1 >= 0) && (desc.m_b1 <= desc.m_max_b_val));2335assert((desc.m_d0 >= desc.m_min_d_val) && (desc.m_d0 <= desc.m_max_d_val));2336assert((desc.m_d1 >= desc.m_min_d_val) && (desc.m_d1 <= desc.m_max_d_val));2337}23382339//--------------------------------------------------------------------------------------------------------------------------23402341void decode_cem_11_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index)2342{2343submode_index = 0;2344maj_index = 0;23452346pack_bit(submode_index, 0, pEndpoints[1], 7);2347pack_bit(submode_index, 1, pEndpoints[2], 7);2348pack_bit(submode_index, 2, pEndpoints[3], 7);23492350pack_bit(maj_index, 0, pEndpoints[4], 7);2351pack_bit(maj_index, 1, pEndpoints[5], 7);2352}23532354//--------------------------------------------------------------------------------------------------------------------------23552356void decode_cem_7_config(const uint8_t* pEndpoints, int& submode_index, int &maj_index)2357{2358const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3];2359(void)v3;23602361// Extract mode bits and unpack to major component and mode.2362const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);23632364if ((modeval & 0xC) != 0xC)2365{2366maj_index = modeval >> 2;2367submode_index = modeval & 3;2368}2369else if (modeval != 0xF)2370{2371maj_index = modeval & 3;2372submode_index = 4;2373}2374else2375{2376maj_index = 0;2377submode_index = 5;2378}2379}23802381//--------------------------------------------------------------------------------------------------------------------------2382// TODO: Use pack_mode11() as a shared function.23832384bool pack_mode11(2385const vec3F& low_color_q16, const vec3F& high_color_q16,2386uint32_t ise_endpoint_range, uint8_t* pEndpoints,2387const astc_hdr_codec_base_options& coptions,2388bool direct_only, int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used)2389{2390uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS];23912392if (direct_only)2393{2394first_submode = -1;2395last_submode = -1;2396}23972398assert(first_submode <= last_submode);2399assert((first_submode >= -1) && (first_submode <= 7));2400assert((last_submode >= -1) && (last_submode <= 7));24012402memset(pEndpoints, 0, NUM_MODE11_ENDPOINTS);24032404double best_trial_dist = BIG_FLOAT_VAL;2405int best_submode = 0;24062407for (int submode = last_submode; submode >= first_submode; submode--)2408{2409bool did_clamp = false;2410int max_clamp_mag = 0;2411if (submode == -1)2412{2413// If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision.2414pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16);2415}2416else2417{2418const int MAX_CLAMP_MAG_ACCEPT_THRESH = 32;2419did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH);24202421if (!ignore_clamping)2422{2423// If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts.2424if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))2425continue;2426}2427}24282429uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS];24302431// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).2432// It could massively distort the endpoints, but still result in a valid encoding.2433basist::astc_6x6_hdr::requantize_ise_endpoints(11, astc_helpers::BISE_256_LEVELS, orig_trial_endpoints, ise_endpoint_range, trial_endpoints);24342435int e[2][3];2436if (!decode_mode11_to_qlog12(trial_endpoints, e, ise_endpoint_range))2437continue;24382439vec3F e0(2440(float)(e[0][0] << 4),2441(float)(e[0][1] << 4),2442(float)(e[0][2] << 4)2443);24442445vec3F e1(2446(float)(e[1][0] << 4),2447(float)(e[1][1] << 4),2448(float)(e[1][2] << 4)2449);24502451double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);2452double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);2453double dist = helpers::minimum(dist0, dist1);24542455if (dist < best_trial_dist)2456{2457best_trial_dist = dist;2458best_submode = submode;2459memcpy(pEndpoints, trial_endpoints, NUM_MODE11_ENDPOINTS);2460}24612462if (coptions.m_take_first_non_clamping_mode11_submode)2463{2464if (!did_clamp)2465break;2466}24672468} // submode24692470if ((coptions.m_ultra_quant) &&2471(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&2472(best_trial_dist != BIG_FLOAT_VAL))2473{2474uint8_t orig_best_trial_endpoints[NUM_MODE11_ENDPOINTS];2475memcpy(orig_best_trial_endpoints, pEndpoints, NUM_MODE11_ENDPOINTS);24762477for (uint32_t c = 0; c < NUM_MODE11_ENDPOINTS; c++)2478{2479for (int dt = 0; dt <= 1; dt++)2480{2481const int d = dt ? 1 : -1;24822483uint8_t varied_endpoints[NUM_MODE11_ENDPOINTS];2484memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE11_ENDPOINTS);24852486int ise = varied_endpoints[c];24872488int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];2489rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);24902491ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];24922493varied_endpoints[c] = (uint8_t)ise;24942495int e[2][3];2496if (!decode_mode11_to_qlog12(varied_endpoints, e, ise_endpoint_range))2497continue;24982499vec3F e0(2500(float)(e[0][0] << 4),2501(float)(e[0][1] << 4),2502(float)(e[0][2] << 4)2503);25042505vec3F e1(2506(float)(e[1][0] << 4),2507(float)(e[1][1] << 4),2508(float)(e[1][2] << 4)2509);25102511double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);2512double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);2513double dist = helpers::minimum(dist0, dist1);25142515if (dist < best_trial_dist)2516{2517best_trial_dist = dist;2518memcpy(pEndpoints, varied_endpoints, NUM_MODE11_ENDPOINTS);2519}2520} // d2521} // c2522} // if (coptions.m_ultra_quant)25232524submode_used = best_submode + 1;25252526return (best_trial_dist != BIG_FLOAT_VAL);2527}25282529bool try_mode11(uint32_t num_pixels,2530uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,2531const vec3F& low_color_q16, const vec3F& high_color_q16,2532const basist::half_float block_pixels_half[][3],2533uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,2534bool constrain_ise_weight_selectors,2535int32_t first_submode, int32_t last_submode, bool ignore_clamping) // -1, 72536{2537assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));2538assert((num_weight_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS));2539assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));2540assert(num_weight_levels == astc_helpers::get_ise_levels(ise_weight_range));25412542half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3];2543uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];25442545if (direct_only)2546{2547first_submode = -1;2548last_submode = -1;2549}25502551assert(first_submode <= last_submode);2552assert((first_submode >= -1) && (first_submode <= 7));2553assert((last_submode >= -1) && (last_submode <= 7));25542555uint8_t best_trial_endpoints[NUM_MODE11_ENDPOINTS];2556clear_obj(best_trial_endpoints);2557double best_trial_dist = BIG_FLOAT_VAL;2558int best_submode = 0;25592560for (int submode = last_submode; submode >= first_submode; submode--)2561{2562bool did_clamp = false;2563int max_clamp_mag = 0;2564if (submode == -1)2565{2566// If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision.2567pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16);2568}2569else2570{2571const int MAX_CLAMP_MAG_ACCEPT_THRESH = 32;2572did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH);25732574if (!ignore_clamping)2575{2576// If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts.2577if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))2578continue;2579}2580}25812582uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS];25832584// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).2585// It could massively distort the endpoints, but still result in a valid encoding.2586basist::astc_6x6_hdr::requantize_ise_endpoints(11, astc_helpers::BISE_256_LEVELS, orig_trial_endpoints, ise_endpoint_range, trial_endpoints);25872588int e[2][3];2589if (!decode_mode11_to_qlog12(trial_endpoints, e, ise_endpoint_range))2590continue;25912592vec3F e0(2593(float)(e[0][0] << 4),2594(float)(e[0][1] << 4),2595(float)(e[0][2] << 4)2596);25972598vec3F e1(2599(float)(e[1][0] << 4),2600(float)(e[1][1] << 4),2601(float)(e[1][2] << 4)2602);26032604double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);2605double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);2606double dist = helpers::minimum(dist0, dist1);26072608if (dist < best_trial_dist)2609{2610best_trial_dist = dist;2611best_submode = submode;2612memcpy(best_trial_endpoints, trial_endpoints, sizeof(best_trial_endpoints));2613}26142615if (coptions.m_take_first_non_clamping_mode11_submode)2616{2617if (!did_clamp)2618break;2619}26202621} // submode26222623if ((coptions.m_ultra_quant) &&2624(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&2625(best_trial_dist != BIG_FLOAT_VAL))2626{2627uint8_t orig_best_trial_endpoints[NUM_MODE11_ENDPOINTS];2628memcpy(orig_best_trial_endpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS);26292630for (uint32_t c = 0; c < NUM_MODE11_ENDPOINTS; c++)2631{2632for (int dt = 0; dt <= 1; dt++)2633{2634const int d = dt ? 1 : -1;26352636uint8_t varied_endpoints[NUM_MODE11_ENDPOINTS];2637memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE11_ENDPOINTS);26382639int ise = varied_endpoints[c];26402641int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];2642rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);26432644ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];26452646varied_endpoints[c] = (uint8_t)ise;26472648int e[2][3];2649if (!decode_mode11_to_qlog12(varied_endpoints, e, ise_endpoint_range))2650continue;26512652vec3F e0(2653(float)(e[0][0] << 4),2654(float)(e[0][1] << 4),2655(float)(e[0][2] << 4)2656);26572658vec3F e1(2659(float)(e[1][0] << 4),2660(float)(e[1][1] << 4),2661(float)(e[1][2] << 4)2662);26632664double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);2665double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);2666double dist = helpers::minimum(dist0, dist1);26672668if (dist < best_trial_dist)2669{2670best_trial_dist = dist;2671memcpy(best_trial_endpoints, varied_endpoints, NUM_MODE11_ENDPOINTS);2672}2673} // d2674} // c2675} // if (coptions.m_ultra_quant)26762677bool improved_flag = false;26782679if (best_trial_dist != BIG_FLOAT_VAL)2680{2681if (get_astc_hdr_mode_11_block_colors(best_trial_endpoints, &decoded_half[0][0], nullptr, num_weight_levels, ise_weight_range, ise_endpoint_range))2682{2683uint32_t usable_selector_bitmask = UINT32_MAX;2684if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_16_LEVELS))2685usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 10) | (1 << 11) | (1 << 14) | (1 << 15);2686else if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_12_LEVELS))2687usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3);26882689double trial_blk_error = eval_selectors(num_pixels, trial_weights, ise_weight_range, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions, usable_selector_bitmask);2690if (trial_blk_error < cur_block_error)2691{2692cur_block_error = trial_blk_error;2693memcpy(pEndpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS);2694memcpy(pWeights, trial_weights, num_pixels);2695submode_used = best_submode + 1;2696improved_flag = true;2697}2698}2699}27002701return improved_flag;2702}27032704//--------------------------------------------------------------------------------------------------------------------------27052706bool try_mode11_dual_plane(uint32_t channel_index, uint32_t num_pixels,2707uint8_t* pEndpoints, uint8_t* pWeights0, uint8_t* pWeights1, double& cur_block_error, uint32_t& submode_used,2708const vec3F& low_color_q16, const vec3F& high_color_q16,2709const basist::half_float block_pixels_half[][3],2710uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,2711bool constrain_ise_weight_selectors,2712int32_t first_submode, int32_t last_submode, bool ignore_clamping) // -1, 72713{2714assert(channel_index <= 2);2715assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));2716assert((num_weight_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS));2717assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));2718assert(num_weight_levels == astc_helpers::get_ise_levels(ise_weight_range));27192720half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3];2721uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights0[MAX_ASTC_HDR_ENC_BLOCK_PIXELS], trial_weights1[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];27222723if (direct_only)2724{2725first_submode = -1;2726last_submode = -1;2727}27282729assert(first_submode <= last_submode);2730assert((first_submode >= -1) && (first_submode <= 7));2731assert((last_submode >= -1) && (last_submode <= 7));27322733uint8_t best_trial_endpoints[NUM_MODE11_ENDPOINTS];2734clear_obj(best_trial_endpoints);27352736double best_trial_dist = BIG_FLOAT_VAL;2737int best_submode = 0;27382739for (int submode = last_submode; submode >= first_submode; submode--)2740{2741bool did_clamp = false;2742int max_clamp_mag = 0;2743if (submode == -1)2744{2745// If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision.2746pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16);2747}2748else2749{2750const int MAX_CLAMP_MAG_ACCEPT_THRESH = 32;2751did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH);27522753if (!ignore_clamping)2754{2755// If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts.2756if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))2757continue;2758}2759}27602761uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS];27622763// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).2764// It could massively distort the endpoints, but still result in a valid encoding.2765basist::astc_6x6_hdr::requantize_ise_endpoints(11, astc_helpers::BISE_256_LEVELS, orig_trial_endpoints, ise_endpoint_range, trial_endpoints);27662767int e[2][3];2768if (!decode_mode11_to_qlog12(trial_endpoints, e, ise_endpoint_range))2769continue;27702771vec3F e0(2772(float)(e[0][0] << 4),2773(float)(e[0][1] << 4),2774(float)(e[0][2] << 4)2775);27762777vec3F e1(2778(float)(e[1][0] << 4),2779(float)(e[1][1] << 4),2780(float)(e[1][2] << 4)2781);27822783double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);2784double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);2785double dist = helpers::minimum(dist0, dist1);27862787if (dist < best_trial_dist)2788{2789best_trial_dist = dist;2790best_submode = submode;2791memcpy(best_trial_endpoints, trial_endpoints, sizeof(best_trial_endpoints));2792}27932794if (coptions.m_take_first_non_clamping_mode11_submode)2795{2796if (!did_clamp)2797break;2798}27992800} // submode28012802if ((coptions.m_ultra_quant) &&2803(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&2804(best_trial_dist != BIG_FLOAT_VAL))2805{2806uint8_t orig_best_trial_endpoints[NUM_MODE11_ENDPOINTS];2807memcpy(orig_best_trial_endpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS);28082809for (uint32_t c = 0; c < NUM_MODE11_ENDPOINTS; c++)2810{2811for (int dt = 0; dt <= 1; dt++)2812{2813const int d = dt ? 1 : -1;28142815uint8_t varied_endpoints[NUM_MODE11_ENDPOINTS];2816memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE11_ENDPOINTS);28172818int ise = varied_endpoints[c];28192820int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];2821rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);28222823ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];28242825varied_endpoints[c] = (uint8_t)ise;28262827int e[2][3];2828if (!decode_mode11_to_qlog12(varied_endpoints, e, ise_endpoint_range))2829continue;28302831vec3F e0(2832(float)(e[0][0] << 4),2833(float)(e[0][1] << 4),2834(float)(e[0][2] << 4)2835);28362837vec3F e1(2838(float)(e[1][0] << 4),2839(float)(e[1][1] << 4),2840(float)(e[1][2] << 4)2841);28422843double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);2844double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);2845double dist = helpers::minimum(dist0, dist1);28462847if (dist < best_trial_dist)2848{2849best_trial_dist = dist;2850memcpy(best_trial_endpoints, varied_endpoints, NUM_MODE11_ENDPOINTS);2851}2852} // d2853} // c2854} // if (coptions.m_ultra_quant)28552856bool improved_flag = false;28572858if (best_trial_dist != BIG_FLOAT_VAL)2859{2860if (get_astc_hdr_mode_11_block_colors(best_trial_endpoints, &decoded_half[0][0], nullptr, num_weight_levels, ise_weight_range, ise_endpoint_range))2861{2862uint32_t usable_selector_bitmask = UINT32_MAX;2863if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_16_LEVELS))2864usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 10) | (1 << 11) | (1 << 14) | (1 << 15);2865else if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_12_LEVELS))2866usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3);28672868double trial_blk_error = eval_selectors_dual_plane(channel_index, num_pixels, trial_weights0, trial_weights1, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions, usable_selector_bitmask);2869if (trial_blk_error < cur_block_error)2870{2871cur_block_error = trial_blk_error;2872memcpy(pEndpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS);2873memcpy(pWeights0, trial_weights0, num_pixels);2874memcpy(pWeights1, trial_weights1, num_pixels);2875submode_used = best_submode + 1;2876improved_flag = true;2877}2878}2879}28802881return improved_flag;2882}28832884//--------------------------------------------------------------------------------------------------------------------------28852886bool pack_mode7(2887const vec3F& high_color_q16, const float s_q16,2888uint32_t ise_endpoint_range, uint8_t* pEndpoints,2889uint32_t ise_weight_range, // only used for determining biasing during packing2890const astc_hdr_codec_base_options& coptions,2891int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used)2892{2893assert(first_submode <= last_submode);2894assert((first_submode >= 0) && (first_submode <= (int)MAX_MODE7_SUBMODE_INDEX));2895assert(last_submode <= (int)MAX_MODE7_SUBMODE_INDEX);28962897uint8_t unquant_trial_endpoints[NUM_MODE7_ENDPOINTS];28982899memset(pEndpoints, 0, NUM_MODE7_ENDPOINTS);29002901double best_trial_dist = BIG_FLOAT_VAL;2902int best_trial_submode = 0;29032904for (int submode = first_submode; submode <= last_submode; submode++)2905{2906const int MAX_CLAMP_MAG_ACCEPT_THRESH = 16;29072908int max_clamp_mag = 0;2909const bool did_clamp = pack_astc_mode7_submode(submode, unquant_trial_endpoints, high_color_q16, s_q16, max_clamp_mag, ise_weight_range, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH);29102911if (submode < 5)2912{2913if (!ignore_clamping)2914{2915if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))2916continue;2917}2918}29192920uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS];29212922// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).2923// It could massively distort the endpoints, but still result in a valid encoding.2924basist::astc_6x6_hdr::requantize_ise_endpoints(7, astc_helpers::BISE_256_LEVELS, unquant_trial_endpoints, ise_endpoint_range, trial_endpoints);29252926int e[2][3];2927int decoded_s = 0;2928if (!decode_mode7_to_qlog12(trial_endpoints, e, &decoded_s, ise_endpoint_range))2929continue;29302931// e1 is always the high color2932vec3F e1(2933(float)(e[1][0] << 4),2934(float)(e[1][1] << 4),2935(float)(e[1][2] << 4)2936);29372938decoded_s <<= 4;29392940double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3;29412942if (dist < best_trial_dist)2943{2944best_trial_dist = dist;2945best_trial_submode = submode;2946memcpy(pEndpoints, trial_endpoints, NUM_MODE7_ENDPOINTS);2947}29482949if (coptions.m_take_first_non_clamping_mode7_submode)2950{2951if (!did_clamp)2952break;2953}29542955} // submode29562957if ((coptions.m_ultra_quant) &&2958(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&2959(best_trial_dist != BIG_FLOAT_VAL))2960{2961uint8_t orig_best_trial_endpoints[NUM_MODE7_ENDPOINTS];2962memcpy(orig_best_trial_endpoints, pEndpoints, NUM_MODE7_ENDPOINTS);29632964vec3F low_color_q16(high_color_q16 - vec3F(s_q16));2965low_color_q16.clamp(0.0f, 65535.0f);29662967for (uint32_t c = 0; c < NUM_MODE7_ENDPOINTS; c++)2968{2969for (int dt = 0; dt <= 1; dt++)2970{2971const int d = dt ? 1 : -1;29722973uint8_t varied_endpoints[NUM_MODE7_ENDPOINTS];2974memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE7_ENDPOINTS);29752976int ise = varied_endpoints[c];29772978int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];2979rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);29802981ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];29822983varied_endpoints[c] = (uint8_t)ise;29842985int e[2][3];2986int decoded_s = 0;2987if (!decode_mode7_to_qlog12(varied_endpoints, e, &decoded_s, ise_endpoint_range))2988continue;29892990// e1 is always the high color2991vec3F e1(2992(float)(e[1][0] << 4),2993(float)(e[1][1] << 4),2994(float)(e[1][2] << 4)2995);29962997decoded_s <<= 4;29982999double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3;30003001if (dist < best_trial_dist)3002{3003best_trial_dist = dist;3004memcpy(pEndpoints, varied_endpoints, NUM_MODE7_ENDPOINTS);3005}30063007} // d3008} // c3009}30103011submode_used = best_trial_submode;30123013return (best_trial_dist != BIG_FLOAT_VAL);3014}30153016//--------------------------------------------------------------------------------------------------------------------------30173018bool try_mode7(3019uint32_t num_pixels,3020uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,3021const vec3F& high_color_q16, const float s_q16,3022const half_float block_pixels_half[][3],3023uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions,3024uint32_t ise_endpoint_range,3025int32_t first_submode, int32_t last_submode)3026{3027assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));3028assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));30293030assert(first_submode <= last_submode);3031assert((first_submode >= 0) && (first_submode <= (int)MAX_MODE7_SUBMODE_INDEX));3032assert(last_submode <= (int)MAX_MODE7_SUBMODE_INDEX);3033assert(num_weight_levels == astc_helpers::get_ise_levels(ise_weight_range));30343035uint8_t unquant_trial_endpoints[NUM_MODE7_ENDPOINTS];30363037uint8_t best_trial_endpoints[NUM_MODE7_ENDPOINTS];3038clear_obj(best_trial_endpoints);3039double best_trial_dist = BIG_FLOAT_VAL;3040int best_trial_submode = 0;30413042for (int submode = first_submode; submode <= last_submode; submode++)3043{3044const int MAX_CLAMP_MAG_ACCEPT_THRESH = 16;30453046int max_clamp_mag = 0;3047const bool did_clamp = pack_astc_mode7_submode(submode, unquant_trial_endpoints, high_color_q16, s_q16, max_clamp_mag, ise_weight_range, true, MAX_CLAMP_MAG_ACCEPT_THRESH);30483049if (submode < 5)3050{3051if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))3052continue;3053}30543055uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS];30563057// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).3058// It could massively distort the endpoints, but still result in a valid encoding.3059basist::astc_6x6_hdr::requantize_ise_endpoints(7, astc_helpers::BISE_256_LEVELS, unquant_trial_endpoints, ise_endpoint_range, trial_endpoints);30603061int e[2][3];3062int decoded_s = 0;3063if (!decode_mode7_to_qlog12(trial_endpoints, e, &decoded_s, ise_endpoint_range))3064continue;30653066// e1 is always the high color3067vec3F e1(3068(float)(e[1][0] << 4),3069(float)(e[1][1] << 4),3070(float)(e[1][2] << 4)3071);30723073decoded_s <<= 4;30743075double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3;30763077if (dist < best_trial_dist)3078{3079best_trial_dist = dist;3080best_trial_submode = submode;3081memcpy(best_trial_endpoints, trial_endpoints, sizeof(best_trial_endpoints));3082}30833084if (coptions.m_take_first_non_clamping_mode7_submode)3085{3086if (!did_clamp)3087break;3088}30893090} // submode30913092if ((coptions.m_ultra_quant) &&3093(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&3094(best_trial_dist != BIG_FLOAT_VAL))3095{3096uint8_t orig_best_trial_endpoints[NUM_MODE7_ENDPOINTS];3097memcpy(orig_best_trial_endpoints, best_trial_endpoints, NUM_MODE7_ENDPOINTS);30983099vec3F low_color_q16(high_color_q16 - vec3F(s_q16));3100low_color_q16.clamp(0.0f, 65535.0f);31013102for (uint32_t c = 0; c < NUM_MODE7_ENDPOINTS; c++)3103{3104for (int dt = 0; dt <= 1; dt++)3105{3106const int d = dt ? 1 : -1;31073108uint8_t varied_endpoints[NUM_MODE7_ENDPOINTS];3109memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE7_ENDPOINTS);31103111int ise = varied_endpoints[c];31123113int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];3114rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);31153116ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];31173118varied_endpoints[c] = (uint8_t)ise;31193120int e[2][3];3121int decoded_s = 0;3122if (!decode_mode7_to_qlog12(varied_endpoints, e, &decoded_s, ise_endpoint_range))3123continue;31243125// e1 is always the high color3126vec3F e1(3127(float)(e[1][0] << 4),3128(float)(e[1][1] << 4),3129(float)(e[1][2] << 4)3130);31313132decoded_s <<= 4;31333134double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3;31353136if (dist < best_trial_dist)3137{3138best_trial_dist = dist;3139memcpy(best_trial_endpoints, varied_endpoints, NUM_MODE7_ENDPOINTS);3140}31413142} // d3143} // c3144}31453146bool improved_flag = false;31473148if (best_trial_dist != BIG_FLOAT_VAL)3149{3150half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3];3151uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];31523153if (get_astc_hdr_mode_7_block_colors(best_trial_endpoints, &decoded_half[0][0], nullptr, num_weight_levels, ise_weight_range, ise_endpoint_range))3154{3155double trial_blk_error = eval_selectors(num_pixels, trial_weights, ise_weight_range, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions);3156if (trial_blk_error < cur_block_error)3157{3158cur_block_error = trial_blk_error;3159memcpy(pEndpoints, best_trial_endpoints, NUM_MODE7_ENDPOINTS);3160memcpy(pWeights, trial_weights, num_pixels);3161submode_used = best_trial_submode;3162improved_flag = true;3163}3164}3165}31663167return improved_flag;3168}31693170//--------------------------------------------------------------------------------------------------------------------------3171const float LOW_EMPHASIS_WEIGHT = 1.0f, MIDDLE_EMPHASIS_WEIGHT = 1.25f, HIGH_EMPHASIS_WEIGHT = 1.0f;3172const float LOW_EMPHASIS_WEIGHT_HEAVY = 1.0f, MIDDLE_EMPHASIS_WEIGHT_HEAVY = 4.0f, HIGH_EMPHASIS_WEIGHT_HEAVY = 1.0f;31733174double encode_astc_hdr_block_mode_11(3175uint32_t num_pixels,3176const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],3177uint32_t ise_weight_range,3178uint32_t& best_submode,3179double cur_block_error,3180uint8_t* blk_endpoints, uint8_t* blk_weights,3181const astc_hdr_codec_base_options& coptions,3182bool direct_only,3183uint32_t ise_endpoint_range,3184bool uber_mode,3185bool constrain_ise_weight_selectors,3186int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode,3187const encode_astc_block_stats* pBlock_stats)3188{3189assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));3190assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));3191assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));31923193assert((first_submode >= FIRST_MODE11_SUBMODE_INDEX) && (first_submode <= last_submode));3194assert(last_submode <= MAX_MODE11_SUBMODE_INDEX);31953196best_submode = 0;31973198const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range);3199assert(num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS);32003201vec3F block_mean_color_q16, block_axis_q16;3202if (!pBlock_stats)3203{3204block_mean_color_q16 = calc_mean(num_pixels, pBlock_pixels_q16);3205block_axis_q16 = calc_rgb_pca(num_pixels, pBlock_pixels_q16, block_mean_color_q16);3206}3207else3208{3209assert(num_pixels == pBlock_stats->m_num_pixels);3210block_mean_color_q16 = pBlock_stats->m_mean_q16;3211block_axis_q16 = pBlock_stats->m_axis_q16;3212}32133214aabb3F color_box_q16(cInitExpand);32153216float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;3217vec3F low_color_q16, high_color_q16;32183219for (uint32_t i = 0; i < num_pixels; i++)3220{3221color_box_q16.expand(pBlock_pixels_q16[i]);32223223vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);3224float kd = k.dot(block_axis_q16);32253226if (kd < l)3227{3228l = kd;3229low_color_q16 = pBlock_pixels_q16[i];3230}32313232if (kd > h)3233{3234h = kd;3235high_color_q16 = pBlock_pixels_q16[i];3236}3237}32383239vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16);32403241for (uint32_t i = 0; i < 3; i++)3242{3243low_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f);3244high_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f);3245}32463247uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS];3248uint8_t trial_blk_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];3249uint32_t trial_best_submode = 0;32503251clear_obj(trial_blk_endpoints);3252clear_obj(trial_blk_weights);32533254double trial_blk_error = BIG_FLOAT_VAL;32553256bool did_improve = try_mode11(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode,3257low_color_q16, high_color_q16,3258pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,3259first_submode, last_submode, ignore_clamping);32603261// If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do.3262if (!did_improve)3263return cur_block_error;32643265// Did the solution improve?3266if (trial_blk_error < cur_block_error)3267{3268cur_block_error = trial_blk_error;3269memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);3270memcpy(blk_weights, trial_blk_weights, num_pixels);3271best_submode = trial_best_submode;3272}32733274if (opt_mode == cNoOpt)3275return cur_block_error;32763277// least squares on the most promising trial weight indices found3278const uint32_t NUM_LS_PASSES = 3;32793280float emphasis_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];32813282if (opt_mode == cWeightedAverage)3283{3284const uint32_t NUM_OPT_PASSES = 3;3285for (uint32_t pass = 0; pass < NUM_OPT_PASSES; pass++)3286{3287vec3F low_p(0.0f);3288float total_low = 0.0f;32893290vec3F high_p(0.0f);3291float total_high = 0.0f;32923293for (uint32_t i = 0; i < num_pixels; i++)3294{3295vec3F p(pBlock_pixels_q16[i]);3296float lerp = g_ise_weight_lerps[ise_weight_range][trial_blk_weights[i] + 1] * (1.0f / 64.0f);32973298low_p += p * (1.0f - lerp);3299total_low += (1.0f - lerp);33003301high_p += p * lerp;3302total_high += lerp;3303}33043305if (total_low != 0.0f)3306low_p *= (1.0f / total_low);33073308if (total_high != 0.0f)3309high_p *= (1.0f / total_high);33103311vec3F low, high;33123313bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,3314low_p, high_p,3315pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,3316first_submode, last_submode, ignore_clamping);33173318if (!was_improved)3319break;33203321memcpy(trial_blk_weights, blk_weights, num_pixels);3322}3323}3324else if (opt_mode == cOrdinaryLeastSquares)3325{3326for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++)3327{3328vec3F l_q16, h_q16;33293330if (!compute_least_squares_endpoints_rgb(num_pixels, trial_blk_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16))3331break;33323333bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,3334l_q16, h_q16,3335pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,3336first_submode, last_submode, ignore_clamping);33373338if (!was_improved)3339break;33403341// It's improved, so let's take the new weight indices.3342memcpy(trial_blk_weights, blk_weights, num_pixels);33433344} // pass3345}3346else3347{3348if (h == l)3349{3350for (uint32_t i = 0; i < num_pixels; i++)3351emphasis_weights[i] = 1.0f;3352}3353else3354{3355float mid = (0.0f - l) / (h - l);3356mid = clamp(mid, .01f, .99f);33573358float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT;3359if (opt_mode == cWeightedLeastSquaresHeavy)3360lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY;33613362for (uint32_t i = 0; i < num_pixels; i++)3363{3364vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);3365float kd = k.dot(block_axis_q16);33663367assert((kd >= l) && (kd <= h));33683369float v = (kd - l) / (h - l);33703371if (v < mid)3372v = lerp(lw, mw, v / mid);3373else3374v = lerp(mw, hw, (v - mid) * (1.0f - mid));33753376emphasis_weights[i] = v;3377}33783379#if 03380if (num_pixels == 6 * 6)3381{3382const float EDGE_WEIGHT = .1f;3383for (uint32_t i = 0; i < 6; i++)3384{3385emphasis_weights[i] += EDGE_WEIGHT;3386emphasis_weights[i + 5 * 6] += EDGE_WEIGHT;3387emphasis_weights[i * 6] += EDGE_WEIGHT;3388emphasis_weights[5 + i * 6] += EDGE_WEIGHT;3389}3390}3391#endif3392}33933394for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++)3395{3396vec3F l_q16, h_q16;33973398if (!compute_weighted_least_squares_endpoints_rgb(3399num_pixels,3400trial_blk_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr,3401emphasis_weights,3402&l_q16, &h_q16,3403pBlock_pixels_q16,3404color_box_q16))3405break;34063407bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,3408l_q16, h_q16,3409pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,3410first_submode, last_submode, ignore_clamping);34113412if (!was_improved)3413break;34143415// It's improved, so let's take the new weight indices.3416memcpy(trial_blk_weights, blk_weights, num_pixels);34173418} // pass3419}34203421if ( (uber_mode) && (ise_weight_range >= astc_helpers::BISE_3_LEVELS) &&3422((opt_mode == cOrdinaryLeastSquares) || (opt_mode == cWeightedLeastSquares) || (opt_mode == cWeightedLeastSquaresHeavy)) )3423{3424// Try varying the current best weight indices. This can be expanded/improved, but at potentially great cost.34253426uint8_t temp_astc_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];3427memcpy(temp_astc_weights, trial_blk_weights, num_pixels);34283429uint32_t min_lin_sel = 256, max_lin_sel = 0;3430for (uint32_t i = 0; i < num_pixels; i++)3431{3432const uint32_t astc_sel = temp_astc_weights[i];34333434const uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];3435assert(lin_sel < num_weight_levels);34363437min_lin_sel = minimumu(min_lin_sel, lin_sel);3438max_lin_sel = maximumu(max_lin_sel, lin_sel);3439}34403441bool was_improved = false;3442(void)was_improved;34433444{3445bool weights_changed = false;3446uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];3447for (uint32_t i = 0; i < num_pixels; i++)3448{3449uint32_t astc_sel = temp_astc_weights[i];3450uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];34513452if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1)))3453{3454lin_sel++;3455weights_changed = true;3456}34573458trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];3459}34603461if (weights_changed)3462{3463vec3F l_q16, h_q16;34643465bool succeeded;3466if (opt_mode == cOrdinaryLeastSquares)3467succeeded = compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);3468else3469succeeded = compute_weighted_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);34703471if (succeeded)3472{3473if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,3474l_q16, h_q16,3475pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,3476first_submode, last_submode, ignore_clamping))3477{3478was_improved = true;3479}3480}3481}3482}34833484{3485bool weights_changed = false;3486uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];34873488for (uint32_t i = 0; i < num_pixels; i++)3489{3490uint32_t astc_sel = temp_astc_weights[i];3491uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];34923493if ((lin_sel == max_lin_sel) && (lin_sel > 0))3494{3495lin_sel--;3496weights_changed = true;3497}34983499trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];3500}35013502if (weights_changed)3503{3504vec3F l_q16, h_q16;35053506bool succeeded;3507if (opt_mode == cOrdinaryLeastSquares)3508succeeded = compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);3509else3510succeeded = compute_weighted_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);35113512if (succeeded)3513{3514if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,3515l_q16, h_q16,3516pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,3517first_submode, last_submode, ignore_clamping))3518{3519was_improved = true;3520}3521}3522}3523}35243525{3526bool weights_changed = false;3527uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];3528for (uint32_t i = 0; i < num_pixels; i++)3529{3530uint32_t astc_sel = temp_astc_weights[i];3531uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];35323533if ((lin_sel == max_lin_sel) && (lin_sel > 0))3534{3535lin_sel--;3536weights_changed = true;3537}3538else if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1)))3539{3540lin_sel++;3541weights_changed = true;3542}35433544trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];3545}35463547if (weights_changed)3548{3549vec3F l_q16, h_q16;3550bool succeeded;3551if (opt_mode == cOrdinaryLeastSquares)3552succeeded = compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);3553else3554succeeded = compute_weighted_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);35553556if (succeeded)3557{3558if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,3559l_q16, h_q16,3560pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,3561first_submode, last_submode, ignore_clamping))3562{3563was_improved = true;3564}3565}3566}3567}35683569} // uber_mode35703571return cur_block_error;3572}35733574//--------------------------------------------------------------------------------------------------------------------------35753576double encode_astc_hdr_block_downsampled_mode_11(3577uint32_t block_x, uint32_t block_y, uint32_t grid_x, uint32_t grid_y,3578uint32_t ise_weight_range, uint32_t ise_endpoint_range,3579uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],3580double cur_block_error,3581int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode,3582uint8_t* pBlk_endpoints, uint8_t* pBlk_weights, uint32_t& best_submode,3583const astc_hdr_codec_base_options& coptions,3584const encode_astc_block_stats* pBlock_stats)3585{3586assert((block_x >= 4) && (block_y >= 4) && (block_x <= MAX_ASTC_HDR_BLOCK_W) && (block_y <= MAX_ASTC_HDR_BLOCK_H));3587assert((grid_x >= 2) && (grid_y >= 2) && (grid_x <= block_x) && (grid_y <= block_y));35883589assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));3590assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));3591assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));35923593assert((first_submode >= FIRST_MODE11_SUBMODE_INDEX) && (first_submode <= last_submode));3594assert(last_submode <= MAX_MODE11_SUBMODE_INDEX);35953596best_submode = 0;35973598assert(astc_helpers::get_ise_levels(ise_weight_range) <= MAX_SUPPORTED_WEIGHT_LEVELS);35993600const uint32_t num_weights = grid_x * grid_y;36013602vec3F block_mean_color_q16, block_axis_q16;3603if (!pBlock_stats)3604{3605block_mean_color_q16 = calc_mean(num_pixels, pBlock_pixels_q16);3606block_axis_q16 = calc_rgb_pca(num_pixels, pBlock_pixels_q16, block_mean_color_q16);3607}3608else3609{3610assert(num_pixels == pBlock_stats->m_num_pixels);3611block_mean_color_q16 = pBlock_stats->m_mean_q16;3612block_axis_q16 = pBlock_stats->m_axis_q16;3613}36143615aabb3F color_box_q16(cInitExpand);36163617float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;3618vec3F low_color_q16, high_color_q16;36193620for (uint32_t i = 0; i < num_pixels; i++)3621{3622color_box_q16.expand(pBlock_pixels_q16[i]);36233624vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);3625float kd = k.dot(block_axis_q16);36263627if (kd < l)3628{3629l = kd;3630low_color_q16 = pBlock_pixels_q16[i];3631}36323633if (kd > h)3634{3635h = kd;3636high_color_q16 = pBlock_pixels_q16[i];3637}3638}36393640vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16);36413642for (uint32_t i = 0; i < 3; i++)3643{3644low_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f);3645high_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f);3646}36473648const uint32_t NUM_PASSES = 3;3649for (uint32_t pass = 0; pass < NUM_PASSES; pass++)3650{3651uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS];3652uint8_t trial_blk_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // at block resolution, not grid res3653uint32_t trial_best_submode = 0;36543655clear_obj(trial_blk_endpoints);3656clear_obj(trial_blk_weights);36573658double trial_blk_error = BIG_FLOAT_VAL;36593660bool could_pack = try_mode11(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode,3661low_color_q16, high_color_q16,3662pBlock_pixels_half, 32, astc_helpers::BISE_32_LEVELS, coptions, false, ise_endpoint_range, false,3663first_submode, last_submode, ignore_clamping);36643665if (!could_pack)3666break;36673668uint8_t trial_downsampled_ise_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];36693670downsample_ise_weights(3671astc_helpers::BISE_32_LEVELS, ise_weight_range,3672block_x, block_y, grid_x, grid_y,3673trial_blk_weights, trial_downsampled_ise_weights);36743675uint8_t trial_downsampled_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];3676dequantize_astc_weights(num_weights, trial_downsampled_ise_weights, ise_weight_range, trial_downsampled_raw_weights);36773678uint8_t trial_upsampled_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE3679astc_helpers::upsample_weight_grid(block_x, block_y, grid_x, grid_y, trial_downsampled_raw_weights, trial_upsampled_raw_weights);36803681//------36823683int trial_e[2][3];3684if (!decode_mode11_to_qlog12(trial_blk_endpoints, trial_e, ise_endpoint_range))3685return cur_block_error;36863687double trial_error = compute_block_error_from_raw_weights(num_pixels, pBlock_pixels_half, trial_upsampled_raw_weights, trial_e, coptions);36883689if (trial_error < cur_block_error)3690{3691cur_block_error = trial_error;3692memcpy(pBlk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);3693memcpy(pBlk_weights, trial_downsampled_ise_weights, num_weights);3694best_submode = trial_best_submode;3695}3696else if (pass)3697break;36983699if ((opt_mode == cWeightedLeastSquares) || (opt_mode == cWeightedLeastSquaresHeavy))3700{3701float emphasis_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];3702if (h == l)3703{3704for (uint32_t i = 0; i < num_pixels; i++)3705emphasis_weights[i] = 1.0f;3706}3707else3708{3709float mid = (0.0f - l) / (h - l);3710mid = clamp(mid, .01f, .99f);37113712float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT;3713if (opt_mode == cWeightedLeastSquaresHeavy)3714lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY;37153716for (uint32_t i = 0; i < num_pixels; i++)3717{3718vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);3719float kd = k.dot(block_axis_q16);37203721assert((kd >= l) && (kd <= h));37223723float v = (kd - l) / (h - l);37243725if (v < mid)3726v = lerp(lw, mw, v / mid);3727else3728v = lerp(mw, hw, (v - mid) * (1.0f - mid));37293730emphasis_weights[i] = v;3731}3732}37333734float trial_upsampled_raw_weightsf[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];3735for (uint32_t i = 0; i < num_pixels; i++)3736trial_upsampled_raw_weightsf[i] = (float)trial_upsampled_raw_weights[i] * (1.0f / 64.0f);37373738if (!compute_weighted_least_squares_endpoints_rgb(num_pixels, nullptr, nullptr, trial_upsampled_raw_weightsf, emphasis_weights, &low_color_q16, &high_color_q16, pBlock_pixels_q16, color_box_q16))3739return false;3740}3741else3742{3743if (!compute_least_squares_endpoints_rgb_raw_weights(num_pixels, trial_upsampled_raw_weights, &low_color_q16, &high_color_q16, pBlock_pixels_q16, color_box_q16))3744break;3745}37463747bool pack_succeeded = pack_mode11(low_color_q16, high_color_q16, ise_endpoint_range, trial_blk_endpoints, coptions, false, first_submode, last_submode, false, trial_best_submode);3748if (!pack_succeeded)3749break;37503751if (!decode_mode11_to_qlog12(trial_blk_endpoints, trial_e, ise_endpoint_range))3752break;37533754trial_error = compute_block_error_from_raw_weights(num_pixels, pBlock_pixels_half, trial_upsampled_raw_weights, trial_e, coptions);37553756if (trial_error < cur_block_error)3757{3758cur_block_error = trial_error;3759memcpy(pBlk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);3760memcpy(pBlk_weights, trial_downsampled_ise_weights, num_weights);3761best_submode = trial_best_submode;3762}3763else3764{3765break;3766}37673768} // pass37693770return cur_block_error;3771}37723773//--------------------------------------------------------------------------------------------------------------------------37743775double encode_astc_hdr_block_mode_11_dual_plane(3776uint32_t num_pixels,3777const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],3778uint32_t channel_index, // 0-23779uint32_t ise_weight_range,3780uint32_t& best_submode,3781double cur_block_error,3782uint8_t* blk_endpoints, uint8_t* blk_weights0, uint8_t* blk_weights1,3783const astc_hdr_codec_base_options& coptions,3784bool direct_only,3785uint32_t ise_endpoint_range,3786bool uber_mode,3787bool constrain_ise_weight_selectors,3788int32_t first_submode, int32_t last_submode, bool ignore_clamping)3789{3790(void)uber_mode;37913792assert(channel_index <= 2);3793assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));3794assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));3795assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));37963797assert((first_submode >= FIRST_MODE11_SUBMODE_INDEX) && (first_submode <= last_submode));3798assert(last_submode <= MAX_MODE11_SUBMODE_INDEX);37993800assert(num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS);38013802best_submode = 0;38033804const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range);3805assert(num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS);38063807vec4F temp_block_pixels_q16[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];3808for (uint32_t i = 0; i < num_pixels; i++)3809{3810temp_block_pixels_q16[i] = pBlock_pixels_q16[i];3811temp_block_pixels_q16[i][channel_index] = 0.0f;3812}38133814vec3F block_mean_color_q16(calc_mean(num_pixels, temp_block_pixels_q16));3815vec3F block_axis_q16(calc_rgb_pca(num_pixels, temp_block_pixels_q16, block_mean_color_q16));38163817float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;3818vec3F low_color_q16, high_color_q16;38193820aabb3F color_box_q16(cInitExpand);38213822for (uint32_t i = 0; i < num_pixels; i++)3823{3824color_box_q16.expand(pBlock_pixels_q16[i]);38253826vec3F k(vec3F(temp_block_pixels_q16[i]) - block_mean_color_q16);3827float kd = k.dot(block_axis_q16);38283829if (kd < l)3830{3831l = kd;3832low_color_q16 = pBlock_pixels_q16[i];3833}38343835if (kd > h)3836{3837h = kd;3838high_color_q16 = pBlock_pixels_q16[i];3839}3840}38413842low_color_q16[channel_index] = 0.0f;3843high_color_q16[channel_index] = 0.0f;38443845float a = low_color_q16.dot(vec3F(1.0f)), b = high_color_q16.dot(vec3F(1.0f));3846if (a <= b)3847{3848low_color_q16[channel_index] = color_box_q16.get_low()[channel_index];3849high_color_q16[channel_index] = color_box_q16.get_high()[channel_index];3850}3851else3852{3853high_color_q16[channel_index] = color_box_q16.get_low()[channel_index];3854low_color_q16[channel_index] = color_box_q16.get_high()[channel_index];3855}38563857vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16);3858for (uint32_t i = 0; i < 3; i++)3859{3860low_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f);3861high_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f);3862}38633864uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS];3865uint8_t trial_blk_weights0[MAX_ASTC_HDR_ENC_BLOCK_PIXELS], trial_blk_weights1[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];3866uint32_t trial_best_submode = 0;38673868clear_obj(trial_blk_endpoints);3869clear_obj(trial_blk_weights0);3870clear_obj(trial_blk_weights1);38713872double trial_blk_error = BIG_FLOAT_VAL;38733874bool did_improve = try_mode11_dual_plane(channel_index, num_pixels, trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_best_submode,3875low_color_q16, high_color_q16,3876pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,3877first_submode, last_submode, ignore_clamping);38783879// If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do.3880if (!did_improve)3881return cur_block_error;38823883// Did the solution improve?3884if (trial_blk_error < cur_block_error)3885{3886cur_block_error = trial_blk_error;3887memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);3888memcpy(blk_weights0, trial_blk_weights0, num_pixels);3889memcpy(blk_weights1, trial_blk_weights1, num_pixels);3890best_submode = trial_best_submode;3891}38923893const uint32_t chan0 = (channel_index + 1) % 3, chan1 = (channel_index + 2) % 3;38943895vec2F plane0_q16[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];3896aabb2F plane0_bounds;3897plane0_bounds[0].set(color_box_q16.get_low()[chan0], color_box_q16.get_low()[chan1]);3898plane0_bounds[1].set(color_box_q16.get_high()[chan0], color_box_q16.get_high()[chan1]);38993900vec1F plane1_q16[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];3901aabb1F plane1_bounds;3902plane1_bounds[0].set(color_box_q16.get_low()[channel_index]);3903plane1_bounds[1].set(color_box_q16.get_high()[channel_index]);39043905for (uint32_t i = 0; i < num_pixels; i++)3906{3907plane0_q16[i][0] = pBlock_pixels_q16[i][chan0];3908plane0_q16[i][1] = pBlock_pixels_q16[i][chan1];39093910plane1_q16[i][0] = pBlock_pixels_q16[i][channel_index];3911}39123913const uint32_t NUM_LS_PASSES = 3;39143915for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++)3916{3917vec2F l0_q16, h0_q16;3918if (!compute_least_squares_endpoints_2D(num_pixels, trial_blk_weights0, &g_astc_ls_weights_ise[ise_weight_range][0], &l0_q16, &h0_q16, plane0_q16, plane0_bounds))3919break;39203921vec1F l1_q16, h1_q16;3922if (!compute_least_squares_endpoints_1D(num_pixels, trial_blk_weights1, &g_astc_ls_weights_ise[ise_weight_range][0], &l1_q16, &h1_q16, plane1_q16, plane1_bounds))3923break;39243925vec3F l_q16, h_q16;39263927l_q16[channel_index] = l1_q16[0];3928h_q16[channel_index] = h1_q16[0];39293930l_q16[chan0] = l0_q16[0];3931h_q16[chan0] = h0_q16[0];39323933l_q16[chan1] = l0_q16[1];3934h_q16[chan1] = h0_q16[1];39353936bool was_improved = try_mode11_dual_plane(channel_index, num_pixels, blk_endpoints, blk_weights0, blk_weights1, cur_block_error, best_submode,3937l_q16, h_q16,3938pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,3939first_submode, last_submode, ignore_clamping);39403941if (!was_improved)3942break;39433944// It's improved, so let's take the new weight indices.3945memcpy(trial_blk_weights0, blk_weights0, num_pixels);3946memcpy(trial_blk_weights1, blk_weights1, num_pixels);39473948} // pass39493950return cur_block_error;3951}39523953//--------------------------------------------------------------------------------------------------------------------------39543955double encode_astc_hdr_block_mode_7(3956uint32_t num_pixels,3957const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],3958uint32_t ise_weight_range,3959uint32_t& best_submode,3960double cur_block_error,3961uint8_t* blk_endpoints, //[4]3962uint8_t* blk_weights, // [num_pixels]3963const astc_hdr_codec_base_options& coptions,3964uint32_t ise_endpoint_range,3965int first_submode, int last_submode,3966const encode_astc_block_stats* pBlock_stats)3967{3968assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));3969assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));3970assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));39713972const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range);3973assert(num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS);39743975best_submode = 0;39763977vec3F block_mean_color_q16;3978if (!pBlock_stats)3979block_mean_color_q16 = calc_mean(num_pixels, pBlock_pixels_q16);3980else3981{3982assert(num_pixels == pBlock_stats->m_num_pixels);3983block_mean_color_q16 = pBlock_stats->m_mean_q16;3984}39853986vec3F block_axis_q16(0.577350259f);39873988aabb3F color_box_q16(cInitExpand);39893990float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;3991for (uint32_t i = 0; i < num_pixels; i++)3992{3993color_box_q16.expand(pBlock_pixels_q16[i]);39943995vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);3996float kd = k.dot(block_axis_q16);39973998l = basisu::minimum<float>(l, kd);3999h = basisu::maximum<float>(h, kd);4000}40014002vec3F low_color_q16(interp_color(block_mean_color_q16, block_axis_q16, l, color_box_q16, color_box_q16));4003vec3F high_color_q16(interp_color(block_mean_color_q16, block_axis_q16, h, color_box_q16, color_box_q16));40044005low_color_q16.clamp(0.0f, MAX_QLOG16_VAL);4006high_color_q16.clamp(0.0f, MAX_QLOG16_VAL);40074008vec3F diff(high_color_q16 - low_color_q16);40094010// The mul here (* block_axis_q16[0]) is because the "S" or scale value is subtracted from the high color with a scale of 1.0,4011// i.e. it's equivalent to a vector of (1,1,1) multiplied by scale before the sub. We want to actually move along the grayscale axis, or (0.577350259, 0.577350259, 0.577350259).4012float s_q16 = diff.dot(block_axis_q16) * block_axis_q16[0];40134014uint8_t trial_blk_endpoints[NUM_MODE7_ENDPOINTS];4015uint8_t trial_blk_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];4016uint32_t trial_best_submode = 0;40174018clear_obj(trial_blk_endpoints);4019clear_obj(trial_blk_weights);40204021double trial_blk_error = BIG_FLOAT_VAL;40224023bool did_improve = try_mode7(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode,4024high_color_q16, ceilf(s_q16),4025pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode);40264027// If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do.4028if (!did_improve)4029{4030return cur_block_error;4031}40324033// Did the solution improve?4034if (trial_blk_error < cur_block_error)4035{4036cur_block_error = trial_blk_error;4037memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE7_ENDPOINTS);4038memcpy(blk_weights, trial_blk_weights, num_pixels);4039best_submode = trial_best_submode;4040}40414042#if 14043{4044//const float TL = 8830.0f;// (float)half_to_qlog16(float_to_half(0.00061f));4045//const float TH = 41600.0f;// (float)half_to_qlog16(float_to_half(40.0f));4046//float zl = minimum<float>(color_box_q16[0][0], color_box_q16[0][1], color_box_q16[0][2]);4047//float zh = minimum<float>(color_box_q16[1][0], color_box_q16[1][1], color_box_q16[1][2]);40484049//if ((zl <= TL) && (zh >= TH))4050{4051// Try a simpler technique for artifact reduction4052l = BIG_FLOAT_VAL;4053h = -BIG_FLOAT_VAL;40544055vec3F alt_low_color_q16(0.0f), alt_high_color_q16(0.0f);4056for (uint32_t i = 0; i < num_pixels; i++)4057{4058color_box_q16.expand(pBlock_pixels_q16[i]);40594060vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);4061float kd = k.dot(block_axis_q16);40624063if (kd < l)4064{4065alt_low_color_q16 = pBlock_pixels_q16[i];4066l = kd;4067}40684069if (kd > h)4070{4071alt_high_color_q16 = pBlock_pixels_q16[i];4072h = kd;4073}4074}40754076vec3F old_alt_low_color_q16(alt_low_color_q16);40774078for (uint32_t i = 0; i < 3; i++)4079alt_low_color_q16[i] = lerp<float>(old_alt_low_color_q16[i], alt_high_color_q16[i], 1.0f / 64.0f);40804081vec3F alt_diff(alt_high_color_q16 - alt_low_color_q16);40824083// The mul here (* block_axis_q16[0]) is because the "S" or scale value is subtracted from the high color with a scale of 1.0,4084// i.e. it's equivalent to a vector of (1,1,1) multiplied by scale before the sub. We want to actually move along the grayscale axis, or (0.577350259, 0.577350259, 0.577350259).4085float alt_s_q16 = alt_diff.dot(block_axis_q16) * block_axis_q16[0];40864087try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,4088alt_high_color_q16, ceilf(alt_s_q16),4089pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode);4090}4091}4092#endif40934094const float one_over_num_pixels = 1.0f / (float)num_pixels;40954096const uint32_t NUM_TRIALS = 2;4097for (uint32_t trial = 0; trial < NUM_TRIALS; trial++)4098{4099// Given a set of selectors and S, try to compute a better high color4100vec3F new_high_color_q16(block_mean_color_q16);41014102int e[2][3];4103int cur_s = 0;4104if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, &cur_s, ise_endpoint_range))4105break;41064107cur_s <<= 4;41084109for (uint32_t i = 0; i < num_pixels; i++)4110{4111uint32_t astc_sel = trial_blk_weights[i];4112float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f);41134114float k = (float)cur_s * (1.0f - lerp) * one_over_num_pixels;4115new_high_color_q16[0] += k;4116new_high_color_q16[1] += k;4117new_high_color_q16[2] += k;4118}41194120bool improved = try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,4121new_high_color_q16, (float)cur_s,4122pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode);41234124if (improved)4125{4126memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS);4127memcpy(trial_blk_weights, blk_weights, num_pixels);4128}41294130// Given a set of selectors and a high color, try to compute a better S.4131float t = 0.0f;41324133for (uint32_t i = 0; i < num_pixels; i++)4134{4135uint32_t astc_sel = trial_blk_weights[i];4136float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f);41374138t += (1.0f) - lerp;4139}41404141t *= one_over_num_pixels;41424143//int e[2][3];4144if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, nullptr, ise_endpoint_range))4145break;41464147vec3F cur_h_q16((float)(e[1][0] << 4), (float)(e[1][1] << 4), (float)(e[1][2] << 4));41484149if (fabs(t) > .0000125f)4150{4151float s_r = (cur_h_q16[0] - block_mean_color_q16[0]) / t;4152float s_g = (cur_h_q16[1] - block_mean_color_q16[1]) / t;4153float s_b = (cur_h_q16[2] - block_mean_color_q16[2]) / t;41544155// TODO: gather statistics on these4156if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,4157cur_h_q16, ceilf(s_r),4158pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))4159{4160improved = true;4161}41624163if (coptions.m_mode7_full_s_optimization)4164{4165if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,4166cur_h_q16, ceilf(s_g),4167pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))4168{4169improved = true;4170}41714172if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,4173cur_h_q16, ceilf(s_b),4174pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))4175{4176improved = true;4177}41784179if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,4180cur_h_q16, ceilf((s_r + s_g + s_b) / 3.0f),4181pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))4182{4183improved = true;4184}41854186// Added this - quite strong.4187if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,4188cur_h_q16, minimum(maximum(s_r, s_g, s_b) * 1.1f, 65535.0f),4189pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))4190{4191improved = true;4192}4193} // if (coptions.m_mode7_full_s_optimization)41944195} // if (fabs(t) > .0000125f)41964197if (!improved)4198break;41994200memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS);4201memcpy(trial_blk_weights, blk_weights, num_pixels);42024203} // trial42044205return cur_block_error;4206}42074208//--------------------------------------------------------------------------------------------------------------------------42094210void dequantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_raw_weights)4211{4212const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(from_ise_range).m_ISE_to_val;42134214for (uint32_t i = 0; i < n; i++)4215pDst_raw_weights[i] = dequant_tab[pSrc_ise_vals[i]];4216}42174218//--------------------------------------------------------------------------------------------------------------------------42194220// For each output (2x2) sample, the weight of each input (6x6) sample.4221static const float g_weight_downsample_6x6_to_2x2[4][36] = {4222{0.165438f, 0.132609f, 0.092681f, 0.028953f, 0.000000f, 0.000000f, 0.133716f, 0.111240f, 0.065133f, 0.022236f, 0.000000f, 0.000000f, 0.092623f, 0.063898f, 0.039120f, 0.000000f, 0.000000f, 0.000000f, 0.028168f, 0.024184f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4223{0.000000f, 0.000000f, 0.027262f, 0.091051f, 0.132446f, 0.164791f, 0.000000f, 0.000000f, 0.026038f, 0.066511f, 0.111644f, 0.133197f, 0.000000f, 0.000000f, 0.000000f, 0.040053f, 0.064757f, 0.091196f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024265f, 0.026789f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4224{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028282f, 0.024804f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092871f, 0.066580f, 0.042024f, 0.000000f, 0.000000f, 0.000000f, 0.132115f, 0.107586f, 0.061943f, 0.025551f, 0.000000f, 0.000000f, 0.166111f, 0.132946f, 0.089043f, 0.030145f, 0.000000f, 0.000000f},4225{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024535f, 0.028835f, 0.000000f, 0.000000f, 0.000000f, 0.044465f, 0.063652f, 0.093251f, 0.000000f, 0.000000f, 0.025961f, 0.063339f, 0.107329f, 0.132240f, 0.000000f, 0.000000f, 0.029844f, 0.089249f, 0.132200f, 0.165099f},4226};42274228// For each output (3x2) sample, the weight of each input (6x6) sample.4229static const float g_weight_downsample_6x6_to_3x2[6][36] = {4230{0.257933f, 0.144768f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.213754f, 0.109376f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.140969f, 0.064128f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041270f, 0.027803f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4231{0.000000f, 0.046066f, 0.153691f, 0.153395f, 0.042845f, 0.000000f, 0.000000f, 0.038497f, 0.131674f, 0.126804f, 0.041513f, 0.000000f, 0.000000f, 0.028434f, 0.081152f, 0.075499f, 0.025372f, 0.000000f, 0.000000f, 0.000000f, 0.030067f, 0.024989f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4232{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.147088f, 0.258980f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105549f, 0.211746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066714f, 0.144015f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027755f, 0.038152f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4233{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044268f, 0.030990f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.141642f, 0.069930f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.207393f, 0.105354f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.255911f, 0.144511f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4234{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026658f, 0.032535f, 0.000000f, 0.000000f, 0.000000f, 0.024618f, 0.079487f, 0.080415f, 0.026311f, 0.000000f, 0.000000f, 0.038382f, 0.133569f, 0.133162f, 0.033451f, 0.000000f, 0.000000f, 0.043697f, 0.152483f, 0.154345f, 0.040885f, 0.000000f},4235{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026401f, 0.040228f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066688f, 0.142350f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.108504f, 0.210286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.149666f, 0.255876f},4236};42374238// For each output (4x2) sample, the weight of each input (6x6) sample.4239static const float g_weight_downsample_6x6_to_4x2[8][36] = {4240{0.318857f, 0.081413f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.262816f, 0.064811f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.175211f, 0.046152f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050740f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4241{0.000000f, 0.163830f, 0.223661f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128904f, 0.194332f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.080369f, 0.121162f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041941f, 0.045801f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4242{0.000000f, 0.000000f, 0.000000f, 0.230801f, 0.166220f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193495f, 0.136548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113816f, 0.085890f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043771f, 0.029459f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4243{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.087528f, 0.318213f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059739f, 0.262039f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046515f, 0.175973f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049993f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4244{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.054078f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173243f, 0.055145f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.254561f, 0.059695f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.319463f, 0.083816f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4245{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.038171f, 0.037447f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.076263f, 0.117360f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.134218f, 0.202503f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.163759f, 0.230278f, 0.000000f, 0.000000f, 0.000000f},4246{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044607f, 0.035170f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.114466f, 0.088407f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201026f, 0.127983f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.224148f, 0.164194f, 0.000000f},4247{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052817f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043531f, 0.174390f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.060164f, 0.262636f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.089340f, 0.317122f},4248};42494250// For each output (5x2) sample, the weight of each input (6x6) sample.4251static const float g_weight_downsample_6x6_to_5x2[10][36] = {4252{0.393855f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.327491f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.216089f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062565f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4253{0.000000f, 0.303101f, 0.078223f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.261199f, 0.068761f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.160056f, 0.054634f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.074026f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4254{0.000000f, 0.000000f, 0.202529f, 0.207447f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.151013f, 0.157673f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.100074f, 0.095239f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043623f, 0.042402f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4255{0.000000f, 0.000000f, 0.000000f, 0.083336f, 0.309647f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061432f, 0.269582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046328f, 0.166035f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063640f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4256{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397684f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326178f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.217856f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.058282f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4257{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065541f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215996f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321124f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397338f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4258{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.069030f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.159434f, 0.051902f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.266327f, 0.065732f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.305627f, 0.081948f, 0.000000f, 0.000000f, 0.000000f},4259{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.038550f, 0.046259f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092606f, 0.100038f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.162523f, 0.163345f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199767f, 0.196912f, 0.000000f, 0.000000f},4260{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066709f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050841f, 0.169003f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061591f, 0.265094f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.081426f, 0.305335f, 0.000000f},4261{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063517f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.210896f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316133f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027674f, 0.381781f},4262};42634264// For each output (6x2) sample, the weight of each input (6x6) sample.4265static const float g_weight_downsample_6x6_to_6x2[12][36] = {4266{0.395563f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.328397f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214936f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061104f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4267{0.000000f, 0.395041f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.323513f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073360f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4268{0.000000f, 0.000000f, 0.393200f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.317339f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.218679f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.070782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4269{0.000000f, 0.000000f, 0.000000f, 0.399071f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321356f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214689f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064883f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4270{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.399159f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326009f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212426f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062406f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4271{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398973f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326510f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.217446f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.057071f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4272{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065386f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215039f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321113f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398462f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4273{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072234f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.211515f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.319185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397066f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4274{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.053184f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.213286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.332634f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400895f, 0.000000f, 0.000000f, 0.000000f},4275{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063501f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.207210f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.334096f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395193f, 0.000000f, 0.000000f},4276{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.074315f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.216723f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320827f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388135f, 0.000000f},4277{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063571f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215814f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.325843f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394772f},4278};42794280// For each output (2x3) sample, the weight of each input (6x6) sample.4281static const float g_weight_downsample_6x6_to_2x3[6][36] = {4282{0.253933f, 0.211745f, 0.142964f, 0.043509f, 0.000000f, 0.000000f, 0.146094f, 0.108119f, 0.068727f, 0.024908f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4283{0.000000f, 0.000000f, 0.043336f, 0.140540f, 0.208745f, 0.253069f, 0.000000f, 0.000000f, 0.031333f, 0.069242f, 0.108596f, 0.145138f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4284{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044780f, 0.036916f, 0.026808f, 0.000000f, 0.000000f, 0.000000f, 0.151455f, 0.129189f, 0.076266f, 0.030885f, 0.000000f, 0.000000f, 0.151915f, 0.131628f, 0.081598f, 0.031903f, 0.000000f, 0.000000f, 0.043838f, 0.032645f, 0.030173f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4285{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028998f, 0.038454f, 0.046460f, 0.000000f, 0.000000f, 0.033717f, 0.076274f, 0.130140f, 0.153377f, 0.000000f, 0.000000f, 0.025762f, 0.077843f, 0.130195f, 0.150217f, 0.000000f, 0.000000f, 0.000000f, 0.029422f, 0.034493f, 0.044648f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4286{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.145243f, 0.107655f, 0.062280f, 0.033041f, 0.000000f, 0.000000f, 0.257369f, 0.210260f, 0.139667f, 0.044485f, 0.000000f, 0.000000f},4287{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037604f, 0.064104f, 0.105759f, 0.144848f, 0.000000f, 0.000000f, 0.042699f, 0.141511f, 0.207704f, 0.255772f},4288};42894290// For each output (3x3) sample, the weight of each input (6x6) sample.4291static const float g_weight_downsample_6x6_to_3x3[9][36] = {4292{0.412913f, 0.237773f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.237370f, 0.111944f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4293{0.000000f, 0.066531f, 0.251421f, 0.245639f, 0.065785f, 0.000000f, 0.000000f, 0.047059f, 0.143642f, 0.128760f, 0.051164f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4294{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.234587f, 0.419421f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.110765f, 0.235227f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4295{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.067391f, 0.044131f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.248992f, 0.133218f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.247568f, 0.139987f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072238f, 0.046475f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4296{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.040674f, 0.048555f, 0.000000f, 0.000000f, 0.000000f, 0.049640f, 0.158199f, 0.158521f, 0.046044f, 0.000000f, 0.000000f, 0.043591f, 0.153956f, 0.155258f, 0.049378f, 0.000000f, 0.000000f, 0.000000f, 0.046674f, 0.049509f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4297{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049528f, 0.063611f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.137662f, 0.252612f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.134924f, 0.246668f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.042655f, 0.072341f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4298{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.237403f, 0.114850f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.418506f, 0.229241f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4299{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049009f, 0.142093f, 0.136891f, 0.036294f, 0.000000f, 0.000000f, 0.074433f, 0.244437f, 0.251631f, 0.065212f, 0.000000f},4300{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.121166f, 0.231108f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.236230f, 0.411495f},4301};43024303// For each output (4x3) sample, the weight of each input (6x6) sample.4304static const float g_weight_downsample_6x6_to_4x3[12][36] = {4305{0.508292f, 0.132529f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.285382f, 0.073798f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4306{0.000000f, 0.266624f, 0.378457f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.144380f, 0.210539f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4307{0.000000f, 0.000000f, 0.000000f, 0.380292f, 0.270590f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200825f, 0.148293f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4308{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.130560f, 0.507542f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.071578f, 0.290320f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4309{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094051f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322294f, 0.082665f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316365f, 0.092271f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092353f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4310{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046081f, 0.061377f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.158151f, 0.235006f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.152896f, 0.232594f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052844f, 0.061053f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4311{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061619f, 0.046867f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.227763f, 0.158202f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.222620f, 0.155545f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073398f, 0.053986f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4312{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.082287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.084098f, 0.330283f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085224f, 0.323658f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094451f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4313{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.286413f, 0.077046f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.512915f, 0.123625f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4314{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.140389f, 0.213324f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.267125f, 0.379163f, 0.000000f, 0.000000f, 0.000000f},4315{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208464f, 0.139969f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.382876f, 0.268691f, 0.000000f},4316{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.080416f, 0.285653f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.131803f, 0.502128f},4317};43184319// For each output (5x3) sample, the weight of each input (6x6) sample.4320static const float g_weight_downsample_6x6_to_5x3[15][36] = {4321{0.618662f, 0.032137f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.349200f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4322{0.000000f, 0.497060f, 0.129255f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.281642f, 0.092043f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4323{0.000000f, 0.000000f, 0.333166f, 0.338337f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164333f, 0.164165f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4324{0.000000f, 0.000000f, 0.000000f, 0.129409f, 0.504176f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085525f, 0.280890f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4325{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.636943f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.363057f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4326{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113467f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394204f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.386741f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105588f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4327{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086925f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.317750f, 0.095763f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321008f, 0.086368f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4328{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.057696f, 0.061462f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.184995f, 0.197656f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.186342f, 0.186715f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059712f, 0.065422f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4329{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.091939f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079906f, 0.328876f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085955f, 0.320229f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.093096f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4330{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.099585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398489f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113144f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4331{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360655f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.639345f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4332{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.285578f, 0.088663f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.495946f, 0.129812f, 0.000000f, 0.000000f, 0.000000f},4333{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.177513f, 0.166195f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.329950f, 0.326342f, 0.000000f, 0.000000f},4334{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.082692f, 0.279744f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.134353f, 0.503211f, 0.000000f},4335{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361178f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.638822f},4336};43374338// For each output (6x3) sample, the weight of each input (6x6) sample.4339static const float g_weight_downsample_6x6_to_6x3[18][36] = {4340{0.640623f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359377f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4341{0.000000f, 0.638697f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361303f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4342{0.000000f, 0.000000f, 0.640672f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359328f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4343{0.000000f, 0.000000f, 0.000000f, 0.637721f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.362279f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4344{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.647342f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.352658f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4345{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.638418f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4346{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111041f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395972f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.387932f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105054f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4347{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.101949f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.401263f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.101060f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4348{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.098132f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388180f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402030f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111659f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4349{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.096173f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393865f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.386312f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.123650f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4350{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.104357f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398062f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393265f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.104316f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4351{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097666f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400772f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.390396f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111166f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4352{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359466f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640534f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4353{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360569f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.639431f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4354{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.355750f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.644250f, 0.000000f, 0.000000f, 0.000000f},4355{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.353865f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.646135f, 0.000000f, 0.000000f},4356{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.642273f, 0.000000f},4357{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359539f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640461f},4358};43594360// For each output (2x4) sample, the weight of each input (6x6) sample.4361static const float g_weight_downsample_6x6_to_2x4[8][36] = {4362{0.312206f, 0.261492f, 0.177496f, 0.055798f, 0.000000f, 0.000000f, 0.081944f, 0.062361f, 0.048703f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4363{0.000000f, 0.000000f, 0.054679f, 0.172805f, 0.260561f, 0.314742f, 0.000000f, 0.000000f, 0.000000f, 0.049040f, 0.065652f, 0.082520f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4364{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164115f, 0.129589f, 0.083879f, 0.029309f, 0.000000f, 0.000000f, 0.231202f, 0.198851f, 0.118719f, 0.044334f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4365{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035855f, 0.083276f, 0.127764f, 0.166965f, 0.000000f, 0.000000f, 0.045347f, 0.116503f, 0.193645f, 0.230645f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4366{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.223790f, 0.194804f, 0.115855f, 0.047371f, 0.000000f, 0.000000f, 0.164616f, 0.125798f, 0.087268f, 0.040497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4367{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044738f, 0.118365f, 0.198854f, 0.230745f, 0.000000f, 0.000000f, 0.029646f, 0.078141f, 0.131405f, 0.168106f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4368{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.080206f, 0.060505f, 0.041197f, 0.000000f, 0.000000f, 0.000000f, 0.320486f, 0.265233f, 0.174992f, 0.057380f, 0.000000f, 0.000000f},4369{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.051057f, 0.058139f, 0.082120f, 0.000000f, 0.000000f, 0.056168f, 0.174118f, 0.260525f, 0.317873f},4370};43714372// For each output (3x4) sample, the weight of each input (6x6) sample.4373static const float g_weight_downsample_6x6_to_3x4[12][36] = {4374{0.503381f, 0.288537f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.130806f, 0.077275f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4375{0.000000f, 0.088808f, 0.319226f, 0.312498f, 0.086797f, 0.000000f, 0.000000f, 0.000000f, 0.092065f, 0.079421f, 0.021185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4376{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.286250f, 0.514036f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072999f, 0.126714f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4377{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.261935f, 0.133191f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.376226f, 0.207118f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021529f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4378{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059585f, 0.153016f, 0.152552f, 0.043373f, 0.000000f, 0.000000f, 0.063990f, 0.231504f, 0.235283f, 0.060696f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4379{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.146403f, 0.262394f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208547f, 0.382656f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4380{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.374676f, 0.209306f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.270440f, 0.145577f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4381{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059636f, 0.233975f, 0.235944f, 0.069029f, 0.000000f, 0.000000f, 0.048950f, 0.150198f, 0.154340f, 0.047929f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4382{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200921f, 0.380881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.146928f, 0.271271f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4383{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128883f, 0.075468f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.509859f, 0.285791f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4384{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.095842f, 0.086878f, 0.000000f, 0.000000f, 0.000000f, 0.092942f, 0.314169f, 0.319263f, 0.090906f, 0.000000f},4385{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079652f, 0.124852f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.289868f, 0.505628f},4386};43874388// For each output (4x4) sample, the weight of each input (6x6) sample.4389static const float g_weight_downsample_6x6_to_4x4[16][36] = {4390{0.665277f, 0.167914f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166809f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4391{0.000000f, 0.325854f, 0.449938f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094690f, 0.129518f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4392{0.000000f, 0.000000f, 0.000000f, 0.455174f, 0.326025f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.109174f, 0.109627f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4393{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166733f, 0.664155f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.169112f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4394{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320619f, 0.090788f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.462066f, 0.126527f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4395{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.165890f, 0.235855f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.233931f, 0.364324f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4396{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.239319f, 0.151533f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.363629f, 0.245519f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4397{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.106763f, 0.311932f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.119451f, 0.461853f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4398{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.451893f, 0.124086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326160f, 0.097861f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4399{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.239712f, 0.365585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164178f, 0.230525f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4400{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360274f, 0.237862f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.246139f, 0.155726f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4401{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.121863f, 0.457051f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097828f, 0.323258f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4402{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.163634f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.667648f, 0.168718f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4403{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094870f, 0.132660f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316878f, 0.455591f, 0.000000f, 0.000000f, 0.000000f},4404{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.116917f, 0.098433f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.458816f, 0.325834f, 0.000000f},4405{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.168403f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.172019f, 0.659578f},4406};44074408// For each output (5x4) sample, the weight of each input (6x6) sample.4409static const float g_weight_downsample_6x6_to_5x4[20][36] = {4410{0.773702f, 0.033711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192588f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4411{0.000000f, 0.633422f, 0.166577f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.170080f, 0.029921f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4412{0.000000f, 0.000000f, 0.388335f, 0.403694f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.100996f, 0.106975f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4413{0.000000f, 0.000000f, 0.000000f, 0.161122f, 0.655288f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.183590f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4414{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.801705f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198295f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4415{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400989f, 0.025097f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.573915f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4416{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309345f, 0.085396f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.478694f, 0.126565f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4417{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.194664f, 0.187267f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.292735f, 0.308960f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.016375f, 0.000000f, 0.000000f},4418{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.098049f, 0.295983f, 0.000000f, 0.000000f, 0.017892f, 0.000000f, 0.111938f, 0.476138f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4419{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043545f, 0.386448f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.570007f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4420{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.566407f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402307f, 0.031286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4421{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.463145f, 0.120696f, 0.000000f, 0.019497f, 0.000000f, 0.000000f, 0.311721f, 0.084942f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4422{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.296730f, 0.300781f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.204639f, 0.197849f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4423{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.122117f, 0.469302f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.102545f, 0.306036f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4424{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.562064f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041534f, 0.396403f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4425{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.190134f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.773971f, 0.035896f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4426{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.169927f, 0.035812f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.630284f, 0.163977f, 0.000000f, 0.000000f, 0.000000f},4427{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.112667f, 0.106813f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393502f, 0.387018f, 0.000000f, 0.000000f},4428{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.177024f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.170482f, 0.652494f, 0.000000f},4429{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192274f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033039f, 0.774687f},4430};44314432// For each output (6x4) sample, the weight of each input (6x6) sample.4433static const float g_weight_downsample_6x6_to_6x4[24][36] = {4434{0.804254f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4435{0.000000f, 0.804177f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195823f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4436{0.000000f, 0.000000f, 0.799585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200415f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4437{0.000000f, 0.000000f, 0.000000f, 0.803604f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196396f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4438{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.807256f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192744f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4439{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.805135f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.194865f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4440{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.410532f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.589468f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4441{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.408690f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.591310f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4442{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.416225f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.583775f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4443{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.414279f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.585721f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4444{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.406723f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.593277f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4445{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402510f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.597490f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4446{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.584784f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.415216f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4447{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.590427f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.409573f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4448{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.590073f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.409927f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4449{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.580348f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.419652f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4450{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.588321f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411679f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4451{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.587022f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.412978f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4452{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193281f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.806719f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4453{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.189163f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.810837f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4454{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195108f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.804892f, 0.000000f, 0.000000f, 0.000000f},4455{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.188290f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.811710f, 0.000000f, 0.000000f},4456{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192914f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.807086f, 0.000000f},4457{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195292f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.804708f},4458};44594460// For each output (2x5) sample, the weight of each input (6x6) sample.4461static const float g_weight_downsample_6x6_to_2x5[10][36] = {4462{0.387593f, 0.325123f, 0.221104f, 0.066180f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4463{0.000000f, 0.000000f, 0.065940f, 0.214659f, 0.326737f, 0.392664f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4464{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309603f, 0.265953f, 0.168780f, 0.060600f, 0.000000f, 0.000000f, 0.084707f, 0.063017f, 0.047341f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4465{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062836f, 0.170767f, 0.261053f, 0.307978f, 0.000000f, 0.000000f, 0.000000f, 0.049286f, 0.064361f, 0.083719f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4466{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195787f, 0.153943f, 0.095706f, 0.042417f, 0.000000f, 0.000000f, 0.190695f, 0.154435f, 0.097288f, 0.040258f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029471f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4467{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017536f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.039307f, 0.094677f, 0.158696f, 0.199136f, 0.000000f, 0.000000f, 0.040959f, 0.093353f, 0.155294f, 0.201042f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4468{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079432f, 0.065739f, 0.044876f, 0.000000f, 0.000000f, 0.000000f, 0.309205f, 0.264700f, 0.167247f, 0.068801f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4469{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052112f, 0.064829f, 0.081363f, 0.000000f, 0.000000f, 0.064024f, 0.161136f, 0.263743f, 0.312793f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4470{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393277f, 0.324792f, 0.213188f, 0.068743f, 0.000000f, 0.000000f},4471{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066964f, 0.215440f, 0.323005f, 0.394591f},4472};44734474// For each output (3x5) sample, the weight of each input (6x6) sample.4475static const float g_weight_downsample_6x6_to_3x5[15][36] = {4476{0.620557f, 0.350797f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028646f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4477{0.000000f, 0.110170f, 0.397489f, 0.386326f, 0.106015f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4478{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357348f, 0.642652f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4479{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.503934f, 0.275289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128280f, 0.092497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4480{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.102294f, 0.316223f, 0.313576f, 0.092518f, 0.000000f, 0.000000f, 0.000000f, 0.081158f, 0.094231f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4481{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.279079f, 0.502163f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086083f, 0.132675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4482{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.325483f, 0.157739f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322567f, 0.172225f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021986f, 0.000000f, 0.000000f},4483{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063342f, 0.192228f, 0.186950f, 0.057021f, 0.000000f, 0.000000f, 0.054779f, 0.186114f, 0.185666f, 0.073901f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4484{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.172195f, 0.331802f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.148212f, 0.322038f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025751f, 0.000000f, 0.000000f},4485{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.123726f, 0.081188f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.507339f, 0.287746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4486{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.093924f, 0.094021f, 0.000000f, 0.000000f, 0.000000f, 0.097070f, 0.315697f, 0.314560f, 0.084728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4487{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.082560f, 0.129771f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.277014f, 0.486817f, 0.023837f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4488{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.644191f, 0.355809f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4489{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.107771f, 0.387615f, 0.393454f, 0.111159f, 0.000000f},4490{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360886f, 0.639114f},4491};44924493// For each output (4x5) sample, the weight of each input (6x6) sample.4494static const float g_weight_downsample_6x6_to_4x5[20][36] = {4495{0.778254f, 0.190730f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031016f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4496{0.000000f, 0.401147f, 0.570243f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028610f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4497{0.000000f, 0.000000f, 0.000000f, 0.563768f, 0.394241f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041992f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4498{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196238f, 0.767548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036214f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4499{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.637514f, 0.166734f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.167634f, 0.028118f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4500{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322778f, 0.473312f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085399f, 0.118511f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4501{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.471429f, 0.308185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.118025f, 0.102361f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4502{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.176592f, 0.643933f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.179475f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4503{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.391609f, 0.100882f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.390531f, 0.116978f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4504{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017259f, 0.000000f, 0.201618f, 0.301555f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.197600f, 0.281968f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4505{0.000000f, 0.000000f, 0.016735f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.293309f, 0.192842f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.268674f, 0.208109f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020330f, 0.000000f, 0.000000f, 0.000000f},4506{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.118514f, 0.380746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097621f, 0.381305f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021814f, 0.000000f, 0.000000f},4507{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.157977f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.657533f, 0.184490f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4508{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097522f, 0.128585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309864f, 0.464029f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4509{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128900f, 0.090864f, 0.000000f, 0.025393f, 0.000000f, 0.000000f, 0.464029f, 0.290814f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4510{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024593f, 0.172268f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173412f, 0.629727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4511{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.778816f, 0.191602f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4512{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036297f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394454f, 0.569249f, 0.000000f, 0.000000f, 0.000000f},4513{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.039685f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.561207f, 0.399108f, 0.000000f},4514{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034683f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193744f, 0.771574f},4515};45164517// For each output (5x5) sample, the weight of each input (6x6) sample.4518static const float g_weight_downsample_6x6_to_5x5[25][36] = {4519{1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4520{0.000000f, 0.794727f, 0.205273f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4521{0.000000f, 0.000000f, 0.465125f, 0.484079f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028881f, 0.000000f, 0.000000f, 0.021914f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4522{0.000000f, 0.000000f, 0.000000f, 0.192446f, 0.772941f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034613f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4523{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033123f, 0.930510f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036367f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4524{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800234f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199766f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4525{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.629079f, 0.165939f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166390f, 0.019675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018918f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4526{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.378734f, 0.373861f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111597f, 0.135808f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4527{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.177492f, 0.641195f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.181313f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4528{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028722f, 0.761781f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.209497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4529{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.475763f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.471882f, 0.029551f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022804f, 0.000000f, 0.000000f},4530{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.382714f, 0.116167f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.383377f, 0.117742f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4531{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.254151f, 0.249987f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.241972f, 0.253891f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4532{0.000000f, 0.000000f, 0.017950f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.122722f, 0.376847f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.095099f, 0.369986f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017396f, 0.000000f, 0.000000f, 0.000000f},4533{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029442f, 0.472507f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.471751f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026300f, 0.000000f, 0.000000f},4534{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.190299f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.776924f, 0.032778f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4535{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.171498f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.666385f, 0.162117f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4536{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.125713f, 0.117624f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.387084f, 0.369579f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4537{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028493f, 0.169318f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173770f, 0.628419f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4538{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198951f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035634f, 0.765415f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4539{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.963102f, 0.036898f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4540{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030322f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.771054f, 0.198624f, 0.000000f, 0.000000f, 0.000000f},4541{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021816f, 0.020944f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.481761f, 0.475479f, 0.000000f, 0.000000f},4542{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032816f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198418f, 0.768766f, 0.000000f},4543{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033338f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966662f},4544};45454546// For each output (6x5) sample, the weight of each input (6x6) sample.4547static const float g_weight_downsample_6x6_to_6x5[30][36] = {4548{0.966284f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033716f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4549{0.000000f, 0.966287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033713f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4550{0.000000f, 0.000000f, 0.966287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033713f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4551{0.000000f, 0.000000f, 0.000000f, 0.966290f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033710f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4552{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966125f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033875f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4553{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966273f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4554{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800857f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199143f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4555{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.773463f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201165f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025372f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4556{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.805735f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.194265f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4557{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.788791f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.211209f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4558{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.785975f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214025f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4559{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.787286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212714f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4560{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.490845f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.487242f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021913f, 0.000000f, 0.000000f},4561{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.490663f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.486878f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022459f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4562{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.505452f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.494548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4563{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.495383f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.482180f, 0.000000f, 0.022437f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4564{0.000000f, 0.000000f, 0.022727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.496545f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.480728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4565{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.486261f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.486387f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027352f, 0.000000f, 0.000000f},4566{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196272f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.803728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4567{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.210059f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.789941f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4568{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212947f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.787053f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4569{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215261f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.784739f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4570{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.209116f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.790884f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4571{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.205881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.794119f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4572{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033710f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966290f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4573{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4574{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033713f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966287f, 0.000000f, 0.000000f, 0.000000f},4575{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033719f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966281f, 0.000000f, 0.000000f},4576{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033712f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966288f, 0.000000f},4577{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033712f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966288f},4578};45794580// For each output (2x6) sample, the weight of each input (6x6) sample.4581static const float g_weight_downsample_6x6_to_2x6[12][36] = {4582{0.388815f, 0.325435f, 0.220189f, 0.065562f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4583{0.000000f, 0.000000f, 0.064515f, 0.214042f, 0.327700f, 0.393742f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4584{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398821f, 0.326200f, 0.217851f, 0.057128f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4585{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062546f, 0.216408f, 0.322269f, 0.398777f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4586{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.396575f, 0.330631f, 0.212857f, 0.059936f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4587{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.070253f, 0.215326f, 0.317576f, 0.396845f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4588{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398130f, 0.324745f, 0.213572f, 0.063553f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4589{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062009f, 0.216253f, 0.324683f, 0.397055f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4590{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397646f, 0.321346f, 0.212334f, 0.068675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4591{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.067073f, 0.210768f, 0.318165f, 0.403993f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4592{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395756f, 0.325048f, 0.211862f, 0.067334f, 0.000000f, 0.000000f},4593{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065475f, 0.214113f, 0.324009f, 0.396403f},4594};45954596// For each output (3x6) sample, the weight of each input (6x6) sample.4597static const float g_weight_downsample_6x6_to_3x6[18][36] = {4598{0.640136f, 0.359864f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4599{0.000000f, 0.108112f, 0.399968f, 0.388087f, 0.103833f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4600{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.356122f, 0.643878f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4601{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.646308f, 0.353692f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4602{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.122937f, 0.390166f, 0.380558f, 0.106339f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4603{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.355015f, 0.644985f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4604{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.642874f, 0.357126f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4605{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111570f, 0.398638f, 0.387639f, 0.102153f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4606{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359134f, 0.640866f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4607{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640159f, 0.359841f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4608{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.098908f, 0.393303f, 0.400421f, 0.107369f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4609{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357119f, 0.642881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4610{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640541f, 0.359459f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4611{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.116318f, 0.397635f, 0.395084f, 0.090964f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4612{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361948f, 0.638052f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4613{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.645448f, 0.354552f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4614{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.106981f, 0.389214f, 0.395056f, 0.108749f, 0.000000f},4615{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359592f, 0.640408f},4616};46174618// For each output (4x6) sample, the weight of each input (6x6) sample.4619static const float g_weight_downsample_6x6_to_4x6[24][36] = {4620{0.806928f, 0.193072f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4621{0.000000f, 0.412216f, 0.587784f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4622{0.000000f, 0.000000f, 0.000000f, 0.590075f, 0.409925f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4623{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200682f, 0.799318f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4624{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.809822f, 0.190178f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4625{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.423474f, 0.576526f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4626{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.580816f, 0.419184f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4627{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.190240f, 0.809760f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4628{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800320f, 0.199680f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4629{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.408625f, 0.591375f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4630{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.583392f, 0.416608f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4631{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200372f, 0.799628f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4632{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.798914f, 0.201086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4633{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411243f, 0.588757f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4634{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.586520f, 0.413480f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4635{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.203588f, 0.796412f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4636{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.802040f, 0.197960f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4637{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411175f, 0.588825f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4638{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.599873f, 0.400127f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4639{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193060f, 0.806940f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4640{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.806073f, 0.193927f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4641{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.408705f, 0.591295f, 0.000000f, 0.000000f, 0.000000f},4642{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.585711f, 0.414289f, 0.000000f},4643{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.197672f, 0.802328f},4644};46454646// For each output (5x6) sample, the weight of each input (6x6) sample.4647static const float g_weight_downsample_6x6_to_5x6[30][36] = {4648{0.966289f, 0.033711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4649{0.000000f, 0.794848f, 0.205152f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4650{0.000000f, 0.000000f, 0.473272f, 0.496525f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030202f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4651{0.000000f, 0.000000f, 0.000000f, 0.196955f, 0.803045f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4652{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033711f, 0.966289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4653{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966284f, 0.033716f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4654{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.795787f, 0.204213f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4655{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.500928f, 0.499072f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4656{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198603f, 0.801397f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4657{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033716f, 0.966284f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4658{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966283f, 0.033717f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4659{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.788424f, 0.211576f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4660{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029276f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.484227f, 0.486497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4661{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201499f, 0.798501f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4662{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033724f, 0.966276f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4663{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966283f, 0.033717f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4664{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.791336f, 0.208664f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4665{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.490188f, 0.509812f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4666{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.204835f, 0.795165f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4667{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033703f, 0.966297f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4668{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966276f, 0.033724f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4669{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.799276f, 0.200724f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4670{0.000000f, 0.000000f, 0.022501f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.494443f, 0.483055f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4671{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.205967f, 0.794033f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4672{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033726f, 0.966274f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4673{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.965971f, 0.034029f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4674{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.798640f, 0.201360f, 0.000000f, 0.000000f, 0.000000f},4675{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.502577f, 0.497423f, 0.000000f, 0.000000f},4676{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.203927f, 0.796073f, 0.000000f},4677{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033706f, 0.966294f},4678};46794680// For each output (6x6) sample, the weight of each input (6x6) sample.4681static const float g_weight_downsample_6x6_to_6x6[36][36] = {4682{1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4683{0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4684{0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4685{0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4686{0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4687{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4688{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4689{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4690{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4691{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4692{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4693{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4694{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4695{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4696{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4697{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4698{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4699{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4700{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4701{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4702{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4703{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4704{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4705{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4706{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4707{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4708{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4709{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4710{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4711{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4712{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4713{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},4714{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f},4715{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f},4716{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f},4717{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f},4718};47194720//--------------------------------------------------------------------------------------------------------------------------47214722const struct downsample_matrix_6x64723{4724uint32_t m_grid_width, m_grid_height;4725const float* m_p;4726} g_downsample_matrices_6x6[] = {4727{ 2, 2, (const float*)g_weight_downsample_6x6_to_2x2 },4728{ 3, 2, (const float*)g_weight_downsample_6x6_to_3x2 },4729{ 4, 2, (const float*)g_weight_downsample_6x6_to_4x2 },4730{ 5, 2, (const float*)g_weight_downsample_6x6_to_5x2 },4731{ 6, 2, (const float*)g_weight_downsample_6x6_to_6x2 },4732{ 2, 3, (const float*)g_weight_downsample_6x6_to_2x3 },4733{ 3, 3, (const float*)g_weight_downsample_6x6_to_3x3 },4734{ 4, 3, (const float*)g_weight_downsample_6x6_to_4x3 },4735{ 5, 3, (const float*)g_weight_downsample_6x6_to_5x3 },4736{ 6, 3, (const float*)g_weight_downsample_6x6_to_6x3 },4737{ 2, 4, (const float*)g_weight_downsample_6x6_to_2x4 },4738{ 3, 4, (const float*)g_weight_downsample_6x6_to_3x4 },4739{ 4, 4, (const float*)g_weight_downsample_6x6_to_4x4 },4740{ 5, 4, (const float*)g_weight_downsample_6x6_to_5x4 },4741{ 6, 4, (const float*)g_weight_downsample_6x6_to_6x4 },4742{ 2, 5, (const float*)g_weight_downsample_6x6_to_2x5 },4743{ 3, 5, (const float*)g_weight_downsample_6x6_to_3x5 },4744{ 4, 5, (const float*)g_weight_downsample_6x6_to_4x5 },4745{ 5, 5, (const float*)g_weight_downsample_6x6_to_5x5 },4746{ 6, 5, (const float*)g_weight_downsample_6x6_to_6x5 },4747{ 2, 6, (const float*)g_weight_downsample_6x6_to_2x6 },4748{ 3, 6, (const float*)g_weight_downsample_6x6_to_3x6 },4749{ 4, 6, (const float*)g_weight_downsample_6x6_to_4x6 },4750{ 5, 6, (const float*)g_weight_downsample_6x6_to_5x6 },4751{ 6, 6, (const float*)g_weight_downsample_6x6_to_6x6 }4752};4753//const uint32_t NUM_DOWNSAMPLE_MATRICES_6x6 = sizeof(g_downsample_matrices_6x6) / sizeof(g_downsample_matrices_6x6[0]);47544755//--------------------------------------------------------------------------------------------------------------------------47564757const float* get_6x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height)4758{4759// TODO: Use hash or map lookup.4760for (const auto& m : g_downsample_matrices_6x6)4761if ((m.m_grid_width == grid_width) && (m.m_grid_height == grid_height))4762return m.m_p;47634764assert(0);4765return nullptr;4766}47674768void downsample_weight_grid(4769const float* pMatrix_weights,4770uint32_t bx, uint32_t by, // source/from dimension (block size)4771uint32_t wx, uint32_t wy, // dest/to dimension (grid size)4772const uint8_t* pSrc_weights, // these are dequantized weights, NOT ISE symbols, [by][bx]4773uint8_t* pDst_weights) // [wy][wx]4774{4775const uint32_t total_block_samples = bx * by;47764777for (uint32_t y = 0; y < wy; y++)4778{4779for (uint32_t x = 0; x < wx; x++)4780{4781float total = 0.5f;47824783for (uint32_t i = 0; i < total_block_samples; i++)4784if (pMatrix_weights[i])4785total += pMatrix_weights[i] * (float)pSrc_weights[i];47864787pDst_weights[x + y * wx] = (uint8_t)clamp((int)total, 0, 64);47884789pMatrix_weights += total_block_samples;4790}4791}4792}47934794//--------------------------------------------------------------------------------------------------------------------------47954796void downsample_ise_weights(4797uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range,4798uint32_t block_w, uint32_t block_h,4799uint32_t grid_w, uint32_t grid_h,4800const uint8_t* pSrc_weights, uint8_t* pDst_weights)4801{4802assert((block_w <= MAX_ASTC_HDR_BLOCK_W) && (block_h <= MAX_ASTC_HDR_BLOCK_H));4803assert((grid_w >= 2) && (grid_w <= MAX_ASTC_HDR_BLOCK_W));4804assert((grid_h >= 2) && (grid_h <= MAX_ASTC_HDR_BLOCK_H));48054806assert(dequant_weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE);4807assert(dequant_weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE);48084809assert(quant_weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE);4810assert(quant_weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE);48114812if ((block_w == grid_w) && (block_h == grid_h))4813{4814if (dequant_weight_ise_range != quant_weight_ise_range)4815{4816basist::astc_6x6_hdr::requantize_astc_weights(block_w * block_h, pSrc_weights, dequant_weight_ise_range, pDst_weights, quant_weight_ise_range);4817}4818else4819{4820if (pDst_weights != pSrc_weights)4821memcpy(pDst_weights, pSrc_weights, block_w * block_h);4822}48234824return;4825}48264827uint8_t desired_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];48284829const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(dequant_weight_ise_range).m_ISE_to_val;48304831for (uint32_t by = 0; by < block_h; by++)4832for (uint32_t bx = 0; bx < block_w; bx++)4833desired_weights[bx + by * block_w] = dequant_tab[pSrc_weights[bx + by * block_w]];48344835uint8_t downsampled_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];48364837const float* pDownsample_matrix = get_6x6_downsample_matrix(grid_w, grid_h);4838assert(pDownsample_matrix);48394840downsample_weight_grid(4841pDownsample_matrix,4842block_w, block_h, // source/from dimension (block size)4843grid_w, grid_h, // dest/to dimension (grid size)4844desired_weights, // these are dequantized weights, NOT ISE symbols, [by][bx]4845downsampled_weights); // [wy][wx]48464847const auto& weight_quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(quant_weight_ise_range).m_val_to_ise;48484849for (uint32_t gy = 0; gy < grid_h; gy++)4850for (uint32_t gx = 0; gx < grid_w; gx++)4851pDst_weights[gx + gy * grid_w] = weight_quant_tab[downsampled_weights[gx + gy * grid_w]];4852}48534854void downsample_ise_weights_dual_plane(4855uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range,4856uint32_t block_w, uint32_t block_h,4857uint32_t grid_w, uint32_t grid_h,4858const uint8_t* pSrc_weights0, const uint8_t* pSrc_weights1,4859uint8_t* pDst_weights)4860{4861uint8_t downsampled_weights0[MAX_ASTC_HDR_BLOCK_W * MAX_ASTC_HDR_BLOCK_H], downsampled_weights1[MAX_ASTC_HDR_BLOCK_W * MAX_ASTC_HDR_BLOCK_H];48624863downsample_ise_weights(4864dequant_weight_ise_range, quant_weight_ise_range,4865block_w, block_h,4866grid_w, grid_h,4867pSrc_weights0, downsampled_weights0);48684869downsample_ise_weights(4870dequant_weight_ise_range, quant_weight_ise_range,4871block_w, block_h,4872grid_w, grid_h,4873pSrc_weights1, downsampled_weights1);48744875const uint32_t num_grid_samples = grid_w * grid_h;4876for (uint32_t i = 0; i < num_grid_samples; i++)4877{4878pDst_weights[i * 2 + 0] = downsampled_weights0[i];4879pDst_weights[i * 2 + 1] = downsampled_weights1[i];4880}4881}48824883static bool refine_endpoints_mode11(4884uint32_t endpoint_ise_range,4885uint8_t* pEndpoint_vals, // the endpoints to optimize4886uint32_t block_w, uint32_t block_h, // block dimensions4887uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid4888uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],4889const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets4890astc_hdr_codec_base_options& coptions,4891bool direct_only, int first_submode, int last_submode,4892opt_mode_t opt_mode)4893{4894if (opt_mode == cNoOpt)4895return false;48964897const uint32_t num_block_pixels = block_w * block_h;48984899uint8_t def_pixel_block_ofs[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];4900if (!pPixel_block_ofs)4901{4902for (uint32_t i = 0; i < num_block_pixels; i++)4903def_pixel_block_ofs[i] = (uint8_t)i;49044905pPixel_block_ofs = def_pixel_block_ofs;4906}49074908const uint32_t num_weights = grid_w * grid_h;49094910uint8_t dequantized_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];4911for (uint32_t i = 0; i < num_weights; i++)4912dequantized_raw_weights[i] = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_val[pWeights[i]];49134914uint8_t upsampled_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE4915astc_helpers::upsample_weight_grid(block_w, block_h, grid_w, grid_h, dequantized_raw_weights, upsampled_weights);49164917aabb3F color_box_q16(cInitExpand);49184919uint8_t trial_blk_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE4920float trial_blk_raw_weightsf[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];4921for (uint32_t i = 0; i < num_pixels; i++)4922{4923color_box_q16.expand(pBlock_pixels_q16[i]);49244925assert(pPixel_block_ofs[i] < num_block_pixels);49264927trial_blk_raw_weights[i] = upsampled_weights[pPixel_block_ofs[i]];4928trial_blk_raw_weightsf[i] = (float)trial_blk_raw_weights[i] * (1.0f / 64.0f);4929}49304931vec3F l_q16, h_q16;4932if (opt_mode == cOrdinaryLeastSquares)4933{4934if (!compute_least_squares_endpoints_rgb_raw_weights(num_pixels, trial_blk_raw_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16))4935return false;4936}4937else if ((opt_mode == cWeightedLeastSquares) || (opt_mode == cWeightedLeastSquaresHeavy))4938{4939vec3F block_mean_color_q16(calc_mean(num_pixels, pBlock_pixels_q16));4940vec3F block_axis_q16(calc_rgb_pca(num_pixels, pBlock_pixels_q16, block_mean_color_q16));4941float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;4942for (uint32_t i = 0; i < num_pixels; i++)4943{4944vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);4945float kd = k.dot(block_axis_q16);4946if (kd < l)4947l = kd;4948if (kd > h)4949h = kd;4950}4951float emphasis_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];4952if (h == l)4953{4954for (uint32_t i = 0; i < num_pixels; i++)4955emphasis_weights[i] = 1.0f;4956}4957else4958{4959float mid = (0.0f - l) / (h - l);4960mid = clamp(mid, .01f, .99f);49614962float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT;4963if (opt_mode == cWeightedLeastSquaresHeavy)4964lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY;49654966for (uint32_t i = 0; i < num_pixels; i++)4967{4968vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);4969float kd = k.dot(block_axis_q16);49704971assert((kd >= l) && (kd <= h));49724973float v = (kd - l) / (h - l);49744975if (v < mid)4976v = lerp(lw, mw, v / mid);4977else4978v = lerp(mw, hw, (v - mid) * (1.0f - mid));49794980emphasis_weights[i] = v;4981}4982}49834984if (!compute_weighted_least_squares_endpoints_rgb(num_pixels, nullptr, nullptr, trial_blk_raw_weightsf, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16))4985return false;4986}4987else4988{4989assert(opt_mode == cWeightedAverage);49904991l_q16.set(0.0f);4992float total_low = 0.0f;49934994h_q16.set(0.0f);4995float total_high = 0.0f;49964997for (uint32_t i = 0; i < num_pixels; i++)4998{4999vec3F p(pBlock_pixels_q16[i]);5000float lerp = (float)trial_blk_raw_weights[i] * (1.0f / 64.0f);50015002l_q16 += p * (1.0f - lerp);5003total_low += (1.0f - lerp);50045005h_q16 += p * lerp;5006total_high += lerp;5007}50085009if (total_low != 0.0f)5010l_q16 *= (1.0f / total_low);5011else5012return false;50135014if (total_high != 0.0f)5015h_q16 *= (1.0f / total_high);5016else5017return false;5018}50195020uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS];50215022uint32_t submode_used;50235024bool pack_succeeded = pack_mode11(l_q16, h_q16, endpoint_ise_range, trial_endpoints, coptions, direct_only, first_submode, last_submode, false, submode_used);5025if (!pack_succeeded)5026return false;50275028int cur_e[2][3];5029if (!decode_mode11_to_qlog12(pEndpoint_vals, cur_e, endpoint_ise_range))5030return false;50315032int trial_e[2][3];5033if (!decode_mode11_to_qlog12(trial_endpoints, trial_e, endpoint_ise_range))5034return false;50355036for (uint32_t i = 0; i < 3; i++)5037{5038cur_e[0][i] <<= 4;5039cur_e[1][i] <<= 4;50405041trial_e[0][i] <<= 4;5042trial_e[1][i] <<= 4;5043}50445045const float R_WEIGHT = coptions.m_r_err_scale, G_WEIGHT = coptions.m_g_err_scale;50465047double cur_error = 0, trial_error = 0;50485049for (uint32_t p = 0; p < num_pixels; p++)5050{5051const half_float* pDesired_half = &pBlock_pixels_half[p][0];50525053const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias), desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias), desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias);50545055const uint32_t c = trial_blk_raw_weights[p];5056assert(c <= 64);50575058{5059half_float rf, gf, bf;50605061{5062uint32_t r0 = cur_e[0][0], r1 = cur_e[1][0];5063int ri = (r0 * (64 - c) + r1 * c + 32) / 64;5064rf = astc_helpers::qlog16_to_half(ri);5065}50665067{5068uint32_t g0 = cur_e[0][1], g1 = cur_e[1][1];5069int gi = (g0 * (64 - c) + g1 * c + 32) / 64;5070gf = astc_helpers::qlog16_to_half(gi);5071}50725073{5074uint32_t b0 = cur_e[0][2], b1 = cur_e[1][2];5075int bi = (b0 * (64 - c) + b1 * c + 32) / 64;5076bf = astc_helpers::qlog16_to_half(bi);5077}50785079const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);50805081const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;50825083cur_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;5084}50855086{5087half_float rf, gf, bf;50885089{5090uint32_t r0 = trial_e[0][0], r1 = trial_e[1][0];5091int ri = (r0 * (64 - c) + r1 * c + 32) / 64;5092rf = astc_helpers::qlog16_to_half(ri);5093}50945095{5096uint32_t g0 = trial_e[0][1], g1 = trial_e[1][1];5097int gi = (g0 * (64 - c) + g1 * c + 32) / 64;5098gf = astc_helpers::qlog16_to_half(gi);5099}51005101{5102uint32_t b0 = trial_e[0][2], b1 = trial_e[1][2];5103int bi = (b0 * (64 - c) + b1 * c + 32) / 64;5104bf = astc_helpers::qlog16_to_half(bi);5105}51065107const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);51085109const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;51105111trial_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;5112}51135114} // p51155116if (trial_error < cur_error)5117{5118memcpy(pEndpoint_vals, trial_endpoints, NUM_MODE11_ENDPOINTS);5119return true;5120}51215122return false;5123}51245125static bool refine_endpoints_mode7(5126uint32_t endpoint_ise_range,5127uint8_t* pEndpoint_vals, // the endpoints to optimize5128uint32_t block_w, uint32_t block_h, // block dimensions5129uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid5130uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],5131const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets5132astc_hdr_codec_base_options& coptions,5133int first_submode, int last_submode)5134{5135const uint32_t num_block_pixels = block_w * block_h;51365137uint8_t def_pixel_block_ofs[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];5138if (!pPixel_block_ofs)5139{5140for (uint32_t i = 0; i < num_block_pixels; i++)5141def_pixel_block_ofs[i] = (uint8_t)i;51425143pPixel_block_ofs = def_pixel_block_ofs;5144}51455146const uint32_t num_weights = grid_w * grid_h;51475148uint8_t dequantized_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];5149for (uint32_t i = 0; i < num_weights; i++)5150dequantized_raw_weights[i] = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_val[pWeights[i]];51515152uint8_t upsampled_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE5153astc_helpers::upsample_weight_grid(block_w, block_h, grid_w, grid_h, dequantized_raw_weights, upsampled_weights);51545155uint8_t trial_blk_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE5156for (uint32_t i = 0; i < num_pixels; i++)5157{5158assert(pPixel_block_ofs[i] < num_block_pixels);51595160trial_blk_raw_weights[i] = upsampled_weights[pPixel_block_ofs[i]];5161}51625163//--51645165int cur_e[2][3];5166int cur_s = 0;5167if (!decode_mode7_to_qlog12(pEndpoint_vals, cur_e, &cur_s, endpoint_ise_range))5168return false;51695170cur_s <<= 4;51715172vec3F block_mean_color_q16(calc_mean(num_pixels, pBlock_pixels_q16));51735174vec3F new_high_color_q16(block_mean_color_q16);51755176const float one_over_num_pixels = 1.0f / (float)num_pixels;51775178for (uint32_t i = 0; i < num_pixels; i++)5179{5180float lerp = trial_blk_raw_weights[i] * (1.0f / 64.0f);51815182float k = (float)cur_s * (1.0f - lerp) * one_over_num_pixels;5183new_high_color_q16[0] += k;5184new_high_color_q16[1] += k;5185new_high_color_q16[2] += k;5186}51875188// Given a set of selectors and a high color, try to compute a better S.5189float t = 0.0f;51905191for (uint32_t i = 0; i < num_pixels; i++)5192{5193float lerp = trial_blk_raw_weights[i] * (1.0f / 64.0f);51945195t += (1.0f) - lerp;5196}51975198t *= one_over_num_pixels;51995200if (fabs(t) < .0000125f)5201return false;52025203uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS];52045205uint32_t submode_used;5206if (!pack_mode7(new_high_color_q16, (float)cur_s, endpoint_ise_range, trial_endpoints, weight_ise_range, coptions, first_submode, last_submode, false, submode_used))5207return false;52085209int trial_e[2][3];5210if (!decode_mode7_to_qlog12(trial_endpoints, trial_e, nullptr, endpoint_ise_range))5211return false;52125213vec3F cur_h_q16((float)(trial_e[1][0] << 4), (float)(trial_e[1][1] << 4), (float)(trial_e[1][2] << 4));52145215float s_r = (cur_h_q16[0] - block_mean_color_q16[0]) / t;5216//float s_g = (cur_h_q16[1] - block_mean_color_q16[1]) / t;5217//float s_b = (cur_h_q16[2] - block_mean_color_q16[2]) / t;5218float new_s_q16 = ceilf(s_r);52195220if (!pack_mode7(new_high_color_q16, new_s_q16, endpoint_ise_range, trial_endpoints, weight_ise_range, coptions, first_submode, last_submode, false, submode_used))5221return false;52225223if (!decode_mode7_to_qlog12(trial_endpoints, trial_e, nullptr, endpoint_ise_range))5224return false;52255226// --52275228for (uint32_t i = 0; i < 3; i++)5229{5230cur_e[0][i] <<= 4;5231cur_e[1][i] <<= 4;52325233trial_e[0][i] <<= 4;5234trial_e[1][i] <<= 4;5235}52365237const float R_WEIGHT = coptions.m_r_err_scale, G_WEIGHT = coptions.m_g_err_scale;52385239double cur_error = 0, trial_error = 0;52405241for (uint32_t p = 0; p < num_pixels; p++)5242{5243const half_float* pDesired_half = &pBlock_pixels_half[p][0];52445245const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias), desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias), desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias);52465247const uint32_t c = trial_blk_raw_weights[p];5248assert(c <= 64);52495250{5251half_float rf, gf, bf;52525253{5254uint32_t r0 = cur_e[0][0], r1 = cur_e[1][0];5255int ri = (r0 * (64 - c) + r1 * c + 32) / 64;5256rf = astc_helpers::qlog16_to_half(ri);5257}52585259{5260uint32_t g0 = cur_e[0][1], g1 = cur_e[1][1];5261int gi = (g0 * (64 - c) + g1 * c + 32) / 64;5262gf = astc_helpers::qlog16_to_half(gi);5263}52645265{5266uint32_t b0 = cur_e[0][2], b1 = cur_e[1][2];5267int bi = (b0 * (64 - c) + b1 * c + 32) / 64;5268bf = astc_helpers::qlog16_to_half(bi);5269}52705271const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);52725273const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;52745275cur_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;5276}52775278{5279half_float rf, gf, bf;52805281{5282uint32_t r0 = trial_e[0][0], r1 = trial_e[1][0];5283int ri = (r0 * (64 - c) + r1 * c + 32) / 64;5284rf = astc_helpers::qlog16_to_half(ri);5285}52865287{5288uint32_t g0 = trial_e[0][1], g1 = trial_e[1][1];5289int gi = (g0 * (64 - c) + g1 * c + 32) / 64;5290gf = astc_helpers::qlog16_to_half(gi);5291}52925293{5294uint32_t b0 = trial_e[0][2], b1 = trial_e[1][2];5295int bi = (b0 * (64 - c) + b1 * c + 32) / 64;5296bf = astc_helpers::qlog16_to_half(bi);5297}52985299const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);53005301const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;53025303trial_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;5304}53055306} // p53075308if (trial_error < cur_error)5309{5310memcpy(pEndpoint_vals, trial_endpoints, NUM_MODE7_ENDPOINTS);5311return true;5312}53135314return false;5315}53165317bool refine_endpoints(5318uint32_t cem,5319uint32_t endpoint_ise_range,5320uint8_t* pEndpoint_vals, // the endpoints to optimize5321uint32_t block_w, uint32_t block_h, // block dimensions5322uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid5323uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],5324const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets5325astc_hdr_codec_base_options& coptions, opt_mode_t opt_mode)5326{5327if (cem == 7)5328{5329return refine_endpoints_mode7(5330endpoint_ise_range,5331pEndpoint_vals,5332block_w, block_h,5333grid_w, grid_h, pWeights, weight_ise_range,5334num_pixels, pBlock_pixels_half, pBlock_pixels_q16,5335pPixel_block_ofs,5336coptions,5337FIRST_MODE7_SUBMODE_INDEX, MAX_MODE7_SUBMODE_INDEX);5338}5339else if (cem == 11)5340{5341return refine_endpoints_mode11(5342endpoint_ise_range,5343pEndpoint_vals,5344block_w, block_h,5345grid_w, grid_h, pWeights, weight_ise_range,5346num_pixels, pBlock_pixels_half, pBlock_pixels_q16,5347pPixel_block_ofs,5348coptions,5349false, FIRST_MODE11_SUBMODE_INDEX, MAX_MODE11_SUBMODE_INDEX, opt_mode);5350}53515352return false;5353}53545355} // namespace basisu5356535753585359