Path: blob/master/thirdparty/basis_universal/encoder/basisu_enc.h
9902 views
// basisu_enc.h1// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.2//3// Licensed under the Apache License, Version 2.0 (the "License");4// you may not use this file except in compliance with the License.5// You may obtain a copy of the License at6//7// http://www.apache.org/licenses/LICENSE-2.08//9// Unless required by applicable law or agreed to in writing, software10// distributed under the License is distributed on an "AS IS" BASIS,11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12// See the License for the specific language governing permissions and13// limitations under the License.14#pragma once15#include "../transcoder/basisu.h"16#include "../transcoder/basisu_transcoder_internal.h"1718#include <mutex>19#include <atomic>20#include <condition_variable>21#include <functional>22#include <thread>23#include <unordered_map>24#include <ostream>2526#if !defined(_WIN32) || defined(__MINGW32__)27#include <libgen.h>28#endif2930// This module is really just a huge grab bag of classes and helper functions needed by the encoder.3132// If BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE is 1, quality in perceptual mode will be slightly greater, but at a large increase in encoding CPU time.33#define BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE (0)3435#if BASISU_SUPPORT_SSE36// Declared in basisu_kernels_imp.h, but we can't include that here otherwise it would lead to circular type errors.37extern void update_covar_matrix_16x16_sse41(uint32_t num_vecs, const void* pWeighted_vecs, const void* pOrigin, const uint32_t *pVec_indices, void* pMatrix16x16);38#endif3940namespace basisu41{42extern uint8_t g_hamming_dist[256];43extern const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8];4445// true if basisu_encoder_init() has been called and returned.46extern bool g_library_initialized;4748// Encoder library initialization.49// This function MUST be called before encoding anything!50// Returns false if library initialization fails.51bool basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false);52void basisu_encoder_deinit();5354// basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=155extern void detect_sse41();5657#if BASISU_SUPPORT_SSE58extern bool g_cpu_supports_sse41;59#else60const bool g_cpu_supports_sse41 = false;61#endif6263void error_vprintf(const char* pFmt, va_list args);64void error_printf(const char *pFmt, ...);6566template <typename... Args>67inline void fmt_error_printf(const char* pFmt, Args&&... args)68{69std::string res;70if (!fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward<Args>(args))... }))71return;72error_printf("%s", res.c_str());73}7475void platform_sleep(uint32_t ms);7677// Helpers7879inline uint8_t clamp255(int32_t i)80{81return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);82}8384inline int left_shift32(int val, int shift)85{86assert((shift >= 0) && (shift < 32));87return static_cast<int>(static_cast<uint32_t>(val) << shift);88}8990inline uint32_t left_shift32(uint32_t val, int shift)91{92assert((shift >= 0) && (shift < 32));93return val << shift;94}9596inline int32_t clampi(int32_t value, int32_t low, int32_t high)97{98if (value < low)99value = low;100else if (value > high)101value = high;102return value;103}104105inline uint8_t mul_8(uint32_t v, uint32_t a)106{107v = v * a + 128;108return (uint8_t)((v + (v >> 8)) >> 8);109}110111inline int fast_roundf_int(float x)112{113return (x >= 0.0f) ? (int)(x + 0.5f) : (int)(x - 0.5f);114}115116inline int fast_floorf_int(float x)117{118int xi = (int)x; // Truncate towards zero119return ((x < 0.0f) && (x != (float)xi)) ? (xi - 1) : xi;120}121122inline uint64_t read_bits(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)123{124assert(codesize <= 64);125uint64_t bits = 0;126uint32_t total_bits = 0;127128while (total_bits < codesize)129{130uint32_t byte_bit_offset = bit_offset & 7;131uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset);132133uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset;134byte_bits &= ((1 << bits_to_read) - 1);135136bits |= ((uint64_t)(byte_bits) << total_bits);137138total_bits += bits_to_read;139bit_offset += bits_to_read;140}141142return bits;143}144145inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)146{147assert(codesize <= 32);148uint32_t bits = 0;149uint32_t total_bits = 0;150151while (total_bits < codesize)152{153uint32_t byte_bit_offset = bit_offset & 7;154uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset);155156uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset;157byte_bits &= ((1 << bits_to_read) - 1);158159bits |= (byte_bits << total_bits);160161total_bits += bits_to_read;162bit_offset += bits_to_read;163}164165return bits;166}167168// Open interval169inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }170inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }171172// Closed interval173inline int bounds_check_incl(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; }174inline uint32_t bounds_check_incl(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; }175176inline uint32_t clz(uint32_t x)177{178if (!x)179return 32;180181uint32_t n = 0;182while ((x & 0x80000000) == 0)183{184x <<= 1u;185n++;186}187188return n;189}190191bool string_begins_with(const std::string& str, const char* pPhrase);192193// Case sensitive, returns -1 if can't find194inline int string_find_first(const std::string& str, const char* pPhrase)195{196size_t res = str.find(pPhrase, 0);197if (res == std::string::npos)198return -1;199return (int)res;200}201202// Hashing203204inline uint32_t bitmix32c(uint32_t v)205{206v = (v + 0x7ed55d16) + (v << 12);207v = (v ^ 0xc761c23c) ^ (v >> 19);208v = (v + 0x165667b1) + (v << 5);209v = (v + 0xd3a2646c) ^ (v << 9);210v = (v + 0xfd7046c5) + (v << 3);211v = (v ^ 0xb55a4f09) ^ (v >> 16);212return v;213}214215inline uint32_t bitmix32(uint32_t v)216{217v -= (v << 6);218v ^= (v >> 17);219v -= (v << 9);220v ^= (v << 4);221v -= (v << 3);222v ^= (v << 10);223v ^= (v >> 15);224return v;225}226227inline uint32_t wang_hash(uint32_t seed)228{229seed = (seed ^ 61) ^ (seed >> 16);230seed *= 9;231seed = seed ^ (seed >> 4);232seed *= 0x27d4eb2d;233seed = seed ^ (seed >> 15);234return seed;235}236237uint32_t hash_hsieh(const uint8_t* pBuf, size_t len);238239template <typename Key>240struct bit_hasher241{242inline std::size_t operator()(const Key& k) const243{244return hash_hsieh(reinterpret_cast<const uint8_t *>(&k), sizeof(k));245}246};247248struct string_hasher249{250inline std::size_t operator()(const std::string& k) const251{252size_t l = k.size();253if (!l)254return 0;255return hash_hsieh(reinterpret_cast<const uint8_t*>(k.c_str()), l);256}257};258259class running_stat260{261public:262running_stat() { clear(); }263264void clear()265{266m_n = 0;267m_total = 0;268m_old_m = 0;269m_new_m = 0;270m_old_s = 0;271m_new_s = 0;272m_min = 0;273m_max = 0;274}275276void push(double x)277{278m_n++;279m_total += x;280if (m_n == 1)281{282m_old_m = m_new_m = x;283m_old_s = 0.0;284m_min = x;285m_max = x;286}287else288{289// See Knuth TAOCP vol 2, 3rd edition, page 232290m_new_m = m_old_m + (x - m_old_m) / m_n;291m_new_s = m_old_s + (x - m_old_m) * (x - m_new_m);292m_old_m = m_new_m;293m_old_s = m_new_s;294m_min = basisu::minimum(x, m_min);295m_max = basisu::maximum(x, m_max);296}297}298299uint32_t get_num() const300{301return m_n;302}303304double get_total() const305{306return m_total;307}308309double get_mean() const310{311return (m_n > 0) ? m_new_m : 0.0;312}313314// Returns sample variance315double get_variance() const316{317return ((m_n > 1) ? m_new_s / (m_n - 1) : 0.0);318}319320double get_std_dev() const321{322return sqrt(get_variance());323}324325double get_min() const326{327return m_min;328}329330double get_max() const331{332return m_max;333}334335private:336uint32_t m_n;337double m_total, m_old_m, m_new_m, m_old_s, m_new_s, m_min, m_max;338};339340// Linear algebra341342template <uint32_t N, typename T>343class vec344{345protected:346T m_v[N];347348public:349enum { num_elements = N };350typedef T scalar_type;351352inline vec() { }353inline vec(eZero) { set_zero(); }354355explicit inline vec(T val) { set(val); }356inline vec(T v0, T v1) { set(v0, v1); }357inline vec(T v0, T v1, T v2) { set(v0, v1, v2); }358inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); }359inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; }360template <uint32_t OtherN, typename OtherT> inline vec(const vec<OtherN, OtherT> &other) { set(other); }361362inline const T& operator[](uint32_t i) const { assert(i < N); return m_v[i]; }363inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; }364365inline T getX() const { return m_v[0]; }366inline T getY() const { static_assert(N >= 2, "N too small"); return m_v[1]; }367inline T getZ() const { static_assert(N >= 3, "N too small"); return m_v[2]; }368inline T getW() const { static_assert(N >= 4, "N too small"); return m_v[3]; }369370inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false; return true; }371inline bool operator!=(const vec& rhs) const { return !(*this == rhs); }372inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; }373374inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; }375inline void clear() { set_zero(); }376377template <uint32_t OtherN, typename OtherT>378inline vec &set(const vec<OtherN, OtherT> &other)379{380uint32_t i;381if ((const void *)(&other) == (const void *)(this))382return *this;383const uint32_t m = minimum(OtherN, N);384for (i = 0; i < m; i++)385m_v[i] = static_cast<T>(other[i]);386for (; i < N; i++)387m_v[i] = 0;388return *this;389}390391inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; }392inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; }393inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; }394395inline vec &set(T v0, T v1)396{397m_v[0] = v0;398if (N >= 2)399{400m_v[1] = v1;401clear_elements(2, N);402}403return *this;404}405406inline vec &set(T v0, T v1, T v2)407{408m_v[0] = v0;409if (N >= 2)410{411m_v[1] = v1;412if (N >= 3)413{414m_v[2] = v2;415clear_elements(3, N);416}417}418return *this;419}420421inline vec &set(T v0, T v1, T v2, T v3)422{423m_v[0] = v0;424if (N >= 2)425{426m_v[1] = v1;427if (N >= 3)428{429m_v[2] = v2;430431if (N >= 4)432{433m_v[3] = v3;434clear_elements(5, N);435}436}437}438return *this;439}440441inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; }442template <uint32_t OtherN, typename OtherT> inline vec &operator=(const vec<OtherN, OtherT> &rhs) { set(rhs); return *this; }443444inline const T *get_ptr() const { return reinterpret_cast<const T *>(&m_v[0]); }445inline T *get_ptr() { return reinterpret_cast<T *>(&m_v[0]); }446447inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; }448inline vec operator+ () const { return *this; }449inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; }450inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; }451inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; }452inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; }453inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; }454inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; }455456friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; }457friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; }458friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; }459friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; }460friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; }461friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; }462463static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; }464465inline T dot(const vec &rhs) const { return dot_product(*this, rhs); }466467inline T norm() const { return dot_product(*this, *this); }468inline T length() const { return sqrt(norm()); }469470inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; }471inline double squared_distance_d(const vec& other) const { double d2 = 0; for (uint32_t i = 0; i < N; i++) { double d = (double)m_v[i] - (double)other.m_v[i]; d2 += d * d; } return d2; }472473inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); }474inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); }475476inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; }477478inline vec get_normalized() const { vec res(*this); res.normalize_in_place(); return res; }479480inline vec &clamp(T l, T h)481{482for (uint32_t i = 0; i < N; i++)483m_v[i] = basisu::clamp(m_v[i], l, h);484return *this;485}486487static vec component_mul(const vec& a, const vec& b)488{489vec res;490for (uint32_t i = 0; i < N; i++)491res[i] = a[i] * b[i];492return res;493}494495static vec component_min(const vec& a, const vec& b)496{497vec res;498for (uint32_t i = 0; i < N; i++)499res[i] = minimum(a[i], b[i]);500return res;501}502503static vec component_max(const vec& a, const vec& b)504{505vec res;506for (uint32_t i = 0; i < N; i++)507res[i] = maximum(a[i], b[i]);508return res;509}510511static vec lerp(const vec& a, const vec& b, float s)512{513vec res;514for (uint32_t i = 0; i < N; i++)515res[i] = basisu::lerp(a[i], b[i], s);516return res;517}518};519520typedef vec<4, double> vec4D;521typedef vec<3, double> vec3D;522typedef vec<2, double> vec2D;523typedef vec<1, double> vec1D;524525typedef vec<6, float> vec6F;526typedef vec<5, float> vec5F;527typedef vec<4, float> vec4F;528typedef vec<3, float> vec3F;529typedef vec<2, float> vec2F;530typedef vec<1, float> vec1F;531532typedef vec<16, float> vec16F;533534template<uint32_t N, typename T> struct bitwise_copyable< vec<N, T> > { enum { cFlag = true }; };535template<uint32_t N, typename T> struct bitwise_movable< vec<N, T> > { enum { cFlag = true }; };536537template <uint32_t Rows, uint32_t Cols, typename T>538class matrix539{540public:541typedef vec<Rows, T> col_vec;542typedef vec<Cols, T> row_vec;543544typedef T scalar_type;545546enum { rows = Rows, cols = Cols };547548protected:549row_vec m_r[Rows];550551public:552inline matrix() {}553inline matrix(eZero) { set_zero(); }554inline matrix(const matrix &other) { for (uint32_t i = 0; i < Rows; i++) m_r[i] = other.m_r[i]; }555inline matrix &operator=(const matrix &rhs) { if (this != &rhs) for (uint32_t i = 0; i < Rows; i++) m_r[i] = rhs.m_r[i]; return *this; }556557inline T operator()(uint32_t r, uint32_t c) const { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }558inline T &operator()(uint32_t r, uint32_t c) { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }559560inline const row_vec &operator[](uint32_t r) const { assert(r < Rows); return m_r[r]; }561inline row_vec &operator[](uint32_t r) { assert(r < Rows); return m_r[r]; }562563inline matrix &set_zero()564{565for (uint32_t i = 0; i < Rows; i++)566m_r[i].set_zero();567return *this;568}569570inline matrix &set_identity()571{572for (uint32_t i = 0; i < Rows; i++)573{574m_r[i].set_zero();575if (i < Cols)576m_r[i][i] = 1.0f;577}578return *this;579}580};581582template<uint32_t R, uint32_t C, typename T> struct bitwise_copyable< matrix<R, C, T> > { enum { cFlag = true }; };583template<uint32_t R, uint32_t C, typename T> struct bitwise_movable< matrix<R, C, T> > { enum { cFlag = true }; };584585template<uint32_t N, typename VectorType>586inline VectorType compute_pca_from_covar(matrix<N, N, float> &cmatrix)587{588VectorType axis;589if (N == 1)590axis.set(1.0f);591else592{593for (uint32_t i = 0; i < N; i++)594axis[i] = lerp(.75f, 1.25f, i * (1.0f / maximum<int>(N - 1, 1)));595}596597VectorType prev_axis(axis);598599// Power iterations600for (uint32_t power_iter = 0; power_iter < 8; power_iter++)601{602VectorType trial_axis;603double max_sum = 0;604605for (uint32_t i = 0; i < N; i++)606{607double sum = 0;608for (uint32_t j = 0; j < N; j++)609sum += cmatrix[i][j] * axis[j];610611trial_axis[i] = static_cast<float>(sum);612613max_sum = maximum(fabs(sum), max_sum);614}615616if (max_sum != 0.0f)617trial_axis *= static_cast<float>(1.0f / max_sum);618619VectorType delta_axis(prev_axis - trial_axis);620621prev_axis = axis;622axis = trial_axis;623624if (delta_axis.norm() < .0024f)625break;626}627628return axis.normalize_in_place();629}630631template<typename T> inline void indirect_sort(uint32_t num_indices, uint32_t* pIndices, const T* pKeys)632{633for (uint32_t i = 0; i < num_indices; i++)634pIndices[i] = i;635636std::sort(637pIndices,638pIndices + num_indices,639[pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; }640);641}642643// 1-4 byte direct Radix sort.644template <typename T>645T* radix_sort(uint32_t num_vals, T* pBuf0, T* pBuf1, uint32_t key_ofs, uint32_t key_size)646{647assert(key_ofs < sizeof(T));648assert((key_size >= 1) && (key_size <= 4));649650uint32_t hist[256 * 4];651652memset(hist, 0, sizeof(hist[0]) * 256 * key_size);653654#define BASISU_GET_KEY(p) (*(uint32_t *)((uint8_t *)(p) + key_ofs))655656if (key_size == 4)657{658T* p = pBuf0;659T* q = pBuf0 + num_vals;660for (; p != q; p++)661{662const uint32_t key = BASISU_GET_KEY(p);663664hist[key & 0xFF]++;665hist[256 + ((key >> 8) & 0xFF)]++;666hist[512 + ((key >> 16) & 0xFF)]++;667hist[768 + ((key >> 24) & 0xFF)]++;668}669}670else if (key_size == 3)671{672T* p = pBuf0;673T* q = pBuf0 + num_vals;674for (; p != q; p++)675{676const uint32_t key = BASISU_GET_KEY(p);677678hist[key & 0xFF]++;679hist[256 + ((key >> 8) & 0xFF)]++;680hist[512 + ((key >> 16) & 0xFF)]++;681}682}683else if (key_size == 2)684{685T* p = pBuf0;686T* q = pBuf0 + (num_vals >> 1) * 2;687688for (; p != q; p += 2)689{690const uint32_t key0 = BASISU_GET_KEY(p);691const uint32_t key1 = BASISU_GET_KEY(p + 1);692693hist[key0 & 0xFF]++;694hist[256 + ((key0 >> 8) & 0xFF)]++;695696hist[key1 & 0xFF]++;697hist[256 + ((key1 >> 8) & 0xFF)]++;698}699700if (num_vals & 1)701{702const uint32_t key = BASISU_GET_KEY(p);703704hist[key & 0xFF]++;705hist[256 + ((key >> 8) & 0xFF)]++;706}707}708else709{710assert(key_size == 1);711if (key_size != 1)712return NULL;713714T* p = pBuf0;715T* q = pBuf0 + (num_vals >> 1) * 2;716717for (; p != q; p += 2)718{719const uint32_t key0 = BASISU_GET_KEY(p);720const uint32_t key1 = BASISU_GET_KEY(p + 1);721722hist[key0 & 0xFF]++;723hist[key1 & 0xFF]++;724}725726if (num_vals & 1)727{728const uint32_t key = BASISU_GET_KEY(p);729hist[key & 0xFF]++;730}731}732733T* pCur = pBuf0;734T* pNew = pBuf1;735736for (uint32_t pass = 0; pass < key_size; pass++)737{738const uint32_t* pHist = &hist[pass << 8];739740uint32_t offsets[256];741742uint32_t cur_ofs = 0;743for (uint32_t i = 0; i < 256; i += 2)744{745offsets[i] = cur_ofs;746cur_ofs += pHist[i];747748offsets[i + 1] = cur_ofs;749cur_ofs += pHist[i + 1];750}751752const uint32_t pass_shift = pass << 3;753754T* p = pCur;755T* q = pCur + (num_vals >> 1) * 2;756757for (; p != q; p += 2)758{759uint32_t c0 = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF;760uint32_t c1 = (BASISU_GET_KEY(p + 1) >> pass_shift) & 0xFF;761762if (c0 == c1)763{764uint32_t dst_offset0 = offsets[c0];765766offsets[c0] = dst_offset0 + 2;767768pNew[dst_offset0] = p[0];769pNew[dst_offset0 + 1] = p[1];770}771else772{773uint32_t dst_offset0 = offsets[c0]++;774uint32_t dst_offset1 = offsets[c1]++;775776pNew[dst_offset0] = p[0];777pNew[dst_offset1] = p[1];778}779}780781if (num_vals & 1)782{783uint32_t c = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF;784785uint32_t dst_offset = offsets[c];786offsets[c] = dst_offset + 1;787788pNew[dst_offset] = *p;789}790791T* t = pCur;792pCur = pNew;793pNew = t;794}795796return pCur;797}798799#undef BASISU_GET_KEY800801// Very simple job pool with no dependencies.802class job_pool803{804BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(job_pool);805806public:807// num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc.808job_pool(uint32_t num_threads);809~job_pool();810811void add_job(const std::function<void()>& job);812void add_job(std::function<void()>&& job);813814void wait_for_all();815816size_t get_total_threads() const { return 1 + m_threads.size(); }817818private:819std::vector<std::thread> m_threads;820std::vector<std::function<void()> > m_queue;821822std::mutex m_mutex;823std::condition_variable m_has_work;824std::condition_variable m_no_more_jobs;825826uint32_t m_num_active_jobs;827828std::atomic<bool> m_kill_flag;829830std::atomic<int> m_num_active_workers;831832void job_thread(uint32_t index);833};834835// Simple 64-bit color class836837class color_rgba_i16838{839public:840union841{842int16_t m_comps[4];843844struct845{846int16_t r;847int16_t g;848int16_t b;849int16_t a;850};851};852853inline color_rgba_i16()854{855static_assert(sizeof(*this) == sizeof(int16_t)*4, "sizeof(*this) == sizeof(int16_t)*4");856}857858inline color_rgba_i16(int sr, int sg, int sb, int sa)859{860set(sr, sg, sb, sa);861}862863inline color_rgba_i16 &set(int sr, int sg, int sb, int sa)864{865m_comps[0] = (int16_t)clamp<int>(sr, INT16_MIN, INT16_MAX);866m_comps[1] = (int16_t)clamp<int>(sg, INT16_MIN, INT16_MAX);867m_comps[2] = (int16_t)clamp<int>(sb, INT16_MIN, INT16_MAX);868m_comps[3] = (int16_t)clamp<int>(sa, INT16_MIN, INT16_MAX);869return *this;870}871};872873class color_rgba874{875public:876union877{878uint8_t m_comps[4];879880struct881{882uint8_t r;883uint8_t g;884uint8_t b;885uint8_t a;886};887};888889inline color_rgba()890{891static_assert(sizeof(*this) == 4, "sizeof(*this) != 4");892static_assert(sizeof(*this) == sizeof(basist::color32), "sizeof(*this) != sizeof(basist::color32)");893}894895// Not too hot about this idea.896inline color_rgba(const basist::color32& other) :897r(other.r),898g(other.g),899b(other.b),900a(other.a)901{902}903904color_rgba& operator= (const basist::color32& rhs)905{906r = rhs.r;907g = rhs.g;908b = rhs.b;909a = rhs.a;910return *this;911}912913inline color_rgba(int y)914{915set(y);916}917918inline color_rgba(int y, int na)919{920set(y, na);921}922923inline color_rgba(int sr, int sg, int sb, int sa)924{925set(sr, sg, sb, sa);926}927928inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa)929{930set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa);931}932933inline color_rgba& set_noclamp_y(int y)934{935m_comps[0] = (uint8_t)y;936m_comps[1] = (uint8_t)y;937m_comps[2] = (uint8_t)y;938m_comps[3] = (uint8_t)255;939return *this;940}941942inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa)943{944m_comps[0] = (uint8_t)sr;945m_comps[1] = (uint8_t)sg;946m_comps[2] = (uint8_t)sb;947m_comps[3] = (uint8_t)sa;948return *this;949}950951inline color_rgba &set(int y)952{953m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));954m_comps[1] = m_comps[0];955m_comps[2] = m_comps[0];956m_comps[3] = 255;957return *this;958}959960inline color_rgba &set(int y, int na)961{962m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));963m_comps[1] = m_comps[0];964m_comps[2] = m_comps[0];965m_comps[3] = static_cast<uint8_t>(clamp<int>(na, 0, 255));966return *this;967}968969inline color_rgba &set(int sr, int sg, int sb, int sa)970{971m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));972m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));973m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));974m_comps[3] = static_cast<uint8_t>(clamp<int>(sa, 0, 255));975return *this;976}977978inline color_rgba &set_rgb(int sr, int sg, int sb)979{980m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));981m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));982m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));983return *this;984}985986inline color_rgba &set_rgb(const color_rgba &other)987{988r = other.r;989g = other.g;990b = other.b;991return *this;992}993994inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; }995inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; }996997inline void clear()998{999m_comps[0] = 0;1000m_comps[1] = 0;1001m_comps[2] = 0;1002m_comps[3] = 0;1003}10041005inline bool operator== (const color_rgba &rhs) const1006{1007if (m_comps[0] != rhs.m_comps[0]) return false;1008if (m_comps[1] != rhs.m_comps[1]) return false;1009if (m_comps[2] != rhs.m_comps[2]) return false;1010if (m_comps[3] != rhs.m_comps[3]) return false;1011return true;1012}10131014inline bool operator!= (const color_rgba &rhs) const1015{1016return !(*this == rhs);1017}10181019inline bool operator<(const color_rgba &rhs) const1020{1021for (int i = 0; i < 4; i++)1022{1023if (m_comps[i] < rhs.m_comps[i])1024return true;1025else if (m_comps[i] != rhs.m_comps[i])1026return false;1027}1028return false;1029}10301031inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; }1032inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; }1033inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); }10341035inline uint32_t get_bgra_uint32() const { return b | (g << 8) | (r << 16) | (a << 24); }1036inline uint32_t get_rgba_uint32() const { return r | (g << 8) | (b << 16) | (a << 24); }10371038inline basist::color32 get_color32() const1039{1040return basist::color32(r, g, b, a);1041}10421043static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); }1044static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); }1045};10461047typedef basisu::vector<color_rgba> color_rgba_vec;10481049const color_rgba g_black_color(0, 0, 0, 255);1050const color_rgba g_black_trans_color(0, 0, 0, 0);1051const color_rgba g_white_color(255, 255, 255, 255);10521053inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1)1054{1055int dr = r0 - r1, dg = g0 - g1, db = b0 - b1;1056return dr * dr + dg * dg + db * db;1057}10581059inline int color_distance(int r0, int g0, int b0, int a0, int r1, int g1, int b1, int a1)1060{1061int dr = r0 - r1, dg = g0 - g1, db = b0 - b1, da = a0 - a1;1062return dr * dr + dg * dg + db * db + da * da;1063}10641065inline int color_distance(const color_rgba &c0, const color_rgba &c1, bool alpha)1066{1067if (alpha)1068return color_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a);1069else1070return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b);1071}10721073// TODO: Allow user to control channel weightings.1074inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha)1075{1076if (perceptual)1077{1078#if BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE1079const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f;1080const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f;10811082const float cr1 = e1.r - l1;1083const float cr2 = e2.r - l2;10841085const float cb1 = e1.b - l1;1086const float cb2 = e2.b - l2;10871088const float dl = l1 - l2;1089const float dcr = cr1 - cr2;1090const float dcb = cb1 - cb2;10911092uint32_t d = static_cast<uint32_t>(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb);10931094if (alpha)1095{1096int da = static_cast<int>(e1.a) - static_cast<int>(e2.a);1097d += static_cast<uint32_t>(128.0f*da*da);1098}10991100return d;1101#elif 11102int dr = e1.r - e2.r;1103int dg = e1.g - e2.g;1104int db = e1.b - e2.b;11051106#if 01107int delta_l = dr * 27 + dg * 92 + db * 9;1108int delta_cr = dr * 128 - delta_l;1109int delta_cb = db * 128 - delta_l;11101111uint32_t id = ((uint32_t)(delta_l * delta_l) >> 7U) +1112((((uint32_t)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) +1113((((uint32_t)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U);1114#else1115int64_t delta_l = dr * 27 + dg * 92 + db * 9;1116int64_t delta_cr = dr * 128 - delta_l;1117int64_t delta_cb = db * 128 - delta_l;11181119uint32_t id = ((uint32_t)((delta_l * delta_l) >> 7U)) +1120((((uint32_t)((delta_cr * delta_cr) >> 7U)) * 26U) >> 7U) +1121((((uint32_t)((delta_cb * delta_cb) >> 7U)) * 3U) >> 7U);1122#endif11231124if (alpha)1125{1126int da = (e1.a - e2.a) << 7;1127// This shouldn't overflow if da is 255 or -255: 29.99 bits after squaring.1128id += ((uint32_t)(da * da) >> 7U);1129}11301131return id;1132#else1133int dr = e1.r - e2.r;1134int dg = e1.g - e2.g;1135int db = e1.b - e2.b;11361137int64_t delta_l = dr * 27 + dg * 92 + db * 9;1138int64_t delta_cr = dr * 128 - delta_l;1139int64_t delta_cb = db * 128 - delta_l;11401141int64_t id = ((delta_l * delta_l) * 128) +1142((delta_cr * delta_cr) * 26) +1143((delta_cb * delta_cb) * 3);11441145if (alpha)1146{1147int64_t da = (e1.a - e2.a);1148id += (da * da) * 128;1149}11501151int d = (id + 8192) >> 14;11521153return d;1154#endif1155}1156else1157return color_distance(e1, e2, alpha);1158}11591160static inline uint32_t color_distance_la(const color_rgba& a, const color_rgba& b)1161{1162const int dl = a.r - b.r;1163const int da = a.a - b.a;1164return dl * dl + da * da;1165}11661167// String helpers11681169inline int string_find_right(const std::string& filename, char c)1170{1171size_t result = filename.find_last_of(c);1172return (result == std::string::npos) ? -1 : (int)result;1173}11741175inline std::string string_get_extension(const std::string &filename)1176{1177int sep = -1;1178#ifdef _WIN321179sep = string_find_right(filename, '\\');1180#endif1181if (sep < 0)1182sep = string_find_right(filename, '/');11831184int dot = string_find_right(filename, '.');1185if (dot <= sep)1186return "";11871188std::string result(filename);1189result.erase(0, dot + 1);11901191return result;1192}11931194inline bool string_remove_extension(std::string &filename)1195{1196int sep = -1;1197#ifdef _WIN321198sep = string_find_right(filename, '\\');1199#endif1200if (sep < 0)1201sep = string_find_right(filename, '/');12021203int dot = string_find_right(filename, '.');1204if ((dot < sep) || (dot < 0))1205return false;12061207filename.resize(dot);12081209return true;1210}12111212inline std::string string_tolower(const std::string& s)1213{1214std::string result(s);1215for (size_t i = 0; i < result.size(); i++)1216{1217result[i] = (char)tolower((uint8_t)(result[i]));1218}1219return result;1220}12211222inline char *strcpy_safe(char *pDst, size_t dst_len, const char *pSrc)1223{1224assert(pDst && pSrc && dst_len);1225if (!dst_len)1226return pDst;12271228const size_t src_len = strlen(pSrc);1229const size_t src_len_plus_terminator = src_len + 1;12301231if (src_len_plus_terminator <= dst_len)1232memcpy(pDst, pSrc, src_len_plus_terminator);1233else1234{1235if (dst_len > 1)1236memcpy(pDst, pSrc, dst_len - 1);1237pDst[dst_len - 1] = '\0';1238}12391240return pDst;1241}12421243inline bool string_ends_with(const std::string& s, char c)1244{1245return (s.size() != 0) && (s.back() == c);1246}12471248inline bool string_split_path(const char *p, std::string *pDrive, std::string *pDir, std::string *pFilename, std::string *pExt)1249{1250#ifdef _MSC_VER1251char drive_buf[_MAX_DRIVE] = { 0 };1252char dir_buf[_MAX_DIR] = { 0 };1253char fname_buf[_MAX_FNAME] = { 0 };1254char ext_buf[_MAX_EXT] = { 0 };12551256errno_t error = _splitpath_s(p,1257pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0,1258pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0,1259pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0,1260pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0);1261if (error != 0)1262return false;12631264if (pDrive) *pDrive = drive_buf;1265if (pDir) *pDir = dir_buf;1266if (pFilename) *pFilename = fname_buf;1267if (pExt) *pExt = ext_buf;1268return true;1269#else1270char dirtmp[1024], nametmp[1024];1271strcpy_safe(dirtmp, sizeof(dirtmp), p);1272strcpy_safe(nametmp, sizeof(nametmp), p);12731274if (pDrive)1275pDrive->resize(0);12761277const char *pDirName = dirname(dirtmp);1278const char* pBaseName = basename(nametmp);1279if ((!pDirName) || (!pBaseName))1280return false;12811282if (pDir)1283{1284*pDir = pDirName;1285if ((pDir->size()) && (pDir->back() != '/'))1286*pDir += "/";1287}12881289if (pFilename)1290{1291*pFilename = pBaseName;1292string_remove_extension(*pFilename);1293}12941295if (pExt)1296{1297*pExt = pBaseName;1298*pExt = string_get_extension(*pExt);1299if (pExt->size())1300*pExt = "." + *pExt;1301}13021303return true;1304#endif1305}13061307inline bool is_path_separator(char c)1308{1309#ifdef _WIN321310return (c == '/') || (c == '\\');1311#else1312return (c == '/');1313#endif1314}13151316inline bool is_drive_separator(char c)1317{1318#ifdef _WIN321319return (c == ':');1320#else1321(void)c;1322return false;1323#endif1324}13251326inline void string_combine_path(std::string &dst, const char *p, const char *q)1327{1328std::string temp(p);1329if (temp.size() && !is_path_separator(q[0]))1330{1331if (!is_path_separator(temp.back()))1332temp.append(1, BASISU_PATH_SEPERATOR_CHAR);1333}1334temp += q;1335dst.swap(temp);1336}13371338inline void string_combine_path(std::string &dst, const char *p, const char *q, const char *r)1339{1340string_combine_path(dst, p, q);1341string_combine_path(dst, dst.c_str(), r);1342}13431344inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt)1345{1346string_combine_path(dst, p, q, r);1347if ((!string_ends_with(dst, '.')) && (pExt[0]) && (pExt[0] != '.'))1348dst.append(1, '.');1349dst.append(pExt);1350}13511352inline bool string_get_pathname(const char *p, std::string &path)1353{1354std::string temp_drive, temp_path;1355if (!string_split_path(p, &temp_drive, &temp_path, NULL, NULL))1356return false;1357string_combine_path(path, temp_drive.c_str(), temp_path.c_str());1358return true;1359}13601361inline bool string_get_filename(const char *p, std::string &filename)1362{1363std::string temp_ext;1364if (!string_split_path(p, nullptr, nullptr, &filename, &temp_ext))1365return false;1366filename += temp_ext;1367return true;1368}13691370class rand1371{1372std::mt19937 m_mt;13731374public:1375rand() { }13761377rand(uint32_t s) { seed(s); }1378void seed(uint32_t s) { m_mt.seed(s); }13791380// between [l,h]1381int irand(int l, int h) { std::uniform_int_distribution<int> d(l, h); return d(m_mt); }13821383uint32_t urand32() { return static_cast<uint32_t>(irand(INT32_MIN, INT32_MAX)); }13841385bool bit() { return irand(0, 1) == 1; }13861387uint8_t byte() { return static_cast<uint8_t>(urand32()); }13881389// between [l,h)1390float frand(float l, float h) { std::uniform_real_distribution<float> d(l, h); return d(m_mt); }13911392float gaussian(float mean, float stddev) { std::normal_distribution<float> d(mean, stddev); return d(m_mt); }1393};13941395class priority_queue1396{1397public:1398priority_queue() :1399m_size(0)1400{1401}14021403void clear()1404{1405m_heap.clear();1406m_size = 0;1407}14081409void init(uint32_t max_entries, uint32_t first_index, float first_priority)1410{1411m_heap.resize(max_entries + 1);1412m_heap[1].m_index = first_index;1413m_heap[1].m_priority = first_priority;1414m_size = 1;1415}14161417inline uint32_t size() const { return m_size; }14181419inline uint32_t get_top_index() const { return m_heap[1].m_index; }1420inline float get_top_priority() const { return m_heap[1].m_priority; }14211422inline void delete_top()1423{1424assert(m_size > 0);1425m_heap[1] = m_heap[m_size];1426m_size--;1427if (m_size)1428down_heap(1);1429}14301431inline void add_heap(uint32_t index, float priority)1432{1433m_size++;14341435uint32_t k = m_size;14361437if (m_size >= m_heap.size())1438m_heap.resize(m_size + 1);14391440for (;;)1441{1442uint32_t parent_index = k >> 1;1443if ((!parent_index) || (m_heap[parent_index].m_priority > priority))1444break;1445m_heap[k] = m_heap[parent_index];1446k = parent_index;1447}14481449m_heap[k].m_index = index;1450m_heap[k].m_priority = priority;1451}14521453private:1454struct entry1455{1456uint32_t m_index;1457float m_priority;1458};14591460basisu::vector<entry> m_heap;1461uint32_t m_size;14621463// Push down entry at index1464inline void down_heap(uint32_t heap_index)1465{1466uint32_t orig_index = m_heap[heap_index].m_index;1467const float orig_priority = m_heap[heap_index].m_priority;14681469uint32_t child_index;1470while ((child_index = (heap_index << 1)) <= m_size)1471{1472if ((child_index < m_size) && (m_heap[child_index].m_priority < m_heap[child_index + 1].m_priority)) ++child_index;1473if (orig_priority > m_heap[child_index].m_priority)1474break;1475m_heap[heap_index] = m_heap[child_index];1476heap_index = child_index;1477}14781479m_heap[heap_index].m_index = orig_index;1480m_heap[heap_index].m_priority = orig_priority;1481}1482};14831484// Tree structured vector quantization (TSVQ)14851486template <typename TrainingVectorType>1487class tree_vector_quant1488{1489public:1490typedef TrainingVectorType training_vec_type;1491typedef std::pair<TrainingVectorType, uint64_t> training_vec_with_weight;1492typedef basisu::vector< training_vec_with_weight > array_of_weighted_training_vecs;14931494tree_vector_quant() :1495m_next_codebook_index(0)1496{1497}14981499void clear()1500{1501clear_vector(m_training_vecs);1502clear_vector(m_nodes);1503m_next_codebook_index = 0;1504}15051506void add_training_vec(const TrainingVectorType &v, uint64_t weight) { m_training_vecs.push_back(std::make_pair(v, weight)); }15071508size_t get_total_training_vecs() const { return m_training_vecs.size(); }1509const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; }1510array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; }15111512void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const1513{1514for (uint32_t i = 0; i < m_nodes.size(); i++)1515{1516const tsvq_node &n = m_nodes[i];1517if (!n.is_leaf())1518continue;15191520codebook.resize(codebook.size() + 1);1521codebook.back() = n.m_training_vecs;1522}1523}15241525void retrieve(basisu::vector<TrainingVectorType> &codebook) const1526{1527for (uint32_t i = 0; i < m_nodes.size(); i++)1528{1529const tsvq_node &n = m_nodes[i];1530if (!n.is_leaf())1531continue;15321533codebook.resize(codebook.size() + 1);1534codebook.back() = n.m_origin;1535}1536}15371538void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const1539{1540uint_vec node_stack;1541node_stack.reserve(512);15421543codebook.resize(0);1544codebook.reserve(max_clusters);15451546uint32_t node_index = 0;15471548while (true)1549{1550const tsvq_node& cur = m_nodes[node_index];15511552if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters))1553{1554codebook.resize(codebook.size() + 1);1555codebook.back() = cur.m_training_vecs;15561557if (node_stack.empty())1558break;15591560node_index = node_stack.back();1561node_stack.pop_back();1562continue;1563}15641565node_stack.push_back(cur.m_right_index);1566node_index = cur.m_left_index;1567}1568}15691570bool generate(uint32_t max_size)1571{1572if (!m_training_vecs.size())1573return false;15741575m_next_codebook_index = 0;15761577clear_vector(m_nodes);1578m_nodes.reserve(max_size * 2 + 1);15791580m_nodes.push_back(prepare_root());15811582priority_queue var_heap;1583var_heap.init(max_size, 0, m_nodes[0].m_var);15841585basisu::vector<uint32_t> l_children, r_children;15861587// Now split the worst nodes1588l_children.reserve(m_training_vecs.size() + 1);1589r_children.reserve(m_training_vecs.size() + 1);15901591uint32_t total_leaf_nodes = 1;15921593//interval_timer tm;1594//tm.start();15951596while ((var_heap.size()) && (total_leaf_nodes < max_size))1597{1598const uint32_t node_index = var_heap.get_top_index();1599const tsvq_node &node = m_nodes[node_index];16001601assert(node.m_var == var_heap.get_top_priority());1602assert(node.is_leaf());16031604var_heap.delete_top();16051606if (node.m_training_vecs.size() > 1)1607{1608if (split_node(node_index, var_heap, l_children, r_children))1609{1610// This removes one leaf node (making an internal node) and replaces it with two new leaves, so +1 total.1611total_leaf_nodes += 1;1612}1613}1614}16151616//debug_printf("tree_vector_quant::generate %u: %3.3f secs\n", TrainingVectorType::num_elements, tm.get_elapsed_secs());16171618return true;1619}16201621private:1622class tsvq_node1623{1624public:1625inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1), m_codebook_index(-1) { }16261627// vecs is erased1628inline void set(const TrainingVectorType &org, uint64_t weight, float var, basisu::vector<uint32_t> &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); }16291630inline bool is_leaf() const { return m_left_index < 0; }16311632float m_var;1633uint64_t m_weight;1634TrainingVectorType m_origin;1635int32_t m_left_index, m_right_index;1636basisu::vector<uint32_t> m_training_vecs;1637int m_codebook_index;1638};16391640typedef basisu::vector<tsvq_node> tsvq_node_vec;1641tsvq_node_vec m_nodes;16421643array_of_weighted_training_vecs m_training_vecs;16441645uint32_t m_next_codebook_index;16461647tsvq_node prepare_root() const1648{1649double ttsum = 0.0f;16501651// Prepare root node containing all training vectors1652tsvq_node root;1653root.m_training_vecs.reserve(m_training_vecs.size());16541655for (uint32_t i = 0; i < m_training_vecs.size(); i++)1656{1657const TrainingVectorType &v = m_training_vecs[i].first;1658const uint64_t weight = m_training_vecs[i].second;16591660root.m_training_vecs.push_back(i);16611662root.m_origin += (v * static_cast<float>(weight));1663root.m_weight += weight;16641665ttsum += v.dot(v) * weight;1666}16671668root.m_var = static_cast<float>(ttsum - (root.m_origin.dot(root.m_origin) / root.m_weight));16691670root.m_origin *= (1.0f / root.m_weight);16711672return root;1673}16741675bool split_node(uint32_t node_index, priority_queue &var_heap, basisu::vector<uint32_t> &l_children, basisu::vector<uint32_t> &r_children)1676{1677TrainingVectorType l_child_org, r_child_org;1678uint64_t l_weight = 0, r_weight = 0;1679float l_var = 0.0f, r_var = 0.0f;16801681// Compute initial left/right child origins1682if (!prep_split(m_nodes[node_index], l_child_org, r_child_org))1683return false;16841685// Use k-means iterations to refine these children vectors1686if (!refine_split(m_nodes[node_index], l_child_org, l_weight, l_var, l_children, r_child_org, r_weight, r_var, r_children))1687return false;16881689// Create children1690const uint32_t l_child_index = (uint32_t)m_nodes.size(), r_child_index = (uint32_t)m_nodes.size() + 1;16911692m_nodes[node_index].m_left_index = l_child_index;1693m_nodes[node_index].m_right_index = r_child_index;16941695m_nodes[node_index].m_codebook_index = m_next_codebook_index;1696m_next_codebook_index++;16971698m_nodes.resize(m_nodes.size() + 2);16991700tsvq_node &l_child = m_nodes[l_child_index], &r_child = m_nodes[r_child_index];17011702l_child.set(l_child_org, l_weight, l_var, l_children);1703r_child.set(r_child_org, r_weight, r_var, r_children);17041705if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1))1706{1707TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first);17081709for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++)1710{1711if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first))1712{1713l_child.m_var = 1e-4f;1714break;1715}1716}1717}17181719if ((r_child.m_var <= 0.0f) && (r_child.m_training_vecs.size() > 1))1720{1721TrainingVectorType v(m_training_vecs[r_child.m_training_vecs[0]].first);17221723for (uint32_t i = 1; i < r_child.m_training_vecs.size(); i++)1724{1725if (!(v == m_training_vecs[r_child.m_training_vecs[i]].first))1726{1727r_child.m_var = 1e-4f;1728break;1729}1730}1731}17321733if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1))1734var_heap.add_heap(l_child_index, l_child.m_var);17351736if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1))1737var_heap.add_heap(r_child_index, r_child.m_var);17381739return true;1740}17411742TrainingVectorType compute_split_axis(const tsvq_node &node) const1743{1744const uint32_t N = TrainingVectorType::num_elements;17451746matrix<N, N, float> cmatrix;17471748if ((N != 16) || (!g_cpu_supports_sse41))1749{1750cmatrix.set_zero();17511752// Compute covariance matrix from weighted input vectors1753for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)1754{1755const TrainingVectorType v(m_training_vecs[node.m_training_vecs[i]].first - node.m_origin);1756const TrainingVectorType w(static_cast<float>(m_training_vecs[node.m_training_vecs[i]].second) * v);17571758for (uint32_t x = 0; x < N; x++)1759for (uint32_t y = x; y < N; y++)1760cmatrix[x][y] = cmatrix[x][y] + v[x] * w[y];1761}1762}1763else1764{1765#if BASISU_SUPPORT_SSE1766// Specialize the case with 16x16 matrices, which are quite expensive without SIMD.1767// This SSE function takes pointers to void types, so do some sanity checks.1768assert(sizeof(TrainingVectorType) == sizeof(float) * 16);1769assert(sizeof(training_vec_with_weight) == sizeof(std::pair<vec16F, uint64_t>));1770update_covar_matrix_16x16_sse41(node.m_training_vecs.size_u32(), m_training_vecs.data(), &node.m_origin, node.m_training_vecs.data(), &cmatrix);1771#endif1772}17731774const float renorm_scale = 1.0f / node.m_weight;17751776for (uint32_t x = 0; x < N; x++)1777for (uint32_t y = x; y < N; y++)1778cmatrix[x][y] *= renorm_scale;17791780// Diagonal flip1781for (uint32_t x = 0; x < (N - 1); x++)1782for (uint32_t y = x + 1; y < N; y++)1783cmatrix[y][x] = cmatrix[x][y];17841785return compute_pca_from_covar<N, TrainingVectorType>(cmatrix);1786}17871788bool prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const1789{1790//const uint32_t N = TrainingVectorType::num_elements;17911792if (2 == node.m_training_vecs.size())1793{1794l_child_result = m_training_vecs[node.m_training_vecs[0]].first;1795r_child_result = m_training_vecs[node.m_training_vecs[1]].first;1796return true;1797}17981799TrainingVectorType axis(compute_split_axis(node)), l_child(0.0f), r_child(0.0f);1800double l_weight = 0.0f, r_weight = 0.0f;18011802// Compute initial left/right children1803for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)1804{1805const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second;18061807const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;18081809double t = (v - node.m_origin).dot(axis);1810if (t >= 0.0f)1811{1812r_child += v * weight;1813r_weight += weight;1814}1815else1816{1817l_child += v * weight;1818l_weight += weight;1819}1820}18211822if ((l_weight > 0.0f) && (r_weight > 0.0f))1823{1824l_child_result = l_child * static_cast<float>(1.0f / l_weight);1825r_child_result = r_child * static_cast<float>(1.0f / r_weight);1826}1827else1828{1829TrainingVectorType l(1e+20f);1830TrainingVectorType h(-1e+20f);1831for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)1832{1833const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;18341835l = TrainingVectorType::component_min(l, v);1836h = TrainingVectorType::component_max(h, v);1837}18381839TrainingVectorType r(h - l);18401841float largest_axis_v = 0.0f;1842int largest_axis_index = -1;1843for (uint32_t i = 0; i < TrainingVectorType::num_elements; i++)1844{1845if (r[i] > largest_axis_v)1846{1847largest_axis_v = r[i];1848largest_axis_index = i;1849}1850}18511852if (largest_axis_index < 0)1853return false;18541855basisu::vector<float> keys(node.m_training_vecs.size());1856for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)1857keys[i] = m_training_vecs[node.m_training_vecs[i]].first[largest_axis_index];18581859uint_vec indices(node.m_training_vecs.size());1860indirect_sort((uint32_t)node.m_training_vecs.size(), &indices[0], &keys[0]);18611862l_child.set_zero();1863l_weight = 0;18641865r_child.set_zero();1866r_weight = 0;18671868const uint32_t half_index = (uint32_t)node.m_training_vecs.size() / 2;1869for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)1870{1871const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second;18721873const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;18741875if (i < half_index)1876{1877l_child += v * weight;1878l_weight += weight;1879}1880else1881{1882r_child += v * weight;1883r_weight += weight;1884}1885}18861887if ((l_weight > 0.0f) && (r_weight > 0.0f))1888{1889l_child_result = l_child * static_cast<float>(1.0f / l_weight);1890r_child_result = r_child * static_cast<float>(1.0f / r_weight);1891}1892else1893{1894l_child_result = l;1895r_child_result = h;1896}1897}18981899return true;1900}19011902bool refine_split(const tsvq_node &node,1903TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, basisu::vector<uint32_t> &l_children,1904TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, basisu::vector<uint32_t> &r_children) const1905{1906l_children.reserve(node.m_training_vecs.size());1907r_children.reserve(node.m_training_vecs.size());19081909float prev_total_variance = 1e+10f;19101911// Refine left/right children locations using k-means iterations1912const uint32_t cMaxIters = 6;1913for (uint32_t iter = 0; iter < cMaxIters; iter++)1914{1915l_children.resize(0);1916r_children.resize(0);19171918TrainingVectorType new_l_child(cZero), new_r_child(cZero);19191920double l_ttsum = 0.0f, r_ttsum = 0.0f;19211922l_weight = 0;1923r_weight = 0;19241925for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)1926{1927const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;1928const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second;19291930double left_dist2 = l_child.squared_distance_d(v), right_dist2 = r_child.squared_distance_d(v);19311932if (left_dist2 >= right_dist2)1933{1934new_r_child += (v * static_cast<float>(weight));1935r_weight += weight;19361937r_ttsum += weight * v.dot(v);1938r_children.push_back(node.m_training_vecs[i]);1939}1940else1941{1942new_l_child += (v * static_cast<float>(weight));1943l_weight += weight;19441945l_ttsum += weight * v.dot(v);1946l_children.push_back(node.m_training_vecs[i]);1947}1948}19491950// Node is unsplittable using the above algorithm - try something else to split it up.1951if ((!l_weight) || (!r_weight))1952{1953l_children.resize(0);1954new_l_child.set(0.0f);1955l_ttsum = 0.0f;1956l_weight = 0;19571958r_children.resize(0);1959new_r_child.set(0.0f);1960r_ttsum = 0.0f;1961r_weight = 0;19621963TrainingVectorType firstVec;1964for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)1965{1966const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;1967const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second;19681969if ((!i) || (v == firstVec))1970{1971firstVec = v;19721973new_r_child += (v * static_cast<float>(weight));1974r_weight += weight;19751976r_ttsum += weight * v.dot(v);1977r_children.push_back(node.m_training_vecs[i]);1978}1979else1980{1981new_l_child += (v * static_cast<float>(weight));1982l_weight += weight;19831984l_ttsum += weight * v.dot(v);1985l_children.push_back(node.m_training_vecs[i]);1986}1987}19881989if ((!l_weight) || (!r_weight))1990return false;1991}19921993l_var = static_cast<float>(l_ttsum - (new_l_child.dot(new_l_child) / l_weight));1994r_var = static_cast<float>(r_ttsum - (new_r_child.dot(new_r_child) / r_weight));19951996new_l_child *= (1.0f / l_weight);1997new_r_child *= (1.0f / r_weight);19981999l_child = new_l_child;2000r_child = new_r_child;20012002float total_var = l_var + r_var;2003const float cGiveupVariance = .00001f;2004if (total_var < cGiveupVariance)2005break;20062007// Check to see if the variance has settled2008const float cVarianceDeltaThresh = .00125f;2009if (((prev_total_variance - total_var) / total_var) < cVarianceDeltaThresh)2010break;20112012prev_total_variance = total_var;2013}20142015return true;2016}2017};20182019struct weighted_block_group2020{2021uint64_t m_total_weight;2022uint_vec m_indices;2023};20242025template<typename Quantizer>2026bool generate_hierarchical_codebook_threaded_internal(Quantizer& q,2027uint32_t max_codebook_size, uint32_t max_parent_codebook_size,2028basisu::vector<uint_vec>& codebook,2029basisu::vector<uint_vec>& parent_codebook,2030uint32_t max_threads, bool limit_clusterizers, job_pool *pJob_pool)2031{2032codebook.resize(0);2033parent_codebook.resize(0);20342035if ((max_threads <= 1) || (q.get_training_vecs().size() < 256) || (max_codebook_size < max_threads * 16))2036{2037if (!q.generate(max_codebook_size))2038return false;20392040q.retrieve(codebook);20412042if (max_parent_codebook_size)2043q.retrieve(max_parent_codebook_size, parent_codebook);20442045return true;2046}20472048const uint32_t cMaxThreads = 16;2049if (max_threads > cMaxThreads)2050max_threads = cMaxThreads;20512052if (!q.generate(max_threads))2053return false;20542055basisu::vector<uint_vec> initial_codebook;20562057q.retrieve(initial_codebook);20582059if (initial_codebook.size() < max_threads)2060{2061codebook = initial_codebook;20622063if (max_parent_codebook_size)2064q.retrieve(max_parent_codebook_size, parent_codebook);20652066return true;2067}20682069Quantizer quantizers[cMaxThreads];20702071bool success_flags[cMaxThreads];2072clear_obj(success_flags);20732074basisu::vector<uint_vec> local_clusters[cMaxThreads];2075basisu::vector<uint_vec> local_parent_clusters[cMaxThreads];20762077for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++)2078{2079pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] {20802081Quantizer& lq = quantizers[thread_iter];2082uint_vec& cluster_indices = initial_codebook[thread_iter];20832084uint_vec local_to_global(cluster_indices.size());20852086for (uint32_t i = 0; i < cluster_indices.size(); i++)2087{2088const uint32_t global_training_vec_index = cluster_indices[i];2089local_to_global[i] = global_training_vec_index;20902091lq.add_training_vec(q.get_training_vecs()[global_training_vec_index].first, q.get_training_vecs()[global_training_vec_index].second);2092}20932094const uint32_t max_clusters = limit_clusterizers ? ((max_codebook_size + max_threads - 1) / max_threads) : (uint32_t)lq.get_total_training_vecs();20952096success_flags[thread_iter] = lq.generate(max_clusters);20972098if (success_flags[thread_iter])2099{2100lq.retrieve(local_clusters[thread_iter]);21012102for (uint32_t i = 0; i < local_clusters[thread_iter].size(); i++)2103{2104for (uint32_t j = 0; j < local_clusters[thread_iter][i].size(); j++)2105local_clusters[thread_iter][i][j] = local_to_global[local_clusters[thread_iter][i][j]];2106}21072108if (max_parent_codebook_size)2109{2110lq.retrieve((max_parent_codebook_size + max_threads - 1) / max_threads, local_parent_clusters[thread_iter]);21112112for (uint32_t i = 0; i < local_parent_clusters[thread_iter].size(); i++)2113{2114for (uint32_t j = 0; j < local_parent_clusters[thread_iter][i].size(); j++)2115local_parent_clusters[thread_iter][i][j] = local_to_global[local_parent_clusters[thread_iter][i][j]];2116}2117}2118}21192120} );21212122} // thread_iter21232124pJob_pool->wait_for_all();21252126uint32_t total_clusters = 0, total_parent_clusters = 0;21272128for (int thread_iter = 0; thread_iter < (int)max_threads; thread_iter++)2129{2130if (!success_flags[thread_iter])2131return false;2132total_clusters += (uint32_t)local_clusters[thread_iter].size();2133total_parent_clusters += (uint32_t)local_parent_clusters[thread_iter].size();2134}21352136codebook.reserve(total_clusters);2137parent_codebook.reserve(total_parent_clusters);21382139for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++)2140{2141for (uint32_t j = 0; j < local_clusters[thread_iter].size(); j++)2142{2143codebook.resize(codebook.size() + 1);2144codebook.back().swap(local_clusters[thread_iter][j]);2145}21462147for (uint32_t j = 0; j < local_parent_clusters[thread_iter].size(); j++)2148{2149parent_codebook.resize(parent_codebook.size() + 1);2150parent_codebook.back().swap(local_parent_clusters[thread_iter][j]);2151}2152}21532154return true;2155}21562157template<typename Quantizer>2158bool generate_hierarchical_codebook_threaded(Quantizer& q,2159uint32_t max_codebook_size, uint32_t max_parent_codebook_size,2160basisu::vector<uint_vec>& codebook,2161basisu::vector<uint_vec>& parent_codebook,2162uint32_t max_threads, job_pool *pJob_pool,2163bool even_odd_input_pairs_equal)2164{2165typedef bit_hasher<typename Quantizer::training_vec_type> training_vec_bit_hasher;21662167typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group,2168training_vec_bit_hasher> group_hash;21692170//interval_timer tm;2171//tm.start();21722173group_hash unique_vecs;21742175unique_vecs.reserve(20000);21762177weighted_block_group g;21782179if (even_odd_input_pairs_equal)2180{2181g.m_indices.resize(2);21822183assert(q.get_training_vecs().size() >= 2 && (q.get_training_vecs().size() & 1) == 0);21842185for (uint32_t i = 0; i < q.get_training_vecs().size(); i += 2)2186{2187assert(q.get_training_vecs()[i].first == q.get_training_vecs()[i + 1].first);21882189g.m_total_weight = q.get_training_vecs()[i].second + q.get_training_vecs()[i + 1].second;2190g.m_indices[0] = i;2191g.m_indices[1] = i + 1;21922193auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g));21942195if (!ins_res.second)2196{2197(ins_res.first)->second.m_total_weight += g.m_total_weight;2198(ins_res.first)->second.m_indices.push_back(i);2199(ins_res.first)->second.m_indices.push_back(i + 1);2200}2201}2202}2203else2204{2205g.m_indices.resize(1);22062207for (uint32_t i = 0; i < q.get_training_vecs().size(); i++)2208{2209g.m_total_weight = q.get_training_vecs()[i].second;2210g.m_indices[0] = i;22112212auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g));22132214if (!ins_res.second)2215{2216(ins_res.first)->second.m_total_weight += g.m_total_weight;2217(ins_res.first)->second.m_indices.push_back(i);2218}2219}2220}22212222//debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors, %3.3f secs\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size(), tm.get_elapsed_secs());2223debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size());22242225Quantizer group_quant;2226typedef typename group_hash::const_iterator group_hash_const_iter;2227basisu::vector<group_hash_const_iter> unique_vec_iters;2228unique_vec_iters.reserve(unique_vecs.size());22292230for (auto iter = unique_vecs.begin(); iter != unique_vecs.end(); ++iter)2231{2232group_quant.add_training_vec(iter->first, iter->second.m_total_weight);2233unique_vec_iters.push_back(iter);2234}22352236bool limit_clusterizers = true;2237if (unique_vecs.size() <= max_codebook_size)2238limit_clusterizers = false;22392240debug_printf("Limit clusterizers: %u\n", limit_clusterizers);22412242basisu::vector<uint_vec> group_codebook, group_parent_codebook;2243bool status = generate_hierarchical_codebook_threaded_internal(group_quant,2244max_codebook_size, max_parent_codebook_size,2245group_codebook,2246group_parent_codebook,2247(unique_vecs.size() < 65536*4) ? 1 : max_threads, limit_clusterizers, pJob_pool);22482249if (!status)2250return false;22512252codebook.resize(0);2253for (uint32_t i = 0; i < group_codebook.size(); i++)2254{2255codebook.resize(codebook.size() + 1);22562257for (uint32_t j = 0; j < group_codebook[i].size(); j++)2258{2259const uint32_t group_index = group_codebook[i][j];22602261typename group_hash::const_iterator group_iter = unique_vec_iters[group_index];2262const uint_vec& training_vec_indices = group_iter->second.m_indices;22632264append_vector(codebook.back(), training_vec_indices);2265}2266}22672268parent_codebook.resize(0);2269for (uint32_t i = 0; i < group_parent_codebook.size(); i++)2270{2271parent_codebook.resize(parent_codebook.size() + 1);22722273for (uint32_t j = 0; j < group_parent_codebook[i].size(); j++)2274{2275const uint32_t group_index = group_parent_codebook[i][j];22762277typename group_hash::const_iterator group_iter = unique_vec_iters[group_index];2278const uint_vec& training_vec_indices = group_iter->second.m_indices;22792280append_vector(parent_codebook.back(), training_vec_indices);2281}2282}22832284return true;2285}22862287// Canonical Huffman coding22882289class histogram2290{2291basisu::vector<uint32_t> m_hist;22922293public:2294histogram(uint32_t size = 0) { init(size); }22952296void clear()2297{2298clear_vector(m_hist);2299}23002301void init(uint32_t size)2302{2303m_hist.resize(0);2304m_hist.resize(size);2305}23062307inline uint32_t size() const { return static_cast<uint32_t>(m_hist.size()); }23082309inline const uint32_t &operator[] (uint32_t index) const2310{2311return m_hist[index];2312}23132314inline uint32_t &operator[] (uint32_t index)2315{2316return m_hist[index];2317}23182319inline void inc(uint32_t index)2320{2321m_hist[index]++;2322}23232324uint64_t get_total() const2325{2326uint64_t total = 0;2327for (uint32_t i = 0; i < m_hist.size(); ++i)2328total += m_hist[i];2329return total;2330}23312332double get_entropy() const2333{2334double total = static_cast<double>(get_total());2335if (total == 0.0f)2336return 0.0f;23372338const double inv_total = 1.0f / total;2339const double neg_inv_log2 = -1.0f / log(2.0f);23402341double e = 0.0f;2342for (uint32_t i = 0; i < m_hist.size(); i++)2343if (m_hist[i])2344e += log(m_hist[i] * inv_total) * neg_inv_log2 * static_cast<double>(m_hist[i]);23452346return e;2347}2348};23492350struct sym_freq2351{2352uint32_t m_key;2353uint16_t m_sym_index;2354};23552356sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1);2357void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms);2358void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size);23592360class huffman_encoding_table2361{2362public:2363huffman_encoding_table()2364{2365}23662367void clear()2368{2369clear_vector(m_codes);2370clear_vector(m_code_sizes);2371}23722373bool init(const histogram &h, uint32_t max_code_size = cHuffmanMaxSupportedCodeSize)2374{2375return init(h.size(), &h[0], max_code_size);2376}23772378bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size);2379bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size);23802381inline const uint16_vec &get_codes() const { return m_codes; }2382inline const uint8_vec &get_code_sizes() const { return m_code_sizes; }23832384uint32_t get_total_used_codes() const2385{2386for (int i = static_cast<int>(m_code_sizes.size()) - 1; i >= 0; i--)2387if (m_code_sizes[i])2388return i + 1;2389return 0;2390}23912392private:2393uint16_vec m_codes;2394uint8_vec m_code_sizes;2395};23962397class bitwise_coder2398{2399public:2400bitwise_coder() :2401m_bit_buffer(0),2402m_bit_buffer_size(0),2403m_total_bits(0)2404{2405}24062407bitwise_coder(const bitwise_coder& other) :2408m_bytes(other.m_bytes),2409m_bit_buffer(other.m_bit_buffer),2410m_bit_buffer_size(other.m_bit_buffer_size),2411m_total_bits(other.m_total_bits)2412{2413}24142415bitwise_coder(bitwise_coder&& other) :2416m_bytes(std::move(other.m_bytes)),2417m_bit_buffer(other.m_bit_buffer),2418m_bit_buffer_size(other.m_bit_buffer_size),2419m_total_bits(other.m_total_bits)2420{2421}24222423bitwise_coder& operator= (const bitwise_coder& rhs)2424{2425if (this == &rhs)2426return *this;24272428m_bytes = rhs.m_bytes;2429m_bit_buffer = rhs.m_bit_buffer;2430m_bit_buffer_size = rhs.m_bit_buffer_size;2431m_total_bits = rhs.m_total_bits;24322433return *this;2434}24352436bitwise_coder& operator= (bitwise_coder&& rhs)2437{2438if (this == &rhs)2439return *this;24402441m_bytes = std::move(rhs.m_bytes);2442m_bit_buffer = rhs.m_bit_buffer;2443m_bit_buffer_size = rhs.m_bit_buffer_size;2444m_total_bits = rhs.m_total_bits;24452446return *this;2447}24482449inline void clear()2450{2451clear_vector(m_bytes);2452m_bit_buffer = 0;2453m_bit_buffer_size = 0;2454m_total_bits = 0;2455}24562457inline void restart()2458{2459m_bytes.resize(0);2460m_bit_buffer = 0;2461m_bit_buffer_size = 0;2462m_total_bits = 0;2463}24642465inline const uint8_vec &get_bytes() const { return m_bytes; }2466inline uint8_vec& get_bytes() { return m_bytes; }24672468inline void reserve(uint32_t size) { m_bytes.reserve(size); }24692470inline uint64_t get_total_bits() const { return m_total_bits; }2471inline uint32_t get_total_bits_u32() const { assert(m_total_bits <= UINT32_MAX); return static_cast<uint32_t>(m_total_bits); }2472inline void clear_total_bits() { m_total_bits = 0; }24732474inline void init(uint32_t reserve_size = 1024)2475{2476m_bytes.reserve(reserve_size);2477m_bytes.resize(0);24782479m_bit_buffer = 0;2480m_bit_buffer_size = 0;2481m_total_bits = 0;2482}24832484inline uint32_t flush()2485{2486if (m_bit_buffer_size)2487{2488m_total_bits += 8 - (m_bit_buffer_size & 7);2489append_byte(static_cast<uint8_t>(m_bit_buffer));24902491m_bit_buffer = 0;2492m_bit_buffer_size = 0;24932494return 8;2495}24962497return 0;2498}24992500inline uint32_t put_bits(uint32_t bits, uint32_t num_bits)2501{2502assert(num_bits <= 32);2503assert(bits < (1ULL << num_bits));25042505if (!num_bits)2506return 0;25072508m_total_bits += num_bits;25092510uint64_t v = (static_cast<uint64_t>(bits) << m_bit_buffer_size) | m_bit_buffer;2511m_bit_buffer_size += num_bits;25122513while (m_bit_buffer_size >= 8)2514{2515append_byte(static_cast<uint8_t>(v));2516v >>= 8;2517m_bit_buffer_size -= 8;2518}25192520m_bit_buffer = static_cast<uint8_t>(v);2521return num_bits;2522}25232524inline uint32_t put_code(uint32_t sym, const huffman_encoding_table &tab)2525{2526uint32_t code = tab.get_codes()[sym];2527uint32_t code_size = tab.get_code_sizes()[sym];2528assert(code_size >= 1);2529put_bits(code, code_size);2530return code_size;2531}25322533inline uint32_t put_truncated_binary(uint32_t v, uint32_t n)2534{2535assert((n >= 2) && (v < n));25362537uint32_t k = floor_log2i(n);2538uint32_t u = (1 << (k + 1)) - n;25392540if (v < u)2541return put_bits(v, k);25422543uint32_t x = v + u;2544assert((x >> 1) >= u);25452546put_bits(x >> 1, k);2547put_bits(x & 1, 1);2548return k + 1;2549}25502551inline uint32_t put_rice(uint32_t v, uint32_t m)2552{2553assert(m);25542555const uint64_t start_bits = m_total_bits;25562557uint32_t q = v >> m, r = v & ((1 << m) - 1);25582559// rice coding sanity check2560assert(q <= 64);25612562for (; q > 16; q -= 16)2563put_bits(0xFFFF, 16);25642565put_bits((1 << q) - 1, q);2566put_bits(r << 1, m + 1);25672568return (uint32_t)(m_total_bits - start_bits);2569}25702571inline uint32_t put_vlc(uint32_t v, uint32_t chunk_bits)2572{2573assert(chunk_bits);25742575const uint32_t chunk_size = 1 << chunk_bits;2576const uint32_t chunk_mask = chunk_size - 1;25772578uint32_t total_bits = 0;25792580for ( ; ; )2581{2582uint32_t next_v = v >> chunk_bits;25832584total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1);2585if (!next_v)2586break;25872588v = next_v;2589}25902591return total_bits;2592}25932594uint32_t emit_huffman_table(const huffman_encoding_table &tab);25952596void append(const bitwise_coder& other)2597{2598for (uint32_t i = 0; i < other.m_bytes.size(); i++)2599put_bits(other.m_bytes[i], 8);26002601if (other.m_bit_buffer_size)2602put_bits(other.m_bit_buffer, other.m_bit_buffer_size);2603}26042605private:2606uint8_vec m_bytes;2607uint32_t m_bit_buffer, m_bit_buffer_size;2608uint64_t m_total_bits;26092610inline void append_byte(uint8_t c)2611{2612//m_bytes.resize(m_bytes.size() + 1);2613//m_bytes.back() = c;26142615m_bytes.push_back(c);2616}26172618static void end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len);2619static void end_zero_run(uint16_vec &syms, uint32_t &run_size);2620};26212622class huff2D2623{2624public:2625huff2D() { }2626huff2D(uint32_t bits_per_sym, uint32_t total_syms_per_group) { init(bits_per_sym, total_syms_per_group); }26272628inline const histogram &get_histogram() const { return m_histogram; }2629inline const huffman_encoding_table &get_encoding_table() const { return m_encoding_table; }26302631inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group)2632{2633assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1);26342635m_bits_per_sym = bits_per_sym;2636m_total_syms_per_group = total_syms_per_group;2637m_cur_sym_bits = 0;2638m_cur_num_syms = 0;2639m_decode_syms_remaining = 0;2640m_next_decoder_group_index = 0;26412642m_histogram.init(1 << (bits_per_sym * total_syms_per_group));2643}26442645inline void clear()2646{2647m_group_bits.clear();26482649m_cur_sym_bits = 0;2650m_cur_num_syms = 0;2651m_decode_syms_remaining = 0;2652m_next_decoder_group_index = 0;2653}26542655inline void emit(uint32_t sym)2656{2657m_cur_sym_bits |= (sym << (m_cur_num_syms * m_bits_per_sym));2658m_cur_num_syms++;26592660if (m_cur_num_syms == m_total_syms_per_group)2661flush();2662}26632664inline void flush()2665{2666if (m_cur_num_syms)2667{2668m_group_bits.push_back(m_cur_sym_bits);2669m_histogram.inc(m_cur_sym_bits);26702671m_cur_sym_bits = 0;2672m_cur_num_syms = 0;2673}2674}26752676inline bool start_encoding(uint32_t code_size_limit = 16)2677{2678flush();26792680if (!m_encoding_table.init(m_histogram, code_size_limit))2681return false;26822683m_decode_syms_remaining = 0;2684m_next_decoder_group_index = 0;26852686return true;2687}26882689inline uint32_t emit_next_sym(bitwise_coder &c)2690{2691uint32_t bits = 0;26922693if (!m_decode_syms_remaining)2694{2695bits = c.put_code(m_group_bits[m_next_decoder_group_index++], m_encoding_table);2696m_decode_syms_remaining = m_total_syms_per_group;2697}26982699m_decode_syms_remaining--;2700return bits;2701}27022703inline void emit_flush()2704{2705m_decode_syms_remaining = 0;2706}27072708private:2709uint_vec m_group_bits;2710huffman_encoding_table m_encoding_table;2711histogram m_histogram;2712uint32_t m_bits_per_sym, m_total_syms_per_group, m_cur_sym_bits, m_cur_num_syms, m_next_decoder_group_index, m_decode_syms_remaining;2713};27142715bool huffman_test(int rand_seed);27162717// VQ index reordering27182719class palette_index_reorderer2720{2721public:2722palette_index_reorderer()2723{2724}27252726void clear()2727{2728clear_vector(m_hist);2729clear_vector(m_total_count_to_picked);2730clear_vector(m_entries_picked);2731clear_vector(m_entries_to_do);2732clear_vector(m_remap_table);2733}27342735// returns [0,1] distance of entry i to entry j2736typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx);27372738void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);27392740// Table remaps old to new symbol indices2741inline const uint_vec &get_remap_table() const { return m_remap_table; }27422743private:2744uint_vec m_hist, m_total_count_to_picked, m_entries_picked, m_entries_to_do, m_remap_table;27452746inline uint32_t get_hist(int i, int j, int n) const { return (i > j) ? m_hist[j * n + i] : m_hist[i * n + j]; }2747inline void inc_hist(int i, int j, int n) { if ((i != j) && (i < j) && (i != -1) && (j != -1)) { assert(((uint32_t)i < (uint32_t)n) && ((uint32_t)j < (uint32_t)n)); m_hist[i * n + j]++; } }27482749void prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices);2750void find_initial(uint32_t num_syms);2751void find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);2752float pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);2753};27542755// Simple 32-bit 2D image class27562757class image2758{2759public:2760image() :2761m_width(0), m_height(0), m_pitch(0)2762{2763}27642765image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) :2766m_width(0), m_height(0), m_pitch(0)2767{2768resize(w, h, p);2769}27702771image(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) :2772m_width(0), m_height(0), m_pitch(0)2773{2774init(pImage, width, height, comps);2775}27762777image(const image &other) :2778m_width(0), m_height(0), m_pitch(0)2779{2780*this = other;2781}27822783image(image&& other) :2784m_width(other.m_width), m_height(other.m_height), m_pitch(other.m_pitch),2785m_pixels(std::move(other.m_pixels))2786{2787other.m_width = 0;2788other.m_height = 0;2789other.m_pitch = 0;2790}27912792image& operator= (image&& rhs)2793{2794if (this != &rhs)2795{2796m_width = rhs.m_width;2797m_height = rhs.m_height;2798m_pitch = rhs.m_pitch;2799m_pixels = std::move(rhs.m_pixels);28002801rhs.m_width = 0;2802rhs.m_height = 0;2803rhs.m_pitch = 0;2804}2805return *this;2806}28072808image &swap(image &other)2809{2810std::swap(m_width, other.m_width);2811std::swap(m_height, other.m_height);2812std::swap(m_pitch, other.m_pitch);2813m_pixels.swap(other.m_pixels);2814return *this;2815}28162817image &operator= (const image &rhs)2818{2819if (this != &rhs)2820{2821m_width = rhs.m_width;2822m_height = rhs.m_height;2823m_pitch = rhs.m_pitch;2824m_pixels = rhs.m_pixels;2825}2826return *this;2827}28282829image &clear()2830{2831m_width = 0;2832m_height = 0;2833m_pitch = 0;2834clear_vector(m_pixels);2835return *this;2836}28372838image& match_dimensions(const image& other)2839{2840resize(other.get_width(), other.get_height());2841return *this;2842}28432844image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color)2845{2846return crop(w, h, p, background);2847}28482849image &set_all(const color_rgba &c)2850{2851for (uint32_t i = 0; i < m_pixels.size(); i++)2852m_pixels[i] = c;2853return *this;2854}28552856void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps)2857{2858assert(comps >= 1 && comps <= 4);28592860resize(width, height);28612862for (uint32_t y = 0; y < height; y++)2863{2864for (uint32_t x = 0; x < width; x++)2865{2866const uint8_t *pSrc = &pImage[(x + y * width) * comps];2867color_rgba &dst = (*this)(x, y);28682869if (comps == 1)2870{2871dst.r = pSrc[0];2872dst.g = pSrc[0];2873dst.b = pSrc[0];2874dst.a = 255;2875}2876else if (comps == 2)2877{2878dst.r = pSrc[0];2879dst.g = pSrc[0];2880dst.b = pSrc[0];2881dst.a = pSrc[1];2882}2883else2884{2885dst.r = pSrc[0];2886dst.g = pSrc[1];2887dst.b = pSrc[2];2888if (comps == 4)2889dst.a = pSrc[3];2890else2891dst.a = 255;2892}2893}2894}2895}28962897image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c)2898{2899for (uint32_t iy = 0; iy < h; iy++)2900for (uint32_t ix = 0; ix < w; ix++)2901set_clipped(x + ix, y + iy, c);2902return *this;2903}29042905image& fill_box_alpha(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba& c)2906{2907for (uint32_t iy = 0; iy < h; iy++)2908for (uint32_t ix = 0; ix < w; ix++)2909set_clipped_alpha(x + ix, y + iy, c);2910return *this;2911}29122913image &crop_dup_borders(uint32_t w, uint32_t h)2914{2915const uint32_t orig_w = m_width, orig_h = m_height;29162917crop(w, h);29182919if (orig_w && orig_h)2920{2921if (m_width > orig_w)2922{2923for (uint32_t x = orig_w; x < m_width; x++)2924for (uint32_t y = 0; y < m_height; y++)2925set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));2926}29272928if (m_height > orig_h)2929{2930for (uint32_t y = orig_h; y < m_height; y++)2931for (uint32_t x = 0; x < m_width; x++)2932set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));2933}2934}2935return *this;2936}29372938// pPixels MUST have been allocated using malloc() (basisu::vector will eventually use free() on the pointer).2939image& grant_ownership(color_rgba* pPixels, uint32_t w, uint32_t h, uint32_t p = UINT32_MAX)2940{2941if (p == UINT32_MAX)2942p = w;29432944clear();29452946if ((!p) || (!w) || (!h))2947return *this;29482949m_pixels.grant_ownership(pPixels, p * h, p * h);29502951m_width = w;2952m_height = h;2953m_pitch = p;29542955return *this;2956}29572958image &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba &background = g_black_color, bool init_image = true)2959{2960if (p == UINT32_MAX)2961p = w;29622963if ((w == m_width) && (m_height == h) && (m_pitch == p))2964return *this;29652966if ((!w) || (!h) || (!p))2967{2968clear();2969return *this;2970}29712972color_rgba_vec cur_state;2973cur_state.swap(m_pixels);29742975m_pixels.resize(p * h);29762977if (init_image)2978{2979if (m_width || m_height)2980{2981for (uint32_t y = 0; y < h; y++)2982{2983for (uint32_t x = 0; x < w; x++)2984{2985if ((x < m_width) && (y < m_height))2986m_pixels[x + y * p] = cur_state[x + y * m_pitch];2987else2988m_pixels[x + y * p] = background;2989}2990}2991}2992else2993{2994m_pixels.set_all(background);2995}2996}29972998m_width = w;2999m_height = h;3000m_pitch = p;30013002return *this;3003}30043005inline const color_rgba &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }3006inline color_rgba &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }30073008inline const color_rgba &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }3009inline color_rgba &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }30103011inline const color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const3012{3013x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);3014y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);3015return m_pixels[x + y * m_pitch];3016}30173018inline color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v)3019{3020x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);3021y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);3022return m_pixels[x + y * m_pitch];3023}30243025inline image &set_clipped(int x, int y, const color_rgba &c)3026{3027if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))3028(*this)(x, y) = c;3029return *this;3030}30313032inline image& set_clipped_alpha(int x, int y, const color_rgba& c)3033{3034if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))3035(*this)(x, y).m_comps[3] = c.m_comps[3];3036return *this;3037}30383039// Very straightforward blit with full clipping. Not fast, but it works.3040image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y)3041{3042for (int y = 0; y < src_h; y++)3043{3044const int sy = src_y + y;3045if (sy < 0)3046continue;3047else if (sy >= (int)src.get_height())3048break;30493050for (int x = 0; x < src_w; x++)3051{3052const int sx = src_x + x;3053if (sx < 0)3054continue;3055else if (sx >= (int)src.get_width())3056break;30573058set_clipped(dst_x + x, dst_y + y, src(sx, sy));3059}3060}30613062return *this;3063}30643065const image &extract_block_clamped(color_rgba *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const3066{3067if (((src_x + w) > m_width) || ((src_y + h) > m_height))3068{3069// Slower clamping case3070for (uint32_t y = 0; y < h; y++)3071for (uint32_t x = 0; x < w; x++)3072*pDst++ = get_clamped(src_x + x, src_y + y);3073}3074else3075{3076const color_rgba* pSrc = &m_pixels[src_x + src_y * m_pitch];30773078for (uint32_t y = 0; y < h; y++)3079{3080memcpy(pDst, pSrc, w * sizeof(color_rgba));3081pSrc += m_pitch;3082pDst += w;3083}3084}30853086return *this;3087}30883089image &set_block_clipped(const color_rgba *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)3090{3091for (uint32_t y = 0; y < h; y++)3092for (uint32_t x = 0; x < w; x++)3093set_clipped(dst_x + x, dst_y + y, *pSrc++);3094return *this;3095}30963097inline bool is_valid() const { return m_width > 0; }30983099inline uint32_t get_width() const { return m_width; }3100inline uint32_t get_height() const { return m_height; }3101inline uint32_t get_pitch() const { return m_pitch; }3102inline uint32_t get_total_pixels() const { return m_width * m_height; }31033104inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }3105inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }3106inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); }31073108inline const color_rgba_vec &get_pixels() const { return m_pixels; }3109inline color_rgba_vec &get_pixels() { return m_pixels; }31103111inline const color_rgba *get_ptr() const { return &m_pixels[0]; }3112inline color_rgba *get_ptr() { return &m_pixels[0]; }31133114bool has_alpha(uint32_t channel = 3) const3115{3116for (uint32_t y = 0; y < m_height; ++y)3117for (uint32_t x = 0; x < m_width; ++x)3118if ((*this)(x, y)[channel] < 255)3119return true;31203121return false;3122}31233124image &set_alpha(uint8_t a)3125{3126for (uint32_t y = 0; y < m_height; ++y)3127for (uint32_t x = 0; x < m_width; ++x)3128(*this)(x, y).a = a;3129return *this;3130}31313132image &flip_y()3133{3134for (uint32_t y = 0; y < m_height / 2; ++y)3135for (uint32_t x = 0; x < m_width; ++x)3136std::swap((*this)(x, y), (*this)(x, m_height - 1 - y));3137return *this;3138}31393140// TODO: There are many ways to do this, not sure this is the best way.3141image &renormalize_normal_map()3142{3143for (uint32_t y = 0; y < m_height; y++)3144{3145for (uint32_t x = 0; x < m_width; x++)3146{3147color_rgba &c = (*this)(x, y);3148if ((c.r == 128) && (c.g == 128) && (c.b == 128))3149continue;31503151vec3F v(c.r, c.g, c.b);3152v = (v * (2.0f / 255.0f)) - vec3F(1.0f);3153v.clamp(-1.0f, 1.0f);31543155float length = v.length();3156const float cValidThresh = .077f;3157if (length < cValidThresh)3158{3159c.set(128, 128, 128, c.a);3160}3161else if (fabs(length - 1.0f) > cValidThresh)3162{3163if (length)3164v /= length;31653166for (uint32_t i = 0; i < 3; i++)3167c[i] = static_cast<uint8_t>(clamp<float>(floor((v[i] + 1.0f) * 255.0f * .5f + .5f), 0.0f, 255.0f));31683169if ((c.g == 128) && (c.r == 128))3170{3171if (c.b < 128)3172c.b = 0;3173else3174c.b = 255;3175}3176}3177}3178}3179return *this;3180}31813182void swap_rb()3183{3184for (auto& v : m_pixels)3185std::swap(v.r, v.b);3186}31873188void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...);31893190vec4F get_filtered_vec4F(float x, float y) const3191{3192x -= .5f;3193y -= .5f;31943195int ix = (int)floorf(x);3196int iy = (int)floorf(y);3197float wx = x - ix;3198float wy = y - iy;31993200color_rgba a(get_clamped(ix, iy));3201color_rgba b(get_clamped(ix + 1, iy));3202color_rgba c(get_clamped(ix, iy + 1));3203color_rgba d(get_clamped(ix + 1, iy + 1));32043205vec4F result;32063207for (uint32_t i = 0; i < 4; i++)3208{3209const float top = lerp<float>((float)a[i], (float)b[i], wx);3210const float bot = lerp<float>((float)c[i], (float)d[i], wx);3211const float m = lerp<float>((float)top, (float)bot, wy);32123213result[i] = m;3214}32153216return result;3217}32183219// (x,y) - Continuous coordinates, where pixel centers are at (.5,.5), valid image coords are [0,width] and [0,height]. Clamp addressing.3220color_rgba get_filtered(float x, float y) const3221{3222const vec4F fresult(get_filtered_vec4F(x, y));32233224color_rgba result;32253226for (uint32_t i = 0; i < 4; i++)3227result[i] = (uint8_t)clamp<int>((int)(fresult[i] + .5f), 0, 255);32283229return result;3230}32313232private:3233uint32_t m_width, m_height, m_pitch; // all in pixels3234color_rgba_vec m_pixels;3235};32363237// Float images32383239typedef basisu::vector<vec4F> vec4F_vec;32403241class imagef3242{3243public:3244imagef() :3245m_width(0), m_height(0), m_pitch(0)3246{3247}32483249imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) :3250m_width(0), m_height(0), m_pitch(0)3251{3252resize(w, h, p);3253}32543255imagef(const imagef &other) :3256m_width(0), m_height(0), m_pitch(0)3257{3258*this = other;3259}32603261imagef(imagef&& other) :3262m_width(other.m_width), m_height(other.m_height), m_pitch(other.m_pitch),3263m_pixels(std::move(other.m_pixels))3264{3265other.m_width = 0;3266other.m_height = 0;3267other.m_pitch = 0;3268}32693270imagef& operator= (imagef&& rhs)3271{3272if (this != &rhs)3273{3274m_width = rhs.m_width;3275m_height = rhs.m_height;3276m_pitch = rhs.m_pitch;3277m_pixels = std::move(rhs.m_pixels);32783279rhs.m_width = 0;3280rhs.m_height = 0;3281rhs.m_pitch = 0;3282}3283return *this;3284}32853286imagef &swap(imagef &other)3287{3288std::swap(m_width, other.m_width);3289std::swap(m_height, other.m_height);3290std::swap(m_pitch, other.m_pitch);3291m_pixels.swap(other.m_pixels);3292return *this;3293}32943295imagef &operator= (const imagef &rhs)3296{3297if (this != &rhs)3298{3299m_width = rhs.m_width;3300m_height = rhs.m_height;3301m_pitch = rhs.m_pitch;3302m_pixels = rhs.m_pixels;3303}3304return *this;3305}33063307imagef &clear()3308{3309m_width = 0;3310m_height = 0;3311m_pitch = 0;3312clear_vector(m_pixels);3313return *this;3314}33153316imagef &set(const image &src, const vec4F &scale = vec4F(1), const vec4F &bias = vec4F(0))3317{3318const uint32_t width = src.get_width();3319const uint32_t height = src.get_height();33203321resize(width, height);33223323for (int y = 0; y < (int)height; y++)3324{3325for (uint32_t x = 0; x < width; x++)3326{3327const color_rgba &src_pixel = src(x, y);3328(*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]);3329}3330}33313332return *this;3333}33343335imagef& match_dimensions(const imagef& other)3336{3337resize(other.get_width(), other.get_height());3338return *this;3339}33403341imagef &resize(const imagef &other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1))3342{3343return resize(other.get_width(), other.get_height(), p, background);3344}33453346imagef &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1))3347{3348return crop(w, h, p, background);3349}33503351imagef &set_all(const vec4F &c)3352{3353for (uint32_t i = 0; i < m_pixels.size(); i++)3354m_pixels[i] = c;3355return *this;3356}33573358imagef &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F &c)3359{3360for (uint32_t iy = 0; iy < h; iy++)3361for (uint32_t ix = 0; ix < w; ix++)3362set_clipped(x + ix, y + iy, c);3363return *this;3364}33653366imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1))3367{3368if (p == UINT32_MAX)3369p = w;33703371if ((w == m_width) && (m_height == h) && (m_pitch == p))3372return *this;33733374if ((!w) || (!h) || (!p))3375{3376clear();3377return *this;3378}33793380vec4F_vec cur_state;3381cur_state.swap(m_pixels);33823383m_pixels.resize(p * h);33843385for (uint32_t y = 0; y < h; y++)3386{3387for (uint32_t x = 0; x < w; x++)3388{3389if ((x < m_width) && (y < m_height))3390m_pixels[x + y * p] = cur_state[x + y * m_pitch];3391else3392m_pixels[x + y * p] = background;3393}3394}33953396m_width = w;3397m_height = h;3398m_pitch = p;33993400return *this;3401}34023403imagef& crop_dup_borders(uint32_t w, uint32_t h)3404{3405const uint32_t orig_w = m_width, orig_h = m_height;34063407crop(w, h);34083409if (orig_w && orig_h)3410{3411if (m_width > orig_w)3412{3413for (uint32_t x = orig_w; x < m_width; x++)3414for (uint32_t y = 0; y < m_height; y++)3415set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));3416}34173418if (m_height > orig_h)3419{3420for (uint32_t y = orig_h; y < m_height; y++)3421for (uint32_t x = 0; x < m_width; x++)3422set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));3423}3424}3425return *this;3426}34273428inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }3429inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }34303431inline const vec4F &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }3432inline vec4F &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }34333434inline const vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const3435{3436x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);3437y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);3438return m_pixels[x + y * m_pitch];3439}34403441inline vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v)3442{3443x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);3444y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);3445return m_pixels[x + y * m_pitch];3446}34473448inline imagef &set_clipped(int x, int y, const vec4F &c)3449{3450if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))3451(*this)(x, y) = c;3452return *this;3453}34543455// Very straightforward blit with full clipping. Not fast, but it works.3456imagef &blit(const imagef &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y)3457{3458for (int y = 0; y < src_h; y++)3459{3460const int sy = src_y + y;3461if (sy < 0)3462continue;3463else if (sy >= (int)src.get_height())3464break;34653466for (int x = 0; x < src_w; x++)3467{3468const int sx = src_x + x;3469if (sx < 0)3470continue;3471else if (sx >= (int)src.get_width())3472break;34733474set_clipped(dst_x + x, dst_y + y, src(sx, sy));3475}3476}34773478return *this;3479}34803481const imagef &extract_block_clamped(vec4F *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const3482{3483for (uint32_t y = 0; y < h; y++)3484for (uint32_t x = 0; x < w; x++)3485*pDst++ = get_clamped(src_x + x, src_y + y);3486return *this;3487}34883489imagef &set_block_clipped(const vec4F *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)3490{3491for (uint32_t y = 0; y < h; y++)3492for (uint32_t x = 0; x < w; x++)3493set_clipped(dst_x + x, dst_y + y, *pSrc++);3494return *this;3495}34963497inline bool is_valid() const { return m_width > 0; }34983499inline uint32_t get_width() const { return m_width; }3500inline uint32_t get_height() const { return m_height; }3501inline uint32_t get_pitch() const { return m_pitch; }3502inline uint64_t get_total_pixels() const { return (uint64_t)m_width * m_height; }35033504inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }3505inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }3506inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); }35073508inline const vec4F_vec &get_pixels() const { return m_pixels; }3509inline vec4F_vec &get_pixels() { return m_pixels; }35103511inline const vec4F *get_ptr() const { return &m_pixels[0]; }3512inline vec4F *get_ptr() { return &m_pixels[0]; }35133514bool clean_astc_hdr_pixels(float highest_mag)3515{3516bool status = true;3517bool nan_msg = false;3518bool inf_msg = false;3519bool neg_zero_msg = false;3520bool neg_msg = false;3521bool clamp_msg = false;35223523for (uint32_t iy = 0; iy < m_height; iy++)3524{3525for (uint32_t ix = 0; ix < m_width; ix++)3526{3527vec4F& c = (*this)(ix, iy);35283529for (uint32_t s = 0; s < 4; s++)3530{3531float &p = c[s];3532union { float f; uint32_t u; } x; x.f = p;35333534if ((std::isnan(p)) || (std::isinf(p)) || (x.u == 0x80000000))3535{3536if (std::isnan(p))3537{3538if (!nan_msg)3539{3540fprintf(stderr, "One or more input pixels was NaN, setting to 0.\n");3541nan_msg = true;3542}3543}35443545if (std::isinf(p))3546{3547if (!inf_msg)3548{3549fprintf(stderr, "One or more input pixels was INF, setting to 0.\n");3550inf_msg = true;3551}3552}35533554if (x.u == 0x80000000)3555{3556if (!neg_zero_msg)3557{3558fprintf(stderr, "One or more input pixels was -0, setting them to 0.\n");3559neg_zero_msg = true;3560}3561}35623563p = 0.0f;3564status = false;3565}3566else3567{3568//const float o = p;3569if (p < 0.0f)3570{3571p = 0.0f;35723573if (!neg_msg)3574{3575fprintf(stderr, "One or more input pixels was negative -- setting these pixel components to 0 because ASTC HDR doesn't support signed values.\n");3576neg_msg = true;3577}35783579status = false;3580}35813582if (p > highest_mag)3583{3584p = highest_mag;35853586if (!clamp_msg)3587{3588fprintf(stderr, "One or more input pixels had to be clamped to %f.\n", highest_mag);3589clamp_msg = true;3590}35913592status = false;3593}3594}3595}3596}3597}35983599return status;3600}36013602imagef& flip_y()3603{3604for (uint32_t y = 0; y < m_height / 2; ++y)3605for (uint32_t x = 0; x < m_width; ++x)3606std::swap((*this)(x, y), (*this)(x, m_height - 1 - y));36073608return *this;3609}36103611bool has_alpha(uint32_t channel = 3) const3612{3613for (uint32_t y = 0; y < m_height; ++y)3614for (uint32_t x = 0; x < m_width; ++x)3615if ((*this)(x, y)[channel] != 1.0f)3616return true;36173618return false;3619}36203621vec4F get_filtered_vec4F(float x, float y) const3622{3623x -= .5f;3624y -= .5f;36253626int ix = (int)floorf(x);3627int iy = (int)floorf(y);3628float wx = x - ix;3629float wy = y - iy;36303631vec4F a(get_clamped(ix, iy));3632vec4F b(get_clamped(ix + 1, iy));3633vec4F c(get_clamped(ix, iy + 1));3634vec4F d(get_clamped(ix + 1, iy + 1));36353636vec4F result;36373638for (uint32_t i = 0; i < 4; i++)3639{3640const float top = lerp<float>((float)a[i], (float)b[i], wx);3641const float bot = lerp<float>((float)c[i], (float)d[i], wx);3642const float m = lerp<float>((float)top, (float)bot, wy);36433644result[i] = m;3645}36463647return result;3648}36493650private:3651uint32_t m_width, m_height, m_pitch; // all in pixels3652vec4F_vec m_pixels;3653};36543655// REC 709 coefficients3656const float REC_709_R = 0.212656f, REC_709_G = 0.715158f, REC_709_B = 0.072186f;36573658inline float get_luminance(const vec4F &c)3659{3660return c[0] * REC_709_R + c[1] * REC_709_G + c[2] * REC_709_B;3661}36623663float linear_to_srgb(float l);3664float srgb_to_linear(float s);36653666class fast_linear_to_srgb3667{3668public:3669fast_linear_to_srgb()3670{3671init();3672}36733674void init()3675{3676for (int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i)3677{3678float l = (float)i * (1.0f / (LINEAR_TO_SRGB_TABLE_SIZE - 1));3679m_linear_to_srgb_table[i] = (uint8_t)basisu::fast_floorf_int(255.0f * basisu::linear_to_srgb(l));3680}36813682float srgb_to_linear[256];3683for (int i = 0; i < 256; i++)3684srgb_to_linear[i] = basisu::srgb_to_linear((float)i / 255.0f);36853686for (int i = 0; i < 256; i++)3687m_srgb_to_linear_thresh[i] = (srgb_to_linear[i] + srgb_to_linear[basisu::minimum<int>(i + 1, 255)]) * .5f;3688}36893690inline uint8_t convert(float l) const3691{3692assert((l >= 0.0f) && (l <= 1.0f));3693int j = basisu::fast_roundf_int((LINEAR_TO_SRGB_TABLE_SIZE - 1) * l);36943695assert((j >= 0) && (j < LINEAR_TO_SRGB_TABLE_SIZE));3696int b = m_linear_to_srgb_table[j];36973698b += (l > m_srgb_to_linear_thresh[b]);36993700return (uint8_t)b;3701}37023703private:3704static constexpr int LINEAR_TO_SRGB_TABLE_SIZE = 2048;3705uint8_t m_linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE];37063707float m_srgb_to_linear_thresh[256];3708};37093710extern fast_linear_to_srgb g_fast_linear_to_srgb;37113712// Image metrics37133714class image_metrics3715{3716public:3717// TODO: Add ssim3718double m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim;3719bool m_has_neg, m_hf_mag_overflow, m_any_abnormal;37203721image_metrics()3722{3723clear();3724}37253726void clear()3727{3728m_max = 0;3729m_mean = 0;3730m_mean_squared = 0;3731m_rms = 0;3732m_psnr = 0;3733m_ssim = 0;3734m_has_neg = false;3735m_hf_mag_overflow = false;3736m_any_abnormal = false;3737}37383739void print(const char *pPrefix = nullptr) { printf("%sMax: %3.3f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); }3740void print_hp(const char* pPrefix = nullptr) { printf("%sMax: %3.6f Mean: %3.6f RMS: %3.6f PSNR: %2.6f dB, Any Neg: %u, Half float overflow: %u, Any NaN/Inf: %u\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal); }37413742void calc(const imagef& a, const imagef& b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool log = false);3743void calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error);3744void calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error);3745void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false);3746};37473748void print_image_metrics(const image& a, const image& b);37493750// Image saving/loading/resampling37513752bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr);3753bool load_png(const char* pFilename, image& img);3754inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); }37553756bool load_tga(const char* pFilename, image& img);3757inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); }37583759bool load_qoi(const char* pFilename, image& img);37603761bool load_jpg(const char *pFilename, image& img);3762bool load_jpg(const uint8_t* pBuf, size_t buf_size, image& img);3763inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); }37643765// Currently loads .PNG, .TGA, or .JPG3766bool load_image(const char* pFilename, image& img);3767inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); }37683769bool is_image_filename_hdr(const char* pFilename);37703771// Supports .HDR and most (but not all) .EXR's (see TinyEXR).3772bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear = true, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f);37733774inline bool load_image_hdr(const std::string& filename, imagef& img, bool ldr_srgb_to_linear = true, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f)3775{3776return load_image_hdr(filename.c_str(), img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias);3777}37783779enum class hdr_image_type3780{3781cHITRGBAHalfFloat = 0,3782cHITRGBAFloat = 1,3783cHITPNGImage = 2,3784cHITEXRImage = 3,3785cHITHDRImage = 4,3786cHITJPGImage = 53787};37883789bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f);37903791uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans);3792uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans);37933794struct rgbe_header_info3795{3796std::string m_program;37973798// Note no validation is done, either gamma or exposure may be 0.3799double m_gamma;3800bool m_has_gamma;38013802double m_exposure; // watts/steradian/m^2.3803bool m_has_exposure;38043805void clear()3806{3807m_program.clear();3808m_gamma = 1.0f;3809m_has_gamma = false;3810m_exposure = 1.0f;3811m_has_exposure = false;3812}3813};38143815bool read_rgbe(const uint8_vec& filedata, imagef& img, rgbe_header_info& hdr_info);3816bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info &hdr_info);38173818bool write_rgbe(uint8_vec& file_data, imagef& img, rgbe_header_info& hdr_info);3819bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info);38203821bool read_exr(const char* pFilename, imagef& img, int& n_chans);3822bool read_exr(const void* pMem, size_t mem_size, imagef& img);38233824enum3825{3826WRITE_EXR_LINEAR_HINT = 1, // hint for lossy comp. methods: exr_perceptual_treatment_t, logarithmic or linear, defaults to logarithmic3827WRITE_EXR_STORE_FLOATS = 2, // use 32-bit floats, otherwise it uses half floats3828WRITE_EXR_NO_COMPRESSION = 4 // no compression, otherwise it uses ZIP compression (16 scanlines per block)3829};38303831// Supports 1 (Y), 3 (RGB), or 4 (RGBA) channel images.3832bool write_exr(const char* pFilename, const imagef& img, uint32_t n_chans, uint32_t flags);38333834enum3835{3836cImageSaveGrayscale = 1,3837cImageSaveIgnoreAlpha = 23838};38393840bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0);3841inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); }38423843bool read_file_to_vec(const char* pFilename, uint8_vec& data);3844bool read_file_to_data(const char* pFilename, void *pData, size_t len);38453846bool write_data_to_file(const char* pFilename, const void* pData, size_t len);38473848inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); }38493850bool image_resample(const image &src, image &dst, bool srgb = false,3851const char *pFilter = "lanczos4", float filter_scale = 1.0f,3852bool wrapping = false,3853uint32_t first_comp = 0, uint32_t num_comps = 4);38543855bool image_resample(const imagef& src, imagef& dst,3856const char* pFilter = "lanczos4", float filter_scale = 1.0f,3857bool wrapping = false,3858uint32_t first_comp = 0, uint32_t num_comps = 4);38593860// Timing38613862typedef uint64_t timer_ticks;38633864class interval_timer3865{3866public:3867interval_timer();38683869void start();3870void stop();38713872double get_elapsed_secs() const;3873inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); }38743875static void init();3876static inline timer_ticks get_ticks_per_sec() { return g_freq; }3877static timer_ticks get_ticks();3878static double ticks_to_secs(timer_ticks ticks);3879static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; }38803881private:3882static timer_ticks g_init_ticks, g_freq;3883static double g_timer_freq;38843885timer_ticks m_start_time, m_stop_time;38863887bool m_started, m_stopped;3888};38893890inline double get_interval_timer() { return interval_timer::ticks_to_secs(interval_timer::get_ticks()); }38913892inline FILE *fopen_safe(const char *pFilename, const char *pMode)3893{3894#ifdef _WIN323895FILE *pFile = nullptr;3896fopen_s(&pFile, pFilename, pMode);3897return pFile;3898#else3899return fopen(pFilename, pMode);3900#endif3901}39023903void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1);39043905const uint32_t cPixelBlockWidth = 4;3906const uint32_t cPixelBlockHeight = 4;3907const uint32_t cPixelBlockTotalPixels = cPixelBlockWidth * cPixelBlockHeight;39083909struct pixel_block3910{3911color_rgba m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x]39123913inline const color_rgba& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }3914inline color_rgba& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }39153916inline const color_rgba* get_ptr() const { return &m_pixels[0][0]; }3917inline color_rgba* get_ptr() { return &m_pixels[0][0]; }39183919inline void clear() { clear_obj(*this); }39203921inline bool operator== (const pixel_block& rhs) const3922{3923return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0;3924}3925};3926typedef basisu::vector<pixel_block> pixel_block_vec;39273928struct pixel_block_hdr3929{3930vec4F m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x]39313932inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }3933inline vec4F& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }39343935inline const vec4F* get_ptr() const { return &m_pixels[0][0]; }3936inline vec4F* get_ptr() { return &m_pixels[0][0]; }39373938inline void clear() { clear_obj(*this); }39393940inline bool operator== (const pixel_block& rhs) const3941{3942return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0;3943}3944};3945typedef basisu::vector<pixel_block_hdr> pixel_block_hdr_vec;39463947void tonemap_image_reinhard(image& ldr_img, const imagef& hdr_img, float exposure, bool add_noise = false, bool per_component = true, bool luma_scaling = false);3948bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img);3949bool tonemap_image_compressive2(image& dst_img, const imagef& hdr_test_img);39503951// Intersection3952enum eClear { cClear = 0 };3953enum eInitExpand { cInitExpand = 0 };3954enum eIdentity { cIdentity = 0 };39553956template<typename vector_type>3957class ray3958{3959public:3960typedef vector_type vector_t;3961typedef typename vector_type::scalar_type scalar_type;39623963inline ray() { }3964inline ray(eClear) { clear(); }3965inline ray(const vector_type& origin, const vector_type& direction) : m_origin(origin), m_direction(direction) { }39663967inline void clear()3968{3969m_origin.clear();3970m_direction.clear();3971}39723973inline const vector_type& get_origin(void) const { return m_origin; }3974inline void set_origin(const vector_type& origin) { m_origin = origin; }39753976inline const vector_type& get_direction(void) const { return m_direction; }3977inline void set_direction(const vector_type& direction) { m_direction = direction; }39783979inline void set_endpoints(const vector_type& start, const vector_type& end)3980{3981m_origin = start;39823983m_direction = end - start;3984m_direction.normalize_in_place();3985}39863987inline vector_type eval(scalar_type t) const3988{3989return m_origin + m_direction * t;3990}39913992private:3993vector_type m_origin;3994vector_type m_direction;3995};39963997typedef ray<vec2F> ray2F;3998typedef ray<vec3F> ray3F;39994000template<typename T>4001class vec_interval4002{4003public:4004enum { N = T::num_elements };4005typedef typename T::scalar_type scalar_type;40064007inline vec_interval(const T& v) { m_bounds[0] = v; m_bounds[1] = v; }4008inline vec_interval(const T& low, const T& high) { m_bounds[0] = low; m_bounds[1] = high; }40094010inline vec_interval() { }4011inline vec_interval(eClear) { clear(); }4012inline vec_interval(eInitExpand) { init_expand(); }40134014inline void clear() { m_bounds[0].clear(); m_bounds[1].clear(); }40154016inline void init_expand()4017{4018m_bounds[0].set(1e+30f, 1e+30f, 1e+30f);4019m_bounds[1].set(-1e+30f, -1e+30f, -1e+30f);4020}40214022inline vec_interval expand(const T& p)4023{4024for (uint32_t c = 0; c < N; c++)4025{4026if (p[c] < m_bounds[0][c])4027m_bounds[0][c] = p[c];40284029if (p[c] > m_bounds[1][c])4030m_bounds[1][c] = p[c];4031}40324033return *this;4034}40354036inline const T& operator[] (uint32_t i) const { assert(i < 2); return m_bounds[i]; }4037inline T& operator[] (uint32_t i) { assert(i < 2); return m_bounds[i]; }40384039const T& get_low() const { return m_bounds[0]; }4040T& get_low() { return m_bounds[0]; }40414042const T& get_high() const { return m_bounds[1]; }4043T& get_high() { return m_bounds[1]; }40444045scalar_type get_dim(uint32_t axis) const { return m_bounds[1][axis] - m_bounds[0][axis]; }40464047bool contains(const T& p) const4048{4049const T& low = get_low(), high = get_high();40504051for (uint32_t i = 0; i < N; i++)4052{4053if (p[i] < low[i])4054return false;40554056if (p[i] > high[i])4057return false;4058}4059return true;4060}40614062private:4063T m_bounds[2];4064};40654066typedef vec_interval<vec1F> vec_interval1F;4067typedef vec_interval<vec2F> vec_interval2F;4068typedef vec_interval<vec3F> vec_interval3F;4069typedef vec_interval<vec4F> vec_interval4F;40704071typedef vec_interval1F aabb1F;4072typedef vec_interval2F aabb2F;4073typedef vec_interval3F aabb3F;40744075namespace intersection4076{4077enum result4078{4079cBackfacing = -1,4080cFailure = 0,4081cSuccess,4082cParallel,4083cInside,4084};40854086// Returns cInside, cSuccess, or cFailure.4087// Algorithm: Graphics Gems 14088template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type>4089result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box)4090{4091enum4092{4093cNumDim = vector_type::num_elements,4094cRight = 0,4095cLeft = 1,4096cMiddle = 24097};40984099bool inside = true;4100int quadrant[cNumDim];4101scalar_type candidate_plane[cNumDim];41024103for (int i = 0; i < cNumDim; i++)4104{4105if (ray.get_origin()[i] < box[0][i])4106{4107quadrant[i] = cLeft;4108candidate_plane[i] = box[0][i];4109inside = false;4110}4111else if (ray.get_origin()[i] > box[1][i])4112{4113quadrant[i] = cRight;4114candidate_plane[i] = box[1][i];4115inside = false;4116}4117else4118{4119quadrant[i] = cMiddle;4120}4121}41224123if (inside)4124{4125coord = ray.get_origin();4126t = 0.0f;4127return cInside;4128}41294130scalar_type max_t[cNumDim];4131for (int i = 0; i < cNumDim; i++)4132{4133if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f))4134max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i];4135else4136max_t[i] = -1.0f;4137}41384139int which_plane = 0;4140for (int i = 1; i < cNumDim; i++)4141if (max_t[which_plane] < max_t[i])4142which_plane = i;41434144if (max_t[which_plane] < 0.0f)4145return cFailure;41464147for (int i = 0; i < cNumDim; i++)4148{4149if (i != which_plane)4150{4151coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i];41524153if ((coord[i] < box[0][i]) || (coord[i] > box[1][i]))4154return cFailure;4155}4156else4157{4158coord[i] = candidate_plane[i];4159}41604161assert(coord[i] >= box[0][i] && coord[i] <= box[1][i]);4162}41634164t = max_t[which_plane];4165return cSuccess;4166}41674168template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type>4169result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box)4170{4171if (!box.contains(ray.get_origin()))4172{4173started_within = false;4174return ray_aabb(coord, t, ray, box);4175}41764177started_within = true;41784179typename vector_type::T diag_dist = box.diagonal_length() * 1.5f;4180ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction());41814182result res(ray_aabb(coord, t, outside_ray, box));4183if (res != cSuccess)4184return res;41854186t = basisu::maximum(0.0f, diag_dist - t);4187return cSuccess;4188}41894190} // intersect41914192// This float->half conversion matches how "F32TO16" works on Intel GPU's.4193// Input cannot be negative, Inf or Nan.4194inline basist::half_float float_to_half_non_neg_no_nan_inf(float val)4195{4196union { float f; int32_t i; uint32_t u; } fi = { val };4197const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF;4198int e = 0, m = 0;41994200assert(((fi.i >> 31) == 0) && (flt_e != 0xFF));42014202// not zero or denormal4203if (flt_e != 0)4204{4205int new_exp = flt_e - 127;4206if (new_exp > 15)4207e = 31;4208else if (new_exp < -14)4209m = lrintf((1 << 24) * fabsf(fi.f));4210else4211{4212e = new_exp + 15;4213m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));4214}4215}42164217assert((0 <= m) && (m <= 1024));4218if (m == 1024)4219{4220e++;4221m = 0;4222}42234224assert((e >= 0) && (e <= 31));4225assert((m >= 0) && (m <= 1023));42264227basist::half_float result = (basist::half_float)((e << 10) | m);4228return result;4229}42304231union fu324232{4233uint32_t u;4234float f;4235};42364237// Supports positive and denormals only. No NaN or Inf.4238BASISU_FORCE_INLINE float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h)4239{4240assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h));42414242// add 112 to the exponent (112+half float's exp bias of 15=float32's bias of 127)4243static const fu32 K = { 0x77800000 };42444245fu32 o;4246o.u = h << 13;4247o.f *= K.f;42484249return o.f;4250}42514252// Positive, negative, or denormals. No NaN or Inf. Clamped to MAX_HALF_FLOAT.4253inline basist::half_float fast_float_to_half_trunc_no_nan_or_inf(float f)4254{4255assert(!isnan(f) && !isinf(f));42564257// Sutract 112 from the exponent, to change the bias from 127 to 15.4258static const fu32 g_f_to_h{ 0x7800000 };42594260fu32 fu;42614262fu.f = minimum<float>((float)basist::MAX_HALF_FLOAT, fabsf(f)) * g_f_to_h.f;42634264return (basist::half_float)(((fu.u >> (23 - 10)) & 0x7FFF) | ((f < 0.0f) ? 0x8000 : 0));4265}42664267inline basist::half_float fast_float_to_half_trunc_no_clamp_neg_nan_or_inf(float f)4268{4269assert(!isnan(f) && !isinf(f));4270assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT));42714272// Sutract 112 from the exponent, to change the bias from 127 to 15.4273static const fu32 g_f_to_h{ 0x7800000 };42744275fu32 fu;42764277fu.f = f * g_f_to_h.f;42784279return (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF);4280}42814282inline basist::half_float fast_float_to_half_no_clamp_neg_nan_or_inf(float f)4283{4284assert(!isnan(f) && !isinf(f));4285assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT));42864287// Sutract 112 from the exponent, to change the bias from 127 to 15.4288static const fu32 g_f_to_h{ 0x7800000 };42894290fu32 fu;42914292fu.f = f * g_f_to_h.f;42934294uint32_t h = (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF);42954296// round to even or nearest4297uint32_t mant = fu.u & 8191; // examine lowest 13 bits4298uint32_t inc = (mant > 4096) | ((mant == 4096) & (h & 1));4299h += inc;43004301if (h > basist::MAX_HALF_FLOAT_AS_INT_BITS)4302h = basist::MAX_HALF_FLOAT_AS_INT_BITS;43034304return (basist::half_float)h;4305}43064307} // namespace basisu43084309#include "basisu_math.h"431043114312