Path: blob/master/thirdparty/basis_universal/encoder/basisu_enc.cpp
9902 views
// basisu_enc.cpp1// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.2//3// Licensed under the Apache License, Version 2.0 (the "License");4// you may not use this file except in compliance with the License.5// You may obtain a copy of the License at6//7// http://www.apache.org/licenses/LICENSE-2.08//9// Unless required by applicable law or agreed to in writing, software10// distributed under the License is distributed on an "AS IS" BASIS,11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12// See the License for the specific language governing permissions and13// limitations under the License.14#include "basisu_enc.h"15#include "basisu_resampler.h"16#include "basisu_resampler_filters.h"17#include "basisu_etc.h"18#include "../transcoder/basisu_transcoder.h"19#include "basisu_bc7enc.h"20#include "jpgd.h"21#include "pvpngreader.h"22#include "basisu_opencl.h"23#include "basisu_uastc_hdr_4x4_enc.h"24#include "basisu_astc_hdr_6x6_enc.h"2526#include <vector>2728#ifndef TINYEXR_USE_ZFP29#define TINYEXR_USE_ZFP (1)30#endif31#include <tinyexr.h>3233#ifndef MINIZ_HEADER_FILE_ONLY34#define MINIZ_HEADER_FILE_ONLY35#endif36#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES37#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES38#endif39#include "basisu_miniz.h"4041#if defined(_WIN32)42// For QueryPerformanceCounter/QueryPerformanceFrequency43#define WIN32_LEAN_AND_MEAN44#include <windows.h>45#endif4647namespace basisu48{49uint64_t interval_timer::g_init_ticks, interval_timer::g_freq;50double interval_timer::g_timer_freq;5152#if BASISU_SUPPORT_SSE53bool g_cpu_supports_sse41;54#endif5556fast_linear_to_srgb g_fast_linear_to_srgb;5758uint8_t g_hamming_dist[256] =59{600, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,611, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,621, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,632, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,641, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,652, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,662, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,673, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,681, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,692, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,702, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,713, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,722, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,733, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,743, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,754, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 876};7778// This is a Public Domain 8x8 font from here:79// https://github.com/dhepper/font8x8/blob/master/font8x8_basic.h80const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] =81{82{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0020 ( )83{ 0x18, 0x3C, 0x3C, 0x18, 0x18, 0x00, 0x18, 0x00}, // U+0021 (!)84{ 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0022 (")85{ 0x36, 0x36, 0x7F, 0x36, 0x7F, 0x36, 0x36, 0x00}, // U+0023 (#)86{ 0x0C, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x0C, 0x00}, // U+0024 ($)87{ 0x00, 0x63, 0x33, 0x18, 0x0C, 0x66, 0x63, 0x00}, // U+0025 (%)88{ 0x1C, 0x36, 0x1C, 0x6E, 0x3B, 0x33, 0x6E, 0x00}, // U+0026 (&)89{ 0x06, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0027 (')90{ 0x18, 0x0C, 0x06, 0x06, 0x06, 0x0C, 0x18, 0x00}, // U+0028 (()91{ 0x06, 0x0C, 0x18, 0x18, 0x18, 0x0C, 0x06, 0x00}, // U+0029 ())92{ 0x00, 0x66, 0x3C, 0xFF, 0x3C, 0x66, 0x00, 0x00}, // U+002A (*)93{ 0x00, 0x0C, 0x0C, 0x3F, 0x0C, 0x0C, 0x00, 0x00}, // U+002B (+)94{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+002C (,)95{ 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00}, // U+002D (-)96{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+002E (.)97{ 0x60, 0x30, 0x18, 0x0C, 0x06, 0x03, 0x01, 0x00}, // U+002F (/)98{ 0x3E, 0x63, 0x73, 0x7B, 0x6F, 0x67, 0x3E, 0x00}, // U+0030 (0)99{ 0x0C, 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x3F, 0x00}, // U+0031 (1)100{ 0x1E, 0x33, 0x30, 0x1C, 0x06, 0x33, 0x3F, 0x00}, // U+0032 (2)101{ 0x1E, 0x33, 0x30, 0x1C, 0x30, 0x33, 0x1E, 0x00}, // U+0033 (3)102{ 0x38, 0x3C, 0x36, 0x33, 0x7F, 0x30, 0x78, 0x00}, // U+0034 (4)103{ 0x3F, 0x03, 0x1F, 0x30, 0x30, 0x33, 0x1E, 0x00}, // U+0035 (5)104{ 0x1C, 0x06, 0x03, 0x1F, 0x33, 0x33, 0x1E, 0x00}, // U+0036 (6)105{ 0x3F, 0x33, 0x30, 0x18, 0x0C, 0x0C, 0x0C, 0x00}, // U+0037 (7)106{ 0x1E, 0x33, 0x33, 0x1E, 0x33, 0x33, 0x1E, 0x00}, // U+0038 (8)107{ 0x1E, 0x33, 0x33, 0x3E, 0x30, 0x18, 0x0E, 0x00}, // U+0039 (9)108{ 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+003A (:)109{ 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+003B (;)110{ 0x18, 0x0C, 0x06, 0x03, 0x06, 0x0C, 0x18, 0x00}, // U+003C (<)111{ 0x00, 0x00, 0x3F, 0x00, 0x00, 0x3F, 0x00, 0x00}, // U+003D (=)112{ 0x06, 0x0C, 0x18, 0x30, 0x18, 0x0C, 0x06, 0x00}, // U+003E (>)113{ 0x1E, 0x33, 0x30, 0x18, 0x0C, 0x00, 0x0C, 0x00}, // U+003F (?)114{ 0x3E, 0x63, 0x7B, 0x7B, 0x7B, 0x03, 0x1E, 0x00}, // U+0040 (@)115{ 0x0C, 0x1E, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x00}, // U+0041 (A)116{ 0x3F, 0x66, 0x66, 0x3E, 0x66, 0x66, 0x3F, 0x00}, // U+0042 (B)117{ 0x3C, 0x66, 0x03, 0x03, 0x03, 0x66, 0x3C, 0x00}, // U+0043 (C)118{ 0x1F, 0x36, 0x66, 0x66, 0x66, 0x36, 0x1F, 0x00}, // U+0044 (D)119{ 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x46, 0x7F, 0x00}, // U+0045 (E)120{ 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x06, 0x0F, 0x00}, // U+0046 (F)121{ 0x3C, 0x66, 0x03, 0x03, 0x73, 0x66, 0x7C, 0x00}, // U+0047 (G)122{ 0x33, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x33, 0x00}, // U+0048 (H)123{ 0x1E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0049 (I)124{ 0x78, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E, 0x00}, // U+004A (J)125{ 0x67, 0x66, 0x36, 0x1E, 0x36, 0x66, 0x67, 0x00}, // U+004B (K)126{ 0x0F, 0x06, 0x06, 0x06, 0x46, 0x66, 0x7F, 0x00}, // U+004C (L)127{ 0x63, 0x77, 0x7F, 0x7F, 0x6B, 0x63, 0x63, 0x00}, // U+004D (M)128{ 0x63, 0x67, 0x6F, 0x7B, 0x73, 0x63, 0x63, 0x00}, // U+004E (N)129{ 0x1C, 0x36, 0x63, 0x63, 0x63, 0x36, 0x1C, 0x00}, // U+004F (O)130{ 0x3F, 0x66, 0x66, 0x3E, 0x06, 0x06, 0x0F, 0x00}, // U+0050 (P)131{ 0x1E, 0x33, 0x33, 0x33, 0x3B, 0x1E, 0x38, 0x00}, // U+0051 (Q)132{ 0x3F, 0x66, 0x66, 0x3E, 0x36, 0x66, 0x67, 0x00}, // U+0052 (R)133{ 0x1E, 0x33, 0x07, 0x0E, 0x38, 0x33, 0x1E, 0x00}, // U+0053 (S)134{ 0x3F, 0x2D, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0054 (T)135{ 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x3F, 0x00}, // U+0055 (U)136{ 0x33, 0x33, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0056 (V)137{ 0x63, 0x63, 0x63, 0x6B, 0x7F, 0x77, 0x63, 0x00}, // U+0057 (W)138{ 0x63, 0x63, 0x36, 0x1C, 0x1C, 0x36, 0x63, 0x00}, // U+0058 (X)139{ 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x0C, 0x1E, 0x00}, // U+0059 (Y)140{ 0x7F, 0x63, 0x31, 0x18, 0x4C, 0x66, 0x7F, 0x00}, // U+005A (Z)141{ 0x1E, 0x06, 0x06, 0x06, 0x06, 0x06, 0x1E, 0x00}, // U+005B ([)142{ 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0x40, 0x00}, // U+005C (\)143{ 0x1E, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1E, 0x00}, // U+005D (])144{ 0x08, 0x1C, 0x36, 0x63, 0x00, 0x00, 0x00, 0x00}, // U+005E (^)145{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF}, // U+005F (_)146{ 0x0C, 0x0C, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0060 (`)147{ 0x00, 0x00, 0x1E, 0x30, 0x3E, 0x33, 0x6E, 0x00}, // U+0061 (a)148{ 0x07, 0x06, 0x06, 0x3E, 0x66, 0x66, 0x3B, 0x00}, // U+0062 (b)149{ 0x00, 0x00, 0x1E, 0x33, 0x03, 0x33, 0x1E, 0x00}, // U+0063 (c)150{ 0x38, 0x30, 0x30, 0x3e, 0x33, 0x33, 0x6E, 0x00}, // U+0064 (d)151{ 0x00, 0x00, 0x1E, 0x33, 0x3f, 0x03, 0x1E, 0x00}, // U+0065 (e)152{ 0x1C, 0x36, 0x06, 0x0f, 0x06, 0x06, 0x0F, 0x00}, // U+0066 (f)153{ 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0067 (g)154{ 0x07, 0x06, 0x36, 0x6E, 0x66, 0x66, 0x67, 0x00}, // U+0068 (h)155{ 0x0C, 0x00, 0x0E, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0069 (i)156{ 0x30, 0x00, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E}, // U+006A (j)157{ 0x07, 0x06, 0x66, 0x36, 0x1E, 0x36, 0x67, 0x00}, // U+006B (k)158{ 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+006C (l)159{ 0x00, 0x00, 0x33, 0x7F, 0x7F, 0x6B, 0x63, 0x00}, // U+006D (m)160{ 0x00, 0x00, 0x1F, 0x33, 0x33, 0x33, 0x33, 0x00}, // U+006E (n)161{ 0x00, 0x00, 0x1E, 0x33, 0x33, 0x33, 0x1E, 0x00}, // U+006F (o)162{ 0x00, 0x00, 0x3B, 0x66, 0x66, 0x3E, 0x06, 0x0F}, // U+0070 (p)163{ 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x78}, // U+0071 (q)164{ 0x00, 0x00, 0x3B, 0x6E, 0x66, 0x06, 0x0F, 0x00}, // U+0072 (r)165{ 0x00, 0x00, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x00}, // U+0073 (s)166{ 0x08, 0x0C, 0x3E, 0x0C, 0x0C, 0x2C, 0x18, 0x00}, // U+0074 (t)167{ 0x00, 0x00, 0x33, 0x33, 0x33, 0x33, 0x6E, 0x00}, // U+0075 (u)168{ 0x00, 0x00, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0076 (v)169{ 0x00, 0x00, 0x63, 0x6B, 0x7F, 0x7F, 0x36, 0x00}, // U+0077 (w)170{ 0x00, 0x00, 0x63, 0x36, 0x1C, 0x36, 0x63, 0x00}, // U+0078 (x)171{ 0x00, 0x00, 0x33, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0079 (y)172{ 0x00, 0x00, 0x3F, 0x19, 0x0C, 0x26, 0x3F, 0x00}, // U+007A (z)173{ 0x38, 0x0C, 0x0C, 0x07, 0x0C, 0x0C, 0x38, 0x00}, // U+007B ({)174{ 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x00}, // U+007C (|)175{ 0x07, 0x0C, 0x0C, 0x38, 0x0C, 0x0C, 0x07, 0x00}, // U+007D (})176{ 0x6E, 0x3B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+007E (~)177{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} // U+007F178};179180bool g_library_initialized;181std::mutex g_encoder_init_mutex;182183// Encoder library initialization (just call once at startup)184bool basisu_encoder_init(bool use_opencl, bool opencl_force_serialization)185{186std::lock_guard<std::mutex> lock(g_encoder_init_mutex);187188if (g_library_initialized)189return true;190191detect_sse41();192193basist::basisu_transcoder_init();194pack_etc1_solid_color_init();195//uastc_init();196bc7enc_compress_block_init(); // must be after uastc_init()197198// Don't bother initializing the OpenCL module at all if it's been completely disabled.199if (use_opencl)200{201opencl_init(opencl_force_serialization);202}203204interval_timer::init(); // make sure interval_timer globals are initialized from main thread to avoid TSAN reports205206astc_hdr_enc_init();207basist::bc6h_enc_init();208astc_6x6_hdr::global_init();209210g_library_initialized = true;211return true;212}213214void basisu_encoder_deinit()215{216opencl_deinit();217218g_library_initialized = false;219}220221void error_vprintf(const char* pFmt, va_list args)222{223const uint32_t BUF_SIZE = 256;224char buf[BUF_SIZE];225226va_list args_copy;227va_copy(args_copy, args);228int total_chars = vsnprintf(buf, sizeof(buf), pFmt, args_copy);229va_end(args_copy);230231if (total_chars < 0)232{233assert(0);234return;235}236237if (total_chars >= (int)BUF_SIZE)238{239basisu::vector<char> var_buf(total_chars + 1);240241va_copy(args_copy, args);242int total_chars_retry = vsnprintf(var_buf.data(), var_buf.size(), pFmt, args_copy);243va_end(args_copy);244245if (total_chars_retry < 0)246{247assert(0);248return;249}250251fprintf(stderr, "ERROR: %s", var_buf.data());252}253else254{255fprintf(stderr, "ERROR: %s", buf);256}257}258259void error_printf(const char *pFmt, ...)260{261va_list args;262va_start(args, pFmt);263error_vprintf(pFmt, args);264va_end(args);265}266267#if defined(_WIN32)268void platform_sleep(uint32_t ms)269{270Sleep(ms);271}272#else273void platform_sleep(uint32_t ms)274{275// TODO276}277#endif278279#if defined(_WIN32)280inline void query_counter(timer_ticks* pTicks)281{282QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER*>(pTicks));283}284inline void query_counter_frequency(timer_ticks* pTicks)285{286QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER*>(pTicks));287}288#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__EMSCRIPTEN__)289#include <sys/time.h>290inline void query_counter(timer_ticks* pTicks)291{292struct timeval cur_time;293gettimeofday(&cur_time, NULL);294*pTicks = static_cast<unsigned long long>(cur_time.tv_sec) * 1000000ULL + static_cast<unsigned long long>(cur_time.tv_usec);295}296inline void query_counter_frequency(timer_ticks* pTicks)297{298*pTicks = 1000000;299}300#elif defined(__GNUC__)301#include <sys/timex.h>302inline void query_counter(timer_ticks* pTicks)303{304struct timeval cur_time;305gettimeofday(&cur_time, NULL);306*pTicks = static_cast<unsigned long long>(cur_time.tv_sec) * 1000000ULL + static_cast<unsigned long long>(cur_time.tv_usec);307}308inline void query_counter_frequency(timer_ticks* pTicks)309{310*pTicks = 1000000;311}312#else313#error TODO314#endif315316interval_timer::interval_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false)317{318if (!g_timer_freq)319init();320}321322void interval_timer::start()323{324query_counter(&m_start_time);325m_started = true;326m_stopped = false;327}328329void interval_timer::stop()330{331assert(m_started);332query_counter(&m_stop_time);333m_stopped = true;334}335336double interval_timer::get_elapsed_secs() const337{338assert(m_started);339if (!m_started)340return 0;341342timer_ticks stop_time = m_stop_time;343if (!m_stopped)344query_counter(&stop_time);345346timer_ticks delta = stop_time - m_start_time;347return delta * g_timer_freq;348}349350void interval_timer::init()351{352if (!g_timer_freq)353{354query_counter_frequency(&g_freq);355g_timer_freq = 1.0f / g_freq;356query_counter(&g_init_ticks);357}358}359360timer_ticks interval_timer::get_ticks()361{362if (!g_timer_freq)363init();364timer_ticks ticks;365query_counter(&ticks);366return ticks - g_init_ticks;367}368369double interval_timer::ticks_to_secs(timer_ticks ticks)370{371if (!g_timer_freq)372init();373return ticks * g_timer_freq;374}375376// Note this is linear<->sRGB, NOT REC709 which uses slightly different equations/transfer functions.377// However the gamuts/white points of REC709 and sRGB are the same.378float linear_to_srgb(float l)379{380assert(l >= 0.0f && l <= 1.0f);381if (l < .0031308f)382return saturate(l * 12.92f);383else384return saturate(1.055f * powf(l, 1.0f / 2.4f) - .055f);385}386387float srgb_to_linear(float s)388{389assert(s >= 0.0f && s <= 1.0f);390if (s < .04045f)391return saturate(s * (1.0f / 12.92f));392else393return saturate(powf((s + .055f) * (1.0f / 1.055f), 2.4f));394}395396const uint32_t MAX_32BIT_ALLOC_SIZE = 250000000;397398bool load_tga(const char* pFilename, image& img)399{400int w = 0, h = 0, n_chans = 0;401uint8_t* pImage_data = read_tga(pFilename, w, h, n_chans);402403if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4)))404{405error_printf("Failed loading .TGA image \"%s\"!\n", pFilename);406407if (pImage_data)408free(pImage_data);409410return false;411}412413if (sizeof(void *) == sizeof(uint32_t))414{415if (((uint64_t)w * h * n_chans) > MAX_32BIT_ALLOC_SIZE)416{417error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h);418419if (pImage_data)420free(pImage_data);421422return false;423}424}425426img.resize(w, h);427428const uint8_t *pSrc = pImage_data;429for (int y = 0; y < h; y++)430{431color_rgba *pDst = &img(0, y);432433for (int x = 0; x < w; x++)434{435pDst->r = pSrc[0];436pDst->g = pSrc[1];437pDst->b = pSrc[2];438pDst->a = (n_chans == 3) ? 255 : pSrc[3];439440pSrc += n_chans;441++pDst;442}443}444445free(pImage_data);446447return true;448}449450bool load_qoi(const char* pFilename, image& img)451{452return false;453}454455bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename)456{457interval_timer tm;458tm.start();459460if (!buf_size)461return false;462463uint32_t width = 0, height = 0, num_chans = 0;464void* pImage = pv_png::load_png(pBuf, buf_size, 4, width, height, num_chans);465466if (!pImage)467{468error_printf("pv_png::load_png failed while loading image \"%s\"\n", pFilename);469return false;470}471472img.grant_ownership(reinterpret_cast<color_rgba*>(pImage), width, height);473474//debug_printf("Total load_png() time: %3.3f secs\n", tm.get_elapsed_secs());475476return true;477}478479bool load_png(const char* pFilename, image& img)480{481uint8_vec buffer;482if (!read_file_to_vec(pFilename, buffer))483{484error_printf("load_png: Failed reading file \"%s\"!\n", pFilename);485return false;486}487488return load_png(buffer.data(), buffer.size(), img, pFilename);489}490491bool load_jpg(const char *pFilename, image& img)492{493int width = 0, height = 0, actual_comps = 0;494uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering);495if (!pImage_data)496return false;497498img.init(pImage_data, width, height, 4);499500free(pImage_data);501502return true;503}504505bool load_jpg(const uint8_t* pBuf, size_t buf_size, image& img)506{507if (buf_size > INT_MAX)508{509assert(0);510return false;511}512513int width = 0, height = 0, actual_comps = 0;514uint8_t* pImage_data = jpgd::decompress_jpeg_image_from_memory(pBuf, (int)buf_size, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering);515if (!pImage_data)516return false;517518img.init(pImage_data, width, height, 4);519520free(pImage_data);521522return true;523}524525bool load_image(const char* pFilename, image& img)526{527std::string ext(string_get_extension(std::string(pFilename)));528529if (ext.length() == 0)530return false;531532const char *pExt = ext.c_str();533534if (strcasecmp(pExt, "png") == 0)535return load_png(pFilename, img);536if (strcasecmp(pExt, "tga") == 0)537return load_tga(pFilename, img);538if (strcasecmp(pExt, "qoi") == 0)539return load_qoi(pFilename, img);540if ( (strcasecmp(pExt, "jpg") == 0) || (strcasecmp(pExt, "jfif") == 0) || (strcasecmp(pExt, "jpeg") == 0) )541return load_jpg(pFilename, img);542543return false;544}545546static void convert_ldr_to_hdr_image(imagef &img, const image &ldr_img, bool ldr_srgb_to_linear, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f)547{548img.resize(ldr_img.get_width(), ldr_img.get_height());549550for (uint32_t y = 0; y < ldr_img.get_height(); y++)551{552for (uint32_t x = 0; x < ldr_img.get_width(); x++)553{554const color_rgba& c = ldr_img(x, y);555556vec4F& d = img(x, y);557if (ldr_srgb_to_linear)558{559float r = (float)c[0];560float g = (float)c[1];561float b = (float)c[2];562563if (ldr_black_bias > 0.0f)564{565// ASTC HDR is noticeably weaker dealing with blocks containing some pixels with components set to 0.566// Add a very slight bias less than .5 to avoid this difficulity. When the HDR image is mapped to SDR sRGB and rounded back to 8-bits, this bias will still result in zero.567// (FWIW, in reality, a physical monitor would be unlikely to have a perfectly zero black level.)568// This is purely optional and on most images it doesn't matter visually.569if (r == 0.0f)570r = ldr_black_bias;571if (g == 0.0f)572g = ldr_black_bias;573if (b == 0.0f)574b = ldr_black_bias;575}576577// Compute how much linear light would be emitted by a SDR 80-100 nit monitor.578d[0] = srgb_to_linear(r * (1.0f / 255.0f)) * linear_nit_multiplier;579d[1] = srgb_to_linear(g * (1.0f / 255.0f)) * linear_nit_multiplier;580d[2] = srgb_to_linear(b * (1.0f / 255.0f)) * linear_nit_multiplier;581}582else583{584d[0] = c[0] * (1.0f / 255.0f) * linear_nit_multiplier;585d[1] = c[1] * (1.0f / 255.0f) * linear_nit_multiplier;586d[2] = c[2] * (1.0f / 255.0f) * linear_nit_multiplier;587}588d[3] = c[3] * (1.0f / 255.0f);589}590}591}592593bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear, float linear_nit_multiplier, float ldr_black_bias)594{595if ((!pMem) || (!mem_size))596{597assert(0);598return false;599}600601switch (img_type)602{603case hdr_image_type::cHITRGBAHalfFloat:604{605if (mem_size != width * height * sizeof(basist::half_float) * 4)606{607assert(0);608return false;609}610611if ((!width) || (!height))612{613assert(0);614return false;615}616617const basist::half_float* pSrc_image_h = static_cast<const basist::half_float *>(pMem);618619img.resize(width, height);620for (uint32_t y = 0; y < height; y++)621{622for (uint32_t x = 0; x < width; x++)623{624const basist::half_float* pSrc_pixel = &pSrc_image_h[x * 4];625626vec4F& dst = img(x, y);627dst[0] = basist::half_to_float(pSrc_pixel[0]);628dst[1] = basist::half_to_float(pSrc_pixel[1]);629dst[2] = basist::half_to_float(pSrc_pixel[2]);630dst[3] = basist::half_to_float(pSrc_pixel[3]);631}632633pSrc_image_h += (width * 4);634}635636break;637}638case hdr_image_type::cHITRGBAFloat:639{640if (mem_size != width * height * sizeof(float) * 4)641{642assert(0);643return false;644}645646if ((!width) || (!height))647{648assert(0);649return false;650}651652img.resize(width, height);653memcpy(img.get_ptr(), pMem, width * height * sizeof(float) * 4);654655break;656}657case hdr_image_type::cHITJPGImage:658{659image ldr_img;660if (!load_jpg(static_cast<const uint8_t*>(pMem), mem_size, ldr_img))661return false;662663convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias);664break;665}666case hdr_image_type::cHITPNGImage:667{668image ldr_img;669if (!load_png(static_cast<const uint8_t *>(pMem), mem_size, ldr_img))670return false;671672convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias);673break;674}675case hdr_image_type::cHITEXRImage:676{677if (!read_exr(pMem, mem_size, img))678return false;679680break;681}682case hdr_image_type::cHITHDRImage:683{684uint8_vec buf(mem_size);685memcpy(buf.get_ptr(), pMem, mem_size);686687rgbe_header_info hdr;688if (!read_rgbe(buf, img, hdr))689return false;690691break;692}693default:694assert(0);695return false;696}697698return true;699}700701bool is_image_filename_hdr(const char *pFilename)702{703std::string ext(string_get_extension(std::string(pFilename)));704705if (ext.length() == 0)706return false;707708const char* pExt = ext.c_str();709710return ((strcasecmp(pExt, "hdr") == 0) || (strcasecmp(pExt, "exr") == 0));711}712713// TODO: move parameters to struct, add a HDR clean flag to eliminate NaN's/Inf's714bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear, float linear_nit_multiplier, float ldr_black_bias)715{716std::string ext(string_get_extension(std::string(pFilename)));717718if (ext.length() == 0)719return false;720721const char* pExt = ext.c_str();722723if (strcasecmp(pExt, "hdr") == 0)724{725rgbe_header_info rgbe_info;726if (!read_rgbe(pFilename, img, rgbe_info))727return false;728return true;729}730731if (strcasecmp(pExt, "exr") == 0)732{733int n_chans = 0;734if (!read_exr(pFilename, img, n_chans))735return false;736return true;737}738739// Try loading image as LDR, then optionally convert to linear light.740{741image ldr_img;742if (!load_image(pFilename, ldr_img))743return false;744745convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias);746}747748return true;749}750751bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp)752{753if (!img.get_total_pixels())754return false;755756void* pPNG_data = nullptr;757size_t PNG_data_size = 0;758759if (image_save_flags & cImageSaveGrayscale)760{761uint8_vec g_pixels(img.get_total_pixels());762uint8_t* pDst = &g_pixels[0];763764for (uint32_t y = 0; y < img.get_height(); y++)765for (uint32_t x = 0; x < img.get_width(); x++)766*pDst++ = img(x, y)[grayscale_comp];767768pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(g_pixels.data(), img.get_width(), img.get_height(), 1, &PNG_data_size, 1, false);769}770else771{772bool has_alpha = false;773774if ((image_save_flags & cImageSaveIgnoreAlpha) == 0)775has_alpha = img.has_alpha();776777if (!has_alpha)778{779uint8_vec rgb_pixels(img.get_total_pixels() * 3);780uint8_t* pDst = &rgb_pixels[0];781782for (uint32_t y = 0; y < img.get_height(); y++)783{784const color_rgba* pSrc = &img(0, y);785for (uint32_t x = 0; x < img.get_width(); x++)786{787pDst[0] = pSrc->r;788pDst[1] = pSrc->g;789pDst[2] = pSrc->b;790791pSrc++;792pDst += 3;793}794}795796pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(rgb_pixels.data(), img.get_width(), img.get_height(), 3, &PNG_data_size, 1, false);797}798else799{800pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(img.get_ptr(), img.get_width(), img.get_height(), 4, &PNG_data_size, 1, false);801}802}803804if (!pPNG_data)805return false;806807bool status = write_data_to_file(pFilename, pPNG_data, PNG_data_size);808if (!status)809{810error_printf("save_png: Failed writing to filename \"%s\"!\n", pFilename);811}812813free(pPNG_data);814815return status;816}817818bool read_file_to_vec(const char* pFilename, uint8_vec& data)819{820FILE* pFile = nullptr;821#ifdef _WIN32822fopen_s(&pFile, pFilename, "rb");823#else824pFile = fopen(pFilename, "rb");825#endif826if (!pFile)827return false;828829fseek(pFile, 0, SEEK_END);830#ifdef _WIN32831int64_t filesize = _ftelli64(pFile);832#else833int64_t filesize = ftello(pFile);834#endif835if (filesize < 0)836{837fclose(pFile);838return false;839}840fseek(pFile, 0, SEEK_SET);841842if (sizeof(size_t) == sizeof(uint32_t))843{844if (filesize > 0x70000000)845{846// File might be too big to load safely in one alloc847fclose(pFile);848return false;849}850}851852if (!data.try_resize((size_t)filesize))853{854fclose(pFile);855return false;856}857858if (filesize)859{860if (fread(&data[0], 1, (size_t)filesize, pFile) != (size_t)filesize)861{862fclose(pFile);863return false;864}865}866867fclose(pFile);868return true;869}870871bool read_file_to_data(const char* pFilename, void *pData, size_t len)872{873assert(pData && len);874if ((!pData) || (!len))875return false;876877FILE* pFile = nullptr;878#ifdef _WIN32879fopen_s(&pFile, pFilename, "rb");880#else881pFile = fopen(pFilename, "rb");882#endif883if (!pFile)884return false;885886fseek(pFile, 0, SEEK_END);887#ifdef _WIN32888int64_t filesize = _ftelli64(pFile);889#else890int64_t filesize = ftello(pFile);891#endif892893if ((filesize < 0) || ((size_t)filesize < len))894{895fclose(pFile);896return false;897}898fseek(pFile, 0, SEEK_SET);899900if (fread(pData, 1, (size_t)len, pFile) != (size_t)len)901{902fclose(pFile);903return false;904}905906fclose(pFile);907return true;908}909910bool write_data_to_file(const char* pFilename, const void* pData, size_t len)911{912FILE* pFile = nullptr;913#ifdef _WIN32914fopen_s(&pFile, pFilename, "wb");915#else916pFile = fopen(pFilename, "wb");917#endif918if (!pFile)919return false;920921if (len)922{923if (fwrite(pData, 1, len, pFile) != len)924{925fclose(pFile);926return false;927}928}929930return fclose(pFile) != EOF;931}932933bool image_resample(const image &src, image &dst, bool srgb,934const char *pFilter, float filter_scale,935bool wrapping,936uint32_t first_comp, uint32_t num_comps)937{938assert((first_comp + num_comps) <= 4);939940const int cMaxComps = 4;941942const uint32_t src_w = src.get_width(), src_h = src.get_height();943const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height();944945if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION)946{947printf("Image is too large!\n");948return false;949}950951if (!src_w || !src_h || !dst_w || !dst_h)952return false;953954if ((num_comps < 1) || (num_comps > cMaxComps))955return false;956957if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION))958{959printf("Image is too large!\n");960return false;961}962963if ((src_w == dst_w) && (src_h == dst_h))964{965dst = src;966return true;967}968969float srgb_to_linear_table[256];970if (srgb)971{972for (int i = 0; i < 256; ++i)973srgb_to_linear_table[i] = srgb_to_linear((float)i * (1.0f/255.0f));974}975976const int LINEAR_TO_SRGB_TABLE_SIZE = 8192;977uint8_t linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE];978979if (srgb)980{981for (int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i)982linear_to_srgb_table[i] = (uint8_t)clamp<int>((int)(255.0f * linear_to_srgb((float)i * (1.0f / (LINEAR_TO_SRGB_TABLE_SIZE - 1))) + .5f), 0, 255);983}984985std::vector<float> samples[cMaxComps];986Resampler *resamplers[cMaxComps];987988resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h,989wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f,990pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0);991samples[0].resize(src_w);992993for (uint32_t i = 1; i < num_comps; ++i)994{995resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h,996wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f,997pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0);998samples[i].resize(src_w);999}10001001uint32_t dst_y = 0;10021003for (uint32_t src_y = 0; src_y < src_h; ++src_y)1004{1005const color_rgba *pSrc = &src(0, src_y);10061007// Put source lines into resampler(s)1008for (uint32_t x = 0; x < src_w; ++x)1009{1010for (uint32_t c = 0; c < num_comps; ++c)1011{1012const uint32_t comp_index = first_comp + c;1013const uint32_t v = (*pSrc)[comp_index];10141015if (!srgb || (comp_index == 3))1016samples[c][x] = v * (1.0f / 255.0f);1017else1018samples[c][x] = srgb_to_linear_table[v];1019}10201021pSrc++;1022}10231024for (uint32_t c = 0; c < num_comps; ++c)1025{1026if (!resamplers[c]->put_line(&samples[c][0]))1027{1028for (uint32_t i = 0; i < num_comps; i++)1029delete resamplers[i];1030return false;1031}1032}10331034// Now retrieve any output lines1035for (;;)1036{1037uint32_t c;1038for (c = 0; c < num_comps; ++c)1039{1040const uint32_t comp_index = first_comp + c;10411042const float *pOutput_samples = resamplers[c]->get_line();1043if (!pOutput_samples)1044break;10451046const bool linear_flag = !srgb || (comp_index == 3);10471048color_rgba *pDst = &dst(0, dst_y);10491050for (uint32_t x = 0; x < dst_w; x++)1051{1052// TODO: Add dithering1053if (linear_flag)1054{1055int j = (int)(255.0f * pOutput_samples[x] + .5f);1056(*pDst)[comp_index] = (uint8_t)clamp<int>(j, 0, 255);1057}1058else1059{1060int j = (int)((LINEAR_TO_SRGB_TABLE_SIZE - 1) * pOutput_samples[x] + .5f);1061(*pDst)[comp_index] = linear_to_srgb_table[clamp<int>(j, 0, LINEAR_TO_SRGB_TABLE_SIZE - 1)];1062}10631064pDst++;1065}1066}1067if (c < num_comps)1068break;10691070++dst_y;1071}1072}10731074for (uint32_t i = 0; i < num_comps; ++i)1075delete resamplers[i];10761077return true;1078}10791080bool image_resample(const imagef& src, imagef& dst,1081const char* pFilter, float filter_scale,1082bool wrapping,1083uint32_t first_comp, uint32_t num_comps)1084{1085assert((first_comp + num_comps) <= 4);10861087const int cMaxComps = 4;10881089const uint32_t src_w = src.get_width(), src_h = src.get_height();1090const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height();10911092if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION)1093{1094printf("Image is too large!\n");1095return false;1096}10971098if (!src_w || !src_h || !dst_w || !dst_h)1099return false;11001101if ((num_comps < 1) || (num_comps > cMaxComps))1102return false;11031104if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION))1105{1106printf("Image is too large!\n");1107return false;1108}11091110if ((src_w == dst_w) && (src_h == dst_h) && (filter_scale == 1.0f))1111{1112dst = src;1113return true;1114}11151116std::vector<float> samples[cMaxComps];1117Resampler* resamplers[cMaxComps];11181119resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h,1120wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 1.0f, 0.0f, // no clamping1121pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0);1122samples[0].resize(src_w);11231124for (uint32_t i = 1; i < num_comps; ++i)1125{1126resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h,1127wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 1.0f, 0.0f, // no clamping1128pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0);1129samples[i].resize(src_w);1130}11311132uint32_t dst_y = 0;11331134for (uint32_t src_y = 0; src_y < src_h; ++src_y)1135{1136const vec4F* pSrc = &src(0, src_y);11371138// Put source lines into resampler(s)1139for (uint32_t x = 0; x < src_w; ++x)1140{1141for (uint32_t c = 0; c < num_comps; ++c)1142{1143const uint32_t comp_index = first_comp + c;1144const float v = (*pSrc)[comp_index];11451146samples[c][x] = v;1147}11481149pSrc++;1150}11511152for (uint32_t c = 0; c < num_comps; ++c)1153{1154if (!resamplers[c]->put_line(&samples[c][0]))1155{1156for (uint32_t i = 0; i < num_comps; i++)1157delete resamplers[i];1158return false;1159}1160}11611162// Now retrieve any output lines1163for (;;)1164{1165uint32_t c;1166for (c = 0; c < num_comps; ++c)1167{1168const uint32_t comp_index = first_comp + c;11691170const float* pOutput_samples = resamplers[c]->get_line();1171if (!pOutput_samples)1172break;11731174vec4F* pDst = &dst(0, dst_y);11751176for (uint32_t x = 0; x < dst_w; x++)1177{1178(*pDst)[comp_index] = pOutput_samples[x];1179pDst++;1180}1181}1182if (c < num_comps)1183break;11841185++dst_y;1186}1187}11881189for (uint32_t i = 0; i < num_comps; ++i)1190delete resamplers[i];11911192return true;1193}11941195void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms)1196{1197// See the paper "In-Place Calculation of Minimum Redundancy Codes" by Moffat and Katajainen1198if (!num_syms)1199return;12001201if (1 == num_syms)1202{1203A[0].m_key = 1;1204return;1205}12061207A[0].m_key += A[1].m_key;12081209int s = 2, r = 0, next;1210for (next = 1; next < (num_syms - 1); ++next)1211{1212if ((s >= num_syms) || (A[r].m_key < A[s].m_key))1213{1214A[next].m_key = A[r].m_key;1215A[r].m_key = next;1216++r;1217}1218else1219{1220A[next].m_key = A[s].m_key;1221++s;1222}12231224if ((s >= num_syms) || ((r < next) && A[r].m_key < A[s].m_key))1225{1226A[next].m_key = A[next].m_key + A[r].m_key;1227A[r].m_key = next;1228++r;1229}1230else1231{1232A[next].m_key = A[next].m_key + A[s].m_key;1233++s;1234}1235}1236A[num_syms - 2].m_key = 0;12371238for (next = num_syms - 3; next >= 0; --next)1239{1240A[next].m_key = 1 + A[A[next].m_key].m_key;1241}12421243int num_avail = 1, num_used = 0, depth = 0;1244r = num_syms - 2;1245next = num_syms - 1;1246while (num_avail > 0)1247{1248for ( ; (r >= 0) && ((int)A[r].m_key == depth); ++num_used, --r )1249;12501251for ( ; num_avail > num_used; --next, --num_avail)1252A[next].m_key = depth;12531254num_avail = 2 * num_used;1255num_used = 0;1256++depth;1257}1258}12591260void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size)1261{1262int i;1263uint32_t total = 0;1264if (code_list_len <= 1)1265return;12661267for (i = max_code_size + 1; i <= cHuffmanMaxSupportedInternalCodeSize; i++)1268pNum_codes[max_code_size] += pNum_codes[i];12691270for (i = max_code_size; i > 0; i--)1271total += (((uint32_t)pNum_codes[i]) << (max_code_size - i));12721273while (total != (1UL << max_code_size))1274{1275pNum_codes[max_code_size]--;1276for (i = max_code_size - 1; i > 0; i--)1277{1278if (pNum_codes[i])1279{1280pNum_codes[i]--;1281pNum_codes[i + 1] += 2;1282break;1283}1284}12851286total--;1287}1288}12891290sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1)1291{1292uint32_t total_passes = 2, pass_shift, pass, i, hist[256 * 2];1293sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1;12941295clear_obj(hist);12961297for (i = 0; i < num_syms; i++)1298{1299uint32_t freq = pSyms0[i].m_key;13001301// We scale all input frequencies to 16-bits.1302assert(freq <= UINT16_MAX);13031304hist[freq & 0xFF]++;1305hist[256 + ((freq >> 8) & 0xFF)]++;1306}13071308while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256]))1309total_passes--;13101311for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)1312{1313const uint32_t *pHist = &hist[pass << 8];1314uint32_t offsets[256], cur_ofs = 0;1315for (i = 0; i < 256; i++)1316{1317offsets[i] = cur_ofs;1318cur_ofs += pHist[i];1319}13201321for (i = 0; i < num_syms; i++)1322pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];13231324sym_freq *t = pCur_syms;1325pCur_syms = pNew_syms;1326pNew_syms = t;1327}13281329return pCur_syms;1330}13311332bool huffman_encoding_table::init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size)1333{1334if (max_code_size > cHuffmanMaxSupportedCodeSize)1335return false;1336if ((!num_syms) || (num_syms > cHuffmanMaxSyms))1337return false;13381339uint32_t total_used_syms = 0;1340for (uint32_t i = 0; i < num_syms; i++)1341if (pFreq[i])1342total_used_syms++;13431344if (!total_used_syms)1345return false;13461347std::vector<sym_freq> sym_freq0(total_used_syms), sym_freq1(total_used_syms);1348for (uint32_t i = 0, j = 0; i < num_syms; i++)1349{1350if (pFreq[i])1351{1352sym_freq0[j].m_key = pFreq[i];1353sym_freq0[j++].m_sym_index = static_cast<uint16_t>(i);1354}1355}13561357sym_freq *pSym_freq = canonical_huffman_radix_sort_syms(total_used_syms, &sym_freq0[0], &sym_freq1[0]);13581359canonical_huffman_calculate_minimum_redundancy(pSym_freq, total_used_syms);13601361int num_codes[cHuffmanMaxSupportedInternalCodeSize + 1];1362clear_obj(num_codes);13631364for (uint32_t i = 0; i < total_used_syms; i++)1365{1366if (pSym_freq[i].m_key > cHuffmanMaxSupportedInternalCodeSize)1367return false;13681369num_codes[pSym_freq[i].m_key]++;1370}13711372canonical_huffman_enforce_max_code_size(num_codes, total_used_syms, max_code_size);13731374m_code_sizes.resize(0);1375m_code_sizes.resize(num_syms);13761377m_codes.resize(0);1378m_codes.resize(num_syms);13791380for (uint32_t i = 1, j = total_used_syms; i <= max_code_size; i++)1381for (uint32_t l = num_codes[i]; l > 0; l--)1382m_code_sizes[pSym_freq[--j].m_sym_index] = static_cast<uint8_t>(i);13831384uint32_t next_code[cHuffmanMaxSupportedInternalCodeSize + 1];13851386next_code[1] = 0;1387for (uint32_t j = 0, i = 2; i <= max_code_size; i++)1388next_code[i] = j = ((j + num_codes[i - 1]) << 1);13891390for (uint32_t i = 0; i < num_syms; i++)1391{1392uint32_t rev_code = 0, code, code_size;1393if ((code_size = m_code_sizes[i]) == 0)1394continue;1395if (code_size > cHuffmanMaxSupportedInternalCodeSize)1396return false;1397code = next_code[code_size]++;1398for (uint32_t l = code_size; l > 0; l--, code >>= 1)1399rev_code = (rev_code << 1) | (code & 1);1400m_codes[i] = static_cast<uint16_t>(rev_code);1401}14021403return true;1404}14051406bool huffman_encoding_table::init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size)1407{1408if ((!num_syms) || (num_syms > cHuffmanMaxSyms))1409return false;14101411uint16_vec sym_freq(num_syms);14121413uint32_t max_freq = 0;1414for (uint32_t i = 0; i < num_syms; i++)1415max_freq = maximum(max_freq, pSym_freq[i]);14161417if (max_freq < UINT16_MAX)1418{1419for (uint32_t i = 0; i < num_syms; i++)1420sym_freq[i] = static_cast<uint16_t>(pSym_freq[i]);1421}1422else1423{1424for (uint32_t i = 0; i < num_syms; i++)1425{1426if (pSym_freq[i])1427{1428uint32_t f = static_cast<uint32_t>((static_cast<uint64_t>(pSym_freq[i]) * 65534U + (max_freq >> 1)) / max_freq);1429sym_freq[i] = static_cast<uint16_t>(clamp<uint32_t>(f, 1, 65534));1430}1431}1432}14331434return init(num_syms, &sym_freq[0], max_code_size);1435}14361437void bitwise_coder::end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len)1438{1439if (run_size)1440{1441if (run_size < cHuffmanSmallRepeatSizeMin)1442{1443while (run_size--)1444syms.push_back(static_cast<uint16_t>(len));1445}1446else if (run_size <= cHuffmanSmallRepeatSizeMax)1447{1448syms.push_back(static_cast<uint16_t>(cHuffmanSmallRepeatCode | ((run_size - cHuffmanSmallRepeatSizeMin) << 6)));1449}1450else1451{1452assert((run_size >= cHuffmanBigRepeatSizeMin) && (run_size <= cHuffmanBigRepeatSizeMax));1453syms.push_back(static_cast<uint16_t>(cHuffmanBigRepeatCode | ((run_size - cHuffmanBigRepeatSizeMin) << 6)));1454}1455}14561457run_size = 0;1458}14591460void bitwise_coder::end_zero_run(uint16_vec &syms, uint32_t &run_size)1461{1462if (run_size)1463{1464if (run_size < cHuffmanSmallZeroRunSizeMin)1465{1466while (run_size--)1467syms.push_back(0);1468}1469else if (run_size <= cHuffmanSmallZeroRunSizeMax)1470{1471syms.push_back(static_cast<uint16_t>(cHuffmanSmallZeroRunCode | ((run_size - cHuffmanSmallZeroRunSizeMin) << 6)));1472}1473else1474{1475assert((run_size >= cHuffmanBigZeroRunSizeMin) && (run_size <= cHuffmanBigZeroRunSizeMax));1476syms.push_back(static_cast<uint16_t>(cHuffmanBigZeroRunCode | ((run_size - cHuffmanBigZeroRunSizeMin) << 6)));1477}1478}14791480run_size = 0;1481}14821483uint32_t bitwise_coder::emit_huffman_table(const huffman_encoding_table &tab)1484{1485const uint64_t start_bits = m_total_bits;14861487const uint8_vec &code_sizes = tab.get_code_sizes();14881489uint32_t total_used = tab.get_total_used_codes();1490put_bits(total_used, cHuffmanMaxSymsLog2);14911492if (!total_used)1493return 0;14941495uint16_vec syms;1496syms.reserve(total_used + 16);14971498uint32_t prev_code_len = UINT_MAX, zero_run_size = 0, nonzero_run_size = 0;14991500for (uint32_t i = 0; i <= total_used; ++i)1501{1502const uint32_t code_len = (i == total_used) ? 0xFF : code_sizes[i];1503assert((code_len == 0xFF) || (code_len <= 16));15041505if (code_len)1506{1507end_zero_run(syms, zero_run_size);15081509if (code_len != prev_code_len)1510{1511end_nonzero_run(syms, nonzero_run_size, prev_code_len);1512if (code_len != 0xFF)1513syms.push_back(static_cast<uint16_t>(code_len));1514}1515else if (++nonzero_run_size == cHuffmanBigRepeatSizeMax)1516end_nonzero_run(syms, nonzero_run_size, prev_code_len);1517}1518else1519{1520end_nonzero_run(syms, nonzero_run_size, prev_code_len);15211522if (++zero_run_size == cHuffmanBigZeroRunSizeMax)1523end_zero_run(syms, zero_run_size);1524}15251526prev_code_len = code_len;1527}15281529histogram h(cHuffmanTotalCodelengthCodes);1530for (uint32_t i = 0; i < syms.size(); i++)1531h.inc(syms[i] & 63);15321533huffman_encoding_table ct;1534if (!ct.init(h, 7))1535return 0;15361537assert(cHuffmanTotalSortedCodelengthCodes == cHuffmanTotalCodelengthCodes);15381539uint32_t total_codelength_codes;1540for (total_codelength_codes = cHuffmanTotalSortedCodelengthCodes; total_codelength_codes > 0; total_codelength_codes--)1541if (ct.get_code_sizes()[g_huffman_sorted_codelength_codes[total_codelength_codes - 1]])1542break;15431544assert(total_codelength_codes);15451546put_bits(total_codelength_codes, 5);1547for (uint32_t i = 0; i < total_codelength_codes; i++)1548put_bits(ct.get_code_sizes()[g_huffman_sorted_codelength_codes[i]], 3);15491550for (uint32_t i = 0; i < syms.size(); ++i)1551{1552const uint32_t l = syms[i] & 63, e = syms[i] >> 6;15531554put_code(l, ct);15551556if (l == cHuffmanSmallZeroRunCode)1557put_bits(e, cHuffmanSmallZeroRunExtraBits);1558else if (l == cHuffmanBigZeroRunCode)1559put_bits(e, cHuffmanBigZeroRunExtraBits);1560else if (l == cHuffmanSmallRepeatCode)1561put_bits(e, cHuffmanSmallRepeatExtraBits);1562else if (l == cHuffmanBigRepeatCode)1563put_bits(e, cHuffmanBigRepeatExtraBits);1564}15651566return (uint32_t)(m_total_bits - start_bits);1567}15681569bool huffman_test(int rand_seed)1570{1571histogram h(19);15721573// Feed in a fibonacci sequence to force large codesizes1574h[0] += 1; h[1] += 1; h[2] += 2; h[3] += 3;1575h[4] += 5; h[5] += 8; h[6] += 13; h[7] += 21;1576h[8] += 34; h[9] += 55; h[10] += 89; h[11] += 144;1577h[12] += 233; h[13] += 377; h[14] += 610; h[15] += 987;1578h[16] += 1597; h[17] += 2584; h[18] += 4181;15791580huffman_encoding_table etab;1581etab.init(h, 16);15821583{1584bitwise_coder c;1585c.init(1024);15861587c.emit_huffman_table(etab);1588for (int i = 0; i < 19; i++)1589c.put_code(i, etab);15901591c.flush();15921593basist::bitwise_decoder d;1594d.init(&c.get_bytes()[0], static_cast<uint32_t>(c.get_bytes().size()));15951596basist::huffman_decoding_table dtab;1597bool success = d.read_huffman_table(dtab);1598if (!success)1599{1600assert(0);1601printf("Failure 5\n");1602return false;1603}16041605for (uint32_t i = 0; i < 19; i++)1606{1607uint32_t s = d.decode_huffman(dtab);1608if (s != i)1609{1610assert(0);1611printf("Failure 5\n");1612return false;1613}1614}1615}16161617basisu::rand r;1618r.seed(rand_seed);16191620for (int iter = 0; iter < 500000; iter++)1621{1622printf("%u\n", iter);16231624uint32_t max_sym = r.irand(0, 8193);1625uint32_t num_codes = r.irand(1, 10000);1626uint_vec syms(num_codes);16271628for (uint32_t i = 0; i < num_codes; i++)1629{1630if (r.bit())1631syms[i] = r.irand(0, max_sym);1632else1633{1634int s = (int)(r.gaussian((float)max_sym / 2, (float)maximum<int>(1, max_sym / 2)) + .5f);1635s = basisu::clamp<int>(s, 0, max_sym);16361637syms[i] = s;1638}16391640}16411642histogram h1(max_sym + 1);1643for (uint32_t i = 0; i < num_codes; i++)1644h1[syms[i]]++;16451646huffman_encoding_table etab2;1647if (!etab2.init(h1, 16))1648{1649assert(0);1650printf("Failed 0\n");1651return false;1652}16531654bitwise_coder c;1655c.init(1024);16561657c.emit_huffman_table(etab2);16581659for (uint32_t i = 0; i < num_codes; i++)1660c.put_code(syms[i], etab2);16611662c.flush();16631664basist::bitwise_decoder d;1665d.init(&c.get_bytes()[0], (uint32_t)c.get_bytes().size());16661667basist::huffman_decoding_table dtab;1668bool success = d.read_huffman_table(dtab);1669if (!success)1670{1671assert(0);1672printf("Failed 2\n");1673return false;1674}16751676for (uint32_t i = 0; i < num_codes; i++)1677{1678uint32_t s = d.decode_huffman(dtab);1679if (s != syms[i])1680{1681assert(0);1682printf("Failed 4\n");1683return false;1684}1685}16861687}1688return true;1689}16901691void palette_index_reorderer::init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight)1692{1693assert((num_syms > 0) && (num_indices > 0));1694assert((dist_func_weight >= 0.0f) && (dist_func_weight <= 1.0f));16951696clear();16971698m_remap_table.resize(num_syms);1699m_entries_picked.reserve(num_syms);1700m_total_count_to_picked.resize(num_syms);17011702if (num_indices <= 1)1703return;17041705prepare_hist(num_syms, num_indices, pIndices);1706find_initial(num_syms);17071708while (m_entries_to_do.size())1709{1710// Find the best entry to move into the picked list.1711uint32_t best_entry;1712double best_count;1713find_next_entry(best_entry, best_count, pDist_func, pCtx, dist_func_weight);17141715// We now have chosen an entry to place in the picked list, now determine which side it goes on.1716const uint32_t entry_to_move = m_entries_to_do[best_entry];17171718float side = pick_side(num_syms, entry_to_move, pDist_func, pCtx, dist_func_weight);17191720// Put entry_to_move either on the "left" or "right" side of the picked entries1721if (side <= 0)1722m_entries_picked.push_back(entry_to_move);1723else1724m_entries_picked.insert(m_entries_picked.begin(), entry_to_move);17251726// Erase best_entry from the todo list1727m_entries_to_do.erase(m_entries_to_do.begin() + best_entry);17281729// We've just moved best_entry to the picked list, so now we need to update m_total_count_to_picked[] to factor the additional count to best_entry1730for (uint32_t i = 0; i < m_entries_to_do.size(); i++)1731m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], entry_to_move, num_syms);1732}17331734for (uint32_t i = 0; i < num_syms; i++)1735m_remap_table[m_entries_picked[i]] = i;1736}17371738void palette_index_reorderer::prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices)1739{1740m_hist.resize(0);1741m_hist.resize(num_syms * num_syms);17421743for (uint32_t i = 0; i < num_indices; i++)1744{1745const uint32_t idx = pIndices[i];1746inc_hist(idx, (i < (num_indices - 1)) ? pIndices[i + 1] : -1, num_syms);1747inc_hist(idx, (i > 0) ? pIndices[i - 1] : -1, num_syms);1748}1749}17501751void palette_index_reorderer::find_initial(uint32_t num_syms)1752{1753uint32_t max_count = 0, max_index = 0;1754for (uint32_t i = 0; i < num_syms * num_syms; i++)1755if (m_hist[i] > max_count)1756max_count = m_hist[i], max_index = i;17571758uint32_t a = max_index / num_syms, b = max_index % num_syms;17591760const size_t ofs = m_entries_picked.size();17611762m_entries_picked.push_back(a);1763m_entries_picked.push_back(b);17641765for (uint32_t i = 0; i < num_syms; i++)1766if ((i != m_entries_picked[ofs + 1]) && (i != m_entries_picked[ofs]))1767m_entries_to_do.push_back(i);17681769for (uint32_t i = 0; i < m_entries_to_do.size(); i++)1770for (uint32_t j = 0; j < m_entries_picked.size(); j++)1771m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], m_entries_picked[j], num_syms);1772}17731774void palette_index_reorderer::find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight)1775{1776best_entry = 0;1777best_count = 0;17781779for (uint32_t i = 0; i < m_entries_to_do.size(); i++)1780{1781const uint32_t u = m_entries_to_do[i];1782double total_count = m_total_count_to_picked[u];17831784if (pDist_func)1785{1786float w = maximum<float>((*pDist_func)(u, m_entries_picked.front(), pCtx), (*pDist_func)(u, m_entries_picked.back(), pCtx));1787assert((w >= 0.0f) && (w <= 1.0f));1788total_count = (total_count + 1.0f) * lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, w);1789}17901791if (total_count <= best_count)1792continue;17931794best_entry = i;1795best_count = total_count;1796}1797}17981799float palette_index_reorderer::pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight)1800{1801float which_side = 0;18021803int l_count = 0, r_count = 0;1804for (uint32_t j = 0; j < m_entries_picked.size(); j++)1805{1806const int count = get_hist(entry_to_move, m_entries_picked[j], num_syms), r = ((int)m_entries_picked.size() + 1 - 2 * (j + 1));1807which_side += static_cast<float>(r * count);1808if (r >= 0)1809l_count += r * count;1810else1811r_count += -r * count;1812}18131814if (pDist_func)1815{1816float w_left = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.front(), pCtx));1817float w_right = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.back(), pCtx));1818which_side = w_left * l_count - w_right * r_count;1819}1820return which_side;1821}18221823void image_metrics::calc(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool log)1824{1825assert((first_chan < 4U) && (first_chan + total_chans <= 4U));18261827const uint32_t width = basisu::minimum(a.get_width(), b.get_width());1828const uint32_t height = basisu::minimum(a.get_height(), b.get_height());18291830double max_e = -1e+30f;1831double sum = 0.0f, sum_sqr = 0.0f;18321833m_has_neg = false;1834m_any_abnormal = false;1835m_hf_mag_overflow = false;18361837for (uint32_t y = 0; y < height; y++)1838{1839for (uint32_t x = 0; x < width; x++)1840{1841const vec4F& ca = a(x, y), &cb = b(x, y);18421843if (total_chans)1844{1845for (uint32_t c = 0; c < total_chans; c++)1846{1847float fa = ca[first_chan + c], fb = cb[first_chan + c];18481849if ((fabs(fa) > basist::MAX_HALF_FLOAT) || (fabs(fb) > basist::MAX_HALF_FLOAT))1850m_hf_mag_overflow = true;18511852if ((fa < 0.0f) || (fb < 0.0f))1853m_has_neg = true;18541855if (std::isinf(fa) || std::isinf(fb) || std::isnan(fa) || std::isnan(fb))1856m_any_abnormal = true;18571858const double delta = fabs(fa - fb);1859max_e = basisu::maximum<double>(max_e, delta);18601861if (log)1862{1863double log2_delta = log2f(basisu::maximum(0.0f, fa) + 1.0f) - log2f(basisu::maximum(0.0f, fb) + 1.0f);18641865sum += fabs(log2_delta);1866sum_sqr += log2_delta * log2_delta;1867}1868else1869{1870sum += fabs(delta);1871sum_sqr += delta * delta;1872}1873}1874}1875else1876{1877for (uint32_t c = 0; c < 3; c++)1878{1879float fa = ca[c], fb = cb[c];18801881if ((fabs(fa) > basist::MAX_HALF_FLOAT) || (fabs(fb) > basist::MAX_HALF_FLOAT))1882m_hf_mag_overflow = true;18831884if ((fa < 0.0f) || (fb < 0.0f))1885m_has_neg = true;18861887if (std::isinf(fa) || std::isinf(fb) || std::isnan(fa) || std::isnan(fb))1888m_any_abnormal = true;1889}18901891double ca_l = get_luminance(ca), cb_l = get_luminance(cb);18921893double delta = fabs(ca_l - cb_l);1894max_e = basisu::maximum(max_e, delta);18951896if (log)1897{1898double log2_delta = log2(basisu::maximum<double>(0.0f, ca_l) + 1.0f) - log2(basisu::maximum<double>(0.0f, cb_l) + 1.0f);18991900sum += fabs(log2_delta);1901sum_sqr += log2_delta * log2_delta;1902}1903else1904{1905sum += delta;1906sum_sqr += delta * delta;1907}1908}1909}1910}19111912m_max = (double)(max_e);19131914double total_values = (double)width * (double)height;1915if (avg_comp_error)1916total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);19171918m_mean = (float)(sum / total_values);1919m_mean_squared = (float)(sum_sqr / total_values);1920m_rms = (float)sqrt(sum_sqr / total_values);19211922const double max_val = 1.0f;1923m_psnr = m_rms ? (float)clamp<double>(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f;1924}19251926void image_metrics::calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error)1927{1928assert(total_chans);1929assert((first_chan < 4U) && (first_chan + total_chans <= 4U));19301931const uint32_t width = basisu::minimum(a.get_width(), b.get_width());1932const uint32_t height = basisu::minimum(a.get_height(), b.get_height());19331934m_has_neg = false;1935m_hf_mag_overflow = false;1936m_any_abnormal = false;19371938uint_vec hist(65536);19391940for (uint32_t y = 0; y < height; y++)1941{1942for (uint32_t x = 0; x < width; x++)1943{1944const vec4F& ca = a(x, y), &cb = b(x, y);19451946for (uint32_t i = 0; i < 4; i++)1947{1948if ((ca[i] < 0.0f) || (cb[i] < 0.0f))1949m_has_neg = true;19501951if ((fabs(ca[i]) > basist::MAX_HALF_FLOAT) || (fabs(cb[i]) > basist::MAX_HALF_FLOAT))1952m_hf_mag_overflow = true;19531954if (std::isnan(ca[i]) || std::isnan(cb[i]) || std::isinf(ca[i]) || std::isinf(cb[i]))1955m_any_abnormal = true;1956}19571958int cah[4] = { basist::float_to_half(ca[0]), basist::float_to_half(ca[1]), basist::float_to_half(ca[2]), basist::float_to_half(ca[3]) };1959int cbh[4] = { basist::float_to_half(cb[0]), basist::float_to_half(cb[1]), basist::float_to_half(cb[2]), basist::float_to_half(cb[3]) };19601961for (uint32_t c = 0; c < total_chans; c++)1962hist[iabs(cah[first_chan + c] - cbh[first_chan + c]) & 65535]++;19631964} // x1965} // y19661967m_max = 0;1968double sum = 0.0f, sum2 = 0.0f;1969for (uint32_t i = 0; i < 65536; i++)1970{1971if (hist[i])1972{1973m_max = basisu::maximum<double>(m_max, (double)i);1974double v = (double)i * (double)hist[i];1975sum += v;1976sum2 += (double)i * v;1977}1978}19791980double total_values = (double)width * (double)height;1981if (avg_comp_error)1982total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);19831984const float max_val = 65535.0f;1985m_mean = (float)clamp<double>(sum / total_values, 0.0f, max_val);1986m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, max_val * max_val);1987m_rms = (float)sqrt(m_mean_squared);1988m_psnr = m_rms ? (float)clamp<double>(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f;1989}19901991// Alt. variant, same as calc_half(), for validation.1992void image_metrics::calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error)1993{1994assert(total_chans);1995assert((first_chan < 4U) && (first_chan + total_chans <= 4U));19961997const uint32_t width = basisu::minimum(a.get_width(), b.get_width());1998const uint32_t height = basisu::minimum(a.get_height(), b.get_height());19992000m_has_neg = false;2001m_hf_mag_overflow = false;2002m_any_abnormal = false;20032004double sum = 0.0f, sum2 = 0.0f;2005m_max = 0;20062007for (uint32_t y = 0; y < height; y++)2008{2009for (uint32_t x = 0; x < width; x++)2010{2011const vec4F& ca = a(x, y), & cb = b(x, y);20122013for (uint32_t i = 0; i < 4; i++)2014{2015if ((ca[i] < 0.0f) || (cb[i] < 0.0f))2016m_has_neg = true;20172018if ((fabs(ca[i]) > basist::MAX_HALF_FLOAT) || (fabs(cb[i]) > basist::MAX_HALF_FLOAT))2019m_hf_mag_overflow = true;20202021if (std::isnan(ca[i]) || std::isnan(cb[i]) || std::isinf(ca[i]) || std::isinf(cb[i]))2022m_any_abnormal = true;2023}20242025int cah[4] = { basist::float_to_half(ca[0]), basist::float_to_half(ca[1]), basist::float_to_half(ca[2]), basist::float_to_half(ca[3]) };2026int cbh[4] = { basist::float_to_half(cb[0]), basist::float_to_half(cb[1]), basist::float_to_half(cb[2]), basist::float_to_half(cb[3]) };20272028for (uint32_t c = 0; c < total_chans; c++)2029{2030int diff = iabs(cah[first_chan + c] - cbh[first_chan + c]);2031if (diff)2032m_max = std::max<double>(m_max, (double)diff);20332034sum += diff;2035sum2 += squarei(cah[first_chan + c] - cbh[first_chan + c]);2036}20372038} // x2039} // y20402041double total_values = (double)width * (double)height;2042if (avg_comp_error)2043total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);20442045const float max_val = 65535.0f;2046m_mean = (float)clamp<double>(sum / total_values, 0.0f, max_val);2047m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, max_val * max_val);2048m_rms = (float)sqrt(m_mean_squared);2049m_psnr = m_rms ? (float)clamp<double>(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f;2050}20512052void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma)2053{2054assert((first_chan < 4U) && (first_chan + total_chans <= 4U));20552056const uint32_t width = basisu::minimum(a.get_width(), b.get_width());2057const uint32_t height = basisu::minimum(a.get_height(), b.get_height());20582059double hist[256];2060clear_obj(hist);20612062m_has_neg = false;2063m_any_abnormal = false;2064m_hf_mag_overflow = false;20652066for (uint32_t y = 0; y < height; y++)2067{2068for (uint32_t x = 0; x < width; x++)2069{2070const color_rgba &ca = a(x, y), &cb = b(x, y);20712072if (total_chans)2073{2074for (uint32_t c = 0; c < total_chans; c++)2075hist[iabs(ca[first_chan + c] - cb[first_chan + c])]++;2076}2077else2078{2079if (use_601_luma)2080hist[iabs(ca.get_601_luma() - cb.get_601_luma())]++;2081else2082hist[iabs(ca.get_709_luma() - cb.get_709_luma())]++;2083}2084}2085}20862087m_max = 0;2088double sum = 0.0f, sum2 = 0.0f;2089for (uint32_t i = 0; i < 256; i++)2090{2091if (hist[i])2092{2093m_max = basisu::maximum<double>(m_max, (double)i);2094double v = i * hist[i];2095sum += v;2096sum2 += i * v;2097}2098}20992100double total_values = (double)width * (double)height;2101if (avg_comp_error)2102total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);21032104m_mean = (float)clamp<double>(sum / total_values, 0.0f, 255.0);2105m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, 255.0f * 255.0f);2106m_rms = (float)sqrt(m_mean_squared);2107m_psnr = m_rms ? (float)clamp<double>(log10(255.0 / m_rms) * 20.0f, 0.0f, 100.0f) : 100.0f;2108}21092110void print_image_metrics(const image& a, const image& b)2111{2112image_metrics im;2113im.calc(a, b, 0, 3);2114im.print("RGB ");21152116im.calc(a, b, 0, 4);2117im.print("RGBA ");21182119im.calc(a, b, 0, 1);2120im.print("R ");21212122im.calc(a, b, 1, 1);2123im.print("G ");21242125im.calc(a, b, 2, 1);2126im.print("B ");21272128im.calc(a, b, 3, 1);2129im.print("A ");21302131im.calc(a, b, 0, 0);2132im.print("Y 709 ");21332134im.calc(a, b, 0, 0, true, true);2135im.print("Y 601 ");2136}21372138void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed)2139{2140rand r(seed);21412142uint8_t *pDst = static_cast<uint8_t *>(pBuf);21432144while (size >= sizeof(uint32_t))2145{2146*(uint32_t *)pDst = r.urand32();2147pDst += sizeof(uint32_t);2148size -= sizeof(uint32_t);2149}21502151while (size)2152{2153*pDst++ = r.byte();2154size--;2155}2156}21572158uint32_t hash_hsieh(const uint8_t *pBuf, size_t len)2159{2160if (!pBuf || !len)2161return 0;21622163uint32_t h = static_cast<uint32_t>(len);21642165const uint32_t bytes_left = len & 3;2166len >>= 2;21672168while (len--)2169{2170const uint16_t *pWords = reinterpret_cast<const uint16_t *>(pBuf);21712172h += pWords[0];21732174const uint32_t t = (pWords[1] << 11) ^ h;2175h = (h << 16) ^ t;21762177pBuf += sizeof(uint32_t);21782179h += h >> 11;2180}21812182switch (bytes_left)2183{2184case 1:2185h += *reinterpret_cast<const signed char*>(pBuf);2186h ^= h << 10;2187h += h >> 1;2188break;2189case 2:2190h += *reinterpret_cast<const uint16_t *>(pBuf);2191h ^= h << 11;2192h += h >> 17;2193break;2194case 3:2195h += *reinterpret_cast<const uint16_t *>(pBuf);2196h ^= h << 16;2197h ^= (static_cast<signed char>(pBuf[sizeof(uint16_t)])) << 18;2198h += h >> 11;2199break;2200default:2201break;2202}22032204h ^= h << 3;2205h += h >> 5;2206h ^= h << 4;2207h += h >> 17;2208h ^= h << 25;2209h += h >> 6;22102211return h;2212}22132214job_pool::job_pool(uint32_t num_threads) :2215m_num_active_jobs(0)2216{2217m_kill_flag.store(false);2218m_num_active_workers.store(0);22192220assert(num_threads >= 1U);22212222debug_printf("job_pool::job_pool: %u total threads\n", num_threads);22232224if (num_threads > 1)2225{2226m_threads.resize(num_threads - 1);22272228for (int i = 0; i < ((int)num_threads - 1); i++)2229m_threads[i] = std::thread([this, i] { job_thread(i); });2230}2231}22322233job_pool::~job_pool()2234{2235debug_printf("job_pool::~job_pool\n");22362237// Notify all workers that they need to die right now.2238m_kill_flag.store(true);22392240m_has_work.notify_all();22412242#ifdef __EMSCRIPTEN__2243for ( ; ; )2244{2245if (m_num_active_workers.load() <= 0)2246break;2247std::this_thread::sleep_for(std::chrono::milliseconds(50));2248}22492250// At this point all worker threads should be exiting or exited.2251// We could call detach(), but this seems to just call join() anyway.2252#endif22532254// Wait for all worker threads to exit.2255for (uint32_t i = 0; i < m_threads.size(); i++)2256m_threads[i].join();2257}22582259void job_pool::add_job(const std::function<void()>& job)2260{2261std::unique_lock<std::mutex> lock(m_mutex);22622263m_queue.emplace_back(job);22642265const size_t queue_size = m_queue.size();22662267lock.unlock();22682269if (queue_size > 1)2270m_has_work.notify_one();2271}22722273void job_pool::add_job(std::function<void()>&& job)2274{2275std::unique_lock<std::mutex> lock(m_mutex);22762277m_queue.emplace_back(std::move(job));22782279const size_t queue_size = m_queue.size();22802281lock.unlock();22822283if (queue_size > 1)2284{2285m_has_work.notify_one();2286}2287}22882289void job_pool::wait_for_all()2290{2291std::unique_lock<std::mutex> lock(m_mutex);22922293// Drain the job queue on the calling thread.2294while (!m_queue.empty())2295{2296std::function<void()> job(m_queue.back());2297m_queue.pop_back();22982299lock.unlock();23002301job();23022303lock.lock();2304}23052306// The queue is empty, now wait for all active jobs to finish up.2307#ifndef __EMSCRIPTEN__2308m_no_more_jobs.wait(lock, [this]{ return !m_num_active_jobs; } );2309#else2310// Avoid infinite blocking2311for (; ; )2312{2313if (m_no_more_jobs.wait_for(lock, std::chrono::milliseconds(50), [this] { return !m_num_active_jobs; }))2314{2315break;2316}2317}2318#endif2319}23202321void job_pool::job_thread(uint32_t index)2322{2323BASISU_NOTE_UNUSED(index);2324//debug_printf("job_pool::job_thread: starting %u\n", index);23252326m_num_active_workers.fetch_add(1);23272328while (true)2329{2330std::unique_lock<std::mutex> lock(m_mutex);23312332// Wait for any jobs to be issued.2333m_has_work.wait(lock, [this] { return m_kill_flag || m_queue.size(); } );23342335// Check to see if we're supposed to exit.2336if (m_kill_flag)2337break;23382339// Get the job and execute it.2340std::function<void()> job(m_queue.back());2341m_queue.pop_back();23422343++m_num_active_jobs;23442345lock.unlock();23462347job();23482349lock.lock();23502351--m_num_active_jobs;23522353// Now check if there are no more jobs remaining.2354const bool all_done = m_queue.empty() && !m_num_active_jobs;23552356lock.unlock();23572358if (all_done)2359m_no_more_jobs.notify_all();2360}23612362m_num_active_workers.fetch_add(-1);23632364//debug_printf("job_pool::job_thread: exiting\n");2365}23662367// .TGA image loading2368#pragma pack(push)2369#pragma pack(1)2370struct tga_header2371{2372uint8_t m_id_len;2373uint8_t m_cmap;2374uint8_t m_type;2375packed_uint<2> m_cmap_first;2376packed_uint<2> m_cmap_len;2377uint8_t m_cmap_bpp;2378packed_uint<2> m_x_org;2379packed_uint<2> m_y_org;2380packed_uint<2> m_width;2381packed_uint<2> m_height;2382uint8_t m_depth;2383uint8_t m_desc;2384};2385#pragma pack(pop)23862387const uint32_t MAX_TGA_IMAGE_SIZE = 16384;23882389enum tga_image_type2390{2391cITPalettized = 1,2392cITRGB = 2,2393cITGrayscale = 32394};23952396uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans)2397{2398width = 0;2399height = 0;2400n_chans = 0;24012402if (buf_size <= sizeof(tga_header))2403return nullptr;24042405const tga_header &hdr = *reinterpret_cast<const tga_header *>(pBuf);24062407if ((!hdr.m_width) || (!hdr.m_height) || (hdr.m_width > MAX_TGA_IMAGE_SIZE) || (hdr.m_height > MAX_TGA_IMAGE_SIZE))2408return nullptr;24092410if (hdr.m_desc >> 6)2411return nullptr;24122413// Simple validation2414if ((hdr.m_cmap != 0) && (hdr.m_cmap != 1))2415return nullptr;24162417if (hdr.m_cmap)2418{2419if ((hdr.m_cmap_bpp == 0) || (hdr.m_cmap_bpp > 32))2420return nullptr;24212422// Nobody implements CMapFirst correctly, so we're not supporting it. Never seen it used, either.2423if (hdr.m_cmap_first != 0)2424return nullptr;2425}24262427const bool x_flipped = (hdr.m_desc & 0x10) != 0;2428const bool y_flipped = (hdr.m_desc & 0x20) == 0;24292430bool rle_flag = false;2431int file_image_type = hdr.m_type;2432if (file_image_type > 8)2433{2434file_image_type -= 8;2435rle_flag = true;2436}24372438const tga_image_type image_type = static_cast<tga_image_type>(file_image_type);24392440switch (file_image_type)2441{2442case cITRGB:2443if (hdr.m_depth == 8)2444return nullptr;2445break;2446case cITPalettized:2447if ((hdr.m_depth != 8) || (hdr.m_cmap != 1) || (hdr.m_cmap_len == 0))2448return nullptr;2449break;2450case cITGrayscale:2451if ((hdr.m_cmap != 0) || (hdr.m_cmap_len != 0))2452return nullptr;2453if ((hdr.m_depth != 8) && (hdr.m_depth != 16))2454return nullptr;2455break;2456default:2457return nullptr;2458}24592460uint32_t tga_bytes_per_pixel = 0;24612462switch (hdr.m_depth)2463{2464case 32:2465tga_bytes_per_pixel = 4;2466n_chans = 4;2467break;2468case 24:2469tga_bytes_per_pixel = 3;2470n_chans = 3;2471break;2472case 16:2473case 15:2474tga_bytes_per_pixel = 2;2475// For compatibility with stb_image_write.h2476n_chans = ((file_image_type == cITGrayscale) && (hdr.m_depth == 16)) ? 4 : 3;2477break;2478case 8:2479tga_bytes_per_pixel = 1;2480// For palettized RGBA support, which both FreeImage and stb_image support.2481n_chans = ((file_image_type == cITPalettized) && (hdr.m_cmap_bpp == 32)) ? 4 : 3;2482break;2483default:2484return nullptr;2485}24862487//const uint32_t bytes_per_line = hdr.m_width * tga_bytes_per_pixel;24882489const uint8_t *pSrc = pBuf + sizeof(tga_header);2490uint32_t bytes_remaining = buf_size - sizeof(tga_header);24912492if (hdr.m_id_len)2493{2494if (bytes_remaining < hdr.m_id_len)2495return nullptr;2496pSrc += hdr.m_id_len;2497bytes_remaining += hdr.m_id_len;2498}24992500color_rgba pal[256];2501for (uint32_t i = 0; i < 256; i++)2502pal[i].set(0, 0, 0, 255);25032504if ((hdr.m_cmap) && (hdr.m_cmap_len))2505{2506if (image_type == cITPalettized)2507{2508// Note I cannot find any files using 32bpp palettes in the wild (never seen any in ~30 years).2509if ( ((hdr.m_cmap_bpp != 32) && (hdr.m_cmap_bpp != 24) && (hdr.m_cmap_bpp != 15) && (hdr.m_cmap_bpp != 16)) || (hdr.m_cmap_len > 256) )2510return nullptr;25112512if (hdr.m_cmap_bpp == 32)2513{2514const uint32_t pal_size = hdr.m_cmap_len * 4;2515if (bytes_remaining < pal_size)2516return nullptr;25172518for (uint32_t i = 0; i < hdr.m_cmap_len; i++)2519{2520pal[i].r = pSrc[i * 4 + 2];2521pal[i].g = pSrc[i * 4 + 1];2522pal[i].b = pSrc[i * 4 + 0];2523pal[i].a = pSrc[i * 4 + 3];2524}25252526bytes_remaining -= pal_size;2527pSrc += pal_size;2528}2529else if (hdr.m_cmap_bpp == 24)2530{2531const uint32_t pal_size = hdr.m_cmap_len * 3;2532if (bytes_remaining < pal_size)2533return nullptr;25342535for (uint32_t i = 0; i < hdr.m_cmap_len; i++)2536{2537pal[i].r = pSrc[i * 3 + 2];2538pal[i].g = pSrc[i * 3 + 1];2539pal[i].b = pSrc[i * 3 + 0];2540pal[i].a = 255;2541}25422543bytes_remaining -= pal_size;2544pSrc += pal_size;2545}2546else2547{2548const uint32_t pal_size = hdr.m_cmap_len * 2;2549if (bytes_remaining < pal_size)2550return nullptr;25512552for (uint32_t i = 0; i < hdr.m_cmap_len; i++)2553{2554const uint32_t v = pSrc[i * 2 + 0] | (pSrc[i * 2 + 1] << 8);25552556pal[i].r = (((v >> 10) & 31) * 255 + 15) / 31;2557pal[i].g = (((v >> 5) & 31) * 255 + 15) / 31;2558pal[i].b = ((v & 31) * 255 + 15) / 31;2559pal[i].a = 255;2560}25612562bytes_remaining -= pal_size;2563pSrc += pal_size;2564}2565}2566else2567{2568const uint32_t bytes_to_skip = (hdr.m_cmap_bpp >> 3) * hdr.m_cmap_len;2569if (bytes_remaining < bytes_to_skip)2570return nullptr;2571pSrc += bytes_to_skip;2572bytes_remaining += bytes_to_skip;2573}2574}25752576width = hdr.m_width;2577height = hdr.m_height;25782579const uint32_t source_pitch = width * tga_bytes_per_pixel;2580const uint32_t dest_pitch = width * n_chans;25812582uint8_t *pImage = (uint8_t *)malloc(dest_pitch * height);2583if (!pImage)2584return nullptr;25852586std::vector<uint8_t> input_line_buf;2587if (rle_flag)2588input_line_buf.resize(source_pitch);25892590int run_type = 0, run_remaining = 0;2591uint8_t run_pixel[4];2592memset(run_pixel, 0, sizeof(run_pixel));25932594for (int y = 0; y < height; y++)2595{2596const uint8_t *pLine_data;25972598if (rle_flag)2599{2600int pixels_remaining = width;2601uint8_t *pDst = &input_line_buf[0];26022603do2604{2605if (!run_remaining)2606{2607if (bytes_remaining < 1)2608{2609free(pImage);2610return nullptr;2611}26122613int v = *pSrc++;2614bytes_remaining--;26152616run_type = v & 0x80;2617run_remaining = (v & 0x7F) + 1;26182619if (run_type)2620{2621if (bytes_remaining < tga_bytes_per_pixel)2622{2623free(pImage);2624return nullptr;2625}26262627memcpy(run_pixel, pSrc, tga_bytes_per_pixel);2628pSrc += tga_bytes_per_pixel;2629bytes_remaining -= tga_bytes_per_pixel;2630}2631}26322633const uint32_t n = basisu::minimum<uint32_t>(pixels_remaining, run_remaining);2634pixels_remaining -= n;2635run_remaining -= n;26362637if (run_type)2638{2639for (uint32_t i = 0; i < n; i++)2640for (uint32_t j = 0; j < tga_bytes_per_pixel; j++)2641*pDst++ = run_pixel[j];2642}2643else2644{2645const uint32_t bytes_wanted = n * tga_bytes_per_pixel;26462647if (bytes_remaining < bytes_wanted)2648{2649free(pImage);2650return nullptr;2651}26522653memcpy(pDst, pSrc, bytes_wanted);2654pDst += bytes_wanted;26552656pSrc += bytes_wanted;2657bytes_remaining -= bytes_wanted;2658}26592660} while (pixels_remaining);26612662assert((pDst - &input_line_buf[0]) == (int)(width * tga_bytes_per_pixel));26632664pLine_data = &input_line_buf[0];2665}2666else2667{2668if (bytes_remaining < source_pitch)2669{2670free(pImage);2671return nullptr;2672}26732674pLine_data = pSrc;2675bytes_remaining -= source_pitch;2676pSrc += source_pitch;2677}26782679// Convert to 24bpp RGB or 32bpp RGBA.2680uint8_t *pDst = pImage + (y_flipped ? (height - 1 - y) : y) * dest_pitch + (x_flipped ? (width - 1) * n_chans : 0);2681const int dst_stride = x_flipped ? -((int)n_chans) : n_chans;26822683switch (hdr.m_depth)2684{2685case 32:2686assert(tga_bytes_per_pixel == 4 && n_chans == 4);2687for (int i = 0; i < width; i++, pLine_data += 4, pDst += dst_stride)2688{2689pDst[0] = pLine_data[2];2690pDst[1] = pLine_data[1];2691pDst[2] = pLine_data[0];2692pDst[3] = pLine_data[3];2693}2694break;2695case 24:2696assert(tga_bytes_per_pixel == 3 && n_chans == 3);2697for (int i = 0; i < width; i++, pLine_data += 3, pDst += dst_stride)2698{2699pDst[0] = pLine_data[2];2700pDst[1] = pLine_data[1];2701pDst[2] = pLine_data[0];2702}2703break;2704case 16:2705case 15:2706if (image_type == cITRGB)2707{2708assert(tga_bytes_per_pixel == 2 && n_chans == 3);2709for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride)2710{2711const uint32_t v = pLine_data[0] | (pLine_data[1] << 8);2712pDst[0] = (((v >> 10) & 31) * 255 + 15) / 31;2713pDst[1] = (((v >> 5) & 31) * 255 + 15) / 31;2714pDst[2] = ((v & 31) * 255 + 15) / 31;2715}2716}2717else2718{2719assert(image_type == cITGrayscale && tga_bytes_per_pixel == 2 && n_chans == 4);2720for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride)2721{2722pDst[0] = pLine_data[0];2723pDst[1] = pLine_data[0];2724pDst[2] = pLine_data[0];2725pDst[3] = pLine_data[1];2726}2727}2728break;2729case 8:2730assert(tga_bytes_per_pixel == 1);2731if (image_type == cITPalettized)2732{2733if (hdr.m_cmap_bpp == 32)2734{2735assert(n_chans == 4);2736for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride)2737{2738const uint32_t c = *pLine_data;2739pDst[0] = pal[c].r;2740pDst[1] = pal[c].g;2741pDst[2] = pal[c].b;2742pDst[3] = pal[c].a;2743}2744}2745else2746{2747assert(n_chans == 3);2748for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride)2749{2750const uint32_t c = *pLine_data;2751pDst[0] = pal[c].r;2752pDst[1] = pal[c].g;2753pDst[2] = pal[c].b;2754}2755}2756}2757else2758{2759assert(n_chans == 3);2760for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride)2761{2762const uint8_t c = *pLine_data;2763pDst[0] = c;2764pDst[1] = c;2765pDst[2] = c;2766}2767}2768break;2769default:2770assert(0);2771break;2772}2773} // y27742775return pImage;2776}27772778uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans)2779{2780width = height = n_chans = 0;27812782uint8_vec filedata;2783if (!read_file_to_vec(pFilename, filedata))2784return nullptr;27852786if (!filedata.size() || (filedata.size() > UINT32_MAX))2787return nullptr;27882789return read_tga(&filedata[0], (uint32_t)filedata.size(), width, height, n_chans);2790}27912792static inline void hdr_convert(const color_rgba& rgbe, vec4F& c)2793{2794if (rgbe[3] != 0)2795{2796float scale = ldexp(1.0f, rgbe[3] - 128 - 8);2797c.set((float)rgbe[0] * scale, (float)rgbe[1] * scale, (float)rgbe[2] * scale, 1.0f);2798}2799else2800{2801c.set(0.0f, 0.0f, 0.0f, 1.0f);2802}2803}28042805bool string_begins_with(const std::string& str, const char* pPhrase)2806{2807const size_t str_len = str.size();28082809const size_t phrase_len = strlen(pPhrase);2810assert(phrase_len);28112812if (str_len >= phrase_len)2813{2814#ifdef _MSC_VER2815if (_strnicmp(pPhrase, str.c_str(), phrase_len) == 0)2816#else2817if (strncasecmp(pPhrase, str.c_str(), phrase_len) == 0)2818#endif2819return true;2820}28212822return false;2823}28242825// Radiance RGBE (.HDR) image reading.2826// This code tries to preserve the original logic in Radiance's ray/src/common/color.c code:2827// https://www.radiance-online.org/cgi-bin/viewcvs.cgi/ray/src/common/color.c?revision=2.26&view=markup&sortby=log2828// Also see: https://flipcode.com/archives/HDR_Image_Reader.shtml.2829// https://github.com/LuminanceHDR/LuminanceHDR/blob/master/src/Libpfs/io/rgbereader.cpp.2830// https://radsite.lbl.gov/radiance/refer/filefmts.pdf2831// Buggy readers:2832// stb_image.h: appears to be a clone of rgbe.c, but with goto's (doesn't support old format files, doesn't support mixture of RLE/non-RLE scanlines)2833// http://www.graphics.cornell.edu/~bjw/rgbe.html - rgbe.c/h2834// http://www.graphics.cornell.edu/online/formats/rgbe/ - rgbe.c/.h - buggy2835bool read_rgbe(const uint8_vec &filedata, imagef& img, rgbe_header_info& hdr_info)2836{2837hdr_info.clear();28382839const uint32_t MAX_SUPPORTED_DIM = 65536;28402841if (filedata.size() < 4)2842return false;28432844// stb_image.h checks for the string "#?RADIANCE" or "#?RGBE" in the header.2845// The original Radiance header code doesn't care about the specific string.2846// opencv's reader only checks for "#?", so that's what we're going to do.2847if ((filedata[0] != '#') || (filedata[1] != '?'))2848return false;28492850//uint32_t width = 0, height = 0;2851bool is_rgbe = false;2852size_t cur_ofs = 0;28532854// Parse the lines until we encounter a blank line.2855std::string cur_line;2856for (; ; )2857{2858if (cur_ofs >= filedata.size())2859return false;28602861const uint32_t HEADER_TOO_BIG_SIZE = 4096;2862if (cur_ofs >= HEADER_TOO_BIG_SIZE)2863{2864// Header seems too large - something is likely wrong. Return failure.2865return false;2866}28672868uint8_t c = filedata[cur_ofs++];28692870if (c == '\n')2871{2872if (!cur_line.size())2873break;28742875if ((cur_line[0] == '#') && (!string_begins_with(cur_line, "#?")) && (!hdr_info.m_program.size()))2876{2877cur_line.erase(0, 1);2878while (cur_line.size() && (cur_line[0] == ' '))2879cur_line.erase(0, 1);28802881hdr_info.m_program = cur_line;2882}2883else if (string_begins_with(cur_line, "EXPOSURE=") && (cur_line.size() > 9))2884{2885hdr_info.m_exposure = atof(cur_line.c_str() + 9);2886hdr_info.m_has_exposure = true;2887}2888else if (string_begins_with(cur_line, "GAMMA=") && (cur_line.size() > 6))2889{2890hdr_info.m_exposure = atof(cur_line.c_str() + 6);2891hdr_info.m_has_gamma = true;2892}2893else if (cur_line == "FORMAT=32-bit_rle_rgbe")2894{2895is_rgbe = true;2896}28972898cur_line.resize(0);2899}2900else2901cur_line.push_back((char)c);2902}29032904if (!is_rgbe)2905return false;29062907// Assume and require the final line to have the image's dimensions. We're not supporting flipping.2908for (; ; )2909{2910if (cur_ofs >= filedata.size())2911return false;2912uint8_t c = filedata[cur_ofs++];2913if (c == '\n')2914break;2915cur_line.push_back((char)c);2916}29172918int comp[2] = { 1, 0 }; // y, x (major, minor)2919int dir[2] = { -1, 1 }; // -1, 1, (major, minor), for y -1=up2920uint32_t major_dim = 0, minor_dim = 0;29212922// Parse the dimension string, normally it'll be "-Y # +X #" (major, minor), rarely it differs2923for (uint32_t d = 0; d < 2; d++) // 0=major, 1=minor2924{2925const bool is_neg_x = (strncmp(&cur_line[0], "-X ", 3) == 0);2926const bool is_pos_x = (strncmp(&cur_line[0], "+X ", 3) == 0);2927const bool is_x = is_neg_x || is_pos_x;29282929const bool is_neg_y = (strncmp(&cur_line[0], "-Y ", 3) == 0);2930const bool is_pos_y = (strncmp(&cur_line[0], "+Y ", 3) == 0);2931const bool is_y = is_neg_y || is_pos_y;29322933if (cur_line.size() < 3)2934return false;29352936if (!is_x && !is_y)2937return false;29382939comp[d] = is_x ? 0 : 1;2940dir[d] = (is_neg_x || is_neg_y) ? -1 : 1;29412942uint32_t& dim = d ? minor_dim : major_dim;29432944cur_line.erase(0, 3);29452946while (cur_line.size())2947{2948char c = cur_line[0];2949if (c != ' ')2950break;2951cur_line.erase(0, 1);2952}29532954bool has_digits = false;2955while (cur_line.size())2956{2957char c = cur_line[0];2958cur_line.erase(0, 1);29592960if (c == ' ')2961break;29622963if ((c < '0') || (c > '9'))2964return false;29652966const uint32_t prev_dim = dim;2967dim = dim * 10 + (c - '0');2968if (dim < prev_dim)2969return false;29702971has_digits = true;2972}2973if (!has_digits)2974return false;29752976if ((dim < 1) || (dim > MAX_SUPPORTED_DIM))2977return false;2978}29792980// temp image: width=minor, height=major2981img.resize(minor_dim, major_dim);29822983std::vector<color_rgba> temp_scanline(minor_dim);29842985// Read the scanlines.2986for (uint32_t y = 0; y < major_dim; y++)2987{2988vec4F* pDst = &img(0, y);29892990if ((filedata.size() - cur_ofs) < 4)2991return false;29922993// Determine if the line uses the new or old format. See the logic in color.c.2994bool old_decrunch = false;2995if ((minor_dim < 8) || (minor_dim > 0x7FFF))2996{2997// Line is too short or long; must be old format.2998old_decrunch = true;2999}3000else if (filedata[cur_ofs] != 2)3001{3002// R is not 2, must be old format3003old_decrunch = true;3004}3005else3006{3007// c[0]/red is 2.Check GB and E for validity.3008color_rgba c;3009memcpy(&c, &filedata[cur_ofs], 4);30103011if ((c[1] != 2) || (c[2] & 0x80))3012{3013// G isn't 2, or the high bit of B is set which is impossible (image's > 0x7FFF pixels can't get here). Use old format.3014old_decrunch = true;3015}3016else3017{3018// Check B and E. If this isn't the minor_dim in network order, something is wrong. The pixel would also be denormalized, and invalid.3019uint32_t w = (c[2] << 8) | c[3];3020if (w != minor_dim)3021return false;30223023cur_ofs += 4;3024}3025}30263027if (old_decrunch)3028{3029uint32_t rshift = 0, x = 0;30303031while (x < minor_dim)3032{3033if ((filedata.size() - cur_ofs) < 4)3034return false;30353036color_rgba c;3037memcpy(&c, &filedata[cur_ofs], 4);3038cur_ofs += 4;30393040if ((c[0] == 1) && (c[1] == 1) && (c[2] == 1))3041{3042// We'll allow RLE matches to cross scanlines, but not on the very first pixel.3043if ((!x) && (!y))3044return false;30453046const uint32_t run_len = c[3] << rshift;3047const vec4F run_color(pDst[-1]);30483049if ((x + run_len) > minor_dim)3050return false;30513052for (uint32_t i = 0; i < run_len; i++)3053*pDst++ = run_color;30543055rshift += 8;3056x += run_len;3057}3058else3059{3060rshift = 0;30613062hdr_convert(c, *pDst);3063pDst++;3064x++;3065}3066}3067continue;3068}30693070// New format3071for (uint32_t s = 0; s < 4; s++)3072{3073uint32_t x_ofs = 0;3074while (x_ofs < minor_dim)3075{3076uint32_t num_remaining = minor_dim - x_ofs;30773078if (cur_ofs >= filedata.size())3079return false;30803081uint8_t count = filedata[cur_ofs++];3082if (count > 128)3083{3084count -= 128;3085if (count > num_remaining)3086return false;30873088if (cur_ofs >= filedata.size())3089return false;3090const uint8_t val = filedata[cur_ofs++];30913092for (uint32_t i = 0; i < count; i++)3093temp_scanline[x_ofs + i][s] = val;30943095x_ofs += count;3096}3097else3098{3099if ((!count) || (count > num_remaining))3100return false;31013102for (uint32_t i = 0; i < count; i++)3103{3104if (cur_ofs >= filedata.size())3105return false;3106const uint8_t val = filedata[cur_ofs++];31073108temp_scanline[x_ofs + i][s] = val;3109}31103111x_ofs += count;3112}3113} // while (x_ofs < minor_dim)3114} // c31153116// Convert all the RGBE pixels to float now3117for (uint32_t x = 0; x < minor_dim; x++, pDst++)3118hdr_convert(temp_scanline[x], *pDst);31193120assert((pDst - &img(0, y)) == (int)minor_dim);31213122} // y31233124// at here:3125// img(width,height)=image pixels as read from file, x=minor axis, y=major axis3126// width=minor axis dimension3127// height=major axis dimension3128// in file, pixels are emitted in minor order, them major (so major=scanlines in the file)31293130imagef final_img;3131if (comp[0] == 0) // if major axis is X3132final_img.resize(major_dim, minor_dim);3133else // major axis is Y, minor is X3134final_img.resize(minor_dim, major_dim);31353136// TODO: optimize the identity case3137for (uint32_t major_iter = 0; major_iter < major_dim; major_iter++)3138{3139for (uint32_t minor_iter = 0; minor_iter < minor_dim; minor_iter++)3140{3141const vec4F& p = img(minor_iter, major_iter);31423143uint32_t dst_x = 0, dst_y = 0;31443145// is the minor dim output x?3146if (comp[1] == 0)3147{3148// minor axis is x, major is y31493150// is minor axis (which is output x) flipped?3151if (dir[1] < 0)3152dst_x = minor_dim - 1 - minor_iter;3153else3154dst_x = minor_iter;31553156// is major axis (which is output y) flipped? -1=down in raster order, 1=up3157if (dir[0] < 0)3158dst_y = major_iter;3159else3160dst_y = major_dim - 1 - major_iter;3161}3162else3163{3164// minor axis is output y, major is output x31653166// is minor axis (which is output y) flipped?3167if (dir[1] < 0)3168dst_y = minor_iter;3169else3170dst_y = minor_dim - 1 - minor_iter;31713172// is major axis (which is output x) flipped?3173if (dir[0] < 0)3174dst_x = major_dim - 1 - major_iter;3175else3176dst_x = major_iter;3177}31783179final_img(dst_x, dst_y) = p;3180}3181}31823183final_img.swap(img);31843185return true;3186}31873188bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info)3189{3190uint8_vec filedata;3191if (!read_file_to_vec(pFilename, filedata))3192return false;3193return read_rgbe(filedata, img, hdr_info);3194}31953196static uint8_vec& append_string(uint8_vec& buf, const char* pStr)3197{3198const size_t str_len = strlen(pStr);3199if (!str_len)3200return buf;32013202const size_t ofs = buf.size();3203buf.resize(ofs + str_len);3204memcpy(&buf[ofs], pStr, str_len);32053206return buf;3207}32083209static uint8_vec& append_string(uint8_vec& buf, const std::string& str)3210{3211if (!str.size())3212return buf;3213return append_string(buf, str.c_str());3214}32153216static inline void float2rgbe(color_rgba &rgbe, const vec4F &c)3217{3218const float red = c[0], green = c[1], blue = c[2];3219assert(red >= 0.0f && green >= 0.0f && blue >= 0.0f);32203221const float max_v = basisu::maximumf(basisu::maximumf(red, green), blue);32223223if (max_v < 1e-32f)3224rgbe.clear();3225else3226{3227int e;3228const float scale = frexp(max_v, &e) * 256.0f / max_v;3229rgbe[0] = (uint8_t)(clamp<int>((int)(red * scale), 0, 255));3230rgbe[1] = (uint8_t)(clamp<int>((int)(green * scale), 0, 255));3231rgbe[2] = (uint8_t)(clamp<int>((int)(blue * scale), 0, 255));3232rgbe[3] = (uint8_t)(e + 128);3233}3234}32353236const bool RGBE_FORCE_RAW = false;3237const bool RGBE_FORCE_OLD_CRUNCH = false; // note must readers (particularly stb_image.h's) don't properly support this, when they should32383239bool write_rgbe(uint8_vec &file_data, imagef& img, rgbe_header_info& hdr_info)3240{3241if (!img.get_width() || !img.get_height())3242return false;32433244const uint32_t width = img.get_width(), height = img.get_height();32453246file_data.resize(0);3247file_data.reserve(1024 + img.get_width() * img.get_height() * 4);32483249append_string(file_data, "#?RADIANCE\n");32503251if (hdr_info.m_has_exposure)3252append_string(file_data, string_format("EXPOSURE=%g\n", hdr_info.m_exposure));32533254if (hdr_info.m_has_gamma)3255append_string(file_data, string_format("GAMMA=%g\n", hdr_info.m_gamma));32563257append_string(file_data, "FORMAT=32-bit_rle_rgbe\n\n");3258append_string(file_data, string_format("-Y %u +X %u\n", height, width));32593260if (((width < 8) || (width > 0x7FFF)) || (RGBE_FORCE_RAW))3261{3262for (uint32_t y = 0; y < height; y++)3263{3264for (uint32_t x = 0; x < width; x++)3265{3266color_rgba rgbe;3267float2rgbe(rgbe, img(x, y));3268append_vector(file_data, (const uint8_t *)&rgbe, sizeof(rgbe));3269}3270}3271}3272else if (RGBE_FORCE_OLD_CRUNCH)3273{3274for (uint32_t y = 0; y < height; y++)3275{3276int prev_r = -1, prev_g = -1, prev_b = -1, prev_e = -1;3277uint32_t cur_run_len = 0;32783279for (uint32_t x = 0; x < width; x++)3280{3281color_rgba rgbe;3282float2rgbe(rgbe, img(x, y));32833284if ((rgbe[0] == prev_r) && (rgbe[1] == prev_g) && (rgbe[2] == prev_b) && (rgbe[3] == prev_e))3285{3286if (++cur_run_len == 255)3287{3288// this ensures rshift stays 0, it's lame but this path is only for testing readers3289color_rgba f(1, 1, 1, cur_run_len - 1);3290append_vector(file_data, (const uint8_t*)&f, sizeof(f));3291append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe));3292cur_run_len = 0;3293}3294}3295else3296{3297if (cur_run_len > 0)3298{3299color_rgba f(1, 1, 1, cur_run_len);3300append_vector(file_data, (const uint8_t*)&f, sizeof(f));33013302cur_run_len = 0;3303}33043305append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe));33063307prev_r = rgbe[0];3308prev_g = rgbe[1];3309prev_b = rgbe[2];3310prev_e = rgbe[3];3311}3312} // x33133314if (cur_run_len > 0)3315{3316color_rgba f(1, 1, 1, cur_run_len);3317append_vector(file_data, (const uint8_t*)&f, sizeof(f));3318}3319} // y3320}3321else3322{3323uint8_vec temp[4];3324for (uint32_t c = 0; c < 4; c++)3325temp[c].resize(width);33263327for (uint32_t y = 0; y < height; y++)3328{3329color_rgba rgbe(2, 2, width >> 8, width & 0xFF);3330append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe));33313332for (uint32_t x = 0; x < width; x++)3333{3334float2rgbe(rgbe, img(x, y));33353336for (uint32_t c = 0; c < 4; c++)3337temp[c][x] = rgbe[c];3338}33393340for (uint32_t c = 0; c < 4; c++)3341{3342int raw_ofs = -1;33433344uint32_t x = 0;3345while (x < width)3346{3347const uint32_t num_bytes_remaining = width - x;3348const uint32_t max_run_len = basisu::minimum<uint32_t>(num_bytes_remaining, 127);3349const uint8_t cur_byte = temp[c][x];33503351uint32_t run_len = 1;3352while (run_len < max_run_len)3353{3354if (temp[c][x + run_len] != cur_byte)3355break;3356run_len++;3357}33583359const uint32_t cost_to_keep_raw = ((raw_ofs != -1) ? 0 : 1) + run_len; // 0 or 1 bytes to start a raw run, then the repeated bytes issued as raw3360const uint32_t cost_to_take_run = 2 + 1; // 2 bytes to issue the RLE, then 1 bytes to start whatever follows it (raw or RLE)33613362if ((run_len >= 3) && (cost_to_take_run < cost_to_keep_raw))3363{3364file_data.push_back((uint8_t)(128 + run_len));3365file_data.push_back(cur_byte);33663367x += run_len;3368raw_ofs = -1;3369}3370else3371{3372if (raw_ofs < 0)3373{3374raw_ofs = (int)file_data.size();3375file_data.push_back(0);3376}33773378if (++file_data[raw_ofs] == 128)3379raw_ofs = -1;33803381file_data.push_back(cur_byte);33823383x++;3384}3385} // x33863387} // c3388} // y3389}33903391return true;3392}33933394bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info)3395{3396uint8_vec file_data;3397if (!write_rgbe(file_data, img, hdr_info))3398return false;3399return write_vec_to_file(pFilename, file_data);3400}34013402bool read_exr(const char* pFilename, imagef& img, int& n_chans)3403{3404n_chans = 0;34053406int width = 0, height = 0;3407float* out_rgba = nullptr;3408const char* err = nullptr;34093410int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err);3411n_chans = 4;3412if (status != 0)3413{3414error_printf("Failed loading .EXR image \"%s\"! (TinyEXR error: %s)\n", pFilename, err ? err : "?");3415FreeEXRErrorMessage(err);3416free(out_rgba);3417return false;3418}34193420const uint32_t MAX_SUPPORTED_DIM = 65536;3421if ((width < 1) || (height < 1) || (width > (int)MAX_SUPPORTED_DIM) || (height > (int)MAX_SUPPORTED_DIM))3422{3423error_printf("Invalid dimensions of .EXR image \"%s\"!\n", pFilename);3424free(out_rgba);3425return false;3426}34273428img.resize(width, height);34293430if (n_chans == 1)3431{3432const float* pSrc = out_rgba;3433vec4F* pDst = img.get_ptr();34343435for (int y = 0; y < height; y++)3436{3437for (int x = 0; x < width; x++)3438{3439(*pDst)[0] = pSrc[0];3440(*pDst)[1] = pSrc[1];3441(*pDst)[2] = pSrc[2];3442(*pDst)[3] = 1.0f;34433444pSrc += 4;3445++pDst;3446}3447}3448}3449else3450{3451memcpy(img.get_ptr(), out_rgba, sizeof(float) * 4 * img.get_total_pixels());3452}34533454free(out_rgba);3455return true;3456}34573458bool read_exr(const void* pMem, size_t mem_size, imagef& img)3459{3460float* out_rgba = nullptr;3461int width = 0, height = 0;3462const char* pErr = nullptr;3463int res = LoadEXRFromMemory(&out_rgba, &width, &height, (const uint8_t*)pMem, mem_size, &pErr);3464if (res < 0)3465{3466error_printf("Failed loading .EXR image from memory! (TinyEXR error: %s)\n", pErr ? pErr : "?");3467FreeEXRErrorMessage(pErr);3468free(out_rgba);3469return false;3470}34713472img.resize(width, height);3473memcpy(img.get_ptr(), out_rgba, width * height * sizeof(float) * 4);3474free(out_rgba);34753476return true;3477}34783479bool write_exr(const char* pFilename, const imagef& img, uint32_t n_chans, uint32_t flags)3480{3481assert((n_chans == 1) || (n_chans == 3) || (n_chans == 4));34823483const bool linear_hint = (flags & WRITE_EXR_LINEAR_HINT) != 0,3484store_float = (flags & WRITE_EXR_STORE_FLOATS) != 0,3485no_compression = (flags & WRITE_EXR_NO_COMPRESSION) != 0;34863487const uint32_t width = img.get_width(), height = img.get_height();3488assert(width && height);34893490if (!width || !height)3491return false;34923493float_vec layers[4];3494float* image_ptrs[4];3495for (uint32_t c = 0; c < n_chans; c++)3496{3497layers[c].resize(width * height);3498image_ptrs[c] = layers[c].get_ptr();3499}35003501// ABGR3502int chan_order[4] = { 3, 2, 1, 0 };35033504if (n_chans == 1)3505{3506// Y3507chan_order[0] = 0;3508}3509else if (n_chans == 3)3510{3511// BGR3512chan_order[0] = 2;3513chan_order[1] = 1;3514chan_order[2] = 0;3515}3516else if (n_chans != 4)3517{3518assert(0);3519return false;3520}35213522for (uint32_t y = 0; y < height; y++)3523{3524for (uint32_t x = 0; x < width; x++)3525{3526const vec4F& p = img(x, y);35273528for (uint32_t c = 0; c < n_chans; c++)3529layers[c][x + y * width] = p[chan_order[c]];3530} // x3531} // y35323533EXRHeader header;3534InitEXRHeader(&header);35353536EXRImage image;3537InitEXRImage(&image);35383539image.num_channels = n_chans;3540image.images = (unsigned char**)image_ptrs;3541image.width = width;3542image.height = height;35433544header.num_channels = n_chans;35453546header.channels = (EXRChannelInfo*)calloc(header.num_channels, sizeof(EXRChannelInfo));35473548// Must be (A)BGR order, since most of EXR viewers expect this channel order.3549for (uint32_t i = 0; i < n_chans; i++)3550{3551char c = 'Y';3552if (n_chans == 3)3553c = "BGR"[i];3554else if (n_chans == 4)3555c = "ABGR"[i];35563557header.channels[i].name[0] = c;3558header.channels[i].name[1] = '\0';35593560header.channels[i].p_linear = linear_hint;3561}35623563header.pixel_types = (int*)calloc(header.num_channels, sizeof(int));3564header.requested_pixel_types = (int*)calloc(header.num_channels, sizeof(int));35653566if (!no_compression)3567header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP;35683569for (int i = 0; i < header.num_channels; i++)3570{3571// pixel type of input image3572header.pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT;35733574// pixel type of output image to be stored in .EXR3575header.requested_pixel_types[i] = store_float ? TINYEXR_PIXELTYPE_FLOAT : TINYEXR_PIXELTYPE_HALF;3576}35773578const char* pErr_msg = nullptr;35793580int ret = SaveEXRImageToFile(&image, &header, pFilename, &pErr_msg);3581if (ret != TINYEXR_SUCCESS)3582{3583error_printf("Save EXR err: %s\n", pErr_msg);3584FreeEXRErrorMessage(pErr_msg);3585}35863587free(header.channels);3588free(header.pixel_types);3589free(header.requested_pixel_types);35903591return (ret == TINYEXR_SUCCESS);3592}35933594void image::debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t scale_x, uint32_t scale_y, const color_rgba& fg, const color_rgba* pBG, bool alpha_only, const char* pFmt, ...)3595{3596char buf[2048];35973598va_list args;3599va_start(args, pFmt);3600#ifdef _WIN323601vsprintf_s(buf, sizeof(buf), pFmt, args);3602#else3603vsnprintf(buf, sizeof(buf), pFmt, args);3604#endif3605va_end(args);36063607const char* p = buf;36083609const uint32_t orig_x_ofs = x_ofs;36103611while (*p)3612{3613uint8_t c = *p++;3614if ((c < 32) || (c > 127))3615c = '.';36163617const uint8_t* pGlpyh = &g_debug_font8x8_basic[c - 32][0];36183619for (uint32_t y = 0; y < 8; y++)3620{3621uint32_t row_bits = pGlpyh[y];3622for (uint32_t x = 0; x < 8; x++)3623{3624const uint32_t q = row_bits & (1 << x);36253626const color_rgba* pColor = q ? &fg : pBG;3627if (!pColor)3628continue;36293630if (alpha_only)3631fill_box_alpha(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor);3632else3633fill_box(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor);3634}3635}36363637x_ofs += 8 * scale_x;3638if ((x_ofs + 8 * scale_x) > m_width)3639{3640x_ofs = orig_x_ofs;3641y_ofs += 8 * scale_y;3642}3643}3644}36453646// Very basic global Reinhard tone mapping, output converted to sRGB with no dithering, alpha is carried through unchanged.3647// Only used for debugging/development.3648void tonemap_image_reinhard(image &ldr_img, const imagef &hdr_img, float exposure, bool add_noise, bool per_component, bool luma_scaling)3649{3650uint32_t width = hdr_img.get_width(), height = hdr_img.get_height();36513652ldr_img.resize(width, height);36533654rand r;3655r.seed(128);36563657for (uint32_t y = 0; y < height; y++)3658{3659for (uint32_t x = 0; x < width; x++)3660{3661vec4F c(hdr_img(x, y));36623663if (per_component)3664{3665for (uint32_t t = 0; t < 3; t++)3666{3667if (c[t] <= 0.0f)3668{3669c[t] = 0.0f;3670}3671else3672{3673c[t] *= exposure;3674c[t] = c[t] / (1.0f + c[t]);3675}3676}3677}3678else3679{3680c[0] *= exposure;3681c[1] *= exposure;3682c[2] *= exposure;36833684const float L = 0.2126f * c[0] + 0.7152f * c[1] + 0.0722f * c[2];36853686float Lmapped = 0.0f;3687if (L > 0.0f)3688{3689//Lmapped = L / (1.0f + L);3690//Lmapped /= L;36913692Lmapped = 1.0f / (1.0f + L);3693}36943695c[0] = c[0] * Lmapped;3696c[1] = c[1] * Lmapped;3697c[2] = c[2] * Lmapped;36983699if (luma_scaling)3700{3701// Keeps the ratio of r/g/b intact3702float m = maximum(c[0], c[1], c[2]);3703if (m > 1.0f)3704{3705c /= m;3706}3707}3708}37093710c.clamp(0.0f, 1.0f);37113712c[3] = c[3] * 255.0f;37133714color_rgba& o = ldr_img(x, y);37153716if (add_noise)3717{3718c[0] = linear_to_srgb(c[0]) * 255.0f;3719c[1] = linear_to_srgb(c[1]) * 255.0f;3720c[2] = linear_to_srgb(c[2]) * 255.0f;37213722const float NOISE_AMP = .5f;3723c[0] += r.frand(-NOISE_AMP, NOISE_AMP);3724c[1] += r.frand(-NOISE_AMP, NOISE_AMP);3725c[2] += r.frand(-NOISE_AMP, NOISE_AMP);37263727c.clamp(0.0f, 255.0f);37283729o[0] = (uint8_t)fast_roundf_int(c[0]);3730o[1] = (uint8_t)fast_roundf_int(c[1]);3731o[2] = (uint8_t)fast_roundf_int(c[2]);3732o[3] = (uint8_t)fast_roundf_int(c[3]);3733}3734else3735{3736o[0] = g_fast_linear_to_srgb.convert(c[0]);3737o[1] = g_fast_linear_to_srgb.convert(c[1]);3738o[2] = g_fast_linear_to_srgb.convert(c[2]);3739o[3] = (uint8_t)fast_roundf_int(c[3]);3740}3741}3742}3743}37443745bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img)3746{3747const uint32_t width = hdr_test_img.get_width();3748const uint32_t height = hdr_test_img.get_height();37493750uint16_vec orig_half_img(width * 3 * height);3751uint16_vec half_img(width * 3 * height);37523753int max_shift = 32;37543755for (uint32_t y = 0; y < height; y++)3756{3757for (uint32_t x = 0; x < width; x++)3758{3759const vec4F& p = hdr_test_img(x, y);37603761for (uint32_t i = 0; i < 3; i++)3762{3763if (p[i] < 0.0f)3764return false;3765if (p[i] > basist::MAX_HALF_FLOAT)3766return false;37673768uint32_t h = basist::float_to_half(p[i]);3769//uint32_t orig_h = h;37703771orig_half_img[(x + y * width) * 3 + i] = (uint16_t)h;37723773// Rotate sign bit into LSB3774//h = rot_left16((uint16_t)h, 1);3775//assert(rot_right16((uint16_t)h, 1) == orig_h);3776h <<= 1;37773778half_img[(x + y * width) * 3 + i] = (uint16_t)h;37793780// Determine # of leading zero bits, ignoring the sign bit3781if (h)3782{3783int lz = clz(h) - 16;3784assert(lz >= 0 && lz <= 16);37853786assert((h << lz) <= 0xFFFF);37873788max_shift = basisu::minimum<int>(max_shift, lz);3789}3790} // i3791} // x3792} // y37933794//printf("tonemap_image_compressive: Max leading zeros: %i\n", max_shift);37953796uint32_t high_hist[256];3797clear_obj(high_hist);37983799for (uint32_t y = 0; y < height; y++)3800{3801for (uint32_t x = 0; x < width; x++)3802{3803for (uint32_t i = 0; i < 3; i++)3804{3805uint16_t& hf = half_img[(x + y * width) * 3 + i];38063807assert(((uint32_t)hf << max_shift) <= 65535);38083809hf <<= max_shift;38103811uint32_t h = (uint8_t)(hf >> 8);3812high_hist[h]++;3813}3814} // x3815} // y38163817uint32_t total_vals_used = 0;3818int remap_old_to_new[256];3819for (uint32_t i = 0; i < 256; i++)3820remap_old_to_new[i] = -1;38213822for (uint32_t i = 0; i < 256; i++)3823{3824if (high_hist[i] != 0)3825{3826remap_old_to_new[i] = total_vals_used;3827total_vals_used++;3828}3829}38303831assert(total_vals_used >= 1);38323833//printf("tonemap_image_compressive: Total used high byte values: %u, unused: %u\n", total_vals_used, 256 - total_vals_used);38343835bool val_used[256];3836clear_obj(val_used);38373838int remap_new_to_old[256];3839for (uint32_t i = 0; i < 256; i++)3840remap_new_to_old[i] = -1;3841BASISU_NOTE_UNUSED(remap_new_to_old);38423843int prev_c = -1;3844BASISU_NOTE_UNUSED(prev_c);3845for (uint32_t i = 0; i < 256; i++)3846{3847if (remap_old_to_new[i] >= 0)3848{3849int c;3850if (total_vals_used <= 1)3851c = remap_old_to_new[i];3852else3853{3854c = (remap_old_to_new[i] * 255 + ((total_vals_used - 1) / 2)) / (total_vals_used - 1);38553856assert(c > prev_c);3857}38583859assert(!val_used[c]);38603861remap_new_to_old[c] = i;38623863remap_old_to_new[i] = c;3864prev_c = c;38653866//printf("%u ", c);38673868val_used[c] = true;3869}3870} // i3871//printf("\n");38723873dst_img.resize(width, height);38743875for (uint32_t y = 0; y < height; y++)3876{3877for (uint32_t x = 0; x < width; x++)3878{3879for (uint32_t c = 0; c < 3; c++)3880{3881uint16_t& v16 = half_img[(x + y * width) * 3 + c];38823883uint32_t hb = v16 >> 8;3884//uint32_t lb = v16 & 0xFF;38853886assert(remap_old_to_new[hb] != -1);3887assert(remap_old_to_new[hb] <= 255);3888assert(remap_new_to_old[remap_old_to_new[hb]] == (int)hb);38893890hb = remap_old_to_new[hb];38913892//v16 = (uint16_t)((hb << 8) | lb);38933894dst_img(x, y)[c] = (uint8_t)hb;3895}3896} // x3897} // y38983899return true;3900}39013902bool tonemap_image_compressive2(image& dst_img, const imagef& hdr_test_img)3903{3904const uint32_t width = hdr_test_img.get_width();3905const uint32_t height = hdr_test_img.get_height();39063907dst_img.resize(width, height);3908dst_img.set_all(color_rgba(0, 0, 0, 255));39093910basisu::vector<basist::half_float> half_img(width * 3 * height);39113912uint32_t low_h = UINT32_MAX, high_h = 0;39133914for (uint32_t y = 0; y < height; y++)3915{3916for (uint32_t x = 0; x < width; x++)3917{3918const vec4F& p = hdr_test_img(x, y);39193920for (uint32_t i = 0; i < 3; i++)3921{3922float f = p[i];39233924if (std::isnan(f) || std::isinf(f))3925f = 0.0f;3926else if (f < 0.0f)3927f = 0.0f;3928else if (f > basist::MAX_HALF_FLOAT)3929f = basist::MAX_HALF_FLOAT;39303931uint32_t h = basist::float_to_half(f);39323933low_h = minimum(low_h, h);3934high_h = maximum(high_h, h);39353936half_img[(x + y * width) * 3 + i] = (basist::half_float)h;39373938} // i3939} // x3940} // y39413942if (low_h == high_h)3943return false;39443945for (uint32_t y = 0; y < height; y++)3946{3947for (uint32_t x = 0; x < width; x++)3948{3949for (uint32_t i = 0; i < 3; i++)3950{3951basist::half_float h = half_img[(x + y * width) * 3 + i];39523953float f = (float)(h - low_h) / (float)(high_h - low_h);39543955int iv = basisu::clamp<int>((int)std::round(f * 255.0f), 0, 255);39563957dst_img(x, y)[i] = (uint8_t)iv;39583959} // i3960} // x3961} // y39623963return true;3964}39653966} // namespace basisu396739683969