Path: blob/master/thirdparty/basis_universal/encoder/basisu_enc.cpp
20920 views
// basisu_enc.cpp1// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.2//3// Licensed under the Apache License, Version 2.0 (the "License");4// you may not use this file except in compliance with the License.5// You may obtain a copy of the License at6//7// http://www.apache.org/licenses/LICENSE-2.08//9// Unless required by applicable law or agreed to in writing, software10// distributed under the License is distributed on an "AS IS" BASIS,11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12// See the License for the specific language governing permissions and13// limitations under the License.14#include "basisu_enc.h"15#include "basisu_resampler.h"16#include "basisu_resampler_filters.h"17#include "basisu_etc.h"18#include "../transcoder/basisu_transcoder.h"19#include "basisu_bc7enc.h"20#include "jpgd.h"21#include "pvpngreader.h"22#include "basisu_opencl.h"23#include "basisu_uastc_hdr_4x4_enc.h"24#include "basisu_astc_hdr_6x6_enc.h"2526#include <vector>2728#ifndef TINYEXR_USE_ZFP29#define TINYEXR_USE_ZFP (1)30#endif31#include <tinyexr.h>3233#ifndef MINIZ_HEADER_FILE_ONLY34#define MINIZ_HEADER_FILE_ONLY35#endif36#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES37#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES38#endif39#include "basisu_miniz.h"4041#if defined(_WIN32)42// For QueryPerformanceCounter/QueryPerformanceFrequency43#define WIN32_LEAN_AND_MEAN44#include <windows.h>45#endif4647namespace basisu48{49uint64_t interval_timer::g_init_ticks, interval_timer::g_freq;50double interval_timer::g_timer_freq;5152#if BASISU_SUPPORT_SSE53bool g_cpu_supports_sse41;54#endif5556fast_linear_to_srgb g_fast_linear_to_srgb;5758uint8_t g_hamming_dist[256] =59{600, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,611, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,621, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,632, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,641, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,652, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,662, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,673, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,681, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,692, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,702, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,713, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,722, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,733, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,743, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,754, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 876};7778// This is a Public Domain 8x8 font from here:79// https://github.com/dhepper/font8x8/blob/master/font8x8_basic.h80const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] =81{82{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0020 ( )83{ 0x18, 0x3C, 0x3C, 0x18, 0x18, 0x00, 0x18, 0x00}, // U+0021 (!)84{ 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0022 (")85{ 0x36, 0x36, 0x7F, 0x36, 0x7F, 0x36, 0x36, 0x00}, // U+0023 (#)86{ 0x0C, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x0C, 0x00}, // U+0024 ($)87{ 0x00, 0x63, 0x33, 0x18, 0x0C, 0x66, 0x63, 0x00}, // U+0025 (%)88{ 0x1C, 0x36, 0x1C, 0x6E, 0x3B, 0x33, 0x6E, 0x00}, // U+0026 (&)89{ 0x06, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0027 (')90{ 0x18, 0x0C, 0x06, 0x06, 0x06, 0x0C, 0x18, 0x00}, // U+0028 (()91{ 0x06, 0x0C, 0x18, 0x18, 0x18, 0x0C, 0x06, 0x00}, // U+0029 ())92{ 0x00, 0x66, 0x3C, 0xFF, 0x3C, 0x66, 0x00, 0x00}, // U+002A (*)93{ 0x00, 0x0C, 0x0C, 0x3F, 0x0C, 0x0C, 0x00, 0x00}, // U+002B (+)94{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+002C (,)95{ 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00}, // U+002D (-)96{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+002E (.)97{ 0x60, 0x30, 0x18, 0x0C, 0x06, 0x03, 0x01, 0x00}, // U+002F (/)98{ 0x3E, 0x63, 0x73, 0x7B, 0x6F, 0x67, 0x3E, 0x00}, // U+0030 (0)99{ 0x0C, 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x3F, 0x00}, // U+0031 (1)100{ 0x1E, 0x33, 0x30, 0x1C, 0x06, 0x33, 0x3F, 0x00}, // U+0032 (2)101{ 0x1E, 0x33, 0x30, 0x1C, 0x30, 0x33, 0x1E, 0x00}, // U+0033 (3)102{ 0x38, 0x3C, 0x36, 0x33, 0x7F, 0x30, 0x78, 0x00}, // U+0034 (4)103{ 0x3F, 0x03, 0x1F, 0x30, 0x30, 0x33, 0x1E, 0x00}, // U+0035 (5)104{ 0x1C, 0x06, 0x03, 0x1F, 0x33, 0x33, 0x1E, 0x00}, // U+0036 (6)105{ 0x3F, 0x33, 0x30, 0x18, 0x0C, 0x0C, 0x0C, 0x00}, // U+0037 (7)106{ 0x1E, 0x33, 0x33, 0x1E, 0x33, 0x33, 0x1E, 0x00}, // U+0038 (8)107{ 0x1E, 0x33, 0x33, 0x3E, 0x30, 0x18, 0x0E, 0x00}, // U+0039 (9)108{ 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+003A (:)109{ 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+003B (;)110{ 0x18, 0x0C, 0x06, 0x03, 0x06, 0x0C, 0x18, 0x00}, // U+003C (<)111{ 0x00, 0x00, 0x3F, 0x00, 0x00, 0x3F, 0x00, 0x00}, // U+003D (=)112{ 0x06, 0x0C, 0x18, 0x30, 0x18, 0x0C, 0x06, 0x00}, // U+003E (>)113{ 0x1E, 0x33, 0x30, 0x18, 0x0C, 0x00, 0x0C, 0x00}, // U+003F (?)114{ 0x3E, 0x63, 0x7B, 0x7B, 0x7B, 0x03, 0x1E, 0x00}, // U+0040 (@)115{ 0x0C, 0x1E, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x00}, // U+0041 (A)116{ 0x3F, 0x66, 0x66, 0x3E, 0x66, 0x66, 0x3F, 0x00}, // U+0042 (B)117{ 0x3C, 0x66, 0x03, 0x03, 0x03, 0x66, 0x3C, 0x00}, // U+0043 (C)118{ 0x1F, 0x36, 0x66, 0x66, 0x66, 0x36, 0x1F, 0x00}, // U+0044 (D)119{ 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x46, 0x7F, 0x00}, // U+0045 (E)120{ 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x06, 0x0F, 0x00}, // U+0046 (F)121{ 0x3C, 0x66, 0x03, 0x03, 0x73, 0x66, 0x7C, 0x00}, // U+0047 (G)122{ 0x33, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x33, 0x00}, // U+0048 (H)123{ 0x1E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0049 (I)124{ 0x78, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E, 0x00}, // U+004A (J)125{ 0x67, 0x66, 0x36, 0x1E, 0x36, 0x66, 0x67, 0x00}, // U+004B (K)126{ 0x0F, 0x06, 0x06, 0x06, 0x46, 0x66, 0x7F, 0x00}, // U+004C (L)127{ 0x63, 0x77, 0x7F, 0x7F, 0x6B, 0x63, 0x63, 0x00}, // U+004D (M)128{ 0x63, 0x67, 0x6F, 0x7B, 0x73, 0x63, 0x63, 0x00}, // U+004E (N)129{ 0x1C, 0x36, 0x63, 0x63, 0x63, 0x36, 0x1C, 0x00}, // U+004F (O)130{ 0x3F, 0x66, 0x66, 0x3E, 0x06, 0x06, 0x0F, 0x00}, // U+0050 (P)131{ 0x1E, 0x33, 0x33, 0x33, 0x3B, 0x1E, 0x38, 0x00}, // U+0051 (Q)132{ 0x3F, 0x66, 0x66, 0x3E, 0x36, 0x66, 0x67, 0x00}, // U+0052 (R)133{ 0x1E, 0x33, 0x07, 0x0E, 0x38, 0x33, 0x1E, 0x00}, // U+0053 (S)134{ 0x3F, 0x2D, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0054 (T)135{ 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x3F, 0x00}, // U+0055 (U)136{ 0x33, 0x33, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0056 (V)137{ 0x63, 0x63, 0x63, 0x6B, 0x7F, 0x77, 0x63, 0x00}, // U+0057 (W)138{ 0x63, 0x63, 0x36, 0x1C, 0x1C, 0x36, 0x63, 0x00}, // U+0058 (X)139{ 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x0C, 0x1E, 0x00}, // U+0059 (Y)140{ 0x7F, 0x63, 0x31, 0x18, 0x4C, 0x66, 0x7F, 0x00}, // U+005A (Z)141{ 0x1E, 0x06, 0x06, 0x06, 0x06, 0x06, 0x1E, 0x00}, // U+005B ([)142{ 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0x40, 0x00}, // U+005C (\)143{ 0x1E, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1E, 0x00}, // U+005D (])144{ 0x08, 0x1C, 0x36, 0x63, 0x00, 0x00, 0x00, 0x00}, // U+005E (^)145{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF}, // U+005F (_)146{ 0x0C, 0x0C, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0060 (`)147{ 0x00, 0x00, 0x1E, 0x30, 0x3E, 0x33, 0x6E, 0x00}, // U+0061 (a)148{ 0x07, 0x06, 0x06, 0x3E, 0x66, 0x66, 0x3B, 0x00}, // U+0062 (b)149{ 0x00, 0x00, 0x1E, 0x33, 0x03, 0x33, 0x1E, 0x00}, // U+0063 (c)150{ 0x38, 0x30, 0x30, 0x3e, 0x33, 0x33, 0x6E, 0x00}, // U+0064 (d)151{ 0x00, 0x00, 0x1E, 0x33, 0x3f, 0x03, 0x1E, 0x00}, // U+0065 (e)152{ 0x1C, 0x36, 0x06, 0x0f, 0x06, 0x06, 0x0F, 0x00}, // U+0066 (f)153{ 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0067 (g)154{ 0x07, 0x06, 0x36, 0x6E, 0x66, 0x66, 0x67, 0x00}, // U+0068 (h)155{ 0x0C, 0x00, 0x0E, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0069 (i)156{ 0x30, 0x00, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E}, // U+006A (j)157{ 0x07, 0x06, 0x66, 0x36, 0x1E, 0x36, 0x67, 0x00}, // U+006B (k)158{ 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+006C (l)159{ 0x00, 0x00, 0x33, 0x7F, 0x7F, 0x6B, 0x63, 0x00}, // U+006D (m)160{ 0x00, 0x00, 0x1F, 0x33, 0x33, 0x33, 0x33, 0x00}, // U+006E (n)161{ 0x00, 0x00, 0x1E, 0x33, 0x33, 0x33, 0x1E, 0x00}, // U+006F (o)162{ 0x00, 0x00, 0x3B, 0x66, 0x66, 0x3E, 0x06, 0x0F}, // U+0070 (p)163{ 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x78}, // U+0071 (q)164{ 0x00, 0x00, 0x3B, 0x6E, 0x66, 0x06, 0x0F, 0x00}, // U+0072 (r)165{ 0x00, 0x00, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x00}, // U+0073 (s)166{ 0x08, 0x0C, 0x3E, 0x0C, 0x0C, 0x2C, 0x18, 0x00}, // U+0074 (t)167{ 0x00, 0x00, 0x33, 0x33, 0x33, 0x33, 0x6E, 0x00}, // U+0075 (u)168{ 0x00, 0x00, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0076 (v)169{ 0x00, 0x00, 0x63, 0x6B, 0x7F, 0x7F, 0x36, 0x00}, // U+0077 (w)170{ 0x00, 0x00, 0x63, 0x36, 0x1C, 0x36, 0x63, 0x00}, // U+0078 (x)171{ 0x00, 0x00, 0x33, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0079 (y)172{ 0x00, 0x00, 0x3F, 0x19, 0x0C, 0x26, 0x3F, 0x00}, // U+007A (z)173{ 0x38, 0x0C, 0x0C, 0x07, 0x0C, 0x0C, 0x38, 0x00}, // U+007B ({)174{ 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x00}, // U+007C (|)175{ 0x07, 0x0C, 0x0C, 0x38, 0x0C, 0x0C, 0x07, 0x00}, // U+007D (})176{ 0x6E, 0x3B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+007E (~)177{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} // U+007F178};179180bool g_library_initialized;181std::mutex g_encoder_init_mutex;182183// Encoder library initialization (just call once at startup)184bool basisu_encoder_init(bool use_opencl, bool opencl_force_serialization)185{186std::lock_guard<std::mutex> lock(g_encoder_init_mutex);187188if (g_library_initialized)189return true;190191detect_sse41();192193basist::basisu_transcoder_init();194pack_etc1_solid_color_init();195//uastc_init();196bc7enc_compress_block_init(); // must be after uastc_init()197198// Don't bother initializing the OpenCL module at all if it's been completely disabled.199if (use_opencl)200{201opencl_init(opencl_force_serialization);202}203204interval_timer::init(); // make sure interval_timer globals are initialized from main thread to avoid TSAN reports205206astc_hdr_enc_init();207basist::bc6h_enc_init();208astc_6x6_hdr::global_init();209210g_library_initialized = true;211return true;212}213214void basisu_encoder_deinit()215{216opencl_deinit();217218g_library_initialized = false;219}220221void error_vprintf(const char* pFmt, va_list args)222{223const uint32_t BUF_SIZE = 256;224char buf[BUF_SIZE];225226va_list args_copy;227va_copy(args_copy, args);228int total_chars = vsnprintf(buf, sizeof(buf), pFmt, args_copy);229va_end(args_copy);230231if (total_chars < 0)232{233assert(0);234return;235}236237if (total_chars >= (int)BUF_SIZE)238{239basisu::vector<char> var_buf(total_chars + 1);240241va_copy(args_copy, args);242int total_chars_retry = vsnprintf(var_buf.data(), var_buf.size(), pFmt, args_copy);243va_end(args_copy);244245if (total_chars_retry < 0)246{247assert(0);248return;249}250251fprintf(stderr, "ERROR: %s", var_buf.data());252}253else254{255fprintf(stderr, "ERROR: %s", buf);256}257}258259void error_printf(const char *pFmt, ...)260{261va_list args;262va_start(args, pFmt);263error_vprintf(pFmt, args);264va_end(args);265}266267#if defined(_WIN32)268void platform_sleep(uint32_t ms)269{270Sleep(ms);271}272#else273void platform_sleep(uint32_t ms)274{275// TODO276}277#endif278279#if defined(_WIN32)280inline void query_counter(timer_ticks* pTicks)281{282QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER*>(pTicks));283}284inline void query_counter_frequency(timer_ticks* pTicks)285{286QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER*>(pTicks));287}288#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__EMSCRIPTEN__)289#include <sys/time.h>290inline void query_counter(timer_ticks* pTicks)291{292struct timeval cur_time;293gettimeofday(&cur_time, NULL);294*pTicks = static_cast<unsigned long long>(cur_time.tv_sec) * 1000000ULL + static_cast<unsigned long long>(cur_time.tv_usec);295}296inline void query_counter_frequency(timer_ticks* pTicks)297{298*pTicks = 1000000;299}300#elif defined(__GNUC__)301#include <sys/timex.h>302inline void query_counter(timer_ticks* pTicks)303{304struct timeval cur_time;305gettimeofday(&cur_time, NULL);306*pTicks = static_cast<unsigned long long>(cur_time.tv_sec) * 1000000ULL + static_cast<unsigned long long>(cur_time.tv_usec);307}308inline void query_counter_frequency(timer_ticks* pTicks)309{310*pTicks = 1000000;311}312#else313#error TODO314#endif315316interval_timer::interval_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false)317{318if (!g_timer_freq)319init();320}321322void interval_timer::start()323{324query_counter(&m_start_time);325m_started = true;326m_stopped = false;327}328329void interval_timer::stop()330{331assert(m_started);332query_counter(&m_stop_time);333m_stopped = true;334}335336double interval_timer::get_elapsed_secs() const337{338assert(m_started);339if (!m_started)340return 0;341342timer_ticks stop_time = m_stop_time;343if (!m_stopped)344query_counter(&stop_time);345346timer_ticks delta = stop_time - m_start_time;347return delta * g_timer_freq;348}349350void interval_timer::init()351{352if (!g_timer_freq)353{354query_counter_frequency(&g_freq);355g_timer_freq = 1.0f / g_freq;356query_counter(&g_init_ticks);357}358}359360timer_ticks interval_timer::get_ticks()361{362if (!g_timer_freq)363init();364timer_ticks ticks;365query_counter(&ticks);366return ticks - g_init_ticks;367}368369double interval_timer::ticks_to_secs(timer_ticks ticks)370{371if (!g_timer_freq)372init();373return ticks * g_timer_freq;374}375376// Note this is linear<->sRGB, NOT REC709 which uses slightly different equations/transfer functions.377// However the gamuts/white points of REC709 and sRGB are the same.378float linear_to_srgb(float l)379{380assert(l >= 0.0f && l <= 1.0f);381if (l < .0031308f)382return saturate(l * 12.92f);383else384return saturate(1.055f * powf(l, 1.0f / 2.4f) - .055f);385}386387float srgb_to_linear(float s)388{389assert(s >= 0.0f && s <= 1.0f);390if (s < .04045f)391return saturate(s * (1.0f / 12.92f));392else393return saturate(powf((s + .055f) * (1.0f / 1.055f), 2.4f));394}395396const uint32_t MAX_32BIT_ALLOC_SIZE = 250000000;397398bool load_tga(const char* pFilename, image& img)399{400int w = 0, h = 0, n_chans = 0;401uint8_t* pImage_data = read_tga(pFilename, w, h, n_chans);402403if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4)))404{405error_printf("Failed loading .TGA image \"%s\"!\n", pFilename);406407if (pImage_data)408free(pImage_data);409410return false;411}412413if (sizeof(void *) == sizeof(uint32_t))414{415if (((uint64_t)w * h * n_chans) > MAX_32BIT_ALLOC_SIZE)416{417error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h);418419if (pImage_data)420free(pImage_data);421422return false;423}424}425426img.resize(w, h);427428const uint8_t *pSrc = pImage_data;429for (int y = 0; y < h; y++)430{431color_rgba *pDst = &img(0, y);432433for (int x = 0; x < w; x++)434{435pDst->r = pSrc[0];436pDst->g = pSrc[1];437pDst->b = pSrc[2];438pDst->a = (n_chans == 3) ? 255 : pSrc[3];439440pSrc += n_chans;441++pDst;442}443}444445free(pImage_data);446447return true;448}449450bool load_qoi(const char* pFilename, image& img)451{452return false;453}454455bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename)456{457interval_timer tm;458tm.start();459460if (!buf_size)461return false;462463uint32_t width = 0, height = 0, num_chans = 0;464void* pImage = pv_png::load_png(pBuf, buf_size, 4, width, height, num_chans);465466if (!pImage)467{468error_printf("pv_png::load_png failed while loading image \"%s\"\n", pFilename);469return false;470}471472img.grant_ownership(reinterpret_cast<color_rgba*>(pImage), width, height);473474//debug_printf("Total load_png() time: %3.3f secs\n", tm.get_elapsed_secs());475476return true;477}478479bool load_png(const char* pFilename, image& img)480{481uint8_vec buffer;482if (!read_file_to_vec(pFilename, buffer))483{484error_printf("load_png: Failed reading file \"%s\"!\n", pFilename);485return false;486}487488return load_png(buffer.data(), buffer.size(), img, pFilename);489}490491bool load_jpg(const char *pFilename, image& img)492{493int width = 0, height = 0, actual_comps = 0;494uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering);495if (!pImage_data)496return false;497498img.init(pImage_data, width, height, 4);499500free(pImage_data);501502return true;503}504505bool load_jpg(const uint8_t* pBuf, size_t buf_size, image& img)506{507if (buf_size > INT_MAX)508{509assert(0);510return false;511}512513int width = 0, height = 0, actual_comps = 0;514uint8_t* pImage_data = jpgd::decompress_jpeg_image_from_memory(pBuf, (int)buf_size, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering);515if (!pImage_data)516return false;517518img.init(pImage_data, width, height, 4);519520free(pImage_data);521522return true;523}524525bool load_image(const char* pFilename, image& img)526{527std::string ext(string_get_extension(std::string(pFilename)));528529if (ext.length() == 0)530return false;531532const char *pExt = ext.c_str();533534if (strcasecmp(pExt, "png") == 0)535return load_png(pFilename, img);536if (strcasecmp(pExt, "tga") == 0)537return load_tga(pFilename, img);538if (strcasecmp(pExt, "qoi") == 0)539return load_qoi(pFilename, img);540if ( (strcasecmp(pExt, "jpg") == 0) || (strcasecmp(pExt, "jfif") == 0) || (strcasecmp(pExt, "jpeg") == 0) )541return load_jpg(pFilename, img);542543return false;544}545546static void convert_ldr_to_hdr_image(imagef &img, const image &ldr_img, bool ldr_srgb_to_linear, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f)547{548img.resize(ldr_img.get_width(), ldr_img.get_height());549550for (uint32_t y = 0; y < ldr_img.get_height(); y++)551{552for (uint32_t x = 0; x < ldr_img.get_width(); x++)553{554const color_rgba& c = ldr_img(x, y);555556vec4F& d = img(x, y);557if (ldr_srgb_to_linear)558{559float r = (float)c[0];560float g = (float)c[1];561float b = (float)c[2];562563if (ldr_black_bias > 0.0f)564{565// ASTC HDR is noticeably weaker dealing with blocks containing some pixels with components set to 0.566// Add a very slight bias less than .5 to avoid this difficulity. When the HDR image is mapped to SDR sRGB and rounded back to 8-bits, this bias will still result in zero.567// (FWIW, in reality, a physical monitor would be unlikely to have a perfectly zero black level.)568// This is purely optional and on most images it doesn't matter visually.569if (r == 0.0f)570r = ldr_black_bias;571if (g == 0.0f)572g = ldr_black_bias;573if (b == 0.0f)574b = ldr_black_bias;575}576577// Compute how much linear light would be emitted by a SDR 80-100 nit monitor.578d[0] = srgb_to_linear(r * (1.0f / 255.0f)) * linear_nit_multiplier;579d[1] = srgb_to_linear(g * (1.0f / 255.0f)) * linear_nit_multiplier;580d[2] = srgb_to_linear(b * (1.0f / 255.0f)) * linear_nit_multiplier;581}582else583{584d[0] = c[0] * (1.0f / 255.0f) * linear_nit_multiplier;585d[1] = c[1] * (1.0f / 255.0f) * linear_nit_multiplier;586d[2] = c[2] * (1.0f / 255.0f) * linear_nit_multiplier;587}588d[3] = c[3] * (1.0f / 255.0f);589}590}591}592593bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear, float linear_nit_multiplier, float ldr_black_bias)594{595if ((!pMem) || (!mem_size))596{597assert(0);598return false;599}600601switch (img_type)602{603case hdr_image_type::cHITRGBAHalfFloat:604{605if (mem_size != width * height * sizeof(basist::half_float) * 4)606{607assert(0);608return false;609}610611if ((!width) || (!height))612{613assert(0);614return false;615}616617const basist::half_float* pSrc_image_h = static_cast<const basist::half_float *>(pMem);618619img.resize(width, height);620for (uint32_t y = 0; y < height; y++)621{622for (uint32_t x = 0; x < width; x++)623{624const basist::half_float* pSrc_pixel = &pSrc_image_h[x * 4];625626vec4F& dst = img(x, y);627dst[0] = basist::half_to_float(pSrc_pixel[0]);628dst[1] = basist::half_to_float(pSrc_pixel[1]);629dst[2] = basist::half_to_float(pSrc_pixel[2]);630dst[3] = basist::half_to_float(pSrc_pixel[3]);631}632633pSrc_image_h += (width * 4);634}635636break;637}638case hdr_image_type::cHITRGBAFloat:639{640if (mem_size != width * height * sizeof(float) * 4)641{642assert(0);643return false;644}645646if ((!width) || (!height))647{648assert(0);649return false;650}651652img.resize(width, height);653memcpy((void *)img.get_ptr(), pMem, width * height * sizeof(float) * 4);654655break;656}657case hdr_image_type::cHITJPGImage:658{659image ldr_img;660if (!load_jpg(static_cast<const uint8_t*>(pMem), mem_size, ldr_img))661return false;662663convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias);664break;665}666case hdr_image_type::cHITPNGImage:667{668image ldr_img;669if (!load_png(static_cast<const uint8_t *>(pMem), mem_size, ldr_img))670return false;671672convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias);673break;674}675case hdr_image_type::cHITEXRImage:676{677if (!read_exr(pMem, mem_size, img))678return false;679680break;681}682case hdr_image_type::cHITHDRImage:683{684uint8_vec buf(mem_size);685memcpy(buf.get_ptr(), pMem, mem_size);686687rgbe_header_info hdr;688if (!read_rgbe(buf, img, hdr))689return false;690691break;692}693default:694assert(0);695return false;696}697698return true;699}700701bool is_image_filename_hdr(const char *pFilename)702{703std::string ext(string_get_extension(std::string(pFilename)));704705if (ext.length() == 0)706return false;707708const char* pExt = ext.c_str();709710return ((strcasecmp(pExt, "hdr") == 0) || (strcasecmp(pExt, "exr") == 0));711}712713// TODO: move parameters to struct, add a HDR clean flag to eliminate NaN's/Inf's714bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear, float linear_nit_multiplier, float ldr_black_bias)715{716std::string ext(string_get_extension(std::string(pFilename)));717718if (ext.length() == 0)719return false;720721const char* pExt = ext.c_str();722723if (strcasecmp(pExt, "hdr") == 0)724{725rgbe_header_info rgbe_info;726if (!read_rgbe(pFilename, img, rgbe_info))727return false;728return true;729}730731if (strcasecmp(pExt, "exr") == 0)732{733int n_chans = 0;734if (!read_exr(pFilename, img, n_chans))735return false;736return true;737}738739// Try loading image as LDR, then optionally convert to linear light.740{741image ldr_img;742if (!load_image(pFilename, ldr_img))743return false;744745convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias);746}747748return true;749}750751bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp)752{753if (!img.get_total_pixels())754return false;755756void* pPNG_data = nullptr;757size_t PNG_data_size = 0;758759if (image_save_flags & cImageSaveGrayscale)760{761uint8_vec g_pixels(img.get_total_pixels());762uint8_t* pDst = &g_pixels[0];763764for (uint32_t y = 0; y < img.get_height(); y++)765for (uint32_t x = 0; x < img.get_width(); x++)766*pDst++ = img(x, y)[grayscale_comp];767768pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(g_pixels.data(), img.get_width(), img.get_height(), 1, &PNG_data_size, 1, false);769}770else771{772bool has_alpha = false;773774if ((image_save_flags & cImageSaveIgnoreAlpha) == 0)775has_alpha = img.has_alpha();776777if (!has_alpha)778{779uint8_vec rgb_pixels(img.get_total_pixels() * 3);780uint8_t* pDst = &rgb_pixels[0];781782for (uint32_t y = 0; y < img.get_height(); y++)783{784const color_rgba* pSrc = &img(0, y);785for (uint32_t x = 0; x < img.get_width(); x++)786{787pDst[0] = pSrc->r;788pDst[1] = pSrc->g;789pDst[2] = pSrc->b;790791pSrc++;792pDst += 3;793}794}795796pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(rgb_pixels.data(), img.get_width(), img.get_height(), 3, &PNG_data_size, 1, false);797}798else799{800pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(img.get_ptr(), img.get_width(), img.get_height(), 4, &PNG_data_size, 1, false);801}802}803804if (!pPNG_data)805return false;806807bool status = write_data_to_file(pFilename, pPNG_data, PNG_data_size);808if (!status)809{810error_printf("save_png: Failed writing to filename \"%s\"!\n", pFilename);811}812813free(pPNG_data);814815return status;816}817818bool read_file_to_vec(const char* pFilename, uint8_vec& data)819{820FILE* pFile = nullptr;821#ifdef _WIN32822fopen_s(&pFile, pFilename, "rb");823#else824pFile = fopen(pFilename, "rb");825#endif826if (!pFile)827return false;828829fseek(pFile, 0, SEEK_END);830#ifdef _WIN32831int64_t filesize = _ftelli64(pFile);832#else833int64_t filesize = ftello(pFile);834#endif835if (filesize < 0)836{837fclose(pFile);838return false;839}840fseek(pFile, 0, SEEK_SET);841842if (sizeof(size_t) == sizeof(uint32_t))843{844if (filesize > 0x70000000)845{846// File might be too big to load safely in one alloc847fclose(pFile);848return false;849}850}851852if (!data.try_resize((size_t)filesize))853{854fclose(pFile);855return false;856}857858if (filesize)859{860if (fread(&data[0], 1, (size_t)filesize, pFile) != (size_t)filesize)861{862fclose(pFile);863return false;864}865}866867fclose(pFile);868return true;869}870871bool read_file_to_data(const char* pFilename, void *pData, size_t len)872{873assert(pData && len);874if ((!pData) || (!len))875return false;876877FILE* pFile = nullptr;878#ifdef _WIN32879fopen_s(&pFile, pFilename, "rb");880#else881pFile = fopen(pFilename, "rb");882#endif883if (!pFile)884return false;885886fseek(pFile, 0, SEEK_END);887#ifdef _WIN32888int64_t filesize = _ftelli64(pFile);889#else890int64_t filesize = ftello(pFile);891#endif892893if ((filesize < 0) || ((size_t)filesize < len))894{895fclose(pFile);896return false;897}898fseek(pFile, 0, SEEK_SET);899900if (fread(pData, 1, (size_t)len, pFile) != (size_t)len)901{902fclose(pFile);903return false;904}905906fclose(pFile);907return true;908}909910bool write_data_to_file(const char* pFilename, const void* pData, size_t len)911{912FILE* pFile = nullptr;913#ifdef _WIN32914fopen_s(&pFile, pFilename, "wb");915#else916pFile = fopen(pFilename, "wb");917#endif918if (!pFile)919return false;920921if (len)922{923if (fwrite(pData, 1, len, pFile) != len)924{925fclose(pFile);926return false;927}928}929930return fclose(pFile) != EOF;931}932933bool image_resample(const image &src, image &dst, bool srgb,934const char *pFilter, float filter_scale,935bool wrapping,936uint32_t first_comp, uint32_t num_comps)937{938assert((first_comp + num_comps) <= 4);939940const int cMaxComps = 4;941942const uint32_t src_w = src.get_width(), src_h = src.get_height();943const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height();944945if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION)946{947printf("Image is too large!\n");948return false;949}950951if (!src_w || !src_h || !dst_w || !dst_h)952return false;953954if ((num_comps < 1) || (num_comps > cMaxComps))955return false;956957if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION))958{959printf("Image is too large!\n");960return false;961}962963if ((src_w == dst_w) && (src_h == dst_h))964{965dst = src;966return true;967}968969float srgb_to_linear_table[256];970if (srgb)971{972for (int i = 0; i < 256; ++i)973srgb_to_linear_table[i] = srgb_to_linear((float)i * (1.0f/255.0f));974}975976const int LINEAR_TO_SRGB_TABLE_SIZE = 8192;977uint8_t linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE];978979if (srgb)980{981for (int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i)982linear_to_srgb_table[i] = (uint8_t)clamp<int>((int)(255.0f * linear_to_srgb((float)i * (1.0f / (LINEAR_TO_SRGB_TABLE_SIZE - 1))) + .5f), 0, 255);983}984985std::vector<float> samples[cMaxComps];986Resampler *resamplers[cMaxComps];987988resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h,989wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f,990pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0);991samples[0].resize(src_w);992993for (uint32_t i = 1; i < num_comps; ++i)994{995resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h,996wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f,997pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0);998samples[i].resize(src_w);999}10001001uint32_t dst_y = 0;10021003for (uint32_t src_y = 0; src_y < src_h; ++src_y)1004{1005const color_rgba *pSrc = &src(0, src_y);10061007// Put source lines into resampler(s)1008for (uint32_t x = 0; x < src_w; ++x)1009{1010for (uint32_t c = 0; c < num_comps; ++c)1011{1012const uint32_t comp_index = first_comp + c;1013const uint32_t v = (*pSrc)[comp_index];10141015if (!srgb || (comp_index == 3))1016samples[c][x] = v * (1.0f / 255.0f);1017else1018samples[c][x] = srgb_to_linear_table[v];1019}10201021pSrc++;1022}10231024for (uint32_t c = 0; c < num_comps; ++c)1025{1026if (!resamplers[c]->put_line(&samples[c][0]))1027{1028for (uint32_t i = 0; i < num_comps; i++)1029delete resamplers[i];1030return false;1031}1032}10331034// Now retrieve any output lines1035for (;;)1036{1037uint32_t c;1038for (c = 0; c < num_comps; ++c)1039{1040const uint32_t comp_index = first_comp + c;10411042const float *pOutput_samples = resamplers[c]->get_line();1043if (!pOutput_samples)1044break;10451046const bool linear_flag = !srgb || (comp_index == 3);10471048color_rgba *pDst = &dst(0, dst_y);10491050for (uint32_t x = 0; x < dst_w; x++)1051{1052// TODO: Add dithering1053if (linear_flag)1054{1055int j = (int)(255.0f * pOutput_samples[x] + .5f);1056(*pDst)[comp_index] = (uint8_t)clamp<int>(j, 0, 255);1057}1058else1059{1060int j = (int)((LINEAR_TO_SRGB_TABLE_SIZE - 1) * pOutput_samples[x] + .5f);1061(*pDst)[comp_index] = linear_to_srgb_table[clamp<int>(j, 0, LINEAR_TO_SRGB_TABLE_SIZE - 1)];1062}10631064pDst++;1065}1066}1067if (c < num_comps)1068break;10691070++dst_y;1071}1072}10731074for (uint32_t i = 0; i < num_comps; ++i)1075delete resamplers[i];10761077return true;1078}10791080bool image_resample(const imagef& src, imagef& dst,1081const char* pFilter, float filter_scale,1082bool wrapping,1083uint32_t first_comp, uint32_t num_comps)1084{1085assert((first_comp + num_comps) <= 4);10861087const int cMaxComps = 4;10881089const uint32_t src_w = src.get_width(), src_h = src.get_height();1090const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height();10911092if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION)1093{1094printf("Image is too large!\n");1095return false;1096}10971098if (!src_w || !src_h || !dst_w || !dst_h)1099return false;11001101if ((num_comps < 1) || (num_comps > cMaxComps))1102return false;11031104if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION))1105{1106printf("Image is too large!\n");1107return false;1108}11091110if ((src_w == dst_w) && (src_h == dst_h) && (filter_scale == 1.0f))1111{1112dst = src;1113return true;1114}11151116std::vector<float> samples[cMaxComps];1117Resampler* resamplers[cMaxComps];11181119resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h,1120wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 1.0f, 0.0f, // no clamping1121pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0);1122samples[0].resize(src_w);11231124for (uint32_t i = 1; i < num_comps; ++i)1125{1126resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h,1127wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 1.0f, 0.0f, // no clamping1128pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0);1129samples[i].resize(src_w);1130}11311132uint32_t dst_y = 0;11331134for (uint32_t src_y = 0; src_y < src_h; ++src_y)1135{1136const vec4F* pSrc = &src(0, src_y);11371138// Put source lines into resampler(s)1139for (uint32_t x = 0; x < src_w; ++x)1140{1141for (uint32_t c = 0; c < num_comps; ++c)1142{1143const uint32_t comp_index = first_comp + c;1144const float v = (*pSrc)[comp_index];11451146samples[c][x] = v;1147}11481149pSrc++;1150}11511152for (uint32_t c = 0; c < num_comps; ++c)1153{1154if (!resamplers[c]->put_line(&samples[c][0]))1155{1156for (uint32_t i = 0; i < num_comps; i++)1157delete resamplers[i];1158return false;1159}1160}11611162// Now retrieve any output lines1163for (;;)1164{1165uint32_t c;1166for (c = 0; c < num_comps; ++c)1167{1168const uint32_t comp_index = first_comp + c;11691170const float* pOutput_samples = resamplers[c]->get_line();1171if (!pOutput_samples)1172break;11731174vec4F* pDst = &dst(0, dst_y);11751176for (uint32_t x = 0; x < dst_w; x++)1177{1178(*pDst)[comp_index] = pOutput_samples[x];1179pDst++;1180}1181}1182if (c < num_comps)1183break;11841185++dst_y;1186}1187}11881189for (uint32_t i = 0; i < num_comps; ++i)1190delete resamplers[i];11911192return true;1193}11941195void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms)1196{1197// See the paper "In-Place Calculation of Minimum Redundancy Codes" by Moffat and Katajainen1198if (!num_syms)1199return;12001201if (1 == num_syms)1202{1203A[0].m_key = 1;1204return;1205}12061207A[0].m_key += A[1].m_key;12081209int s = 2, r = 0, next;1210for (next = 1; next < (num_syms - 1); ++next)1211{1212if ((s >= num_syms) || (A[r].m_key < A[s].m_key))1213{1214A[next].m_key = A[r].m_key;1215A[r].m_key = next;1216++r;1217}1218else1219{1220A[next].m_key = A[s].m_key;1221++s;1222}12231224if ((s >= num_syms) || ((r < next) && A[r].m_key < A[s].m_key))1225{1226A[next].m_key = A[next].m_key + A[r].m_key;1227A[r].m_key = next;1228++r;1229}1230else1231{1232A[next].m_key = A[next].m_key + A[s].m_key;1233++s;1234}1235}1236A[num_syms - 2].m_key = 0;12371238for (next = num_syms - 3; next >= 0; --next)1239{1240A[next].m_key = 1 + A[A[next].m_key].m_key;1241}12421243int num_avail = 1, num_used = 0, depth = 0;1244r = num_syms - 2;1245next = num_syms - 1;1246while (num_avail > 0)1247{1248for ( ; (r >= 0) && ((int)A[r].m_key == depth); ++num_used, --r )1249;12501251for ( ; num_avail > num_used; --next, --num_avail)1252A[next].m_key = depth;12531254num_avail = 2 * num_used;1255num_used = 0;1256++depth;1257}1258}12591260void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size)1261{1262int i;1263uint32_t total = 0;1264if (code_list_len <= 1)1265return;12661267for (i = max_code_size + 1; i <= cHuffmanMaxSupportedInternalCodeSize; i++)1268pNum_codes[max_code_size] += pNum_codes[i];12691270for (i = max_code_size; i > 0; i--)1271total += (((uint32_t)pNum_codes[i]) << (max_code_size - i));12721273while (total != (1UL << max_code_size))1274{1275pNum_codes[max_code_size]--;1276for (i = max_code_size - 1; i > 0; i--)1277{1278if (pNum_codes[i])1279{1280pNum_codes[i]--;1281pNum_codes[i + 1] += 2;1282break;1283}1284}12851286total--;1287}1288}12891290sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1)1291{1292uint32_t total_passes = 2, pass_shift, pass, i, hist[256 * 2];1293sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1;12941295clear_obj(hist);12961297for (i = 0; i < num_syms; i++)1298{1299uint32_t freq = pSyms0[i].m_key;13001301// We scale all input frequencies to 16-bits.1302assert(freq <= UINT16_MAX);13031304hist[freq & 0xFF]++;1305hist[256 + ((freq >> 8) & 0xFF)]++;1306}13071308while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256]))1309total_passes--;13101311for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)1312{1313const uint32_t *pHist = &hist[pass << 8];1314uint32_t offsets[256], cur_ofs = 0;1315for (i = 0; i < 256; i++)1316{1317offsets[i] = cur_ofs;1318cur_ofs += pHist[i];1319}13201321for (i = 0; i < num_syms; i++)1322pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];13231324sym_freq *t = pCur_syms;1325pCur_syms = pNew_syms;1326pNew_syms = t;1327}13281329return pCur_syms;1330}13311332bool huffman_encoding_table::init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size)1333{1334if (max_code_size > cHuffmanMaxSupportedCodeSize)1335return false;1336if ((!num_syms) || (num_syms > cHuffmanMaxSyms))1337return false;13381339uint32_t total_used_syms = 0;1340for (uint32_t i = 0; i < num_syms; i++)1341if (pFreq[i])1342total_used_syms++;13431344if (!total_used_syms)1345return false;13461347std::vector<sym_freq> sym_freq0(total_used_syms), sym_freq1(total_used_syms);1348for (uint32_t i = 0, j = 0; i < num_syms; i++)1349{1350if (pFreq[i])1351{1352sym_freq0[j].m_key = pFreq[i];1353sym_freq0[j++].m_sym_index = static_cast<uint16_t>(i);1354}1355}13561357sym_freq *pSym_freq = canonical_huffman_radix_sort_syms(total_used_syms, &sym_freq0[0], &sym_freq1[0]);13581359canonical_huffman_calculate_minimum_redundancy(pSym_freq, total_used_syms);13601361int num_codes[cHuffmanMaxSupportedInternalCodeSize + 1];1362clear_obj(num_codes);13631364for (uint32_t i = 0; i < total_used_syms; i++)1365{1366if (pSym_freq[i].m_key > cHuffmanMaxSupportedInternalCodeSize)1367return false;13681369num_codes[pSym_freq[i].m_key]++;1370}13711372canonical_huffman_enforce_max_code_size(num_codes, total_used_syms, max_code_size);13731374m_code_sizes.resize(0);1375m_code_sizes.resize(num_syms);13761377m_codes.resize(0);1378m_codes.resize(num_syms);13791380for (uint32_t i = 1, j = total_used_syms; i <= max_code_size; i++)1381for (uint32_t l = num_codes[i]; l > 0; l--)1382m_code_sizes[pSym_freq[--j].m_sym_index] = static_cast<uint8_t>(i);13831384uint32_t next_code[cHuffmanMaxSupportedInternalCodeSize + 1];13851386next_code[1] = 0;1387for (uint32_t j = 0, i = 2; i <= max_code_size; i++)1388next_code[i] = j = ((j + num_codes[i - 1]) << 1);13891390for (uint32_t i = 0; i < num_syms; i++)1391{1392uint32_t rev_code = 0, code, code_size;1393if ((code_size = m_code_sizes[i]) == 0)1394continue;1395if (code_size > cHuffmanMaxSupportedInternalCodeSize)1396return false;1397code = next_code[code_size]++;1398for (uint32_t l = code_size; l > 0; l--, code >>= 1)1399rev_code = (rev_code << 1) | (code & 1);1400m_codes[i] = static_cast<uint16_t>(rev_code);1401}14021403return true;1404}14051406bool huffman_encoding_table::init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size)1407{1408if ((!num_syms) || (num_syms > cHuffmanMaxSyms))1409return false;14101411uint16_vec sym_freq(num_syms);14121413uint32_t max_freq = 0;1414for (uint32_t i = 0; i < num_syms; i++)1415max_freq = maximum(max_freq, pSym_freq[i]);14161417if (max_freq < UINT16_MAX)1418{1419for (uint32_t i = 0; i < num_syms; i++)1420sym_freq[i] = static_cast<uint16_t>(pSym_freq[i]);1421}1422else1423{1424for (uint32_t i = 0; i < num_syms; i++)1425{1426if (pSym_freq[i])1427{1428uint32_t f = static_cast<uint32_t>((static_cast<uint64_t>(pSym_freq[i]) * 65534U + (max_freq >> 1)) / max_freq);1429sym_freq[i] = static_cast<uint16_t>(clamp<uint32_t>(f, 1, 65534));1430}1431}1432}14331434return init(num_syms, &sym_freq[0], max_code_size);1435}14361437void bitwise_coder::end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len)1438{1439if (run_size)1440{1441if (run_size < cHuffmanSmallRepeatSizeMin)1442{1443while (run_size--)1444syms.push_back(static_cast<uint16_t>(len));1445}1446else if (run_size <= cHuffmanSmallRepeatSizeMax)1447{1448syms.push_back(static_cast<uint16_t>(cHuffmanSmallRepeatCode | ((run_size - cHuffmanSmallRepeatSizeMin) << 6)));1449}1450else1451{1452assert((run_size >= cHuffmanBigRepeatSizeMin) && (run_size <= cHuffmanBigRepeatSizeMax));1453syms.push_back(static_cast<uint16_t>(cHuffmanBigRepeatCode | ((run_size - cHuffmanBigRepeatSizeMin) << 6)));1454}1455}14561457run_size = 0;1458}14591460void bitwise_coder::end_zero_run(uint16_vec &syms, uint32_t &run_size)1461{1462if (run_size)1463{1464if (run_size < cHuffmanSmallZeroRunSizeMin)1465{1466while (run_size--)1467syms.push_back(0);1468}1469else if (run_size <= cHuffmanSmallZeroRunSizeMax)1470{1471syms.push_back(static_cast<uint16_t>(cHuffmanSmallZeroRunCode | ((run_size - cHuffmanSmallZeroRunSizeMin) << 6)));1472}1473else1474{1475assert((run_size >= cHuffmanBigZeroRunSizeMin) && (run_size <= cHuffmanBigZeroRunSizeMax));1476syms.push_back(static_cast<uint16_t>(cHuffmanBigZeroRunCode | ((run_size - cHuffmanBigZeroRunSizeMin) << 6)));1477}1478}14791480run_size = 0;1481}14821483uint32_t bitwise_coder::emit_huffman_table(const huffman_encoding_table &tab)1484{1485const uint64_t start_bits = m_total_bits;14861487const uint8_vec &code_sizes = tab.get_code_sizes();14881489uint32_t total_used = tab.get_total_used_codes();1490put_bits(total_used, cHuffmanMaxSymsLog2);14911492if (!total_used)1493return 0;14941495uint16_vec syms;1496syms.reserve(total_used + 16);14971498uint32_t prev_code_len = UINT_MAX, zero_run_size = 0, nonzero_run_size = 0;14991500for (uint32_t i = 0; i <= total_used; ++i)1501{1502const uint32_t code_len = (i == total_used) ? 0xFF : code_sizes[i];1503assert((code_len == 0xFF) || (code_len <= 16));15041505if (code_len)1506{1507end_zero_run(syms, zero_run_size);15081509if (code_len != prev_code_len)1510{1511end_nonzero_run(syms, nonzero_run_size, prev_code_len);1512if (code_len != 0xFF)1513syms.push_back(static_cast<uint16_t>(code_len));1514}1515else if (++nonzero_run_size == cHuffmanBigRepeatSizeMax)1516end_nonzero_run(syms, nonzero_run_size, prev_code_len);1517}1518else1519{1520end_nonzero_run(syms, nonzero_run_size, prev_code_len);15211522if (++zero_run_size == cHuffmanBigZeroRunSizeMax)1523end_zero_run(syms, zero_run_size);1524}15251526prev_code_len = code_len;1527}15281529histogram h(cHuffmanTotalCodelengthCodes);1530for (uint32_t i = 0; i < syms.size(); i++)1531h.inc(syms[i] & 63);15321533huffman_encoding_table ct;1534if (!ct.init(h, 7))1535return 0;15361537assert(cHuffmanTotalSortedCodelengthCodes == cHuffmanTotalCodelengthCodes);15381539uint32_t total_codelength_codes;1540for (total_codelength_codes = cHuffmanTotalSortedCodelengthCodes; total_codelength_codes > 0; total_codelength_codes--)1541if (ct.get_code_sizes()[g_huffman_sorted_codelength_codes[total_codelength_codes - 1]])1542break;15431544assert(total_codelength_codes);15451546put_bits(total_codelength_codes, 5);1547for (uint32_t i = 0; i < total_codelength_codes; i++)1548put_bits(ct.get_code_sizes()[g_huffman_sorted_codelength_codes[i]], 3);15491550for (uint32_t i = 0; i < syms.size(); ++i)1551{1552const uint32_t l = syms[i] & 63, e = syms[i] >> 6;15531554put_code(l, ct);15551556if (l == cHuffmanSmallZeroRunCode)1557put_bits(e, cHuffmanSmallZeroRunExtraBits);1558else if (l == cHuffmanBigZeroRunCode)1559put_bits(e, cHuffmanBigZeroRunExtraBits);1560else if (l == cHuffmanSmallRepeatCode)1561put_bits(e, cHuffmanSmallRepeatExtraBits);1562else if (l == cHuffmanBigRepeatCode)1563put_bits(e, cHuffmanBigRepeatExtraBits);1564}15651566return (uint32_t)(m_total_bits - start_bits);1567}15681569bool huffman_test(int rand_seed)1570{1571histogram h(19);15721573// Feed in a fibonacci sequence to force large codesizes1574h[0] += 1; h[1] += 1; h[2] += 2; h[3] += 3;1575h[4] += 5; h[5] += 8; h[6] += 13; h[7] += 21;1576h[8] += 34; h[9] += 55; h[10] += 89; h[11] += 144;1577h[12] += 233; h[13] += 377; h[14] += 610; h[15] += 987;1578h[16] += 1597; h[17] += 2584; h[18] += 4181;15791580huffman_encoding_table etab;1581etab.init(h, 16);15821583{1584bitwise_coder c;1585c.init(1024);15861587c.emit_huffman_table(etab);1588for (int i = 0; i < 19; i++)1589c.put_code(i, etab);15901591c.flush();15921593basist::bitwise_decoder d;1594d.init(&c.get_bytes()[0], static_cast<uint32_t>(c.get_bytes().size()));15951596basist::huffman_decoding_table dtab;1597bool success = d.read_huffman_table(dtab);1598if (!success)1599{1600assert(0);1601printf("Failure 5\n");1602return false;1603}16041605for (uint32_t i = 0; i < 19; i++)1606{1607uint32_t s = d.decode_huffman(dtab);1608if (s != i)1609{1610assert(0);1611printf("Failure 5\n");1612return false;1613}1614}1615}16161617basisu::rand r;1618r.seed(rand_seed);16191620for (int iter = 0; iter < 500000; iter++)1621{1622printf("%u\n", iter);16231624uint32_t max_sym = r.irand(0, 8193);1625uint32_t num_codes = r.irand(1, 10000);1626uint_vec syms(num_codes);16271628for (uint32_t i = 0; i < num_codes; i++)1629{1630if (r.bit())1631syms[i] = r.irand(0, max_sym);1632else1633{1634int s = (int)(r.gaussian((float)max_sym / 2, (float)maximum<int>(1, max_sym / 2)) + .5f);1635s = basisu::clamp<int>(s, 0, max_sym);16361637syms[i] = s;1638}16391640}16411642histogram h1(max_sym + 1);1643for (uint32_t i = 0; i < num_codes; i++)1644h1[syms[i]]++;16451646huffman_encoding_table etab2;1647if (!etab2.init(h1, 16))1648{1649assert(0);1650printf("Failed 0\n");1651return false;1652}16531654bitwise_coder c;1655c.init(1024);16561657c.emit_huffman_table(etab2);16581659for (uint32_t i = 0; i < num_codes; i++)1660c.put_code(syms[i], etab2);16611662c.flush();16631664basist::bitwise_decoder d;1665d.init(&c.get_bytes()[0], (uint32_t)c.get_bytes().size());16661667basist::huffman_decoding_table dtab;1668bool success = d.read_huffman_table(dtab);1669if (!success)1670{1671assert(0);1672printf("Failed 2\n");1673return false;1674}16751676for (uint32_t i = 0; i < num_codes; i++)1677{1678uint32_t s = d.decode_huffman(dtab);1679if (s != syms[i])1680{1681assert(0);1682printf("Failed 4\n");1683return false;1684}1685}16861687}1688return true;1689}16901691void palette_index_reorderer::init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight)1692{1693assert((num_syms > 0) && (num_indices > 0));1694assert((dist_func_weight >= 0.0f) && (dist_func_weight <= 1.0f));16951696clear();16971698m_remap_table.resize(num_syms);1699m_entries_picked.reserve(num_syms);1700m_total_count_to_picked.resize(num_syms);17011702if (num_indices <= 1)1703return;17041705prepare_hist(num_syms, num_indices, pIndices);1706find_initial(num_syms);17071708while (m_entries_to_do.size())1709{1710// Find the best entry to move into the picked list.1711uint32_t best_entry;1712double best_count;1713find_next_entry(best_entry, best_count, pDist_func, pCtx, dist_func_weight);17141715// We now have chosen an entry to place in the picked list, now determine which side it goes on.1716const uint32_t entry_to_move = m_entries_to_do[best_entry];17171718float side = pick_side(num_syms, entry_to_move, pDist_func, pCtx, dist_func_weight);17191720// Put entry_to_move either on the "left" or "right" side of the picked entries1721if (side <= 0)1722m_entries_picked.push_back(entry_to_move);1723else1724m_entries_picked.insert(m_entries_picked.begin(), entry_to_move);17251726// Erase best_entry from the todo list1727m_entries_to_do.erase(m_entries_to_do.begin() + best_entry);17281729// We've just moved best_entry to the picked list, so now we need to update m_total_count_to_picked[] to factor the additional count to best_entry1730for (uint32_t i = 0; i < m_entries_to_do.size(); i++)1731m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], entry_to_move, num_syms);1732}17331734for (uint32_t i = 0; i < num_syms; i++)1735m_remap_table[m_entries_picked[i]] = i;1736}17371738void palette_index_reorderer::prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices)1739{1740m_hist.resize(0);1741m_hist.resize(num_syms * num_syms);17421743for (uint32_t i = 0; i < num_indices; i++)1744{1745const uint32_t idx = pIndices[i];1746inc_hist(idx, (i < (num_indices - 1)) ? pIndices[i + 1] : -1, num_syms);1747inc_hist(idx, (i > 0) ? pIndices[i - 1] : -1, num_syms);1748}1749}17501751void palette_index_reorderer::find_initial(uint32_t num_syms)1752{1753uint32_t max_count = 0, max_index = 0;1754for (uint32_t i = 0; i < num_syms * num_syms; i++)1755if (m_hist[i] > max_count)1756max_count = m_hist[i], max_index = i;17571758uint32_t a = max_index / num_syms, b = max_index % num_syms;17591760const size_t ofs = m_entries_picked.size();17611762m_entries_picked.push_back(a);1763m_entries_picked.push_back(b);17641765for (uint32_t i = 0; i < num_syms; i++)1766if ((i != m_entries_picked[ofs + 1]) && (i != m_entries_picked[ofs]))1767m_entries_to_do.push_back(i);17681769for (uint32_t i = 0; i < m_entries_to_do.size(); i++)1770for (uint32_t j = 0; j < m_entries_picked.size(); j++)1771m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], m_entries_picked[j], num_syms);1772}17731774void palette_index_reorderer::find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight)1775{1776best_entry = 0;1777best_count = 0;17781779for (uint32_t i = 0; i < m_entries_to_do.size(); i++)1780{1781const uint32_t u = m_entries_to_do[i];1782double total_count = m_total_count_to_picked[u];17831784if (pDist_func)1785{1786float w = maximum<float>((*pDist_func)(u, m_entries_picked.front(), pCtx), (*pDist_func)(u, m_entries_picked.back(), pCtx));1787assert((w >= 0.0f) && (w <= 1.0f));1788total_count = (total_count + 1.0f) * lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, w);1789}17901791if (total_count <= best_count)1792continue;17931794best_entry = i;1795best_count = total_count;1796}1797}17981799float palette_index_reorderer::pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight)1800{1801float which_side = 0;18021803int l_count = 0, r_count = 0;1804for (uint32_t j = 0; j < m_entries_picked.size(); j++)1805{1806const int count = get_hist(entry_to_move, m_entries_picked[j], num_syms), r = ((int)m_entries_picked.size() + 1 - 2 * (j + 1));1807which_side += static_cast<float>(r * count);1808if (r >= 0)1809l_count += r * count;1810else1811r_count += -r * count;1812}18131814if (pDist_func)1815{1816float w_left = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.front(), pCtx));1817float w_right = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.back(), pCtx));1818which_side = w_left * l_count - w_right * r_count;1819}1820return which_side;1821}18221823void image_metrics::calc(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool log)1824{1825assert((first_chan < 4U) && (first_chan + total_chans <= 4U));18261827const uint32_t width = basisu::minimum(a.get_width(), b.get_width());1828const uint32_t height = basisu::minimum(a.get_height(), b.get_height());18291830double max_e = -1e+30f;1831double sum = 0.0f, sum_sqr = 0.0f;18321833m_has_neg = false;1834m_any_abnormal = false;1835m_hf_mag_overflow = false;18361837for (uint32_t y = 0; y < height; y++)1838{1839for (uint32_t x = 0; x < width; x++)1840{1841const vec4F& ca = a(x, y), &cb = b(x, y);18421843if (total_chans)1844{1845for (uint32_t c = 0; c < total_chans; c++)1846{1847float fa = ca[first_chan + c], fb = cb[first_chan + c];18481849if ((fabs(fa) > basist::MAX_HALF_FLOAT) || (fabs(fb) > basist::MAX_HALF_FLOAT))1850m_hf_mag_overflow = true;18511852if ((fa < 0.0f) || (fb < 0.0f))1853m_has_neg = true;18541855if (std::isinf(fa) || std::isinf(fb) || std::isnan(fa) || std::isnan(fb))1856m_any_abnormal = true;18571858const double delta = fabs(fa - fb);1859max_e = basisu::maximum<double>(max_e, delta);18601861if (log)1862{1863double log2_delta = log2f(basisu::maximum(0.0f, fa) + 1.0f) - log2f(basisu::maximum(0.0f, fb) + 1.0f);18641865sum += fabs(log2_delta);1866sum_sqr += log2_delta * log2_delta;1867}1868else1869{1870sum += fabs(delta);1871sum_sqr += delta * delta;1872}1873}1874}1875else1876{1877for (uint32_t c = 0; c < 3; c++)1878{1879float fa = ca[c], fb = cb[c];18801881if ((fabs(fa) > basist::MAX_HALF_FLOAT) || (fabs(fb) > basist::MAX_HALF_FLOAT))1882m_hf_mag_overflow = true;18831884if ((fa < 0.0f) || (fb < 0.0f))1885m_has_neg = true;18861887if (std::isinf(fa) || std::isinf(fb) || std::isnan(fa) || std::isnan(fb))1888m_any_abnormal = true;1889}18901891double ca_l = get_luminance(ca), cb_l = get_luminance(cb);18921893double delta = fabs(ca_l - cb_l);1894max_e = basisu::maximum(max_e, delta);18951896if (log)1897{1898double log2_delta = log2(basisu::maximum<double>(0.0f, ca_l) + 1.0f) - log2(basisu::maximum<double>(0.0f, cb_l) + 1.0f);18991900sum += fabs(log2_delta);1901sum_sqr += log2_delta * log2_delta;1902}1903else1904{1905sum += delta;1906sum_sqr += delta * delta;1907}1908}1909}1910}19111912m_max = (double)(max_e);19131914double total_values = (double)width * (double)height;1915if (avg_comp_error)1916total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);19171918m_mean = (float)(sum / total_values);1919m_mean_squared = (float)(sum_sqr / total_values);1920m_rms = (float)sqrt(sum_sqr / total_values);19211922const double max_val = 1.0f;1923m_psnr = m_rms ? (float)clamp<double>(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f;1924}19251926void image_metrics::calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error)1927{1928assert(total_chans);1929assert((first_chan < 4U) && (first_chan + total_chans <= 4U));19301931const uint32_t width = basisu::minimum(a.get_width(), b.get_width());1932const uint32_t height = basisu::minimum(a.get_height(), b.get_height());19331934m_has_neg = false;1935m_hf_mag_overflow = false;1936m_any_abnormal = false;19371938uint_vec hist(65536);19391940for (uint32_t y = 0; y < height; y++)1941{1942for (uint32_t x = 0; x < width; x++)1943{1944const vec4F& ca = a(x, y), &cb = b(x, y);19451946for (uint32_t i = 0; i < 4; i++)1947{1948if ((ca[i] < 0.0f) || (cb[i] < 0.0f))1949m_has_neg = true;19501951if ((fabs(ca[i]) > basist::MAX_HALF_FLOAT) || (fabs(cb[i]) > basist::MAX_HALF_FLOAT))1952m_hf_mag_overflow = true;19531954if (std::isnan(ca[i]) || std::isnan(cb[i]) || std::isinf(ca[i]) || std::isinf(cb[i]))1955m_any_abnormal = true;1956}19571958int cah[4] = { basist::float_to_half(ca[0]), basist::float_to_half(ca[1]), basist::float_to_half(ca[2]), basist::float_to_half(ca[3]) };1959int cbh[4] = { basist::float_to_half(cb[0]), basist::float_to_half(cb[1]), basist::float_to_half(cb[2]), basist::float_to_half(cb[3]) };19601961for (uint32_t c = 0; c < total_chans; c++)1962hist[iabs(cah[first_chan + c] - cbh[first_chan + c]) & 65535]++;19631964} // x1965} // y19661967m_max = 0;1968double sum = 0.0f, sum2 = 0.0f;1969for (uint32_t i = 0; i < 65536; i++)1970{1971if (hist[i])1972{1973m_max = basisu::maximum<double>(m_max, (double)i);1974double v = (double)i * (double)hist[i];1975sum += v;1976sum2 += (double)i * v;1977}1978}19791980double total_values = (double)width * (double)height;1981if (avg_comp_error)1982total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);19831984const float max_val = 65535.0f;1985m_mean = (float)clamp<double>(sum / total_values, 0.0f, max_val);1986m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, max_val * max_val);1987m_rms = (float)sqrt(m_mean_squared);1988m_psnr = m_rms ? (float)clamp<double>(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f;1989}19901991// Alt. variant, same as calc_half(), for validation.1992void image_metrics::calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error)1993{1994assert(total_chans);1995assert((first_chan < 4U) && (first_chan + total_chans <= 4U));19961997const uint32_t width = basisu::minimum(a.get_width(), b.get_width());1998const uint32_t height = basisu::minimum(a.get_height(), b.get_height());19992000m_has_neg = false;2001m_hf_mag_overflow = false;2002m_any_abnormal = false;20032004double sum = 0.0f, sum2 = 0.0f;2005m_max = 0;20062007for (uint32_t y = 0; y < height; y++)2008{2009for (uint32_t x = 0; x < width; x++)2010{2011const vec4F& ca = a(x, y), & cb = b(x, y);20122013for (uint32_t i = 0; i < 4; i++)2014{2015if ((ca[i] < 0.0f) || (cb[i] < 0.0f))2016m_has_neg = true;20172018if ((fabs(ca[i]) > basist::MAX_HALF_FLOAT) || (fabs(cb[i]) > basist::MAX_HALF_FLOAT))2019m_hf_mag_overflow = true;20202021if (std::isnan(ca[i]) || std::isnan(cb[i]) || std::isinf(ca[i]) || std::isinf(cb[i]))2022m_any_abnormal = true;2023}20242025int cah[4] = { basist::float_to_half(ca[0]), basist::float_to_half(ca[1]), basist::float_to_half(ca[2]), basist::float_to_half(ca[3]) };2026int cbh[4] = { basist::float_to_half(cb[0]), basist::float_to_half(cb[1]), basist::float_to_half(cb[2]), basist::float_to_half(cb[3]) };20272028for (uint32_t c = 0; c < total_chans; c++)2029{2030int diff = iabs(cah[first_chan + c] - cbh[first_chan + c]);2031if (diff)2032m_max = std::max<double>(m_max, (double)diff);20332034sum += diff;2035sum2 += squarei(cah[first_chan + c] - cbh[first_chan + c]);2036}20372038} // x2039} // y20402041double total_values = (double)width * (double)height;2042if (avg_comp_error)2043total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);20442045const float max_val = 65535.0f;2046m_mean = (float)clamp<double>(sum / total_values, 0.0f, max_val);2047m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, max_val * max_val);2048m_rms = (float)sqrt(m_mean_squared);2049m_psnr = m_rms ? (float)clamp<double>(log10(max_val / m_rms) * 20.0f, 0.0f, 1000.0f) : 1000.0f;2050}20512052void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma)2053{2054assert((first_chan < 4U) && (first_chan + total_chans <= 4U));20552056const uint32_t width = basisu::minimum(a.get_width(), b.get_width());2057const uint32_t height = basisu::minimum(a.get_height(), b.get_height());20582059double hist[256];2060clear_obj(hist);20612062m_has_neg = false;2063m_any_abnormal = false;2064m_hf_mag_overflow = false;20652066for (uint32_t y = 0; y < height; y++)2067{2068for (uint32_t x = 0; x < width; x++)2069{2070const color_rgba &ca = a(x, y), &cb = b(x, y);20712072if (total_chans)2073{2074for (uint32_t c = 0; c < total_chans; c++)2075hist[iabs(ca[first_chan + c] - cb[first_chan + c])]++;2076}2077else2078{2079if (use_601_luma)2080hist[iabs(ca.get_601_luma() - cb.get_601_luma())]++;2081else2082hist[iabs(ca.get_709_luma() - cb.get_709_luma())]++;2083}2084}2085}20862087m_max = 0;2088double sum = 0.0f, sum2 = 0.0f;2089for (uint32_t i = 0; i < 256; i++)2090{2091if (hist[i])2092{2093m_max = basisu::maximum<double>(m_max, (double)i);2094double v = i * hist[i];2095sum += v;2096sum2 += i * v;2097}2098}20992100double total_values = (double)width * (double)height;2101if (avg_comp_error)2102total_values *= (double)clamp<uint32_t>(total_chans, 1, 4);21032104m_mean = (float)clamp<double>(sum / total_values, 0.0f, 255.0);2105m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, 255.0f * 255.0f);2106m_rms = (float)sqrt(m_mean_squared);2107m_psnr = m_rms ? (float)clamp<double>(log10(255.0 / m_rms) * 20.0f, 0.0f, 100.0f) : 100.0f;2108}21092110void print_image_metrics(const image& a, const image& b)2111{2112image_metrics im;2113im.calc(a, b, 0, 3);2114im.print("RGB ");21152116im.calc(a, b, 0, 4);2117im.print("RGBA ");21182119im.calc(a, b, 0, 1);2120im.print("R ");21212122im.calc(a, b, 1, 1);2123im.print("G ");21242125im.calc(a, b, 2, 1);2126im.print("B ");21272128im.calc(a, b, 3, 1);2129im.print("A ");21302131im.calc(a, b, 0, 0);2132im.print("Y 709 ");21332134im.calc(a, b, 0, 0, true, true);2135im.print("Y 601 ");2136}21372138void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed)2139{2140rand r(seed);21412142uint8_t *pDst = static_cast<uint8_t *>(pBuf);21432144while (size >= sizeof(uint32_t))2145{2146*(uint32_t *)pDst = r.urand32();2147pDst += sizeof(uint32_t);2148size -= sizeof(uint32_t);2149}21502151while (size)2152{2153*pDst++ = r.byte();2154size--;2155}2156}21572158uint32_t hash_hsieh(const uint8_t *pBuf, size_t len)2159{2160if (!pBuf || !len)2161return 0;21622163uint32_t h = static_cast<uint32_t>(len);21642165const uint32_t bytes_left = len & 3;2166len >>= 2;21672168while (len--)2169{2170const uint16_t *pWords = reinterpret_cast<const uint16_t *>(pBuf);21712172h += pWords[0];21732174const uint32_t t = (pWords[1] << 11) ^ h;2175h = (h << 16) ^ t;21762177pBuf += sizeof(uint32_t);21782179h += h >> 11;2180}21812182switch (bytes_left)2183{2184case 1:2185h += *reinterpret_cast<const signed char*>(pBuf);2186h ^= h << 10;2187h += h >> 1;2188break;2189case 2:2190h += *reinterpret_cast<const uint16_t *>(pBuf);2191h ^= h << 11;2192h += h >> 17;2193break;2194case 3:2195h += *reinterpret_cast<const uint16_t *>(pBuf);2196h ^= h << 16;2197h ^= (static_cast<signed char>(pBuf[sizeof(uint16_t)])) << 18;2198h += h >> 11;2199break;2200default:2201break;2202}22032204h ^= h << 3;2205h += h >> 5;2206h ^= h << 4;2207h += h >> 17;2208h ^= h << 25;2209h += h >> 6;22102211return h;2212}22132214job_pool::job_pool(uint32_t num_threads) :2215m_num_active_jobs(0)2216{2217m_kill_flag.store(false);2218m_num_active_workers.store(0);22192220assert(num_threads >= 1U);22212222debug_printf("job_pool::job_pool: %u total threads\n", num_threads);22232224if (num_threads > 1)2225{2226m_threads.resize(num_threads - 1);22272228for (int i = 0; i < ((int)num_threads - 1); i++)2229m_threads[i] = std::thread([this, i] { job_thread(i); });2230}2231}22322233job_pool::~job_pool()2234{2235debug_printf("job_pool::~job_pool\n");22362237// Notify all workers that they need to die right now.2238{2239std::lock_guard<std::mutex> lk(m_mutex);2240m_kill_flag.store(true);2241}22422243m_has_work.notify_all();22442245#ifdef __EMSCRIPTEN__2246for ( ; ; )2247{2248if (m_num_active_workers.load() <= 0)2249break;2250std::this_thread::sleep_for(std::chrono::milliseconds(50));2251}22522253// At this point all worker threads should be exiting or exited.2254// We could call detach(), but this seems to just call join() anyway.2255#endif22562257// Wait for all worker threads to exit.2258for (uint32_t i = 0; i < m_threads.size(); i++)2259m_threads[i].join();2260}22612262void job_pool::add_job(const std::function<void()>& job)2263{2264std::unique_lock<std::mutex> lock(m_mutex);22652266m_queue.emplace_back(job);22672268const size_t queue_size = m_queue.size();22692270lock.unlock();22712272if (queue_size > 1)2273m_has_work.notify_one();2274}22752276void job_pool::add_job(std::function<void()>&& job)2277{2278std::unique_lock<std::mutex> lock(m_mutex);22792280m_queue.emplace_back(std::move(job));22812282const size_t queue_size = m_queue.size();22832284lock.unlock();22852286if (queue_size > 1)2287{2288m_has_work.notify_one();2289}2290}22912292void job_pool::wait_for_all()2293{2294std::unique_lock<std::mutex> lock(m_mutex);22952296// Drain the job queue on the calling thread.2297while (!m_queue.empty())2298{2299std::function<void()> job(m_queue.back());2300m_queue.pop_back();23012302lock.unlock();23032304job();23052306lock.lock();2307}23082309// The queue is empty, now wait for all active jobs to finish up.2310#ifndef __EMSCRIPTEN__2311m_no_more_jobs.wait(lock, [this]{ return !m_num_active_jobs; } );2312#else2313// Avoid infinite blocking2314for (; ; )2315{2316if (m_no_more_jobs.wait_for(lock, std::chrono::milliseconds(50), [this] { return !m_num_active_jobs; }))2317{2318break;2319}2320}2321#endif2322}23232324void job_pool::job_thread(uint32_t index)2325{2326BASISU_NOTE_UNUSED(index);2327//debug_printf("job_pool::job_thread: starting %u\n", index);23282329m_num_active_workers.fetch_add(1);23302331while (!m_kill_flag)2332{2333std::unique_lock<std::mutex> lock(m_mutex);23342335// Wait for any jobs to be issued.2336#if 02337m_has_work.wait(lock, [this] { return m_kill_flag || m_queue.size(); } );2338#else2339// For more safety vs. buggy RTL's. Worse case we stall for a second vs. locking up forever if something goes wrong.2340m_has_work.wait_for(lock, std::chrono::milliseconds(1000), [this] {2341return m_kill_flag || !m_queue.empty();2342});2343#endif23442345// Check to see if we're supposed to exit.2346if (m_kill_flag)2347break;23482349if (m_queue.empty())2350continue;23512352// Get the job and execute it.2353std::function<void()> job(m_queue.back());2354m_queue.pop_back();23552356++m_num_active_jobs;23572358lock.unlock();23592360job();23612362lock.lock();23632364--m_num_active_jobs;23652366// Now check if there are no more jobs remaining.2367const bool all_done = m_queue.empty() && !m_num_active_jobs;23682369lock.unlock();23702371if (all_done)2372m_no_more_jobs.notify_all();2373}23742375m_num_active_workers.fetch_add(-1);23762377//debug_printf("job_pool::job_thread: exiting\n");2378}23792380// .TGA image loading2381#pragma pack(push)2382#pragma pack(1)2383struct tga_header2384{2385uint8_t m_id_len;2386uint8_t m_cmap;2387uint8_t m_type;2388packed_uint<2> m_cmap_first;2389packed_uint<2> m_cmap_len;2390uint8_t m_cmap_bpp;2391packed_uint<2> m_x_org;2392packed_uint<2> m_y_org;2393packed_uint<2> m_width;2394packed_uint<2> m_height;2395uint8_t m_depth;2396uint8_t m_desc;2397};2398#pragma pack(pop)23992400const uint32_t MAX_TGA_IMAGE_SIZE = 16384;24012402enum tga_image_type2403{2404cITPalettized = 1,2405cITRGB = 2,2406cITGrayscale = 32407};24082409uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans)2410{2411width = 0;2412height = 0;2413n_chans = 0;24142415if (buf_size <= sizeof(tga_header))2416return nullptr;24172418const tga_header &hdr = *reinterpret_cast<const tga_header *>(pBuf);24192420if ((!hdr.m_width) || (!hdr.m_height) || (hdr.m_width > MAX_TGA_IMAGE_SIZE) || (hdr.m_height > MAX_TGA_IMAGE_SIZE))2421return nullptr;24222423if (hdr.m_desc >> 6)2424return nullptr;24252426// Simple validation2427if ((hdr.m_cmap != 0) && (hdr.m_cmap != 1))2428return nullptr;24292430if (hdr.m_cmap)2431{2432if ((hdr.m_cmap_bpp == 0) || (hdr.m_cmap_bpp > 32))2433return nullptr;24342435// Nobody implements CMapFirst correctly, so we're not supporting it. Never seen it used, either.2436if (hdr.m_cmap_first != 0)2437return nullptr;2438}24392440const bool x_flipped = (hdr.m_desc & 0x10) != 0;2441const bool y_flipped = (hdr.m_desc & 0x20) == 0;24422443bool rle_flag = false;2444int file_image_type = hdr.m_type;2445if (file_image_type > 8)2446{2447file_image_type -= 8;2448rle_flag = true;2449}24502451const tga_image_type image_type = static_cast<tga_image_type>(file_image_type);24522453switch (file_image_type)2454{2455case cITRGB:2456if (hdr.m_depth == 8)2457return nullptr;2458break;2459case cITPalettized:2460if ((hdr.m_depth != 8) || (hdr.m_cmap != 1) || (hdr.m_cmap_len == 0))2461return nullptr;2462break;2463case cITGrayscale:2464if ((hdr.m_cmap != 0) || (hdr.m_cmap_len != 0))2465return nullptr;2466if ((hdr.m_depth != 8) && (hdr.m_depth != 16))2467return nullptr;2468break;2469default:2470return nullptr;2471}24722473uint32_t tga_bytes_per_pixel = 0;24742475switch (hdr.m_depth)2476{2477case 32:2478tga_bytes_per_pixel = 4;2479n_chans = 4;2480break;2481case 24:2482tga_bytes_per_pixel = 3;2483n_chans = 3;2484break;2485case 16:2486case 15:2487tga_bytes_per_pixel = 2;2488// For compatibility with stb_image_write.h2489n_chans = ((file_image_type == cITGrayscale) && (hdr.m_depth == 16)) ? 4 : 3;2490break;2491case 8:2492tga_bytes_per_pixel = 1;2493// For palettized RGBA support, which both FreeImage and stb_image support.2494n_chans = ((file_image_type == cITPalettized) && (hdr.m_cmap_bpp == 32)) ? 4 : 3;2495break;2496default:2497return nullptr;2498}24992500//const uint32_t bytes_per_line = hdr.m_width * tga_bytes_per_pixel;25012502const uint8_t *pSrc = pBuf + sizeof(tga_header);2503uint32_t bytes_remaining = buf_size - sizeof(tga_header);25042505if (hdr.m_id_len)2506{2507if (bytes_remaining < hdr.m_id_len)2508return nullptr;2509pSrc += hdr.m_id_len;2510bytes_remaining += hdr.m_id_len;2511}25122513color_rgba pal[256];2514for (uint32_t i = 0; i < 256; i++)2515pal[i].set(0, 0, 0, 255);25162517if ((hdr.m_cmap) && (hdr.m_cmap_len))2518{2519if (image_type == cITPalettized)2520{2521// Note I cannot find any files using 32bpp palettes in the wild (never seen any in ~30 years).2522if ( ((hdr.m_cmap_bpp != 32) && (hdr.m_cmap_bpp != 24) && (hdr.m_cmap_bpp != 15) && (hdr.m_cmap_bpp != 16)) || (hdr.m_cmap_len > 256) )2523return nullptr;25242525if (hdr.m_cmap_bpp == 32)2526{2527const uint32_t pal_size = hdr.m_cmap_len * 4;2528if (bytes_remaining < pal_size)2529return nullptr;25302531for (uint32_t i = 0; i < hdr.m_cmap_len; i++)2532{2533pal[i].r = pSrc[i * 4 + 2];2534pal[i].g = pSrc[i * 4 + 1];2535pal[i].b = pSrc[i * 4 + 0];2536pal[i].a = pSrc[i * 4 + 3];2537}25382539bytes_remaining -= pal_size;2540pSrc += pal_size;2541}2542else if (hdr.m_cmap_bpp == 24)2543{2544const uint32_t pal_size = hdr.m_cmap_len * 3;2545if (bytes_remaining < pal_size)2546return nullptr;25472548for (uint32_t i = 0; i < hdr.m_cmap_len; i++)2549{2550pal[i].r = pSrc[i * 3 + 2];2551pal[i].g = pSrc[i * 3 + 1];2552pal[i].b = pSrc[i * 3 + 0];2553pal[i].a = 255;2554}25552556bytes_remaining -= pal_size;2557pSrc += pal_size;2558}2559else2560{2561const uint32_t pal_size = hdr.m_cmap_len * 2;2562if (bytes_remaining < pal_size)2563return nullptr;25642565for (uint32_t i = 0; i < hdr.m_cmap_len; i++)2566{2567const uint32_t v = pSrc[i * 2 + 0] | (pSrc[i * 2 + 1] << 8);25682569pal[i].r = (((v >> 10) & 31) * 255 + 15) / 31;2570pal[i].g = (((v >> 5) & 31) * 255 + 15) / 31;2571pal[i].b = ((v & 31) * 255 + 15) / 31;2572pal[i].a = 255;2573}25742575bytes_remaining -= pal_size;2576pSrc += pal_size;2577}2578}2579else2580{2581const uint32_t bytes_to_skip = (hdr.m_cmap_bpp >> 3) * hdr.m_cmap_len;2582if (bytes_remaining < bytes_to_skip)2583return nullptr;2584pSrc += bytes_to_skip;2585bytes_remaining += bytes_to_skip;2586}2587}25882589width = hdr.m_width;2590height = hdr.m_height;25912592const uint32_t source_pitch = width * tga_bytes_per_pixel;2593const uint32_t dest_pitch = width * n_chans;25942595uint8_t *pImage = (uint8_t *)malloc(dest_pitch * height);2596if (!pImage)2597return nullptr;25982599std::vector<uint8_t> input_line_buf;2600if (rle_flag)2601input_line_buf.resize(source_pitch);26022603int run_type = 0, run_remaining = 0;2604uint8_t run_pixel[4];2605memset(run_pixel, 0, sizeof(run_pixel));26062607for (int y = 0; y < height; y++)2608{2609const uint8_t *pLine_data;26102611if (rle_flag)2612{2613int pixels_remaining = width;2614uint8_t *pDst = &input_line_buf[0];26152616do2617{2618if (!run_remaining)2619{2620if (bytes_remaining < 1)2621{2622free(pImage);2623return nullptr;2624}26252626int v = *pSrc++;2627bytes_remaining--;26282629run_type = v & 0x80;2630run_remaining = (v & 0x7F) + 1;26312632if (run_type)2633{2634if (bytes_remaining < tga_bytes_per_pixel)2635{2636free(pImage);2637return nullptr;2638}26392640memcpy(run_pixel, pSrc, tga_bytes_per_pixel);2641pSrc += tga_bytes_per_pixel;2642bytes_remaining -= tga_bytes_per_pixel;2643}2644}26452646const uint32_t n = basisu::minimum<uint32_t>(pixels_remaining, run_remaining);2647pixels_remaining -= n;2648run_remaining -= n;26492650if (run_type)2651{2652for (uint32_t i = 0; i < n; i++)2653for (uint32_t j = 0; j < tga_bytes_per_pixel; j++)2654*pDst++ = run_pixel[j];2655}2656else2657{2658const uint32_t bytes_wanted = n * tga_bytes_per_pixel;26592660if (bytes_remaining < bytes_wanted)2661{2662free(pImage);2663return nullptr;2664}26652666memcpy(pDst, pSrc, bytes_wanted);2667pDst += bytes_wanted;26682669pSrc += bytes_wanted;2670bytes_remaining -= bytes_wanted;2671}26722673} while (pixels_remaining);26742675assert((pDst - &input_line_buf[0]) == (int)(width * tga_bytes_per_pixel));26762677pLine_data = &input_line_buf[0];2678}2679else2680{2681if (bytes_remaining < source_pitch)2682{2683free(pImage);2684return nullptr;2685}26862687pLine_data = pSrc;2688bytes_remaining -= source_pitch;2689pSrc += source_pitch;2690}26912692// Convert to 24bpp RGB or 32bpp RGBA.2693uint8_t *pDst = pImage + (y_flipped ? (height - 1 - y) : y) * dest_pitch + (x_flipped ? (width - 1) * n_chans : 0);2694const int dst_stride = x_flipped ? -((int)n_chans) : n_chans;26952696switch (hdr.m_depth)2697{2698case 32:2699assert(tga_bytes_per_pixel == 4 && n_chans == 4);2700for (int i = 0; i < width; i++, pLine_data += 4, pDst += dst_stride)2701{2702pDst[0] = pLine_data[2];2703pDst[1] = pLine_data[1];2704pDst[2] = pLine_data[0];2705pDst[3] = pLine_data[3];2706}2707break;2708case 24:2709assert(tga_bytes_per_pixel == 3 && n_chans == 3);2710for (int i = 0; i < width; i++, pLine_data += 3, pDst += dst_stride)2711{2712pDst[0] = pLine_data[2];2713pDst[1] = pLine_data[1];2714pDst[2] = pLine_data[0];2715}2716break;2717case 16:2718case 15:2719if (image_type == cITRGB)2720{2721assert(tga_bytes_per_pixel == 2 && n_chans == 3);2722for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride)2723{2724const uint32_t v = pLine_data[0] | (pLine_data[1] << 8);2725pDst[0] = (((v >> 10) & 31) * 255 + 15) / 31;2726pDst[1] = (((v >> 5) & 31) * 255 + 15) / 31;2727pDst[2] = ((v & 31) * 255 + 15) / 31;2728}2729}2730else2731{2732assert(image_type == cITGrayscale && tga_bytes_per_pixel == 2 && n_chans == 4);2733for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride)2734{2735pDst[0] = pLine_data[0];2736pDst[1] = pLine_data[0];2737pDst[2] = pLine_data[0];2738pDst[3] = pLine_data[1];2739}2740}2741break;2742case 8:2743assert(tga_bytes_per_pixel == 1);2744if (image_type == cITPalettized)2745{2746if (hdr.m_cmap_bpp == 32)2747{2748assert(n_chans == 4);2749for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride)2750{2751const uint32_t c = *pLine_data;2752pDst[0] = pal[c].r;2753pDst[1] = pal[c].g;2754pDst[2] = pal[c].b;2755pDst[3] = pal[c].a;2756}2757}2758else2759{2760assert(n_chans == 3);2761for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride)2762{2763const uint32_t c = *pLine_data;2764pDst[0] = pal[c].r;2765pDst[1] = pal[c].g;2766pDst[2] = pal[c].b;2767}2768}2769}2770else2771{2772assert(n_chans == 3);2773for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride)2774{2775const uint8_t c = *pLine_data;2776pDst[0] = c;2777pDst[1] = c;2778pDst[2] = c;2779}2780}2781break;2782default:2783assert(0);2784break;2785}2786} // y27872788return pImage;2789}27902791uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans)2792{2793width = height = n_chans = 0;27942795uint8_vec filedata;2796if (!read_file_to_vec(pFilename, filedata))2797return nullptr;27982799if (!filedata.size() || (filedata.size() > UINT32_MAX))2800return nullptr;28012802return read_tga(&filedata[0], (uint32_t)filedata.size(), width, height, n_chans);2803}28042805static inline void hdr_convert(const color_rgba& rgbe, vec4F& c)2806{2807if (rgbe[3] != 0)2808{2809float scale = ldexp(1.0f, rgbe[3] - 128 - 8);2810c.set((float)rgbe[0] * scale, (float)rgbe[1] * scale, (float)rgbe[2] * scale, 1.0f);2811}2812else2813{2814c.set(0.0f, 0.0f, 0.0f, 1.0f);2815}2816}28172818bool string_begins_with(const std::string& str, const char* pPhrase)2819{2820const size_t str_len = str.size();28212822const size_t phrase_len = strlen(pPhrase);2823assert(phrase_len);28242825if (str_len >= phrase_len)2826{2827#ifdef _MSC_VER2828if (_strnicmp(pPhrase, str.c_str(), phrase_len) == 0)2829#else2830if (strncasecmp(pPhrase, str.c_str(), phrase_len) == 0)2831#endif2832return true;2833}28342835return false;2836}28372838// Radiance RGBE (.HDR) image reading.2839// This code tries to preserve the original logic in Radiance's ray/src/common/color.c code:2840// https://www.radiance-online.org/cgi-bin/viewcvs.cgi/ray/src/common/color.c?revision=2.26&view=markup&sortby=log2841// Also see: https://flipcode.com/archives/HDR_Image_Reader.shtml.2842// https://github.com/LuminanceHDR/LuminanceHDR/blob/master/src/Libpfs/io/rgbereader.cpp.2843// https://radsite.lbl.gov/radiance/refer/filefmts.pdf2844// Buggy readers:2845// stb_image.h: appears to be a clone of rgbe.c, but with goto's (doesn't support old format files, doesn't support mixture of RLE/non-RLE scanlines)2846// http://www.graphics.cornell.edu/~bjw/rgbe.html - rgbe.c/h2847// http://www.graphics.cornell.edu/online/formats/rgbe/ - rgbe.c/.h - buggy2848bool read_rgbe(const uint8_vec &filedata, imagef& img, rgbe_header_info& hdr_info)2849{2850hdr_info.clear();28512852const uint32_t MAX_SUPPORTED_DIM = 65536;28532854if (filedata.size() < 4)2855return false;28562857// stb_image.h checks for the string "#?RADIANCE" or "#?RGBE" in the header.2858// The original Radiance header code doesn't care about the specific string.2859// opencv's reader only checks for "#?", so that's what we're going to do.2860if ((filedata[0] != '#') || (filedata[1] != '?'))2861return false;28622863//uint32_t width = 0, height = 0;2864bool is_rgbe = false;2865size_t cur_ofs = 0;28662867// Parse the lines until we encounter a blank line.2868std::string cur_line;2869for (; ; )2870{2871if (cur_ofs >= filedata.size())2872return false;28732874const uint32_t HEADER_TOO_BIG_SIZE = 4096;2875if (cur_ofs >= HEADER_TOO_BIG_SIZE)2876{2877// Header seems too large - something is likely wrong. Return failure.2878return false;2879}28802881uint8_t c = filedata[cur_ofs++];28822883if (c == '\n')2884{2885if (!cur_line.size())2886break;28872888if ((cur_line[0] == '#') && (!string_begins_with(cur_line, "#?")) && (!hdr_info.m_program.size()))2889{2890cur_line.erase(0, 1);2891while (cur_line.size() && (cur_line[0] == ' '))2892cur_line.erase(0, 1);28932894hdr_info.m_program = cur_line;2895}2896else if (string_begins_with(cur_line, "EXPOSURE=") && (cur_line.size() > 9))2897{2898hdr_info.m_exposure = atof(cur_line.c_str() + 9);2899hdr_info.m_has_exposure = true;2900}2901else if (string_begins_with(cur_line, "GAMMA=") && (cur_line.size() > 6))2902{2903hdr_info.m_exposure = atof(cur_line.c_str() + 6);2904hdr_info.m_has_gamma = true;2905}2906else if (cur_line == "FORMAT=32-bit_rle_rgbe")2907{2908is_rgbe = true;2909}29102911cur_line.resize(0);2912}2913else2914cur_line.push_back((char)c);2915}29162917if (!is_rgbe)2918return false;29192920// Assume and require the final line to have the image's dimensions. We're not supporting flipping.2921for (; ; )2922{2923if (cur_ofs >= filedata.size())2924return false;2925uint8_t c = filedata[cur_ofs++];2926if (c == '\n')2927break;2928cur_line.push_back((char)c);2929}29302931int comp[2] = { 1, 0 }; // y, x (major, minor)2932int dir[2] = { -1, 1 }; // -1, 1, (major, minor), for y -1=up2933uint32_t major_dim = 0, minor_dim = 0;29342935// Parse the dimension string, normally it'll be "-Y # +X #" (major, minor), rarely it differs2936for (uint32_t d = 0; d < 2; d++) // 0=major, 1=minor2937{2938const bool is_neg_x = (strncmp(&cur_line[0], "-X ", 3) == 0);2939const bool is_pos_x = (strncmp(&cur_line[0], "+X ", 3) == 0);2940const bool is_x = is_neg_x || is_pos_x;29412942const bool is_neg_y = (strncmp(&cur_line[0], "-Y ", 3) == 0);2943const bool is_pos_y = (strncmp(&cur_line[0], "+Y ", 3) == 0);2944const bool is_y = is_neg_y || is_pos_y;29452946if (cur_line.size() < 3)2947return false;29482949if (!is_x && !is_y)2950return false;29512952comp[d] = is_x ? 0 : 1;2953dir[d] = (is_neg_x || is_neg_y) ? -1 : 1;29542955uint32_t& dim = d ? minor_dim : major_dim;29562957cur_line.erase(0, 3);29582959while (cur_line.size())2960{2961char c = cur_line[0];2962if (c != ' ')2963break;2964cur_line.erase(0, 1);2965}29662967bool has_digits = false;2968while (cur_line.size())2969{2970char c = cur_line[0];2971cur_line.erase(0, 1);29722973if (c == ' ')2974break;29752976if ((c < '0') || (c > '9'))2977return false;29782979const uint32_t prev_dim = dim;2980dim = dim * 10 + (c - '0');2981if (dim < prev_dim)2982return false;29832984has_digits = true;2985}2986if (!has_digits)2987return false;29882989if ((dim < 1) || (dim > MAX_SUPPORTED_DIM))2990return false;2991}29922993// temp image: width=minor, height=major2994img.resize(minor_dim, major_dim);29952996std::vector<color_rgba> temp_scanline(minor_dim);29972998// Read the scanlines.2999for (uint32_t y = 0; y < major_dim; y++)3000{3001vec4F* pDst = &img(0, y);30023003if ((filedata.size() - cur_ofs) < 4)3004return false;30053006// Determine if the line uses the new or old format. See the logic in color.c.3007bool old_decrunch = false;3008if ((minor_dim < 8) || (minor_dim > 0x7FFF))3009{3010// Line is too short or long; must be old format.3011old_decrunch = true;3012}3013else if (filedata[cur_ofs] != 2)3014{3015// R is not 2, must be old format3016old_decrunch = true;3017}3018else3019{3020// c[0]/red is 2.Check GB and E for validity.3021color_rgba c;3022memcpy(&c, &filedata[cur_ofs], 4);30233024if ((c[1] != 2) || (c[2] & 0x80))3025{3026// G isn't 2, or the high bit of B is set which is impossible (image's > 0x7FFF pixels can't get here). Use old format.3027old_decrunch = true;3028}3029else3030{3031// Check B and E. If this isn't the minor_dim in network order, something is wrong. The pixel would also be denormalized, and invalid.3032uint32_t w = (c[2] << 8) | c[3];3033if (w != minor_dim)3034return false;30353036cur_ofs += 4;3037}3038}30393040if (old_decrunch)3041{3042uint32_t rshift = 0, x = 0;30433044while (x < minor_dim)3045{3046if ((filedata.size() - cur_ofs) < 4)3047return false;30483049color_rgba c;3050memcpy(&c, &filedata[cur_ofs], 4);3051cur_ofs += 4;30523053if ((c[0] == 1) && (c[1] == 1) && (c[2] == 1))3054{3055// We'll allow RLE matches to cross scanlines, but not on the very first pixel.3056if ((!x) && (!y))3057return false;30583059const uint32_t run_len = c[3] << rshift;3060const vec4F run_color(pDst[-1]);30613062if ((x + run_len) > minor_dim)3063return false;30643065for (uint32_t i = 0; i < run_len; i++)3066*pDst++ = run_color;30673068rshift += 8;3069x += run_len;3070}3071else3072{3073rshift = 0;30743075hdr_convert(c, *pDst);3076pDst++;3077x++;3078}3079}3080continue;3081}30823083// New format3084for (uint32_t s = 0; s < 4; s++)3085{3086uint32_t x_ofs = 0;3087while (x_ofs < minor_dim)3088{3089uint32_t num_remaining = minor_dim - x_ofs;30903091if (cur_ofs >= filedata.size())3092return false;30933094uint8_t count = filedata[cur_ofs++];3095if (count > 128)3096{3097count -= 128;3098if (count > num_remaining)3099return false;31003101if (cur_ofs >= filedata.size())3102return false;3103const uint8_t val = filedata[cur_ofs++];31043105for (uint32_t i = 0; i < count; i++)3106temp_scanline[x_ofs + i][s] = val;31073108x_ofs += count;3109}3110else3111{3112if ((!count) || (count > num_remaining))3113return false;31143115for (uint32_t i = 0; i < count; i++)3116{3117if (cur_ofs >= filedata.size())3118return false;3119const uint8_t val = filedata[cur_ofs++];31203121temp_scanline[x_ofs + i][s] = val;3122}31233124x_ofs += count;3125}3126} // while (x_ofs < minor_dim)3127} // c31283129// Convert all the RGBE pixels to float now3130for (uint32_t x = 0; x < minor_dim; x++, pDst++)3131hdr_convert(temp_scanline[x], *pDst);31323133assert((pDst - &img(0, y)) == (int)minor_dim);31343135} // y31363137// at here:3138// img(width,height)=image pixels as read from file, x=minor axis, y=major axis3139// width=minor axis dimension3140// height=major axis dimension3141// in file, pixels are emitted in minor order, them major (so major=scanlines in the file)31423143imagef final_img;3144if (comp[0] == 0) // if major axis is X3145final_img.resize(major_dim, minor_dim);3146else // major axis is Y, minor is X3147final_img.resize(minor_dim, major_dim);31483149// TODO: optimize the identity case3150for (uint32_t major_iter = 0; major_iter < major_dim; major_iter++)3151{3152for (uint32_t minor_iter = 0; minor_iter < minor_dim; minor_iter++)3153{3154const vec4F& p = img(minor_iter, major_iter);31553156uint32_t dst_x = 0, dst_y = 0;31573158// is the minor dim output x?3159if (comp[1] == 0)3160{3161// minor axis is x, major is y31623163// is minor axis (which is output x) flipped?3164if (dir[1] < 0)3165dst_x = minor_dim - 1 - minor_iter;3166else3167dst_x = minor_iter;31683169// is major axis (which is output y) flipped? -1=down in raster order, 1=up3170if (dir[0] < 0)3171dst_y = major_iter;3172else3173dst_y = major_dim - 1 - major_iter;3174}3175else3176{3177// minor axis is output y, major is output x31783179// is minor axis (which is output y) flipped?3180if (dir[1] < 0)3181dst_y = minor_iter;3182else3183dst_y = minor_dim - 1 - minor_iter;31843185// is major axis (which is output x) flipped?3186if (dir[0] < 0)3187dst_x = major_dim - 1 - major_iter;3188else3189dst_x = major_iter;3190}31913192final_img(dst_x, dst_y) = p;3193}3194}31953196final_img.swap(img);31973198return true;3199}32003201bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info)3202{3203uint8_vec filedata;3204if (!read_file_to_vec(pFilename, filedata))3205return false;3206return read_rgbe(filedata, img, hdr_info);3207}32083209static uint8_vec& append_string(uint8_vec& buf, const char* pStr)3210{3211const size_t str_len = strlen(pStr);3212if (!str_len)3213return buf;32143215const size_t ofs = buf.size();3216buf.resize(ofs + str_len);3217memcpy(&buf[ofs], pStr, str_len);32183219return buf;3220}32213222static uint8_vec& append_string(uint8_vec& buf, const std::string& str)3223{3224if (!str.size())3225return buf;3226return append_string(buf, str.c_str());3227}32283229static inline void float2rgbe(color_rgba &rgbe, const vec4F &c)3230{3231const float red = c[0], green = c[1], blue = c[2];3232assert(red >= 0.0f && green >= 0.0f && blue >= 0.0f);32333234const float max_v = basisu::maximumf(basisu::maximumf(red, green), blue);32353236if (max_v < 1e-32f)3237rgbe.clear();3238else3239{3240int e;3241const float scale = frexp(max_v, &e) * 256.0f / max_v;3242rgbe[0] = (uint8_t)(clamp<int>((int)(red * scale), 0, 255));3243rgbe[1] = (uint8_t)(clamp<int>((int)(green * scale), 0, 255));3244rgbe[2] = (uint8_t)(clamp<int>((int)(blue * scale), 0, 255));3245rgbe[3] = (uint8_t)(e + 128);3246}3247}32483249const bool RGBE_FORCE_RAW = false;3250const bool RGBE_FORCE_OLD_CRUNCH = false; // note must readers (particularly stb_image.h's) don't properly support this, when they should32513252bool write_rgbe(uint8_vec &file_data, imagef& img, rgbe_header_info& hdr_info)3253{3254if (!img.get_width() || !img.get_height())3255return false;32563257const uint32_t width = img.get_width(), height = img.get_height();32583259file_data.resize(0);3260file_data.reserve(1024 + img.get_width() * img.get_height() * 4);32613262append_string(file_data, "#?RADIANCE\n");32633264if (hdr_info.m_has_exposure)3265append_string(file_data, string_format("EXPOSURE=%g\n", hdr_info.m_exposure));32663267if (hdr_info.m_has_gamma)3268append_string(file_data, string_format("GAMMA=%g\n", hdr_info.m_gamma));32693270append_string(file_data, "FORMAT=32-bit_rle_rgbe\n\n");3271append_string(file_data, string_format("-Y %u +X %u\n", height, width));32723273if (((width < 8) || (width > 0x7FFF)) || (RGBE_FORCE_RAW))3274{3275for (uint32_t y = 0; y < height; y++)3276{3277for (uint32_t x = 0; x < width; x++)3278{3279color_rgba rgbe;3280float2rgbe(rgbe, img(x, y));3281append_vector(file_data, (const uint8_t *)&rgbe, sizeof(rgbe));3282}3283}3284}3285else if (RGBE_FORCE_OLD_CRUNCH)3286{3287for (uint32_t y = 0; y < height; y++)3288{3289int prev_r = -1, prev_g = -1, prev_b = -1, prev_e = -1;3290uint32_t cur_run_len = 0;32913292for (uint32_t x = 0; x < width; x++)3293{3294color_rgba rgbe;3295float2rgbe(rgbe, img(x, y));32963297if ((rgbe[0] == prev_r) && (rgbe[1] == prev_g) && (rgbe[2] == prev_b) && (rgbe[3] == prev_e))3298{3299if (++cur_run_len == 255)3300{3301// this ensures rshift stays 0, it's lame but this path is only for testing readers3302color_rgba f(1, 1, 1, cur_run_len - 1);3303append_vector(file_data, (const uint8_t*)&f, sizeof(f));3304append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe));3305cur_run_len = 0;3306}3307}3308else3309{3310if (cur_run_len > 0)3311{3312color_rgba f(1, 1, 1, cur_run_len);3313append_vector(file_data, (const uint8_t*)&f, sizeof(f));33143315cur_run_len = 0;3316}33173318append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe));33193320prev_r = rgbe[0];3321prev_g = rgbe[1];3322prev_b = rgbe[2];3323prev_e = rgbe[3];3324}3325} // x33263327if (cur_run_len > 0)3328{3329color_rgba f(1, 1, 1, cur_run_len);3330append_vector(file_data, (const uint8_t*)&f, sizeof(f));3331}3332} // y3333}3334else3335{3336uint8_vec temp[4];3337for (uint32_t c = 0; c < 4; c++)3338temp[c].resize(width);33393340for (uint32_t y = 0; y < height; y++)3341{3342color_rgba rgbe(2, 2, width >> 8, width & 0xFF);3343append_vector(file_data, (const uint8_t*)&rgbe, sizeof(rgbe));33443345for (uint32_t x = 0; x < width; x++)3346{3347float2rgbe(rgbe, img(x, y));33483349for (uint32_t c = 0; c < 4; c++)3350temp[c][x] = rgbe[c];3351}33523353for (uint32_t c = 0; c < 4; c++)3354{3355int raw_ofs = -1;33563357uint32_t x = 0;3358while (x < width)3359{3360const uint32_t num_bytes_remaining = width - x;3361const uint32_t max_run_len = basisu::minimum<uint32_t>(num_bytes_remaining, 127);3362const uint8_t cur_byte = temp[c][x];33633364uint32_t run_len = 1;3365while (run_len < max_run_len)3366{3367if (temp[c][x + run_len] != cur_byte)3368break;3369run_len++;3370}33713372const uint32_t cost_to_keep_raw = ((raw_ofs != -1) ? 0 : 1) + run_len; // 0 or 1 bytes to start a raw run, then the repeated bytes issued as raw3373const uint32_t cost_to_take_run = 2 + 1; // 2 bytes to issue the RLE, then 1 bytes to start whatever follows it (raw or RLE)33743375if ((run_len >= 3) && (cost_to_take_run < cost_to_keep_raw))3376{3377file_data.push_back((uint8_t)(128 + run_len));3378file_data.push_back(cur_byte);33793380x += run_len;3381raw_ofs = -1;3382}3383else3384{3385if (raw_ofs < 0)3386{3387raw_ofs = (int)file_data.size();3388file_data.push_back(0);3389}33903391if (++file_data[raw_ofs] == 128)3392raw_ofs = -1;33933394file_data.push_back(cur_byte);33953396x++;3397}3398} // x33993400} // c3401} // y3402}34033404return true;3405}34063407bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info)3408{3409uint8_vec file_data;3410if (!write_rgbe(file_data, img, hdr_info))3411return false;3412return write_vec_to_file(pFilename, file_data);3413}34143415bool read_exr(const char* pFilename, imagef& img, int& n_chans)3416{3417n_chans = 0;34183419int width = 0, height = 0;3420float* out_rgba = nullptr;3421const char* err = nullptr;34223423int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err);3424n_chans = 4;3425if (status != 0)3426{3427error_printf("Failed loading .EXR image \"%s\"! (TinyEXR error: %s)\n", pFilename, err ? err : "?");3428FreeEXRErrorMessage(err);3429free(out_rgba);3430return false;3431}34323433const uint32_t MAX_SUPPORTED_DIM = 65536;3434if ((width < 1) || (height < 1) || (width > (int)MAX_SUPPORTED_DIM) || (height > (int)MAX_SUPPORTED_DIM))3435{3436error_printf("Invalid dimensions of .EXR image \"%s\"!\n", pFilename);3437free(out_rgba);3438return false;3439}34403441img.resize(width, height);34423443if (n_chans == 1)3444{3445const float* pSrc = out_rgba;3446vec4F* pDst = img.get_ptr();34473448for (int y = 0; y < height; y++)3449{3450for (int x = 0; x < width; x++)3451{3452(*pDst)[0] = pSrc[0];3453(*pDst)[1] = pSrc[1];3454(*pDst)[2] = pSrc[2];3455(*pDst)[3] = 1.0f;34563457pSrc += 4;3458++pDst;3459}3460}3461}3462else3463{3464memcpy((void *)img.get_ptr(), out_rgba, static_cast<size_t>(sizeof(float) * 4 * img.get_total_pixels()));3465}34663467free(out_rgba);3468return true;3469}34703471bool read_exr(const void* pMem, size_t mem_size, imagef& img)3472{3473float* out_rgba = nullptr;3474int width = 0, height = 0;3475const char* pErr = nullptr;3476int res = LoadEXRFromMemory(&out_rgba, &width, &height, (const uint8_t*)pMem, mem_size, &pErr);3477if (res < 0)3478{3479error_printf("Failed loading .EXR image from memory! (TinyEXR error: %s)\n", pErr ? pErr : "?");3480FreeEXRErrorMessage(pErr);3481free(out_rgba);3482return false;3483}34843485img.resize(width, height);3486memcpy((void *)img.get_ptr(), out_rgba, width * height * sizeof(float) * 4);3487free(out_rgba);34883489return true;3490}34913492bool write_exr(const char* pFilename, const imagef& img, uint32_t n_chans, uint32_t flags)3493{3494assert((n_chans == 1) || (n_chans == 3) || (n_chans == 4));34953496const bool linear_hint = (flags & WRITE_EXR_LINEAR_HINT) != 0,3497store_float = (flags & WRITE_EXR_STORE_FLOATS) != 0,3498no_compression = (flags & WRITE_EXR_NO_COMPRESSION) != 0;34993500const uint32_t width = img.get_width(), height = img.get_height();3501assert(width && height);35023503if (!width || !height)3504return false;35053506float_vec layers[4];3507float* image_ptrs[4];3508for (uint32_t c = 0; c < n_chans; c++)3509{3510layers[c].resize(width * height);3511image_ptrs[c] = layers[c].get_ptr();3512}35133514// ABGR3515int chan_order[4] = { 3, 2, 1, 0 };35163517if (n_chans == 1)3518{3519// Y3520chan_order[0] = 0;3521}3522else if (n_chans == 3)3523{3524// BGR3525chan_order[0] = 2;3526chan_order[1] = 1;3527chan_order[2] = 0;3528}3529else if (n_chans != 4)3530{3531assert(0);3532return false;3533}35343535for (uint32_t y = 0; y < height; y++)3536{3537for (uint32_t x = 0; x < width; x++)3538{3539const vec4F& p = img(x, y);35403541for (uint32_t c = 0; c < n_chans; c++)3542layers[c][x + y * width] = p[chan_order[c]];3543} // x3544} // y35453546EXRHeader header;3547InitEXRHeader(&header);35483549EXRImage image;3550InitEXRImage(&image);35513552image.num_channels = n_chans;3553image.images = (unsigned char**)image_ptrs;3554image.width = width;3555image.height = height;35563557header.num_channels = n_chans;35583559header.channels = (EXRChannelInfo*)calloc(header.num_channels, sizeof(EXRChannelInfo));35603561// Must be (A)BGR order, since most of EXR viewers expect this channel order.3562for (uint32_t i = 0; i < n_chans; i++)3563{3564char c = 'Y';3565if (n_chans == 3)3566c = "BGR"[i];3567else if (n_chans == 4)3568c = "ABGR"[i];35693570header.channels[i].name[0] = c;3571header.channels[i].name[1] = '\0';35723573header.channels[i].p_linear = linear_hint;3574}35753576header.pixel_types = (int*)calloc(header.num_channels, sizeof(int));3577header.requested_pixel_types = (int*)calloc(header.num_channels, sizeof(int));35783579if (!no_compression)3580header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP;35813582for (int i = 0; i < header.num_channels; i++)3583{3584// pixel type of input image3585header.pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT;35863587// pixel type of output image to be stored in .EXR3588header.requested_pixel_types[i] = store_float ? TINYEXR_PIXELTYPE_FLOAT : TINYEXR_PIXELTYPE_HALF;3589}35903591const char* pErr_msg = nullptr;35923593int ret = SaveEXRImageToFile(&image, &header, pFilename, &pErr_msg);3594if (ret != TINYEXR_SUCCESS)3595{3596error_printf("Save EXR err: %s\n", pErr_msg);3597FreeEXRErrorMessage(pErr_msg);3598}35993600free(header.channels);3601free(header.pixel_types);3602free(header.requested_pixel_types);36033604return (ret == TINYEXR_SUCCESS);3605}36063607void image::debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t scale_x, uint32_t scale_y, const color_rgba& fg, const color_rgba* pBG, bool alpha_only, const char* pFmt, ...)3608{3609char buf[2048];36103611va_list args;3612va_start(args, pFmt);3613#ifdef _WIN323614vsprintf_s(buf, sizeof(buf), pFmt, args);3615#else3616vsnprintf(buf, sizeof(buf), pFmt, args);3617#endif3618va_end(args);36193620const char* p = buf;36213622const uint32_t orig_x_ofs = x_ofs;36233624while (*p)3625{3626uint8_t c = *p++;3627if ((c < 32) || (c > 127))3628c = '.';36293630const uint8_t* pGlpyh = &g_debug_font8x8_basic[c - 32][0];36313632for (uint32_t y = 0; y < 8; y++)3633{3634uint32_t row_bits = pGlpyh[y];3635for (uint32_t x = 0; x < 8; x++)3636{3637const uint32_t q = row_bits & (1 << x);36383639const color_rgba* pColor = q ? &fg : pBG;3640if (!pColor)3641continue;36423643if (alpha_only)3644fill_box_alpha(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor);3645else3646fill_box(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor);3647}3648}36493650x_ofs += 8 * scale_x;3651if ((x_ofs + 8 * scale_x) > m_width)3652{3653x_ofs = orig_x_ofs;3654y_ofs += 8 * scale_y;3655}3656}3657}36583659// Very basic global Reinhard tone mapping, output converted to sRGB with no dithering, alpha is carried through unchanged.3660// Only used for debugging/development.3661void tonemap_image_reinhard(image &ldr_img, const imagef &hdr_img, float exposure, bool add_noise, bool per_component, bool luma_scaling)3662{3663uint32_t width = hdr_img.get_width(), height = hdr_img.get_height();36643665ldr_img.resize(width, height);36663667rand r;3668r.seed(128);36693670for (uint32_t y = 0; y < height; y++)3671{3672for (uint32_t x = 0; x < width; x++)3673{3674vec4F c(hdr_img(x, y));36753676if (per_component)3677{3678for (uint32_t t = 0; t < 3; t++)3679{3680if (c[t] <= 0.0f)3681{3682c[t] = 0.0f;3683}3684else3685{3686c[t] *= exposure;3687c[t] = c[t] / (1.0f + c[t]);3688}3689}3690}3691else3692{3693c[0] *= exposure;3694c[1] *= exposure;3695c[2] *= exposure;36963697const float L = 0.2126f * c[0] + 0.7152f * c[1] + 0.0722f * c[2];36983699float Lmapped = 0.0f;3700if (L > 0.0f)3701{3702//Lmapped = L / (1.0f + L);3703//Lmapped /= L;37043705Lmapped = 1.0f / (1.0f + L);3706}37073708c[0] = c[0] * Lmapped;3709c[1] = c[1] * Lmapped;3710c[2] = c[2] * Lmapped;37113712if (luma_scaling)3713{3714// Keeps the ratio of r/g/b intact3715float m = maximum(c[0], c[1], c[2]);3716if (m > 1.0f)3717{3718c /= m;3719}3720}3721}37223723c.clamp(0.0f, 1.0f);37243725c[3] = c[3] * 255.0f;37263727color_rgba& o = ldr_img(x, y);37283729if (add_noise)3730{3731c[0] = linear_to_srgb(c[0]) * 255.0f;3732c[1] = linear_to_srgb(c[1]) * 255.0f;3733c[2] = linear_to_srgb(c[2]) * 255.0f;37343735const float NOISE_AMP = .5f;3736c[0] += r.frand(-NOISE_AMP, NOISE_AMP);3737c[1] += r.frand(-NOISE_AMP, NOISE_AMP);3738c[2] += r.frand(-NOISE_AMP, NOISE_AMP);37393740c.clamp(0.0f, 255.0f);37413742o[0] = (uint8_t)fast_roundf_int(c[0]);3743o[1] = (uint8_t)fast_roundf_int(c[1]);3744o[2] = (uint8_t)fast_roundf_int(c[2]);3745o[3] = (uint8_t)fast_roundf_int(c[3]);3746}3747else3748{3749o[0] = g_fast_linear_to_srgb.convert(c[0]);3750o[1] = g_fast_linear_to_srgb.convert(c[1]);3751o[2] = g_fast_linear_to_srgb.convert(c[2]);3752o[3] = (uint8_t)fast_roundf_int(c[3]);3753}3754}3755}3756}37573758bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img)3759{3760const uint32_t width = hdr_test_img.get_width();3761const uint32_t height = hdr_test_img.get_height();37623763uint16_vec orig_half_img(width * 3 * height);3764uint16_vec half_img(width * 3 * height);37653766int max_shift = 32;37673768for (uint32_t y = 0; y < height; y++)3769{3770for (uint32_t x = 0; x < width; x++)3771{3772const vec4F& p = hdr_test_img(x, y);37733774for (uint32_t i = 0; i < 3; i++)3775{3776if (p[i] < 0.0f)3777return false;3778if (p[i] > basist::MAX_HALF_FLOAT)3779return false;37803781uint32_t h = basist::float_to_half(p[i]);3782//uint32_t orig_h = h;37833784orig_half_img[(x + y * width) * 3 + i] = (uint16_t)h;37853786// Rotate sign bit into LSB3787//h = rot_left16((uint16_t)h, 1);3788//assert(rot_right16((uint16_t)h, 1) == orig_h);3789h <<= 1;37903791half_img[(x + y * width) * 3 + i] = (uint16_t)h;37923793// Determine # of leading zero bits, ignoring the sign bit3794if (h)3795{3796int lz = clz(h) - 16;3797assert(lz >= 0 && lz <= 16);37983799assert((h << lz) <= 0xFFFF);38003801max_shift = basisu::minimum<int>(max_shift, lz);3802}3803} // i3804} // x3805} // y38063807//printf("tonemap_image_compressive: Max leading zeros: %i\n", max_shift);38083809uint32_t high_hist[256];3810clear_obj(high_hist);38113812for (uint32_t y = 0; y < height; y++)3813{3814for (uint32_t x = 0; x < width; x++)3815{3816for (uint32_t i = 0; i < 3; i++)3817{3818uint16_t& hf = half_img[(x + y * width) * 3 + i];38193820assert(((uint32_t)hf << max_shift) <= 65535);38213822hf <<= max_shift;38233824uint32_t h = (uint8_t)(hf >> 8);3825high_hist[h]++;3826}3827} // x3828} // y38293830uint32_t total_vals_used = 0;3831int remap_old_to_new[256];3832for (uint32_t i = 0; i < 256; i++)3833remap_old_to_new[i] = -1;38343835for (uint32_t i = 0; i < 256; i++)3836{3837if (high_hist[i] != 0)3838{3839remap_old_to_new[i] = total_vals_used;3840total_vals_used++;3841}3842}38433844assert(total_vals_used >= 1);38453846//printf("tonemap_image_compressive: Total used high byte values: %u, unused: %u\n", total_vals_used, 256 - total_vals_used);38473848bool val_used[256];3849clear_obj(val_used);38503851int remap_new_to_old[256];3852for (uint32_t i = 0; i < 256; i++)3853remap_new_to_old[i] = -1;3854BASISU_NOTE_UNUSED(remap_new_to_old);38553856int prev_c = -1;3857BASISU_NOTE_UNUSED(prev_c);3858for (uint32_t i = 0; i < 256; i++)3859{3860if (remap_old_to_new[i] >= 0)3861{3862int c;3863if (total_vals_used <= 1)3864c = remap_old_to_new[i];3865else3866{3867c = (remap_old_to_new[i] * 255 + ((total_vals_used - 1) / 2)) / (total_vals_used - 1);38683869assert(c > prev_c);3870}38713872assert(!val_used[c]);38733874remap_new_to_old[c] = i;38753876remap_old_to_new[i] = c;3877prev_c = c;38783879//printf("%u ", c);38803881val_used[c] = true;3882}3883} // i3884//printf("\n");38853886dst_img.resize(width, height);38873888for (uint32_t y = 0; y < height; y++)3889{3890for (uint32_t x = 0; x < width; x++)3891{3892for (uint32_t c = 0; c < 3; c++)3893{3894uint16_t& v16 = half_img[(x + y * width) * 3 + c];38953896uint32_t hb = v16 >> 8;3897//uint32_t lb = v16 & 0xFF;38983899assert(remap_old_to_new[hb] != -1);3900assert(remap_old_to_new[hb] <= 255);3901assert(remap_new_to_old[remap_old_to_new[hb]] == (int)hb);39023903hb = remap_old_to_new[hb];39043905//v16 = (uint16_t)((hb << 8) | lb);39063907dst_img(x, y)[c] = (uint8_t)hb;3908}3909} // x3910} // y39113912return true;3913}39143915bool tonemap_image_compressive2(image& dst_img, const imagef& hdr_test_img)3916{3917const uint32_t width = hdr_test_img.get_width();3918const uint32_t height = hdr_test_img.get_height();39193920dst_img.resize(width, height);3921dst_img.set_all(color_rgba(0, 0, 0, 255));39223923basisu::vector<basist::half_float> half_img(width * 3 * height);39243925uint32_t low_h = UINT32_MAX, high_h = 0;39263927for (uint32_t y = 0; y < height; y++)3928{3929for (uint32_t x = 0; x < width; x++)3930{3931const vec4F& p = hdr_test_img(x, y);39323933for (uint32_t i = 0; i < 3; i++)3934{3935float f = p[i];39363937if (std::isnan(f) || std::isinf(f))3938f = 0.0f;3939else if (f < 0.0f)3940f = 0.0f;3941else if (f > basist::MAX_HALF_FLOAT)3942f = basist::MAX_HALF_FLOAT;39433944uint32_t h = basist::float_to_half(f);39453946low_h = minimum(low_h, h);3947high_h = maximum(high_h, h);39483949half_img[(x + y * width) * 3 + i] = (basist::half_float)h;39503951} // i3952} // x3953} // y39543955if (low_h == high_h)3956return false;39573958for (uint32_t y = 0; y < height; y++)3959{3960for (uint32_t x = 0; x < width; x++)3961{3962for (uint32_t i = 0; i < 3; i++)3963{3964basist::half_float h = half_img[(x + y * width) * 3 + i];39653966float f = (float)(h - low_h) / (float)(high_h - low_h);39673968int iv = basisu::clamp<int>((int)std::round(f * 255.0f), 0, 255);39693970dst_img(x, y)[i] = (uint8_t)iv;39713972} // i3973} // x3974} // y39753976return true;3977}39783979} // namespace basisu398039813982