Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/encoder/basisu_enc.h
9902 views
1
// basisu_enc.h
2
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
// http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
#pragma once
16
#include "../transcoder/basisu.h"
17
#include "../transcoder/basisu_transcoder_internal.h"
18
19
#include <mutex>
20
#include <atomic>
21
#include <condition_variable>
22
#include <functional>
23
#include <thread>
24
#include <unordered_map>
25
#include <ostream>
26
27
#if !defined(_WIN32) || defined(__MINGW32__)
28
#include <libgen.h>
29
#endif
30
31
// This module is really just a huge grab bag of classes and helper functions needed by the encoder.
32
33
// If BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE is 1, quality in perceptual mode will be slightly greater, but at a large increase in encoding CPU time.
34
#define BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE (0)
35
36
#if BASISU_SUPPORT_SSE
37
// Declared in basisu_kernels_imp.h, but we can't include that here otherwise it would lead to circular type errors.
38
extern void update_covar_matrix_16x16_sse41(uint32_t num_vecs, const void* pWeighted_vecs, const void* pOrigin, const uint32_t *pVec_indices, void* pMatrix16x16);
39
#endif
40
41
namespace basisu
42
{
43
extern uint8_t g_hamming_dist[256];
44
extern const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8];
45
46
// true if basisu_encoder_init() has been called and returned.
47
extern bool g_library_initialized;
48
49
// Encoder library initialization.
50
// This function MUST be called before encoding anything!
51
// Returns false if library initialization fails.
52
bool basisu_encoder_init(bool use_opencl = false, bool opencl_force_serialization = false);
53
void basisu_encoder_deinit();
54
55
// basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1
56
extern void detect_sse41();
57
58
#if BASISU_SUPPORT_SSE
59
extern bool g_cpu_supports_sse41;
60
#else
61
const bool g_cpu_supports_sse41 = false;
62
#endif
63
64
void error_vprintf(const char* pFmt, va_list args);
65
void error_printf(const char *pFmt, ...);
66
67
template <typename... Args>
68
inline void fmt_error_printf(const char* pFmt, Args&&... args)
69
{
70
std::string res;
71
if (!fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward<Args>(args))... }))
72
return;
73
error_printf("%s", res.c_str());
74
}
75
76
void platform_sleep(uint32_t ms);
77
78
// Helpers
79
80
inline uint8_t clamp255(int32_t i)
81
{
82
return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);
83
}
84
85
inline int left_shift32(int val, int shift)
86
{
87
assert((shift >= 0) && (shift < 32));
88
return static_cast<int>(static_cast<uint32_t>(val) << shift);
89
}
90
91
inline uint32_t left_shift32(uint32_t val, int shift)
92
{
93
assert((shift >= 0) && (shift < 32));
94
return val << shift;
95
}
96
97
inline int32_t clampi(int32_t value, int32_t low, int32_t high)
98
{
99
if (value < low)
100
value = low;
101
else if (value > high)
102
value = high;
103
return value;
104
}
105
106
inline uint8_t mul_8(uint32_t v, uint32_t a)
107
{
108
v = v * a + 128;
109
return (uint8_t)((v + (v >> 8)) >> 8);
110
}
111
112
inline int fast_roundf_int(float x)
113
{
114
return (x >= 0.0f) ? (int)(x + 0.5f) : (int)(x - 0.5f);
115
}
116
117
inline int fast_floorf_int(float x)
118
{
119
int xi = (int)x; // Truncate towards zero
120
return ((x < 0.0f) && (x != (float)xi)) ? (xi - 1) : xi;
121
}
122
123
inline uint64_t read_bits(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
124
{
125
assert(codesize <= 64);
126
uint64_t bits = 0;
127
uint32_t total_bits = 0;
128
129
while (total_bits < codesize)
130
{
131
uint32_t byte_bit_offset = bit_offset & 7;
132
uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset);
133
134
uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset;
135
byte_bits &= ((1 << bits_to_read) - 1);
136
137
bits |= ((uint64_t)(byte_bits) << total_bits);
138
139
total_bits += bits_to_read;
140
bit_offset += bits_to_read;
141
}
142
143
return bits;
144
}
145
146
inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
147
{
148
assert(codesize <= 32);
149
uint32_t bits = 0;
150
uint32_t total_bits = 0;
151
152
while (total_bits < codesize)
153
{
154
uint32_t byte_bit_offset = bit_offset & 7;
155
uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset);
156
157
uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset;
158
byte_bits &= ((1 << bits_to_read) - 1);
159
160
bits |= (byte_bits << total_bits);
161
162
total_bits += bits_to_read;
163
bit_offset += bits_to_read;
164
}
165
166
return bits;
167
}
168
169
// Open interval
170
inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
171
inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
172
173
// Closed interval
174
inline int bounds_check_incl(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; }
175
inline uint32_t bounds_check_incl(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v <= h); return v; }
176
177
inline uint32_t clz(uint32_t x)
178
{
179
if (!x)
180
return 32;
181
182
uint32_t n = 0;
183
while ((x & 0x80000000) == 0)
184
{
185
x <<= 1u;
186
n++;
187
}
188
189
return n;
190
}
191
192
bool string_begins_with(const std::string& str, const char* pPhrase);
193
194
// Case sensitive, returns -1 if can't find
195
inline int string_find_first(const std::string& str, const char* pPhrase)
196
{
197
size_t res = str.find(pPhrase, 0);
198
if (res == std::string::npos)
199
return -1;
200
return (int)res;
201
}
202
203
// Hashing
204
205
inline uint32_t bitmix32c(uint32_t v)
206
{
207
v = (v + 0x7ed55d16) + (v << 12);
208
v = (v ^ 0xc761c23c) ^ (v >> 19);
209
v = (v + 0x165667b1) + (v << 5);
210
v = (v + 0xd3a2646c) ^ (v << 9);
211
v = (v + 0xfd7046c5) + (v << 3);
212
v = (v ^ 0xb55a4f09) ^ (v >> 16);
213
return v;
214
}
215
216
inline uint32_t bitmix32(uint32_t v)
217
{
218
v -= (v << 6);
219
v ^= (v >> 17);
220
v -= (v << 9);
221
v ^= (v << 4);
222
v -= (v << 3);
223
v ^= (v << 10);
224
v ^= (v >> 15);
225
return v;
226
}
227
228
inline uint32_t wang_hash(uint32_t seed)
229
{
230
seed = (seed ^ 61) ^ (seed >> 16);
231
seed *= 9;
232
seed = seed ^ (seed >> 4);
233
seed *= 0x27d4eb2d;
234
seed = seed ^ (seed >> 15);
235
return seed;
236
}
237
238
uint32_t hash_hsieh(const uint8_t* pBuf, size_t len);
239
240
template <typename Key>
241
struct bit_hasher
242
{
243
inline std::size_t operator()(const Key& k) const
244
{
245
return hash_hsieh(reinterpret_cast<const uint8_t *>(&k), sizeof(k));
246
}
247
};
248
249
struct string_hasher
250
{
251
inline std::size_t operator()(const std::string& k) const
252
{
253
size_t l = k.size();
254
if (!l)
255
return 0;
256
return hash_hsieh(reinterpret_cast<const uint8_t*>(k.c_str()), l);
257
}
258
};
259
260
class running_stat
261
{
262
public:
263
running_stat() { clear(); }
264
265
void clear()
266
{
267
m_n = 0;
268
m_total = 0;
269
m_old_m = 0;
270
m_new_m = 0;
271
m_old_s = 0;
272
m_new_s = 0;
273
m_min = 0;
274
m_max = 0;
275
}
276
277
void push(double x)
278
{
279
m_n++;
280
m_total += x;
281
if (m_n == 1)
282
{
283
m_old_m = m_new_m = x;
284
m_old_s = 0.0;
285
m_min = x;
286
m_max = x;
287
}
288
else
289
{
290
// See Knuth TAOCP vol 2, 3rd edition, page 232
291
m_new_m = m_old_m + (x - m_old_m) / m_n;
292
m_new_s = m_old_s + (x - m_old_m) * (x - m_new_m);
293
m_old_m = m_new_m;
294
m_old_s = m_new_s;
295
m_min = basisu::minimum(x, m_min);
296
m_max = basisu::maximum(x, m_max);
297
}
298
}
299
300
uint32_t get_num() const
301
{
302
return m_n;
303
}
304
305
double get_total() const
306
{
307
return m_total;
308
}
309
310
double get_mean() const
311
{
312
return (m_n > 0) ? m_new_m : 0.0;
313
}
314
315
// Returns sample variance
316
double get_variance() const
317
{
318
return ((m_n > 1) ? m_new_s / (m_n - 1) : 0.0);
319
}
320
321
double get_std_dev() const
322
{
323
return sqrt(get_variance());
324
}
325
326
double get_min() const
327
{
328
return m_min;
329
}
330
331
double get_max() const
332
{
333
return m_max;
334
}
335
336
private:
337
uint32_t m_n;
338
double m_total, m_old_m, m_new_m, m_old_s, m_new_s, m_min, m_max;
339
};
340
341
// Linear algebra
342
343
template <uint32_t N, typename T>
344
class vec
345
{
346
protected:
347
T m_v[N];
348
349
public:
350
enum { num_elements = N };
351
typedef T scalar_type;
352
353
inline vec() { }
354
inline vec(eZero) { set_zero(); }
355
356
explicit inline vec(T val) { set(val); }
357
inline vec(T v0, T v1) { set(v0, v1); }
358
inline vec(T v0, T v1, T v2) { set(v0, v1, v2); }
359
inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); }
360
inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; }
361
template <uint32_t OtherN, typename OtherT> inline vec(const vec<OtherN, OtherT> &other) { set(other); }
362
363
inline const T& operator[](uint32_t i) const { assert(i < N); return m_v[i]; }
364
inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; }
365
366
inline T getX() const { return m_v[0]; }
367
inline T getY() const { static_assert(N >= 2, "N too small"); return m_v[1]; }
368
inline T getZ() const { static_assert(N >= 3, "N too small"); return m_v[2]; }
369
inline T getW() const { static_assert(N >= 4, "N too small"); return m_v[3]; }
370
371
inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false; return true; }
372
inline bool operator!=(const vec& rhs) const { return !(*this == rhs); }
373
inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; }
374
375
inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; }
376
inline void clear() { set_zero(); }
377
378
template <uint32_t OtherN, typename OtherT>
379
inline vec &set(const vec<OtherN, OtherT> &other)
380
{
381
uint32_t i;
382
if ((const void *)(&other) == (const void *)(this))
383
return *this;
384
const uint32_t m = minimum(OtherN, N);
385
for (i = 0; i < m; i++)
386
m_v[i] = static_cast<T>(other[i]);
387
for (; i < N; i++)
388
m_v[i] = 0;
389
return *this;
390
}
391
392
inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; }
393
inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; }
394
inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; }
395
396
inline vec &set(T v0, T v1)
397
{
398
m_v[0] = v0;
399
if (N >= 2)
400
{
401
m_v[1] = v1;
402
clear_elements(2, N);
403
}
404
return *this;
405
}
406
407
inline vec &set(T v0, T v1, T v2)
408
{
409
m_v[0] = v0;
410
if (N >= 2)
411
{
412
m_v[1] = v1;
413
if (N >= 3)
414
{
415
m_v[2] = v2;
416
clear_elements(3, N);
417
}
418
}
419
return *this;
420
}
421
422
inline vec &set(T v0, T v1, T v2, T v3)
423
{
424
m_v[0] = v0;
425
if (N >= 2)
426
{
427
m_v[1] = v1;
428
if (N >= 3)
429
{
430
m_v[2] = v2;
431
432
if (N >= 4)
433
{
434
m_v[3] = v3;
435
clear_elements(5, N);
436
}
437
}
438
}
439
return *this;
440
}
441
442
inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; }
443
template <uint32_t OtherN, typename OtherT> inline vec &operator=(const vec<OtherN, OtherT> &rhs) { set(rhs); return *this; }
444
445
inline const T *get_ptr() const { return reinterpret_cast<const T *>(&m_v[0]); }
446
inline T *get_ptr() { return reinterpret_cast<T *>(&m_v[0]); }
447
448
inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; }
449
inline vec operator+ () const { return *this; }
450
inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; }
451
inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; }
452
inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; }
453
inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; }
454
inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; }
455
inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; }
456
457
friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; }
458
friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; }
459
friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; }
460
friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; }
461
friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; }
462
friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; }
463
464
static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; }
465
466
inline T dot(const vec &rhs) const { return dot_product(*this, rhs); }
467
468
inline T norm() const { return dot_product(*this, *this); }
469
inline T length() const { return sqrt(norm()); }
470
471
inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; }
472
inline double squared_distance_d(const vec& other) const { double d2 = 0; for (uint32_t i = 0; i < N; i++) { double d = (double)m_v[i] - (double)other.m_v[i]; d2 += d * d; } return d2; }
473
474
inline T distance(const vec &other) const { return static_cast<T>(sqrt(squared_distance(other))); }
475
inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); }
476
477
inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; }
478
479
inline vec get_normalized() const { vec res(*this); res.normalize_in_place(); return res; }
480
481
inline vec &clamp(T l, T h)
482
{
483
for (uint32_t i = 0; i < N; i++)
484
m_v[i] = basisu::clamp(m_v[i], l, h);
485
return *this;
486
}
487
488
static vec component_mul(const vec& a, const vec& b)
489
{
490
vec res;
491
for (uint32_t i = 0; i < N; i++)
492
res[i] = a[i] * b[i];
493
return res;
494
}
495
496
static vec component_min(const vec& a, const vec& b)
497
{
498
vec res;
499
for (uint32_t i = 0; i < N; i++)
500
res[i] = minimum(a[i], b[i]);
501
return res;
502
}
503
504
static vec component_max(const vec& a, const vec& b)
505
{
506
vec res;
507
for (uint32_t i = 0; i < N; i++)
508
res[i] = maximum(a[i], b[i]);
509
return res;
510
}
511
512
static vec lerp(const vec& a, const vec& b, float s)
513
{
514
vec res;
515
for (uint32_t i = 0; i < N; i++)
516
res[i] = basisu::lerp(a[i], b[i], s);
517
return res;
518
}
519
};
520
521
typedef vec<4, double> vec4D;
522
typedef vec<3, double> vec3D;
523
typedef vec<2, double> vec2D;
524
typedef vec<1, double> vec1D;
525
526
typedef vec<6, float> vec6F;
527
typedef vec<5, float> vec5F;
528
typedef vec<4, float> vec4F;
529
typedef vec<3, float> vec3F;
530
typedef vec<2, float> vec2F;
531
typedef vec<1, float> vec1F;
532
533
typedef vec<16, float> vec16F;
534
535
template<uint32_t N, typename T> struct bitwise_copyable< vec<N, T> > { enum { cFlag = true }; };
536
template<uint32_t N, typename T> struct bitwise_movable< vec<N, T> > { enum { cFlag = true }; };
537
538
template <uint32_t Rows, uint32_t Cols, typename T>
539
class matrix
540
{
541
public:
542
typedef vec<Rows, T> col_vec;
543
typedef vec<Cols, T> row_vec;
544
545
typedef T scalar_type;
546
547
enum { rows = Rows, cols = Cols };
548
549
protected:
550
row_vec m_r[Rows];
551
552
public:
553
inline matrix() {}
554
inline matrix(eZero) { set_zero(); }
555
inline matrix(const matrix &other) { for (uint32_t i = 0; i < Rows; i++) m_r[i] = other.m_r[i]; }
556
inline matrix &operator=(const matrix &rhs) { if (this != &rhs) for (uint32_t i = 0; i < Rows; i++) m_r[i] = rhs.m_r[i]; return *this; }
557
558
inline T operator()(uint32_t r, uint32_t c) const { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }
559
inline T &operator()(uint32_t r, uint32_t c) { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }
560
561
inline const row_vec &operator[](uint32_t r) const { assert(r < Rows); return m_r[r]; }
562
inline row_vec &operator[](uint32_t r) { assert(r < Rows); return m_r[r]; }
563
564
inline matrix &set_zero()
565
{
566
for (uint32_t i = 0; i < Rows; i++)
567
m_r[i].set_zero();
568
return *this;
569
}
570
571
inline matrix &set_identity()
572
{
573
for (uint32_t i = 0; i < Rows; i++)
574
{
575
m_r[i].set_zero();
576
if (i < Cols)
577
m_r[i][i] = 1.0f;
578
}
579
return *this;
580
}
581
};
582
583
template<uint32_t R, uint32_t C, typename T> struct bitwise_copyable< matrix<R, C, T> > { enum { cFlag = true }; };
584
template<uint32_t R, uint32_t C, typename T> struct bitwise_movable< matrix<R, C, T> > { enum { cFlag = true }; };
585
586
template<uint32_t N, typename VectorType>
587
inline VectorType compute_pca_from_covar(matrix<N, N, float> &cmatrix)
588
{
589
VectorType axis;
590
if (N == 1)
591
axis.set(1.0f);
592
else
593
{
594
for (uint32_t i = 0; i < N; i++)
595
axis[i] = lerp(.75f, 1.25f, i * (1.0f / maximum<int>(N - 1, 1)));
596
}
597
598
VectorType prev_axis(axis);
599
600
// Power iterations
601
for (uint32_t power_iter = 0; power_iter < 8; power_iter++)
602
{
603
VectorType trial_axis;
604
double max_sum = 0;
605
606
for (uint32_t i = 0; i < N; i++)
607
{
608
double sum = 0;
609
for (uint32_t j = 0; j < N; j++)
610
sum += cmatrix[i][j] * axis[j];
611
612
trial_axis[i] = static_cast<float>(sum);
613
614
max_sum = maximum(fabs(sum), max_sum);
615
}
616
617
if (max_sum != 0.0f)
618
trial_axis *= static_cast<float>(1.0f / max_sum);
619
620
VectorType delta_axis(prev_axis - trial_axis);
621
622
prev_axis = axis;
623
axis = trial_axis;
624
625
if (delta_axis.norm() < .0024f)
626
break;
627
}
628
629
return axis.normalize_in_place();
630
}
631
632
template<typename T> inline void indirect_sort(uint32_t num_indices, uint32_t* pIndices, const T* pKeys)
633
{
634
for (uint32_t i = 0; i < num_indices; i++)
635
pIndices[i] = i;
636
637
std::sort(
638
pIndices,
639
pIndices + num_indices,
640
[pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; }
641
);
642
}
643
644
// 1-4 byte direct Radix sort.
645
template <typename T>
646
T* radix_sort(uint32_t num_vals, T* pBuf0, T* pBuf1, uint32_t key_ofs, uint32_t key_size)
647
{
648
assert(key_ofs < sizeof(T));
649
assert((key_size >= 1) && (key_size <= 4));
650
651
uint32_t hist[256 * 4];
652
653
memset(hist, 0, sizeof(hist[0]) * 256 * key_size);
654
655
#define BASISU_GET_KEY(p) (*(uint32_t *)((uint8_t *)(p) + key_ofs))
656
657
if (key_size == 4)
658
{
659
T* p = pBuf0;
660
T* q = pBuf0 + num_vals;
661
for (; p != q; p++)
662
{
663
const uint32_t key = BASISU_GET_KEY(p);
664
665
hist[key & 0xFF]++;
666
hist[256 + ((key >> 8) & 0xFF)]++;
667
hist[512 + ((key >> 16) & 0xFF)]++;
668
hist[768 + ((key >> 24) & 0xFF)]++;
669
}
670
}
671
else if (key_size == 3)
672
{
673
T* p = pBuf0;
674
T* q = pBuf0 + num_vals;
675
for (; p != q; p++)
676
{
677
const uint32_t key = BASISU_GET_KEY(p);
678
679
hist[key & 0xFF]++;
680
hist[256 + ((key >> 8) & 0xFF)]++;
681
hist[512 + ((key >> 16) & 0xFF)]++;
682
}
683
}
684
else if (key_size == 2)
685
{
686
T* p = pBuf0;
687
T* q = pBuf0 + (num_vals >> 1) * 2;
688
689
for (; p != q; p += 2)
690
{
691
const uint32_t key0 = BASISU_GET_KEY(p);
692
const uint32_t key1 = BASISU_GET_KEY(p + 1);
693
694
hist[key0 & 0xFF]++;
695
hist[256 + ((key0 >> 8) & 0xFF)]++;
696
697
hist[key1 & 0xFF]++;
698
hist[256 + ((key1 >> 8) & 0xFF)]++;
699
}
700
701
if (num_vals & 1)
702
{
703
const uint32_t key = BASISU_GET_KEY(p);
704
705
hist[key & 0xFF]++;
706
hist[256 + ((key >> 8) & 0xFF)]++;
707
}
708
}
709
else
710
{
711
assert(key_size == 1);
712
if (key_size != 1)
713
return NULL;
714
715
T* p = pBuf0;
716
T* q = pBuf0 + (num_vals >> 1) * 2;
717
718
for (; p != q; p += 2)
719
{
720
const uint32_t key0 = BASISU_GET_KEY(p);
721
const uint32_t key1 = BASISU_GET_KEY(p + 1);
722
723
hist[key0 & 0xFF]++;
724
hist[key1 & 0xFF]++;
725
}
726
727
if (num_vals & 1)
728
{
729
const uint32_t key = BASISU_GET_KEY(p);
730
hist[key & 0xFF]++;
731
}
732
}
733
734
T* pCur = pBuf0;
735
T* pNew = pBuf1;
736
737
for (uint32_t pass = 0; pass < key_size; pass++)
738
{
739
const uint32_t* pHist = &hist[pass << 8];
740
741
uint32_t offsets[256];
742
743
uint32_t cur_ofs = 0;
744
for (uint32_t i = 0; i < 256; i += 2)
745
{
746
offsets[i] = cur_ofs;
747
cur_ofs += pHist[i];
748
749
offsets[i + 1] = cur_ofs;
750
cur_ofs += pHist[i + 1];
751
}
752
753
const uint32_t pass_shift = pass << 3;
754
755
T* p = pCur;
756
T* q = pCur + (num_vals >> 1) * 2;
757
758
for (; p != q; p += 2)
759
{
760
uint32_t c0 = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF;
761
uint32_t c1 = (BASISU_GET_KEY(p + 1) >> pass_shift) & 0xFF;
762
763
if (c0 == c1)
764
{
765
uint32_t dst_offset0 = offsets[c0];
766
767
offsets[c0] = dst_offset0 + 2;
768
769
pNew[dst_offset0] = p[0];
770
pNew[dst_offset0 + 1] = p[1];
771
}
772
else
773
{
774
uint32_t dst_offset0 = offsets[c0]++;
775
uint32_t dst_offset1 = offsets[c1]++;
776
777
pNew[dst_offset0] = p[0];
778
pNew[dst_offset1] = p[1];
779
}
780
}
781
782
if (num_vals & 1)
783
{
784
uint32_t c = (BASISU_GET_KEY(p) >> pass_shift) & 0xFF;
785
786
uint32_t dst_offset = offsets[c];
787
offsets[c] = dst_offset + 1;
788
789
pNew[dst_offset] = *p;
790
}
791
792
T* t = pCur;
793
pCur = pNew;
794
pNew = t;
795
}
796
797
return pCur;
798
}
799
800
#undef BASISU_GET_KEY
801
802
// Very simple job pool with no dependencies.
803
class job_pool
804
{
805
BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(job_pool);
806
807
public:
808
// num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc.
809
job_pool(uint32_t num_threads);
810
~job_pool();
811
812
void add_job(const std::function<void()>& job);
813
void add_job(std::function<void()>&& job);
814
815
void wait_for_all();
816
817
size_t get_total_threads() const { return 1 + m_threads.size(); }
818
819
private:
820
std::vector<std::thread> m_threads;
821
std::vector<std::function<void()> > m_queue;
822
823
std::mutex m_mutex;
824
std::condition_variable m_has_work;
825
std::condition_variable m_no_more_jobs;
826
827
uint32_t m_num_active_jobs;
828
829
std::atomic<bool> m_kill_flag;
830
831
std::atomic<int> m_num_active_workers;
832
833
void job_thread(uint32_t index);
834
};
835
836
// Simple 64-bit color class
837
838
class color_rgba_i16
839
{
840
public:
841
union
842
{
843
int16_t m_comps[4];
844
845
struct
846
{
847
int16_t r;
848
int16_t g;
849
int16_t b;
850
int16_t a;
851
};
852
};
853
854
inline color_rgba_i16()
855
{
856
static_assert(sizeof(*this) == sizeof(int16_t)*4, "sizeof(*this) == sizeof(int16_t)*4");
857
}
858
859
inline color_rgba_i16(int sr, int sg, int sb, int sa)
860
{
861
set(sr, sg, sb, sa);
862
}
863
864
inline color_rgba_i16 &set(int sr, int sg, int sb, int sa)
865
{
866
m_comps[0] = (int16_t)clamp<int>(sr, INT16_MIN, INT16_MAX);
867
m_comps[1] = (int16_t)clamp<int>(sg, INT16_MIN, INT16_MAX);
868
m_comps[2] = (int16_t)clamp<int>(sb, INT16_MIN, INT16_MAX);
869
m_comps[3] = (int16_t)clamp<int>(sa, INT16_MIN, INT16_MAX);
870
return *this;
871
}
872
};
873
874
class color_rgba
875
{
876
public:
877
union
878
{
879
uint8_t m_comps[4];
880
881
struct
882
{
883
uint8_t r;
884
uint8_t g;
885
uint8_t b;
886
uint8_t a;
887
};
888
};
889
890
inline color_rgba()
891
{
892
static_assert(sizeof(*this) == 4, "sizeof(*this) != 4");
893
static_assert(sizeof(*this) == sizeof(basist::color32), "sizeof(*this) != sizeof(basist::color32)");
894
}
895
896
// Not too hot about this idea.
897
inline color_rgba(const basist::color32& other) :
898
r(other.r),
899
g(other.g),
900
b(other.b),
901
a(other.a)
902
{
903
}
904
905
color_rgba& operator= (const basist::color32& rhs)
906
{
907
r = rhs.r;
908
g = rhs.g;
909
b = rhs.b;
910
a = rhs.a;
911
return *this;
912
}
913
914
inline color_rgba(int y)
915
{
916
set(y);
917
}
918
919
inline color_rgba(int y, int na)
920
{
921
set(y, na);
922
}
923
924
inline color_rgba(int sr, int sg, int sb, int sa)
925
{
926
set(sr, sg, sb, sa);
927
}
928
929
inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa)
930
{
931
set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa);
932
}
933
934
inline color_rgba& set_noclamp_y(int y)
935
{
936
m_comps[0] = (uint8_t)y;
937
m_comps[1] = (uint8_t)y;
938
m_comps[2] = (uint8_t)y;
939
m_comps[3] = (uint8_t)255;
940
return *this;
941
}
942
943
inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa)
944
{
945
m_comps[0] = (uint8_t)sr;
946
m_comps[1] = (uint8_t)sg;
947
m_comps[2] = (uint8_t)sb;
948
m_comps[3] = (uint8_t)sa;
949
return *this;
950
}
951
952
inline color_rgba &set(int y)
953
{
954
m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));
955
m_comps[1] = m_comps[0];
956
m_comps[2] = m_comps[0];
957
m_comps[3] = 255;
958
return *this;
959
}
960
961
inline color_rgba &set(int y, int na)
962
{
963
m_comps[0] = static_cast<uint8_t>(clamp<int>(y, 0, 255));
964
m_comps[1] = m_comps[0];
965
m_comps[2] = m_comps[0];
966
m_comps[3] = static_cast<uint8_t>(clamp<int>(na, 0, 255));
967
return *this;
968
}
969
970
inline color_rgba &set(int sr, int sg, int sb, int sa)
971
{
972
m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));
973
m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));
974
m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));
975
m_comps[3] = static_cast<uint8_t>(clamp<int>(sa, 0, 255));
976
return *this;
977
}
978
979
inline color_rgba &set_rgb(int sr, int sg, int sb)
980
{
981
m_comps[0] = static_cast<uint8_t>(clamp<int>(sr, 0, 255));
982
m_comps[1] = static_cast<uint8_t>(clamp<int>(sg, 0, 255));
983
m_comps[2] = static_cast<uint8_t>(clamp<int>(sb, 0, 255));
984
return *this;
985
}
986
987
inline color_rgba &set_rgb(const color_rgba &other)
988
{
989
r = other.r;
990
g = other.g;
991
b = other.b;
992
return *this;
993
}
994
995
inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; }
996
inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; }
997
998
inline void clear()
999
{
1000
m_comps[0] = 0;
1001
m_comps[1] = 0;
1002
m_comps[2] = 0;
1003
m_comps[3] = 0;
1004
}
1005
1006
inline bool operator== (const color_rgba &rhs) const
1007
{
1008
if (m_comps[0] != rhs.m_comps[0]) return false;
1009
if (m_comps[1] != rhs.m_comps[1]) return false;
1010
if (m_comps[2] != rhs.m_comps[2]) return false;
1011
if (m_comps[3] != rhs.m_comps[3]) return false;
1012
return true;
1013
}
1014
1015
inline bool operator!= (const color_rgba &rhs) const
1016
{
1017
return !(*this == rhs);
1018
}
1019
1020
inline bool operator<(const color_rgba &rhs) const
1021
{
1022
for (int i = 0; i < 4; i++)
1023
{
1024
if (m_comps[i] < rhs.m_comps[i])
1025
return true;
1026
else if (m_comps[i] != rhs.m_comps[i])
1027
return false;
1028
}
1029
return false;
1030
}
1031
1032
inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; }
1033
inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; }
1034
inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); }
1035
1036
inline uint32_t get_bgra_uint32() const { return b | (g << 8) | (r << 16) | (a << 24); }
1037
inline uint32_t get_rgba_uint32() const { return r | (g << 8) | (b << 16) | (a << 24); }
1038
1039
inline basist::color32 get_color32() const
1040
{
1041
return basist::color32(r, g, b, a);
1042
}
1043
1044
static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); }
1045
static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); }
1046
};
1047
1048
typedef basisu::vector<color_rgba> color_rgba_vec;
1049
1050
const color_rgba g_black_color(0, 0, 0, 255);
1051
const color_rgba g_black_trans_color(0, 0, 0, 0);
1052
const color_rgba g_white_color(255, 255, 255, 255);
1053
1054
inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1)
1055
{
1056
int dr = r0 - r1, dg = g0 - g1, db = b0 - b1;
1057
return dr * dr + dg * dg + db * db;
1058
}
1059
1060
inline int color_distance(int r0, int g0, int b0, int a0, int r1, int g1, int b1, int a1)
1061
{
1062
int dr = r0 - r1, dg = g0 - g1, db = b0 - b1, da = a0 - a1;
1063
return dr * dr + dg * dg + db * db + da * da;
1064
}
1065
1066
inline int color_distance(const color_rgba &c0, const color_rgba &c1, bool alpha)
1067
{
1068
if (alpha)
1069
return color_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a);
1070
else
1071
return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b);
1072
}
1073
1074
// TODO: Allow user to control channel weightings.
1075
inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha)
1076
{
1077
if (perceptual)
1078
{
1079
#if BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE
1080
const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f;
1081
const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f;
1082
1083
const float cr1 = e1.r - l1;
1084
const float cr2 = e2.r - l2;
1085
1086
const float cb1 = e1.b - l1;
1087
const float cb2 = e2.b - l2;
1088
1089
const float dl = l1 - l2;
1090
const float dcr = cr1 - cr2;
1091
const float dcb = cb1 - cb2;
1092
1093
uint32_t d = static_cast<uint32_t>(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb);
1094
1095
if (alpha)
1096
{
1097
int da = static_cast<int>(e1.a) - static_cast<int>(e2.a);
1098
d += static_cast<uint32_t>(128.0f*da*da);
1099
}
1100
1101
return d;
1102
#elif 1
1103
int dr = e1.r - e2.r;
1104
int dg = e1.g - e2.g;
1105
int db = e1.b - e2.b;
1106
1107
#if 0
1108
int delta_l = dr * 27 + dg * 92 + db * 9;
1109
int delta_cr = dr * 128 - delta_l;
1110
int delta_cb = db * 128 - delta_l;
1111
1112
uint32_t id = ((uint32_t)(delta_l * delta_l) >> 7U) +
1113
((((uint32_t)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) +
1114
((((uint32_t)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U);
1115
#else
1116
int64_t delta_l = dr * 27 + dg * 92 + db * 9;
1117
int64_t delta_cr = dr * 128 - delta_l;
1118
int64_t delta_cb = db * 128 - delta_l;
1119
1120
uint32_t id = ((uint32_t)((delta_l * delta_l) >> 7U)) +
1121
((((uint32_t)((delta_cr * delta_cr) >> 7U)) * 26U) >> 7U) +
1122
((((uint32_t)((delta_cb * delta_cb) >> 7U)) * 3U) >> 7U);
1123
#endif
1124
1125
if (alpha)
1126
{
1127
int da = (e1.a - e2.a) << 7;
1128
// This shouldn't overflow if da is 255 or -255: 29.99 bits after squaring.
1129
id += ((uint32_t)(da * da) >> 7U);
1130
}
1131
1132
return id;
1133
#else
1134
int dr = e1.r - e2.r;
1135
int dg = e1.g - e2.g;
1136
int db = e1.b - e2.b;
1137
1138
int64_t delta_l = dr * 27 + dg * 92 + db * 9;
1139
int64_t delta_cr = dr * 128 - delta_l;
1140
int64_t delta_cb = db * 128 - delta_l;
1141
1142
int64_t id = ((delta_l * delta_l) * 128) +
1143
((delta_cr * delta_cr) * 26) +
1144
((delta_cb * delta_cb) * 3);
1145
1146
if (alpha)
1147
{
1148
int64_t da = (e1.a - e2.a);
1149
id += (da * da) * 128;
1150
}
1151
1152
int d = (id + 8192) >> 14;
1153
1154
return d;
1155
#endif
1156
}
1157
else
1158
return color_distance(e1, e2, alpha);
1159
}
1160
1161
static inline uint32_t color_distance_la(const color_rgba& a, const color_rgba& b)
1162
{
1163
const int dl = a.r - b.r;
1164
const int da = a.a - b.a;
1165
return dl * dl + da * da;
1166
}
1167
1168
// String helpers
1169
1170
inline int string_find_right(const std::string& filename, char c)
1171
{
1172
size_t result = filename.find_last_of(c);
1173
return (result == std::string::npos) ? -1 : (int)result;
1174
}
1175
1176
inline std::string string_get_extension(const std::string &filename)
1177
{
1178
int sep = -1;
1179
#ifdef _WIN32
1180
sep = string_find_right(filename, '\\');
1181
#endif
1182
if (sep < 0)
1183
sep = string_find_right(filename, '/');
1184
1185
int dot = string_find_right(filename, '.');
1186
if (dot <= sep)
1187
return "";
1188
1189
std::string result(filename);
1190
result.erase(0, dot + 1);
1191
1192
return result;
1193
}
1194
1195
inline bool string_remove_extension(std::string &filename)
1196
{
1197
int sep = -1;
1198
#ifdef _WIN32
1199
sep = string_find_right(filename, '\\');
1200
#endif
1201
if (sep < 0)
1202
sep = string_find_right(filename, '/');
1203
1204
int dot = string_find_right(filename, '.');
1205
if ((dot < sep) || (dot < 0))
1206
return false;
1207
1208
filename.resize(dot);
1209
1210
return true;
1211
}
1212
1213
inline std::string string_tolower(const std::string& s)
1214
{
1215
std::string result(s);
1216
for (size_t i = 0; i < result.size(); i++)
1217
{
1218
result[i] = (char)tolower((uint8_t)(result[i]));
1219
}
1220
return result;
1221
}
1222
1223
inline char *strcpy_safe(char *pDst, size_t dst_len, const char *pSrc)
1224
{
1225
assert(pDst && pSrc && dst_len);
1226
if (!dst_len)
1227
return pDst;
1228
1229
const size_t src_len = strlen(pSrc);
1230
const size_t src_len_plus_terminator = src_len + 1;
1231
1232
if (src_len_plus_terminator <= dst_len)
1233
memcpy(pDst, pSrc, src_len_plus_terminator);
1234
else
1235
{
1236
if (dst_len > 1)
1237
memcpy(pDst, pSrc, dst_len - 1);
1238
pDst[dst_len - 1] = '\0';
1239
}
1240
1241
return pDst;
1242
}
1243
1244
inline bool string_ends_with(const std::string& s, char c)
1245
{
1246
return (s.size() != 0) && (s.back() == c);
1247
}
1248
1249
inline bool string_split_path(const char *p, std::string *pDrive, std::string *pDir, std::string *pFilename, std::string *pExt)
1250
{
1251
#ifdef _MSC_VER
1252
char drive_buf[_MAX_DRIVE] = { 0 };
1253
char dir_buf[_MAX_DIR] = { 0 };
1254
char fname_buf[_MAX_FNAME] = { 0 };
1255
char ext_buf[_MAX_EXT] = { 0 };
1256
1257
errno_t error = _splitpath_s(p,
1258
pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0,
1259
pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0,
1260
pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0,
1261
pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0);
1262
if (error != 0)
1263
return false;
1264
1265
if (pDrive) *pDrive = drive_buf;
1266
if (pDir) *pDir = dir_buf;
1267
if (pFilename) *pFilename = fname_buf;
1268
if (pExt) *pExt = ext_buf;
1269
return true;
1270
#else
1271
char dirtmp[1024], nametmp[1024];
1272
strcpy_safe(dirtmp, sizeof(dirtmp), p);
1273
strcpy_safe(nametmp, sizeof(nametmp), p);
1274
1275
if (pDrive)
1276
pDrive->resize(0);
1277
1278
const char *pDirName = dirname(dirtmp);
1279
const char* pBaseName = basename(nametmp);
1280
if ((!pDirName) || (!pBaseName))
1281
return false;
1282
1283
if (pDir)
1284
{
1285
*pDir = pDirName;
1286
if ((pDir->size()) && (pDir->back() != '/'))
1287
*pDir += "/";
1288
}
1289
1290
if (pFilename)
1291
{
1292
*pFilename = pBaseName;
1293
string_remove_extension(*pFilename);
1294
}
1295
1296
if (pExt)
1297
{
1298
*pExt = pBaseName;
1299
*pExt = string_get_extension(*pExt);
1300
if (pExt->size())
1301
*pExt = "." + *pExt;
1302
}
1303
1304
return true;
1305
#endif
1306
}
1307
1308
inline bool is_path_separator(char c)
1309
{
1310
#ifdef _WIN32
1311
return (c == '/') || (c == '\\');
1312
#else
1313
return (c == '/');
1314
#endif
1315
}
1316
1317
inline bool is_drive_separator(char c)
1318
{
1319
#ifdef _WIN32
1320
return (c == ':');
1321
#else
1322
(void)c;
1323
return false;
1324
#endif
1325
}
1326
1327
inline void string_combine_path(std::string &dst, const char *p, const char *q)
1328
{
1329
std::string temp(p);
1330
if (temp.size() && !is_path_separator(q[0]))
1331
{
1332
if (!is_path_separator(temp.back()))
1333
temp.append(1, BASISU_PATH_SEPERATOR_CHAR);
1334
}
1335
temp += q;
1336
dst.swap(temp);
1337
}
1338
1339
inline void string_combine_path(std::string &dst, const char *p, const char *q, const char *r)
1340
{
1341
string_combine_path(dst, p, q);
1342
string_combine_path(dst, dst.c_str(), r);
1343
}
1344
1345
inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt)
1346
{
1347
string_combine_path(dst, p, q, r);
1348
if ((!string_ends_with(dst, '.')) && (pExt[0]) && (pExt[0] != '.'))
1349
dst.append(1, '.');
1350
dst.append(pExt);
1351
}
1352
1353
inline bool string_get_pathname(const char *p, std::string &path)
1354
{
1355
std::string temp_drive, temp_path;
1356
if (!string_split_path(p, &temp_drive, &temp_path, NULL, NULL))
1357
return false;
1358
string_combine_path(path, temp_drive.c_str(), temp_path.c_str());
1359
return true;
1360
}
1361
1362
inline bool string_get_filename(const char *p, std::string &filename)
1363
{
1364
std::string temp_ext;
1365
if (!string_split_path(p, nullptr, nullptr, &filename, &temp_ext))
1366
return false;
1367
filename += temp_ext;
1368
return true;
1369
}
1370
1371
class rand
1372
{
1373
std::mt19937 m_mt;
1374
1375
public:
1376
rand() { }
1377
1378
rand(uint32_t s) { seed(s); }
1379
void seed(uint32_t s) { m_mt.seed(s); }
1380
1381
// between [l,h]
1382
int irand(int l, int h) { std::uniform_int_distribution<int> d(l, h); return d(m_mt); }
1383
1384
uint32_t urand32() { return static_cast<uint32_t>(irand(INT32_MIN, INT32_MAX)); }
1385
1386
bool bit() { return irand(0, 1) == 1; }
1387
1388
uint8_t byte() { return static_cast<uint8_t>(urand32()); }
1389
1390
// between [l,h)
1391
float frand(float l, float h) { std::uniform_real_distribution<float> d(l, h); return d(m_mt); }
1392
1393
float gaussian(float mean, float stddev) { std::normal_distribution<float> d(mean, stddev); return d(m_mt); }
1394
};
1395
1396
class priority_queue
1397
{
1398
public:
1399
priority_queue() :
1400
m_size(0)
1401
{
1402
}
1403
1404
void clear()
1405
{
1406
m_heap.clear();
1407
m_size = 0;
1408
}
1409
1410
void init(uint32_t max_entries, uint32_t first_index, float first_priority)
1411
{
1412
m_heap.resize(max_entries + 1);
1413
m_heap[1].m_index = first_index;
1414
m_heap[1].m_priority = first_priority;
1415
m_size = 1;
1416
}
1417
1418
inline uint32_t size() const { return m_size; }
1419
1420
inline uint32_t get_top_index() const { return m_heap[1].m_index; }
1421
inline float get_top_priority() const { return m_heap[1].m_priority; }
1422
1423
inline void delete_top()
1424
{
1425
assert(m_size > 0);
1426
m_heap[1] = m_heap[m_size];
1427
m_size--;
1428
if (m_size)
1429
down_heap(1);
1430
}
1431
1432
inline void add_heap(uint32_t index, float priority)
1433
{
1434
m_size++;
1435
1436
uint32_t k = m_size;
1437
1438
if (m_size >= m_heap.size())
1439
m_heap.resize(m_size + 1);
1440
1441
for (;;)
1442
{
1443
uint32_t parent_index = k >> 1;
1444
if ((!parent_index) || (m_heap[parent_index].m_priority > priority))
1445
break;
1446
m_heap[k] = m_heap[parent_index];
1447
k = parent_index;
1448
}
1449
1450
m_heap[k].m_index = index;
1451
m_heap[k].m_priority = priority;
1452
}
1453
1454
private:
1455
struct entry
1456
{
1457
uint32_t m_index;
1458
float m_priority;
1459
};
1460
1461
basisu::vector<entry> m_heap;
1462
uint32_t m_size;
1463
1464
// Push down entry at index
1465
inline void down_heap(uint32_t heap_index)
1466
{
1467
uint32_t orig_index = m_heap[heap_index].m_index;
1468
const float orig_priority = m_heap[heap_index].m_priority;
1469
1470
uint32_t child_index;
1471
while ((child_index = (heap_index << 1)) <= m_size)
1472
{
1473
if ((child_index < m_size) && (m_heap[child_index].m_priority < m_heap[child_index + 1].m_priority)) ++child_index;
1474
if (orig_priority > m_heap[child_index].m_priority)
1475
break;
1476
m_heap[heap_index] = m_heap[child_index];
1477
heap_index = child_index;
1478
}
1479
1480
m_heap[heap_index].m_index = orig_index;
1481
m_heap[heap_index].m_priority = orig_priority;
1482
}
1483
};
1484
1485
// Tree structured vector quantization (TSVQ)
1486
1487
template <typename TrainingVectorType>
1488
class tree_vector_quant
1489
{
1490
public:
1491
typedef TrainingVectorType training_vec_type;
1492
typedef std::pair<TrainingVectorType, uint64_t> training_vec_with_weight;
1493
typedef basisu::vector< training_vec_with_weight > array_of_weighted_training_vecs;
1494
1495
tree_vector_quant() :
1496
m_next_codebook_index(0)
1497
{
1498
}
1499
1500
void clear()
1501
{
1502
clear_vector(m_training_vecs);
1503
clear_vector(m_nodes);
1504
m_next_codebook_index = 0;
1505
}
1506
1507
void add_training_vec(const TrainingVectorType &v, uint64_t weight) { m_training_vecs.push_back(std::make_pair(v, weight)); }
1508
1509
size_t get_total_training_vecs() const { return m_training_vecs.size(); }
1510
const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; }
1511
array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; }
1512
1513
void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const
1514
{
1515
for (uint32_t i = 0; i < m_nodes.size(); i++)
1516
{
1517
const tsvq_node &n = m_nodes[i];
1518
if (!n.is_leaf())
1519
continue;
1520
1521
codebook.resize(codebook.size() + 1);
1522
codebook.back() = n.m_training_vecs;
1523
}
1524
}
1525
1526
void retrieve(basisu::vector<TrainingVectorType> &codebook) const
1527
{
1528
for (uint32_t i = 0; i < m_nodes.size(); i++)
1529
{
1530
const tsvq_node &n = m_nodes[i];
1531
if (!n.is_leaf())
1532
continue;
1533
1534
codebook.resize(codebook.size() + 1);
1535
codebook.back() = n.m_origin;
1536
}
1537
}
1538
1539
void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const
1540
{
1541
uint_vec node_stack;
1542
node_stack.reserve(512);
1543
1544
codebook.resize(0);
1545
codebook.reserve(max_clusters);
1546
1547
uint32_t node_index = 0;
1548
1549
while (true)
1550
{
1551
const tsvq_node& cur = m_nodes[node_index];
1552
1553
if (cur.is_leaf() || ((2 + cur.m_codebook_index) > (int)max_clusters))
1554
{
1555
codebook.resize(codebook.size() + 1);
1556
codebook.back() = cur.m_training_vecs;
1557
1558
if (node_stack.empty())
1559
break;
1560
1561
node_index = node_stack.back();
1562
node_stack.pop_back();
1563
continue;
1564
}
1565
1566
node_stack.push_back(cur.m_right_index);
1567
node_index = cur.m_left_index;
1568
}
1569
}
1570
1571
bool generate(uint32_t max_size)
1572
{
1573
if (!m_training_vecs.size())
1574
return false;
1575
1576
m_next_codebook_index = 0;
1577
1578
clear_vector(m_nodes);
1579
m_nodes.reserve(max_size * 2 + 1);
1580
1581
m_nodes.push_back(prepare_root());
1582
1583
priority_queue var_heap;
1584
var_heap.init(max_size, 0, m_nodes[0].m_var);
1585
1586
basisu::vector<uint32_t> l_children, r_children;
1587
1588
// Now split the worst nodes
1589
l_children.reserve(m_training_vecs.size() + 1);
1590
r_children.reserve(m_training_vecs.size() + 1);
1591
1592
uint32_t total_leaf_nodes = 1;
1593
1594
//interval_timer tm;
1595
//tm.start();
1596
1597
while ((var_heap.size()) && (total_leaf_nodes < max_size))
1598
{
1599
const uint32_t node_index = var_heap.get_top_index();
1600
const tsvq_node &node = m_nodes[node_index];
1601
1602
assert(node.m_var == var_heap.get_top_priority());
1603
assert(node.is_leaf());
1604
1605
var_heap.delete_top();
1606
1607
if (node.m_training_vecs.size() > 1)
1608
{
1609
if (split_node(node_index, var_heap, l_children, r_children))
1610
{
1611
// This removes one leaf node (making an internal node) and replaces it with two new leaves, so +1 total.
1612
total_leaf_nodes += 1;
1613
}
1614
}
1615
}
1616
1617
//debug_printf("tree_vector_quant::generate %u: %3.3f secs\n", TrainingVectorType::num_elements, tm.get_elapsed_secs());
1618
1619
return true;
1620
}
1621
1622
private:
1623
class tsvq_node
1624
{
1625
public:
1626
inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1), m_codebook_index(-1) { }
1627
1628
// vecs is erased
1629
inline void set(const TrainingVectorType &org, uint64_t weight, float var, basisu::vector<uint32_t> &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); }
1630
1631
inline bool is_leaf() const { return m_left_index < 0; }
1632
1633
float m_var;
1634
uint64_t m_weight;
1635
TrainingVectorType m_origin;
1636
int32_t m_left_index, m_right_index;
1637
basisu::vector<uint32_t> m_training_vecs;
1638
int m_codebook_index;
1639
};
1640
1641
typedef basisu::vector<tsvq_node> tsvq_node_vec;
1642
tsvq_node_vec m_nodes;
1643
1644
array_of_weighted_training_vecs m_training_vecs;
1645
1646
uint32_t m_next_codebook_index;
1647
1648
tsvq_node prepare_root() const
1649
{
1650
double ttsum = 0.0f;
1651
1652
// Prepare root node containing all training vectors
1653
tsvq_node root;
1654
root.m_training_vecs.reserve(m_training_vecs.size());
1655
1656
for (uint32_t i = 0; i < m_training_vecs.size(); i++)
1657
{
1658
const TrainingVectorType &v = m_training_vecs[i].first;
1659
const uint64_t weight = m_training_vecs[i].second;
1660
1661
root.m_training_vecs.push_back(i);
1662
1663
root.m_origin += (v * static_cast<float>(weight));
1664
root.m_weight += weight;
1665
1666
ttsum += v.dot(v) * weight;
1667
}
1668
1669
root.m_var = static_cast<float>(ttsum - (root.m_origin.dot(root.m_origin) / root.m_weight));
1670
1671
root.m_origin *= (1.0f / root.m_weight);
1672
1673
return root;
1674
}
1675
1676
bool split_node(uint32_t node_index, priority_queue &var_heap, basisu::vector<uint32_t> &l_children, basisu::vector<uint32_t> &r_children)
1677
{
1678
TrainingVectorType l_child_org, r_child_org;
1679
uint64_t l_weight = 0, r_weight = 0;
1680
float l_var = 0.0f, r_var = 0.0f;
1681
1682
// Compute initial left/right child origins
1683
if (!prep_split(m_nodes[node_index], l_child_org, r_child_org))
1684
return false;
1685
1686
// Use k-means iterations to refine these children vectors
1687
if (!refine_split(m_nodes[node_index], l_child_org, l_weight, l_var, l_children, r_child_org, r_weight, r_var, r_children))
1688
return false;
1689
1690
// Create children
1691
const uint32_t l_child_index = (uint32_t)m_nodes.size(), r_child_index = (uint32_t)m_nodes.size() + 1;
1692
1693
m_nodes[node_index].m_left_index = l_child_index;
1694
m_nodes[node_index].m_right_index = r_child_index;
1695
1696
m_nodes[node_index].m_codebook_index = m_next_codebook_index;
1697
m_next_codebook_index++;
1698
1699
m_nodes.resize(m_nodes.size() + 2);
1700
1701
tsvq_node &l_child = m_nodes[l_child_index], &r_child = m_nodes[r_child_index];
1702
1703
l_child.set(l_child_org, l_weight, l_var, l_children);
1704
r_child.set(r_child_org, r_weight, r_var, r_children);
1705
1706
if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1))
1707
{
1708
TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first);
1709
1710
for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++)
1711
{
1712
if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first))
1713
{
1714
l_child.m_var = 1e-4f;
1715
break;
1716
}
1717
}
1718
}
1719
1720
if ((r_child.m_var <= 0.0f) && (r_child.m_training_vecs.size() > 1))
1721
{
1722
TrainingVectorType v(m_training_vecs[r_child.m_training_vecs[0]].first);
1723
1724
for (uint32_t i = 1; i < r_child.m_training_vecs.size(); i++)
1725
{
1726
if (!(v == m_training_vecs[r_child.m_training_vecs[i]].first))
1727
{
1728
r_child.m_var = 1e-4f;
1729
break;
1730
}
1731
}
1732
}
1733
1734
if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1))
1735
var_heap.add_heap(l_child_index, l_child.m_var);
1736
1737
if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1))
1738
var_heap.add_heap(r_child_index, r_child.m_var);
1739
1740
return true;
1741
}
1742
1743
TrainingVectorType compute_split_axis(const tsvq_node &node) const
1744
{
1745
const uint32_t N = TrainingVectorType::num_elements;
1746
1747
matrix<N, N, float> cmatrix;
1748
1749
if ((N != 16) || (!g_cpu_supports_sse41))
1750
{
1751
cmatrix.set_zero();
1752
1753
// Compute covariance matrix from weighted input vectors
1754
for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1755
{
1756
const TrainingVectorType v(m_training_vecs[node.m_training_vecs[i]].first - node.m_origin);
1757
const TrainingVectorType w(static_cast<float>(m_training_vecs[node.m_training_vecs[i]].second) * v);
1758
1759
for (uint32_t x = 0; x < N; x++)
1760
for (uint32_t y = x; y < N; y++)
1761
cmatrix[x][y] = cmatrix[x][y] + v[x] * w[y];
1762
}
1763
}
1764
else
1765
{
1766
#if BASISU_SUPPORT_SSE
1767
// Specialize the case with 16x16 matrices, which are quite expensive without SIMD.
1768
// This SSE function takes pointers to void types, so do some sanity checks.
1769
assert(sizeof(TrainingVectorType) == sizeof(float) * 16);
1770
assert(sizeof(training_vec_with_weight) == sizeof(std::pair<vec16F, uint64_t>));
1771
update_covar_matrix_16x16_sse41(node.m_training_vecs.size_u32(), m_training_vecs.data(), &node.m_origin, node.m_training_vecs.data(), &cmatrix);
1772
#endif
1773
}
1774
1775
const float renorm_scale = 1.0f / node.m_weight;
1776
1777
for (uint32_t x = 0; x < N; x++)
1778
for (uint32_t y = x; y < N; y++)
1779
cmatrix[x][y] *= renorm_scale;
1780
1781
// Diagonal flip
1782
for (uint32_t x = 0; x < (N - 1); x++)
1783
for (uint32_t y = x + 1; y < N; y++)
1784
cmatrix[y][x] = cmatrix[x][y];
1785
1786
return compute_pca_from_covar<N, TrainingVectorType>(cmatrix);
1787
}
1788
1789
bool prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const
1790
{
1791
//const uint32_t N = TrainingVectorType::num_elements;
1792
1793
if (2 == node.m_training_vecs.size())
1794
{
1795
l_child_result = m_training_vecs[node.m_training_vecs[0]].first;
1796
r_child_result = m_training_vecs[node.m_training_vecs[1]].first;
1797
return true;
1798
}
1799
1800
TrainingVectorType axis(compute_split_axis(node)), l_child(0.0f), r_child(0.0f);
1801
double l_weight = 0.0f, r_weight = 0.0f;
1802
1803
// Compute initial left/right children
1804
for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1805
{
1806
const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second;
1807
1808
const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;
1809
1810
double t = (v - node.m_origin).dot(axis);
1811
if (t >= 0.0f)
1812
{
1813
r_child += v * weight;
1814
r_weight += weight;
1815
}
1816
else
1817
{
1818
l_child += v * weight;
1819
l_weight += weight;
1820
}
1821
}
1822
1823
if ((l_weight > 0.0f) && (r_weight > 0.0f))
1824
{
1825
l_child_result = l_child * static_cast<float>(1.0f / l_weight);
1826
r_child_result = r_child * static_cast<float>(1.0f / r_weight);
1827
}
1828
else
1829
{
1830
TrainingVectorType l(1e+20f);
1831
TrainingVectorType h(-1e+20f);
1832
for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1833
{
1834
const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
1835
1836
l = TrainingVectorType::component_min(l, v);
1837
h = TrainingVectorType::component_max(h, v);
1838
}
1839
1840
TrainingVectorType r(h - l);
1841
1842
float largest_axis_v = 0.0f;
1843
int largest_axis_index = -1;
1844
for (uint32_t i = 0; i < TrainingVectorType::num_elements; i++)
1845
{
1846
if (r[i] > largest_axis_v)
1847
{
1848
largest_axis_v = r[i];
1849
largest_axis_index = i;
1850
}
1851
}
1852
1853
if (largest_axis_index < 0)
1854
return false;
1855
1856
basisu::vector<float> keys(node.m_training_vecs.size());
1857
for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1858
keys[i] = m_training_vecs[node.m_training_vecs[i]].first[largest_axis_index];
1859
1860
uint_vec indices(node.m_training_vecs.size());
1861
indirect_sort((uint32_t)node.m_training_vecs.size(), &indices[0], &keys[0]);
1862
1863
l_child.set_zero();
1864
l_weight = 0;
1865
1866
r_child.set_zero();
1867
r_weight = 0;
1868
1869
const uint32_t half_index = (uint32_t)node.m_training_vecs.size() / 2;
1870
for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1871
{
1872
const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second;
1873
1874
const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
1875
1876
if (i < half_index)
1877
{
1878
l_child += v * weight;
1879
l_weight += weight;
1880
}
1881
else
1882
{
1883
r_child += v * weight;
1884
r_weight += weight;
1885
}
1886
}
1887
1888
if ((l_weight > 0.0f) && (r_weight > 0.0f))
1889
{
1890
l_child_result = l_child * static_cast<float>(1.0f / l_weight);
1891
r_child_result = r_child * static_cast<float>(1.0f / r_weight);
1892
}
1893
else
1894
{
1895
l_child_result = l;
1896
r_child_result = h;
1897
}
1898
}
1899
1900
return true;
1901
}
1902
1903
bool refine_split(const tsvq_node &node,
1904
TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, basisu::vector<uint32_t> &l_children,
1905
TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, basisu::vector<uint32_t> &r_children) const
1906
{
1907
l_children.reserve(node.m_training_vecs.size());
1908
r_children.reserve(node.m_training_vecs.size());
1909
1910
float prev_total_variance = 1e+10f;
1911
1912
// Refine left/right children locations using k-means iterations
1913
const uint32_t cMaxIters = 6;
1914
for (uint32_t iter = 0; iter < cMaxIters; iter++)
1915
{
1916
l_children.resize(0);
1917
r_children.resize(0);
1918
1919
TrainingVectorType new_l_child(cZero), new_r_child(cZero);
1920
1921
double l_ttsum = 0.0f, r_ttsum = 0.0f;
1922
1923
l_weight = 0;
1924
r_weight = 0;
1925
1926
for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1927
{
1928
const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;
1929
const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second;
1930
1931
double left_dist2 = l_child.squared_distance_d(v), right_dist2 = r_child.squared_distance_d(v);
1932
1933
if (left_dist2 >= right_dist2)
1934
{
1935
new_r_child += (v * static_cast<float>(weight));
1936
r_weight += weight;
1937
1938
r_ttsum += weight * v.dot(v);
1939
r_children.push_back(node.m_training_vecs[i]);
1940
}
1941
else
1942
{
1943
new_l_child += (v * static_cast<float>(weight));
1944
l_weight += weight;
1945
1946
l_ttsum += weight * v.dot(v);
1947
l_children.push_back(node.m_training_vecs[i]);
1948
}
1949
}
1950
1951
// Node is unsplittable using the above algorithm - try something else to split it up.
1952
if ((!l_weight) || (!r_weight))
1953
{
1954
l_children.resize(0);
1955
new_l_child.set(0.0f);
1956
l_ttsum = 0.0f;
1957
l_weight = 0;
1958
1959
r_children.resize(0);
1960
new_r_child.set(0.0f);
1961
r_ttsum = 0.0f;
1962
r_weight = 0;
1963
1964
TrainingVectorType firstVec;
1965
for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
1966
{
1967
const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first;
1968
const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second;
1969
1970
if ((!i) || (v == firstVec))
1971
{
1972
firstVec = v;
1973
1974
new_r_child += (v * static_cast<float>(weight));
1975
r_weight += weight;
1976
1977
r_ttsum += weight * v.dot(v);
1978
r_children.push_back(node.m_training_vecs[i]);
1979
}
1980
else
1981
{
1982
new_l_child += (v * static_cast<float>(weight));
1983
l_weight += weight;
1984
1985
l_ttsum += weight * v.dot(v);
1986
l_children.push_back(node.m_training_vecs[i]);
1987
}
1988
}
1989
1990
if ((!l_weight) || (!r_weight))
1991
return false;
1992
}
1993
1994
l_var = static_cast<float>(l_ttsum - (new_l_child.dot(new_l_child) / l_weight));
1995
r_var = static_cast<float>(r_ttsum - (new_r_child.dot(new_r_child) / r_weight));
1996
1997
new_l_child *= (1.0f / l_weight);
1998
new_r_child *= (1.0f / r_weight);
1999
2000
l_child = new_l_child;
2001
r_child = new_r_child;
2002
2003
float total_var = l_var + r_var;
2004
const float cGiveupVariance = .00001f;
2005
if (total_var < cGiveupVariance)
2006
break;
2007
2008
// Check to see if the variance has settled
2009
const float cVarianceDeltaThresh = .00125f;
2010
if (((prev_total_variance - total_var) / total_var) < cVarianceDeltaThresh)
2011
break;
2012
2013
prev_total_variance = total_var;
2014
}
2015
2016
return true;
2017
}
2018
};
2019
2020
struct weighted_block_group
2021
{
2022
uint64_t m_total_weight;
2023
uint_vec m_indices;
2024
};
2025
2026
template<typename Quantizer>
2027
bool generate_hierarchical_codebook_threaded_internal(Quantizer& q,
2028
uint32_t max_codebook_size, uint32_t max_parent_codebook_size,
2029
basisu::vector<uint_vec>& codebook,
2030
basisu::vector<uint_vec>& parent_codebook,
2031
uint32_t max_threads, bool limit_clusterizers, job_pool *pJob_pool)
2032
{
2033
codebook.resize(0);
2034
parent_codebook.resize(0);
2035
2036
if ((max_threads <= 1) || (q.get_training_vecs().size() < 256) || (max_codebook_size < max_threads * 16))
2037
{
2038
if (!q.generate(max_codebook_size))
2039
return false;
2040
2041
q.retrieve(codebook);
2042
2043
if (max_parent_codebook_size)
2044
q.retrieve(max_parent_codebook_size, parent_codebook);
2045
2046
return true;
2047
}
2048
2049
const uint32_t cMaxThreads = 16;
2050
if (max_threads > cMaxThreads)
2051
max_threads = cMaxThreads;
2052
2053
if (!q.generate(max_threads))
2054
return false;
2055
2056
basisu::vector<uint_vec> initial_codebook;
2057
2058
q.retrieve(initial_codebook);
2059
2060
if (initial_codebook.size() < max_threads)
2061
{
2062
codebook = initial_codebook;
2063
2064
if (max_parent_codebook_size)
2065
q.retrieve(max_parent_codebook_size, parent_codebook);
2066
2067
return true;
2068
}
2069
2070
Quantizer quantizers[cMaxThreads];
2071
2072
bool success_flags[cMaxThreads];
2073
clear_obj(success_flags);
2074
2075
basisu::vector<uint_vec> local_clusters[cMaxThreads];
2076
basisu::vector<uint_vec> local_parent_clusters[cMaxThreads];
2077
2078
for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++)
2079
{
2080
pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] {
2081
2082
Quantizer& lq = quantizers[thread_iter];
2083
uint_vec& cluster_indices = initial_codebook[thread_iter];
2084
2085
uint_vec local_to_global(cluster_indices.size());
2086
2087
for (uint32_t i = 0; i < cluster_indices.size(); i++)
2088
{
2089
const uint32_t global_training_vec_index = cluster_indices[i];
2090
local_to_global[i] = global_training_vec_index;
2091
2092
lq.add_training_vec(q.get_training_vecs()[global_training_vec_index].first, q.get_training_vecs()[global_training_vec_index].second);
2093
}
2094
2095
const uint32_t max_clusters = limit_clusterizers ? ((max_codebook_size + max_threads - 1) / max_threads) : (uint32_t)lq.get_total_training_vecs();
2096
2097
success_flags[thread_iter] = lq.generate(max_clusters);
2098
2099
if (success_flags[thread_iter])
2100
{
2101
lq.retrieve(local_clusters[thread_iter]);
2102
2103
for (uint32_t i = 0; i < local_clusters[thread_iter].size(); i++)
2104
{
2105
for (uint32_t j = 0; j < local_clusters[thread_iter][i].size(); j++)
2106
local_clusters[thread_iter][i][j] = local_to_global[local_clusters[thread_iter][i][j]];
2107
}
2108
2109
if (max_parent_codebook_size)
2110
{
2111
lq.retrieve((max_parent_codebook_size + max_threads - 1) / max_threads, local_parent_clusters[thread_iter]);
2112
2113
for (uint32_t i = 0; i < local_parent_clusters[thread_iter].size(); i++)
2114
{
2115
for (uint32_t j = 0; j < local_parent_clusters[thread_iter][i].size(); j++)
2116
local_parent_clusters[thread_iter][i][j] = local_to_global[local_parent_clusters[thread_iter][i][j]];
2117
}
2118
}
2119
}
2120
2121
} );
2122
2123
} // thread_iter
2124
2125
pJob_pool->wait_for_all();
2126
2127
uint32_t total_clusters = 0, total_parent_clusters = 0;
2128
2129
for (int thread_iter = 0; thread_iter < (int)max_threads; thread_iter++)
2130
{
2131
if (!success_flags[thread_iter])
2132
return false;
2133
total_clusters += (uint32_t)local_clusters[thread_iter].size();
2134
total_parent_clusters += (uint32_t)local_parent_clusters[thread_iter].size();
2135
}
2136
2137
codebook.reserve(total_clusters);
2138
parent_codebook.reserve(total_parent_clusters);
2139
2140
for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++)
2141
{
2142
for (uint32_t j = 0; j < local_clusters[thread_iter].size(); j++)
2143
{
2144
codebook.resize(codebook.size() + 1);
2145
codebook.back().swap(local_clusters[thread_iter][j]);
2146
}
2147
2148
for (uint32_t j = 0; j < local_parent_clusters[thread_iter].size(); j++)
2149
{
2150
parent_codebook.resize(parent_codebook.size() + 1);
2151
parent_codebook.back().swap(local_parent_clusters[thread_iter][j]);
2152
}
2153
}
2154
2155
return true;
2156
}
2157
2158
template<typename Quantizer>
2159
bool generate_hierarchical_codebook_threaded(Quantizer& q,
2160
uint32_t max_codebook_size, uint32_t max_parent_codebook_size,
2161
basisu::vector<uint_vec>& codebook,
2162
basisu::vector<uint_vec>& parent_codebook,
2163
uint32_t max_threads, job_pool *pJob_pool,
2164
bool even_odd_input_pairs_equal)
2165
{
2166
typedef bit_hasher<typename Quantizer::training_vec_type> training_vec_bit_hasher;
2167
2168
typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group,
2169
training_vec_bit_hasher> group_hash;
2170
2171
//interval_timer tm;
2172
//tm.start();
2173
2174
group_hash unique_vecs;
2175
2176
unique_vecs.reserve(20000);
2177
2178
weighted_block_group g;
2179
2180
if (even_odd_input_pairs_equal)
2181
{
2182
g.m_indices.resize(2);
2183
2184
assert(q.get_training_vecs().size() >= 2 && (q.get_training_vecs().size() & 1) == 0);
2185
2186
for (uint32_t i = 0; i < q.get_training_vecs().size(); i += 2)
2187
{
2188
assert(q.get_training_vecs()[i].first == q.get_training_vecs()[i + 1].first);
2189
2190
g.m_total_weight = q.get_training_vecs()[i].second + q.get_training_vecs()[i + 1].second;
2191
g.m_indices[0] = i;
2192
g.m_indices[1] = i + 1;
2193
2194
auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g));
2195
2196
if (!ins_res.second)
2197
{
2198
(ins_res.first)->second.m_total_weight += g.m_total_weight;
2199
(ins_res.first)->second.m_indices.push_back(i);
2200
(ins_res.first)->second.m_indices.push_back(i + 1);
2201
}
2202
}
2203
}
2204
else
2205
{
2206
g.m_indices.resize(1);
2207
2208
for (uint32_t i = 0; i < q.get_training_vecs().size(); i++)
2209
{
2210
g.m_total_weight = q.get_training_vecs()[i].second;
2211
g.m_indices[0] = i;
2212
2213
auto ins_res = unique_vecs.insert(std::make_pair(q.get_training_vecs()[i].first, g));
2214
2215
if (!ins_res.second)
2216
{
2217
(ins_res.first)->second.m_total_weight += g.m_total_weight;
2218
(ins_res.first)->second.m_indices.push_back(i);
2219
}
2220
}
2221
}
2222
2223
//debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors, %3.3f secs\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size(), tm.get_elapsed_secs());
2224
debug_printf("generate_hierarchical_codebook_threaded: %u training vectors, %u unique training vectors\n", q.get_total_training_vecs(), (uint32_t)unique_vecs.size());
2225
2226
Quantizer group_quant;
2227
typedef typename group_hash::const_iterator group_hash_const_iter;
2228
basisu::vector<group_hash_const_iter> unique_vec_iters;
2229
unique_vec_iters.reserve(unique_vecs.size());
2230
2231
for (auto iter = unique_vecs.begin(); iter != unique_vecs.end(); ++iter)
2232
{
2233
group_quant.add_training_vec(iter->first, iter->second.m_total_weight);
2234
unique_vec_iters.push_back(iter);
2235
}
2236
2237
bool limit_clusterizers = true;
2238
if (unique_vecs.size() <= max_codebook_size)
2239
limit_clusterizers = false;
2240
2241
debug_printf("Limit clusterizers: %u\n", limit_clusterizers);
2242
2243
basisu::vector<uint_vec> group_codebook, group_parent_codebook;
2244
bool status = generate_hierarchical_codebook_threaded_internal(group_quant,
2245
max_codebook_size, max_parent_codebook_size,
2246
group_codebook,
2247
group_parent_codebook,
2248
(unique_vecs.size() < 65536*4) ? 1 : max_threads, limit_clusterizers, pJob_pool);
2249
2250
if (!status)
2251
return false;
2252
2253
codebook.resize(0);
2254
for (uint32_t i = 0; i < group_codebook.size(); i++)
2255
{
2256
codebook.resize(codebook.size() + 1);
2257
2258
for (uint32_t j = 0; j < group_codebook[i].size(); j++)
2259
{
2260
const uint32_t group_index = group_codebook[i][j];
2261
2262
typename group_hash::const_iterator group_iter = unique_vec_iters[group_index];
2263
const uint_vec& training_vec_indices = group_iter->second.m_indices;
2264
2265
append_vector(codebook.back(), training_vec_indices);
2266
}
2267
}
2268
2269
parent_codebook.resize(0);
2270
for (uint32_t i = 0; i < group_parent_codebook.size(); i++)
2271
{
2272
parent_codebook.resize(parent_codebook.size() + 1);
2273
2274
for (uint32_t j = 0; j < group_parent_codebook[i].size(); j++)
2275
{
2276
const uint32_t group_index = group_parent_codebook[i][j];
2277
2278
typename group_hash::const_iterator group_iter = unique_vec_iters[group_index];
2279
const uint_vec& training_vec_indices = group_iter->second.m_indices;
2280
2281
append_vector(parent_codebook.back(), training_vec_indices);
2282
}
2283
}
2284
2285
return true;
2286
}
2287
2288
// Canonical Huffman coding
2289
2290
class histogram
2291
{
2292
basisu::vector<uint32_t> m_hist;
2293
2294
public:
2295
histogram(uint32_t size = 0) { init(size); }
2296
2297
void clear()
2298
{
2299
clear_vector(m_hist);
2300
}
2301
2302
void init(uint32_t size)
2303
{
2304
m_hist.resize(0);
2305
m_hist.resize(size);
2306
}
2307
2308
inline uint32_t size() const { return static_cast<uint32_t>(m_hist.size()); }
2309
2310
inline const uint32_t &operator[] (uint32_t index) const
2311
{
2312
return m_hist[index];
2313
}
2314
2315
inline uint32_t &operator[] (uint32_t index)
2316
{
2317
return m_hist[index];
2318
}
2319
2320
inline void inc(uint32_t index)
2321
{
2322
m_hist[index]++;
2323
}
2324
2325
uint64_t get_total() const
2326
{
2327
uint64_t total = 0;
2328
for (uint32_t i = 0; i < m_hist.size(); ++i)
2329
total += m_hist[i];
2330
return total;
2331
}
2332
2333
double get_entropy() const
2334
{
2335
double total = static_cast<double>(get_total());
2336
if (total == 0.0f)
2337
return 0.0f;
2338
2339
const double inv_total = 1.0f / total;
2340
const double neg_inv_log2 = -1.0f / log(2.0f);
2341
2342
double e = 0.0f;
2343
for (uint32_t i = 0; i < m_hist.size(); i++)
2344
if (m_hist[i])
2345
e += log(m_hist[i] * inv_total) * neg_inv_log2 * static_cast<double>(m_hist[i]);
2346
2347
return e;
2348
}
2349
};
2350
2351
struct sym_freq
2352
{
2353
uint32_t m_key;
2354
uint16_t m_sym_index;
2355
};
2356
2357
sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1);
2358
void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms);
2359
void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size);
2360
2361
class huffman_encoding_table
2362
{
2363
public:
2364
huffman_encoding_table()
2365
{
2366
}
2367
2368
void clear()
2369
{
2370
clear_vector(m_codes);
2371
clear_vector(m_code_sizes);
2372
}
2373
2374
bool init(const histogram &h, uint32_t max_code_size = cHuffmanMaxSupportedCodeSize)
2375
{
2376
return init(h.size(), &h[0], max_code_size);
2377
}
2378
2379
bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size);
2380
bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size);
2381
2382
inline const uint16_vec &get_codes() const { return m_codes; }
2383
inline const uint8_vec &get_code_sizes() const { return m_code_sizes; }
2384
2385
uint32_t get_total_used_codes() const
2386
{
2387
for (int i = static_cast<int>(m_code_sizes.size()) - 1; i >= 0; i--)
2388
if (m_code_sizes[i])
2389
return i + 1;
2390
return 0;
2391
}
2392
2393
private:
2394
uint16_vec m_codes;
2395
uint8_vec m_code_sizes;
2396
};
2397
2398
class bitwise_coder
2399
{
2400
public:
2401
bitwise_coder() :
2402
m_bit_buffer(0),
2403
m_bit_buffer_size(0),
2404
m_total_bits(0)
2405
{
2406
}
2407
2408
bitwise_coder(const bitwise_coder& other) :
2409
m_bytes(other.m_bytes),
2410
m_bit_buffer(other.m_bit_buffer),
2411
m_bit_buffer_size(other.m_bit_buffer_size),
2412
m_total_bits(other.m_total_bits)
2413
{
2414
}
2415
2416
bitwise_coder(bitwise_coder&& other) :
2417
m_bytes(std::move(other.m_bytes)),
2418
m_bit_buffer(other.m_bit_buffer),
2419
m_bit_buffer_size(other.m_bit_buffer_size),
2420
m_total_bits(other.m_total_bits)
2421
{
2422
}
2423
2424
bitwise_coder& operator= (const bitwise_coder& rhs)
2425
{
2426
if (this == &rhs)
2427
return *this;
2428
2429
m_bytes = rhs.m_bytes;
2430
m_bit_buffer = rhs.m_bit_buffer;
2431
m_bit_buffer_size = rhs.m_bit_buffer_size;
2432
m_total_bits = rhs.m_total_bits;
2433
2434
return *this;
2435
}
2436
2437
bitwise_coder& operator= (bitwise_coder&& rhs)
2438
{
2439
if (this == &rhs)
2440
return *this;
2441
2442
m_bytes = std::move(rhs.m_bytes);
2443
m_bit_buffer = rhs.m_bit_buffer;
2444
m_bit_buffer_size = rhs.m_bit_buffer_size;
2445
m_total_bits = rhs.m_total_bits;
2446
2447
return *this;
2448
}
2449
2450
inline void clear()
2451
{
2452
clear_vector(m_bytes);
2453
m_bit_buffer = 0;
2454
m_bit_buffer_size = 0;
2455
m_total_bits = 0;
2456
}
2457
2458
inline void restart()
2459
{
2460
m_bytes.resize(0);
2461
m_bit_buffer = 0;
2462
m_bit_buffer_size = 0;
2463
m_total_bits = 0;
2464
}
2465
2466
inline const uint8_vec &get_bytes() const { return m_bytes; }
2467
inline uint8_vec& get_bytes() { return m_bytes; }
2468
2469
inline void reserve(uint32_t size) { m_bytes.reserve(size); }
2470
2471
inline uint64_t get_total_bits() const { return m_total_bits; }
2472
inline uint32_t get_total_bits_u32() const { assert(m_total_bits <= UINT32_MAX); return static_cast<uint32_t>(m_total_bits); }
2473
inline void clear_total_bits() { m_total_bits = 0; }
2474
2475
inline void init(uint32_t reserve_size = 1024)
2476
{
2477
m_bytes.reserve(reserve_size);
2478
m_bytes.resize(0);
2479
2480
m_bit_buffer = 0;
2481
m_bit_buffer_size = 0;
2482
m_total_bits = 0;
2483
}
2484
2485
inline uint32_t flush()
2486
{
2487
if (m_bit_buffer_size)
2488
{
2489
m_total_bits += 8 - (m_bit_buffer_size & 7);
2490
append_byte(static_cast<uint8_t>(m_bit_buffer));
2491
2492
m_bit_buffer = 0;
2493
m_bit_buffer_size = 0;
2494
2495
return 8;
2496
}
2497
2498
return 0;
2499
}
2500
2501
inline uint32_t put_bits(uint32_t bits, uint32_t num_bits)
2502
{
2503
assert(num_bits <= 32);
2504
assert(bits < (1ULL << num_bits));
2505
2506
if (!num_bits)
2507
return 0;
2508
2509
m_total_bits += num_bits;
2510
2511
uint64_t v = (static_cast<uint64_t>(bits) << m_bit_buffer_size) | m_bit_buffer;
2512
m_bit_buffer_size += num_bits;
2513
2514
while (m_bit_buffer_size >= 8)
2515
{
2516
append_byte(static_cast<uint8_t>(v));
2517
v >>= 8;
2518
m_bit_buffer_size -= 8;
2519
}
2520
2521
m_bit_buffer = static_cast<uint8_t>(v);
2522
return num_bits;
2523
}
2524
2525
inline uint32_t put_code(uint32_t sym, const huffman_encoding_table &tab)
2526
{
2527
uint32_t code = tab.get_codes()[sym];
2528
uint32_t code_size = tab.get_code_sizes()[sym];
2529
assert(code_size >= 1);
2530
put_bits(code, code_size);
2531
return code_size;
2532
}
2533
2534
inline uint32_t put_truncated_binary(uint32_t v, uint32_t n)
2535
{
2536
assert((n >= 2) && (v < n));
2537
2538
uint32_t k = floor_log2i(n);
2539
uint32_t u = (1 << (k + 1)) - n;
2540
2541
if (v < u)
2542
return put_bits(v, k);
2543
2544
uint32_t x = v + u;
2545
assert((x >> 1) >= u);
2546
2547
put_bits(x >> 1, k);
2548
put_bits(x & 1, 1);
2549
return k + 1;
2550
}
2551
2552
inline uint32_t put_rice(uint32_t v, uint32_t m)
2553
{
2554
assert(m);
2555
2556
const uint64_t start_bits = m_total_bits;
2557
2558
uint32_t q = v >> m, r = v & ((1 << m) - 1);
2559
2560
// rice coding sanity check
2561
assert(q <= 64);
2562
2563
for (; q > 16; q -= 16)
2564
put_bits(0xFFFF, 16);
2565
2566
put_bits((1 << q) - 1, q);
2567
put_bits(r << 1, m + 1);
2568
2569
return (uint32_t)(m_total_bits - start_bits);
2570
}
2571
2572
inline uint32_t put_vlc(uint32_t v, uint32_t chunk_bits)
2573
{
2574
assert(chunk_bits);
2575
2576
const uint32_t chunk_size = 1 << chunk_bits;
2577
const uint32_t chunk_mask = chunk_size - 1;
2578
2579
uint32_t total_bits = 0;
2580
2581
for ( ; ; )
2582
{
2583
uint32_t next_v = v >> chunk_bits;
2584
2585
total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1);
2586
if (!next_v)
2587
break;
2588
2589
v = next_v;
2590
}
2591
2592
return total_bits;
2593
}
2594
2595
uint32_t emit_huffman_table(const huffman_encoding_table &tab);
2596
2597
void append(const bitwise_coder& other)
2598
{
2599
for (uint32_t i = 0; i < other.m_bytes.size(); i++)
2600
put_bits(other.m_bytes[i], 8);
2601
2602
if (other.m_bit_buffer_size)
2603
put_bits(other.m_bit_buffer, other.m_bit_buffer_size);
2604
}
2605
2606
private:
2607
uint8_vec m_bytes;
2608
uint32_t m_bit_buffer, m_bit_buffer_size;
2609
uint64_t m_total_bits;
2610
2611
inline void append_byte(uint8_t c)
2612
{
2613
//m_bytes.resize(m_bytes.size() + 1);
2614
//m_bytes.back() = c;
2615
2616
m_bytes.push_back(c);
2617
}
2618
2619
static void end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len);
2620
static void end_zero_run(uint16_vec &syms, uint32_t &run_size);
2621
};
2622
2623
class huff2D
2624
{
2625
public:
2626
huff2D() { }
2627
huff2D(uint32_t bits_per_sym, uint32_t total_syms_per_group) { init(bits_per_sym, total_syms_per_group); }
2628
2629
inline const histogram &get_histogram() const { return m_histogram; }
2630
inline const huffman_encoding_table &get_encoding_table() const { return m_encoding_table; }
2631
2632
inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group)
2633
{
2634
assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1);
2635
2636
m_bits_per_sym = bits_per_sym;
2637
m_total_syms_per_group = total_syms_per_group;
2638
m_cur_sym_bits = 0;
2639
m_cur_num_syms = 0;
2640
m_decode_syms_remaining = 0;
2641
m_next_decoder_group_index = 0;
2642
2643
m_histogram.init(1 << (bits_per_sym * total_syms_per_group));
2644
}
2645
2646
inline void clear()
2647
{
2648
m_group_bits.clear();
2649
2650
m_cur_sym_bits = 0;
2651
m_cur_num_syms = 0;
2652
m_decode_syms_remaining = 0;
2653
m_next_decoder_group_index = 0;
2654
}
2655
2656
inline void emit(uint32_t sym)
2657
{
2658
m_cur_sym_bits |= (sym << (m_cur_num_syms * m_bits_per_sym));
2659
m_cur_num_syms++;
2660
2661
if (m_cur_num_syms == m_total_syms_per_group)
2662
flush();
2663
}
2664
2665
inline void flush()
2666
{
2667
if (m_cur_num_syms)
2668
{
2669
m_group_bits.push_back(m_cur_sym_bits);
2670
m_histogram.inc(m_cur_sym_bits);
2671
2672
m_cur_sym_bits = 0;
2673
m_cur_num_syms = 0;
2674
}
2675
}
2676
2677
inline bool start_encoding(uint32_t code_size_limit = 16)
2678
{
2679
flush();
2680
2681
if (!m_encoding_table.init(m_histogram, code_size_limit))
2682
return false;
2683
2684
m_decode_syms_remaining = 0;
2685
m_next_decoder_group_index = 0;
2686
2687
return true;
2688
}
2689
2690
inline uint32_t emit_next_sym(bitwise_coder &c)
2691
{
2692
uint32_t bits = 0;
2693
2694
if (!m_decode_syms_remaining)
2695
{
2696
bits = c.put_code(m_group_bits[m_next_decoder_group_index++], m_encoding_table);
2697
m_decode_syms_remaining = m_total_syms_per_group;
2698
}
2699
2700
m_decode_syms_remaining--;
2701
return bits;
2702
}
2703
2704
inline void emit_flush()
2705
{
2706
m_decode_syms_remaining = 0;
2707
}
2708
2709
private:
2710
uint_vec m_group_bits;
2711
huffman_encoding_table m_encoding_table;
2712
histogram m_histogram;
2713
uint32_t m_bits_per_sym, m_total_syms_per_group, m_cur_sym_bits, m_cur_num_syms, m_next_decoder_group_index, m_decode_syms_remaining;
2714
};
2715
2716
bool huffman_test(int rand_seed);
2717
2718
// VQ index reordering
2719
2720
class palette_index_reorderer
2721
{
2722
public:
2723
palette_index_reorderer()
2724
{
2725
}
2726
2727
void clear()
2728
{
2729
clear_vector(m_hist);
2730
clear_vector(m_total_count_to_picked);
2731
clear_vector(m_entries_picked);
2732
clear_vector(m_entries_to_do);
2733
clear_vector(m_remap_table);
2734
}
2735
2736
// returns [0,1] distance of entry i to entry j
2737
typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx);
2738
2739
void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
2740
2741
// Table remaps old to new symbol indices
2742
inline const uint_vec &get_remap_table() const { return m_remap_table; }
2743
2744
private:
2745
uint_vec m_hist, m_total_count_to_picked, m_entries_picked, m_entries_to_do, m_remap_table;
2746
2747
inline uint32_t get_hist(int i, int j, int n) const { return (i > j) ? m_hist[j * n + i] : m_hist[i * n + j]; }
2748
inline void inc_hist(int i, int j, int n) { if ((i != j) && (i < j) && (i != -1) && (j != -1)) { assert(((uint32_t)i < (uint32_t)n) && ((uint32_t)j < (uint32_t)n)); m_hist[i * n + j]++; } }
2749
2750
void prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices);
2751
void find_initial(uint32_t num_syms);
2752
void find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
2753
float pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight);
2754
};
2755
2756
// Simple 32-bit 2D image class
2757
2758
class image
2759
{
2760
public:
2761
image() :
2762
m_width(0), m_height(0), m_pitch(0)
2763
{
2764
}
2765
2766
image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) :
2767
m_width(0), m_height(0), m_pitch(0)
2768
{
2769
resize(w, h, p);
2770
}
2771
2772
image(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) :
2773
m_width(0), m_height(0), m_pitch(0)
2774
{
2775
init(pImage, width, height, comps);
2776
}
2777
2778
image(const image &other) :
2779
m_width(0), m_height(0), m_pitch(0)
2780
{
2781
*this = other;
2782
}
2783
2784
image(image&& other) :
2785
m_width(other.m_width), m_height(other.m_height), m_pitch(other.m_pitch),
2786
m_pixels(std::move(other.m_pixels))
2787
{
2788
other.m_width = 0;
2789
other.m_height = 0;
2790
other.m_pitch = 0;
2791
}
2792
2793
image& operator= (image&& rhs)
2794
{
2795
if (this != &rhs)
2796
{
2797
m_width = rhs.m_width;
2798
m_height = rhs.m_height;
2799
m_pitch = rhs.m_pitch;
2800
m_pixels = std::move(rhs.m_pixels);
2801
2802
rhs.m_width = 0;
2803
rhs.m_height = 0;
2804
rhs.m_pitch = 0;
2805
}
2806
return *this;
2807
}
2808
2809
image &swap(image &other)
2810
{
2811
std::swap(m_width, other.m_width);
2812
std::swap(m_height, other.m_height);
2813
std::swap(m_pitch, other.m_pitch);
2814
m_pixels.swap(other.m_pixels);
2815
return *this;
2816
}
2817
2818
image &operator= (const image &rhs)
2819
{
2820
if (this != &rhs)
2821
{
2822
m_width = rhs.m_width;
2823
m_height = rhs.m_height;
2824
m_pitch = rhs.m_pitch;
2825
m_pixels = rhs.m_pixels;
2826
}
2827
return *this;
2828
}
2829
2830
image &clear()
2831
{
2832
m_width = 0;
2833
m_height = 0;
2834
m_pitch = 0;
2835
clear_vector(m_pixels);
2836
return *this;
2837
}
2838
2839
image& match_dimensions(const image& other)
2840
{
2841
resize(other.get_width(), other.get_height());
2842
return *this;
2843
}
2844
2845
image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color)
2846
{
2847
return crop(w, h, p, background);
2848
}
2849
2850
image &set_all(const color_rgba &c)
2851
{
2852
for (uint32_t i = 0; i < m_pixels.size(); i++)
2853
m_pixels[i] = c;
2854
return *this;
2855
}
2856
2857
void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps)
2858
{
2859
assert(comps >= 1 && comps <= 4);
2860
2861
resize(width, height);
2862
2863
for (uint32_t y = 0; y < height; y++)
2864
{
2865
for (uint32_t x = 0; x < width; x++)
2866
{
2867
const uint8_t *pSrc = &pImage[(x + y * width) * comps];
2868
color_rgba &dst = (*this)(x, y);
2869
2870
if (comps == 1)
2871
{
2872
dst.r = pSrc[0];
2873
dst.g = pSrc[0];
2874
dst.b = pSrc[0];
2875
dst.a = 255;
2876
}
2877
else if (comps == 2)
2878
{
2879
dst.r = pSrc[0];
2880
dst.g = pSrc[0];
2881
dst.b = pSrc[0];
2882
dst.a = pSrc[1];
2883
}
2884
else
2885
{
2886
dst.r = pSrc[0];
2887
dst.g = pSrc[1];
2888
dst.b = pSrc[2];
2889
if (comps == 4)
2890
dst.a = pSrc[3];
2891
else
2892
dst.a = 255;
2893
}
2894
}
2895
}
2896
}
2897
2898
image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c)
2899
{
2900
for (uint32_t iy = 0; iy < h; iy++)
2901
for (uint32_t ix = 0; ix < w; ix++)
2902
set_clipped(x + ix, y + iy, c);
2903
return *this;
2904
}
2905
2906
image& fill_box_alpha(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba& c)
2907
{
2908
for (uint32_t iy = 0; iy < h; iy++)
2909
for (uint32_t ix = 0; ix < w; ix++)
2910
set_clipped_alpha(x + ix, y + iy, c);
2911
return *this;
2912
}
2913
2914
image &crop_dup_borders(uint32_t w, uint32_t h)
2915
{
2916
const uint32_t orig_w = m_width, orig_h = m_height;
2917
2918
crop(w, h);
2919
2920
if (orig_w && orig_h)
2921
{
2922
if (m_width > orig_w)
2923
{
2924
for (uint32_t x = orig_w; x < m_width; x++)
2925
for (uint32_t y = 0; y < m_height; y++)
2926
set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
2927
}
2928
2929
if (m_height > orig_h)
2930
{
2931
for (uint32_t y = orig_h; y < m_height; y++)
2932
for (uint32_t x = 0; x < m_width; x++)
2933
set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
2934
}
2935
}
2936
return *this;
2937
}
2938
2939
// pPixels MUST have been allocated using malloc() (basisu::vector will eventually use free() on the pointer).
2940
image& grant_ownership(color_rgba* pPixels, uint32_t w, uint32_t h, uint32_t p = UINT32_MAX)
2941
{
2942
if (p == UINT32_MAX)
2943
p = w;
2944
2945
clear();
2946
2947
if ((!p) || (!w) || (!h))
2948
return *this;
2949
2950
m_pixels.grant_ownership(pPixels, p * h, p * h);
2951
2952
m_width = w;
2953
m_height = h;
2954
m_pitch = p;
2955
2956
return *this;
2957
}
2958
2959
image &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba &background = g_black_color, bool init_image = true)
2960
{
2961
if (p == UINT32_MAX)
2962
p = w;
2963
2964
if ((w == m_width) && (m_height == h) && (m_pitch == p))
2965
return *this;
2966
2967
if ((!w) || (!h) || (!p))
2968
{
2969
clear();
2970
return *this;
2971
}
2972
2973
color_rgba_vec cur_state;
2974
cur_state.swap(m_pixels);
2975
2976
m_pixels.resize(p * h);
2977
2978
if (init_image)
2979
{
2980
if (m_width || m_height)
2981
{
2982
for (uint32_t y = 0; y < h; y++)
2983
{
2984
for (uint32_t x = 0; x < w; x++)
2985
{
2986
if ((x < m_width) && (y < m_height))
2987
m_pixels[x + y * p] = cur_state[x + y * m_pitch];
2988
else
2989
m_pixels[x + y * p] = background;
2990
}
2991
}
2992
}
2993
else
2994
{
2995
m_pixels.set_all(background);
2996
}
2997
}
2998
2999
m_width = w;
3000
m_height = h;
3001
m_pitch = p;
3002
3003
return *this;
3004
}
3005
3006
inline const color_rgba &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
3007
inline color_rgba &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
3008
3009
inline const color_rgba &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
3010
inline color_rgba &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
3011
3012
inline const color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const
3013
{
3014
x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
3015
y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
3016
return m_pixels[x + y * m_pitch];
3017
}
3018
3019
inline color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v)
3020
{
3021
x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
3022
y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
3023
return m_pixels[x + y * m_pitch];
3024
}
3025
3026
inline image &set_clipped(int x, int y, const color_rgba &c)
3027
{
3028
if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
3029
(*this)(x, y) = c;
3030
return *this;
3031
}
3032
3033
inline image& set_clipped_alpha(int x, int y, const color_rgba& c)
3034
{
3035
if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
3036
(*this)(x, y).m_comps[3] = c.m_comps[3];
3037
return *this;
3038
}
3039
3040
// Very straightforward blit with full clipping. Not fast, but it works.
3041
image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y)
3042
{
3043
for (int y = 0; y < src_h; y++)
3044
{
3045
const int sy = src_y + y;
3046
if (sy < 0)
3047
continue;
3048
else if (sy >= (int)src.get_height())
3049
break;
3050
3051
for (int x = 0; x < src_w; x++)
3052
{
3053
const int sx = src_x + x;
3054
if (sx < 0)
3055
continue;
3056
else if (sx >= (int)src.get_width())
3057
break;
3058
3059
set_clipped(dst_x + x, dst_y + y, src(sx, sy));
3060
}
3061
}
3062
3063
return *this;
3064
}
3065
3066
const image &extract_block_clamped(color_rgba *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const
3067
{
3068
if (((src_x + w) > m_width) || ((src_y + h) > m_height))
3069
{
3070
// Slower clamping case
3071
for (uint32_t y = 0; y < h; y++)
3072
for (uint32_t x = 0; x < w; x++)
3073
*pDst++ = get_clamped(src_x + x, src_y + y);
3074
}
3075
else
3076
{
3077
const color_rgba* pSrc = &m_pixels[src_x + src_y * m_pitch];
3078
3079
for (uint32_t y = 0; y < h; y++)
3080
{
3081
memcpy(pDst, pSrc, w * sizeof(color_rgba));
3082
pSrc += m_pitch;
3083
pDst += w;
3084
}
3085
}
3086
3087
return *this;
3088
}
3089
3090
image &set_block_clipped(const color_rgba *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
3091
{
3092
for (uint32_t y = 0; y < h; y++)
3093
for (uint32_t x = 0; x < w; x++)
3094
set_clipped(dst_x + x, dst_y + y, *pSrc++);
3095
return *this;
3096
}
3097
3098
inline bool is_valid() const { return m_width > 0; }
3099
3100
inline uint32_t get_width() const { return m_width; }
3101
inline uint32_t get_height() const { return m_height; }
3102
inline uint32_t get_pitch() const { return m_pitch; }
3103
inline uint32_t get_total_pixels() const { return m_width * m_height; }
3104
3105
inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }
3106
inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }
3107
inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); }
3108
3109
inline const color_rgba_vec &get_pixels() const { return m_pixels; }
3110
inline color_rgba_vec &get_pixels() { return m_pixels; }
3111
3112
inline const color_rgba *get_ptr() const { return &m_pixels[0]; }
3113
inline color_rgba *get_ptr() { return &m_pixels[0]; }
3114
3115
bool has_alpha(uint32_t channel = 3) const
3116
{
3117
for (uint32_t y = 0; y < m_height; ++y)
3118
for (uint32_t x = 0; x < m_width; ++x)
3119
if ((*this)(x, y)[channel] < 255)
3120
return true;
3121
3122
return false;
3123
}
3124
3125
image &set_alpha(uint8_t a)
3126
{
3127
for (uint32_t y = 0; y < m_height; ++y)
3128
for (uint32_t x = 0; x < m_width; ++x)
3129
(*this)(x, y).a = a;
3130
return *this;
3131
}
3132
3133
image &flip_y()
3134
{
3135
for (uint32_t y = 0; y < m_height / 2; ++y)
3136
for (uint32_t x = 0; x < m_width; ++x)
3137
std::swap((*this)(x, y), (*this)(x, m_height - 1 - y));
3138
return *this;
3139
}
3140
3141
// TODO: There are many ways to do this, not sure this is the best way.
3142
image &renormalize_normal_map()
3143
{
3144
for (uint32_t y = 0; y < m_height; y++)
3145
{
3146
for (uint32_t x = 0; x < m_width; x++)
3147
{
3148
color_rgba &c = (*this)(x, y);
3149
if ((c.r == 128) && (c.g == 128) && (c.b == 128))
3150
continue;
3151
3152
vec3F v(c.r, c.g, c.b);
3153
v = (v * (2.0f / 255.0f)) - vec3F(1.0f);
3154
v.clamp(-1.0f, 1.0f);
3155
3156
float length = v.length();
3157
const float cValidThresh = .077f;
3158
if (length < cValidThresh)
3159
{
3160
c.set(128, 128, 128, c.a);
3161
}
3162
else if (fabs(length - 1.0f) > cValidThresh)
3163
{
3164
if (length)
3165
v /= length;
3166
3167
for (uint32_t i = 0; i < 3; i++)
3168
c[i] = static_cast<uint8_t>(clamp<float>(floor((v[i] + 1.0f) * 255.0f * .5f + .5f), 0.0f, 255.0f));
3169
3170
if ((c.g == 128) && (c.r == 128))
3171
{
3172
if (c.b < 128)
3173
c.b = 0;
3174
else
3175
c.b = 255;
3176
}
3177
}
3178
}
3179
}
3180
return *this;
3181
}
3182
3183
void swap_rb()
3184
{
3185
for (auto& v : m_pixels)
3186
std::swap(v.r, v.b);
3187
}
3188
3189
void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...);
3190
3191
vec4F get_filtered_vec4F(float x, float y) const
3192
{
3193
x -= .5f;
3194
y -= .5f;
3195
3196
int ix = (int)floorf(x);
3197
int iy = (int)floorf(y);
3198
float wx = x - ix;
3199
float wy = y - iy;
3200
3201
color_rgba a(get_clamped(ix, iy));
3202
color_rgba b(get_clamped(ix + 1, iy));
3203
color_rgba c(get_clamped(ix, iy + 1));
3204
color_rgba d(get_clamped(ix + 1, iy + 1));
3205
3206
vec4F result;
3207
3208
for (uint32_t i = 0; i < 4; i++)
3209
{
3210
const float top = lerp<float>((float)a[i], (float)b[i], wx);
3211
const float bot = lerp<float>((float)c[i], (float)d[i], wx);
3212
const float m = lerp<float>((float)top, (float)bot, wy);
3213
3214
result[i] = m;
3215
}
3216
3217
return result;
3218
}
3219
3220
// (x,y) - Continuous coordinates, where pixel centers are at (.5,.5), valid image coords are [0,width] and [0,height]. Clamp addressing.
3221
color_rgba get_filtered(float x, float y) const
3222
{
3223
const vec4F fresult(get_filtered_vec4F(x, y));
3224
3225
color_rgba result;
3226
3227
for (uint32_t i = 0; i < 4; i++)
3228
result[i] = (uint8_t)clamp<int>((int)(fresult[i] + .5f), 0, 255);
3229
3230
return result;
3231
}
3232
3233
private:
3234
uint32_t m_width, m_height, m_pitch; // all in pixels
3235
color_rgba_vec m_pixels;
3236
};
3237
3238
// Float images
3239
3240
typedef basisu::vector<vec4F> vec4F_vec;
3241
3242
class imagef
3243
{
3244
public:
3245
imagef() :
3246
m_width(0), m_height(0), m_pitch(0)
3247
{
3248
}
3249
3250
imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) :
3251
m_width(0), m_height(0), m_pitch(0)
3252
{
3253
resize(w, h, p);
3254
}
3255
3256
imagef(const imagef &other) :
3257
m_width(0), m_height(0), m_pitch(0)
3258
{
3259
*this = other;
3260
}
3261
3262
imagef(imagef&& other) :
3263
m_width(other.m_width), m_height(other.m_height), m_pitch(other.m_pitch),
3264
m_pixels(std::move(other.m_pixels))
3265
{
3266
other.m_width = 0;
3267
other.m_height = 0;
3268
other.m_pitch = 0;
3269
}
3270
3271
imagef& operator= (imagef&& rhs)
3272
{
3273
if (this != &rhs)
3274
{
3275
m_width = rhs.m_width;
3276
m_height = rhs.m_height;
3277
m_pitch = rhs.m_pitch;
3278
m_pixels = std::move(rhs.m_pixels);
3279
3280
rhs.m_width = 0;
3281
rhs.m_height = 0;
3282
rhs.m_pitch = 0;
3283
}
3284
return *this;
3285
}
3286
3287
imagef &swap(imagef &other)
3288
{
3289
std::swap(m_width, other.m_width);
3290
std::swap(m_height, other.m_height);
3291
std::swap(m_pitch, other.m_pitch);
3292
m_pixels.swap(other.m_pixels);
3293
return *this;
3294
}
3295
3296
imagef &operator= (const imagef &rhs)
3297
{
3298
if (this != &rhs)
3299
{
3300
m_width = rhs.m_width;
3301
m_height = rhs.m_height;
3302
m_pitch = rhs.m_pitch;
3303
m_pixels = rhs.m_pixels;
3304
}
3305
return *this;
3306
}
3307
3308
imagef &clear()
3309
{
3310
m_width = 0;
3311
m_height = 0;
3312
m_pitch = 0;
3313
clear_vector(m_pixels);
3314
return *this;
3315
}
3316
3317
imagef &set(const image &src, const vec4F &scale = vec4F(1), const vec4F &bias = vec4F(0))
3318
{
3319
const uint32_t width = src.get_width();
3320
const uint32_t height = src.get_height();
3321
3322
resize(width, height);
3323
3324
for (int y = 0; y < (int)height; y++)
3325
{
3326
for (uint32_t x = 0; x < width; x++)
3327
{
3328
const color_rgba &src_pixel = src(x, y);
3329
(*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]);
3330
}
3331
}
3332
3333
return *this;
3334
}
3335
3336
imagef& match_dimensions(const imagef& other)
3337
{
3338
resize(other.get_width(), other.get_height());
3339
return *this;
3340
}
3341
3342
imagef &resize(const imagef &other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1))
3343
{
3344
return resize(other.get_width(), other.get_height(), p, background);
3345
}
3346
3347
imagef &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1))
3348
{
3349
return crop(w, h, p, background);
3350
}
3351
3352
imagef &set_all(const vec4F &c)
3353
{
3354
for (uint32_t i = 0; i < m_pixels.size(); i++)
3355
m_pixels[i] = c;
3356
return *this;
3357
}
3358
3359
imagef &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F &c)
3360
{
3361
for (uint32_t iy = 0; iy < h; iy++)
3362
for (uint32_t ix = 0; ix < w; ix++)
3363
set_clipped(x + ix, y + iy, c);
3364
return *this;
3365
}
3366
3367
imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1))
3368
{
3369
if (p == UINT32_MAX)
3370
p = w;
3371
3372
if ((w == m_width) && (m_height == h) && (m_pitch == p))
3373
return *this;
3374
3375
if ((!w) || (!h) || (!p))
3376
{
3377
clear();
3378
return *this;
3379
}
3380
3381
vec4F_vec cur_state;
3382
cur_state.swap(m_pixels);
3383
3384
m_pixels.resize(p * h);
3385
3386
for (uint32_t y = 0; y < h; y++)
3387
{
3388
for (uint32_t x = 0; x < w; x++)
3389
{
3390
if ((x < m_width) && (y < m_height))
3391
m_pixels[x + y * p] = cur_state[x + y * m_pitch];
3392
else
3393
m_pixels[x + y * p] = background;
3394
}
3395
}
3396
3397
m_width = w;
3398
m_height = h;
3399
m_pitch = p;
3400
3401
return *this;
3402
}
3403
3404
imagef& crop_dup_borders(uint32_t w, uint32_t h)
3405
{
3406
const uint32_t orig_w = m_width, orig_h = m_height;
3407
3408
crop(w, h);
3409
3410
if (orig_w && orig_h)
3411
{
3412
if (m_width > orig_w)
3413
{
3414
for (uint32_t x = orig_w; x < m_width; x++)
3415
for (uint32_t y = 0; y < m_height; y++)
3416
set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
3417
}
3418
3419
if (m_height > orig_h)
3420
{
3421
for (uint32_t y = orig_h; y < m_height; y++)
3422
for (uint32_t x = 0; x < m_width; x++)
3423
set_clipped(x, y, get_clamped(minimum(x, orig_w - 1U), minimum(y, orig_h - 1U)));
3424
}
3425
}
3426
return *this;
3427
}
3428
3429
inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
3430
inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; }
3431
3432
inline const vec4F &get_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
3433
inline vec4F &get_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
3434
3435
inline const vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const
3436
{
3437
x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
3438
y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
3439
return m_pixels[x + y * m_pitch];
3440
}
3441
3442
inline vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v)
3443
{
3444
x = wrap_u ? posmod(x, m_width) : clamp<int>(x, 0, m_width - 1);
3445
y = wrap_v ? posmod(y, m_height) : clamp<int>(y, 0, m_height - 1);
3446
return m_pixels[x + y * m_pitch];
3447
}
3448
3449
inline imagef &set_clipped(int x, int y, const vec4F &c)
3450
{
3451
if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height))
3452
(*this)(x, y) = c;
3453
return *this;
3454
}
3455
3456
// Very straightforward blit with full clipping. Not fast, but it works.
3457
imagef &blit(const imagef &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y)
3458
{
3459
for (int y = 0; y < src_h; y++)
3460
{
3461
const int sy = src_y + y;
3462
if (sy < 0)
3463
continue;
3464
else if (sy >= (int)src.get_height())
3465
break;
3466
3467
for (int x = 0; x < src_w; x++)
3468
{
3469
const int sx = src_x + x;
3470
if (sx < 0)
3471
continue;
3472
else if (sx >= (int)src.get_width())
3473
break;
3474
3475
set_clipped(dst_x + x, dst_y + y, src(sx, sy));
3476
}
3477
}
3478
3479
return *this;
3480
}
3481
3482
const imagef &extract_block_clamped(vec4F *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const
3483
{
3484
for (uint32_t y = 0; y < h; y++)
3485
for (uint32_t x = 0; x < w; x++)
3486
*pDst++ = get_clamped(src_x + x, src_y + y);
3487
return *this;
3488
}
3489
3490
imagef &set_block_clipped(const vec4F *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h)
3491
{
3492
for (uint32_t y = 0; y < h; y++)
3493
for (uint32_t x = 0; x < w; x++)
3494
set_clipped(dst_x + x, dst_y + y, *pSrc++);
3495
return *this;
3496
}
3497
3498
inline bool is_valid() const { return m_width > 0; }
3499
3500
inline uint32_t get_width() const { return m_width; }
3501
inline uint32_t get_height() const { return m_height; }
3502
inline uint32_t get_pitch() const { return m_pitch; }
3503
inline uint64_t get_total_pixels() const { return (uint64_t)m_width * m_height; }
3504
3505
inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }
3506
inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }
3507
inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); }
3508
3509
inline const vec4F_vec &get_pixels() const { return m_pixels; }
3510
inline vec4F_vec &get_pixels() { return m_pixels; }
3511
3512
inline const vec4F *get_ptr() const { return &m_pixels[0]; }
3513
inline vec4F *get_ptr() { return &m_pixels[0]; }
3514
3515
bool clean_astc_hdr_pixels(float highest_mag)
3516
{
3517
bool status = true;
3518
bool nan_msg = false;
3519
bool inf_msg = false;
3520
bool neg_zero_msg = false;
3521
bool neg_msg = false;
3522
bool clamp_msg = false;
3523
3524
for (uint32_t iy = 0; iy < m_height; iy++)
3525
{
3526
for (uint32_t ix = 0; ix < m_width; ix++)
3527
{
3528
vec4F& c = (*this)(ix, iy);
3529
3530
for (uint32_t s = 0; s < 4; s++)
3531
{
3532
float &p = c[s];
3533
union { float f; uint32_t u; } x; x.f = p;
3534
3535
if ((std::isnan(p)) || (std::isinf(p)) || (x.u == 0x80000000))
3536
{
3537
if (std::isnan(p))
3538
{
3539
if (!nan_msg)
3540
{
3541
fprintf(stderr, "One or more input pixels was NaN, setting to 0.\n");
3542
nan_msg = true;
3543
}
3544
}
3545
3546
if (std::isinf(p))
3547
{
3548
if (!inf_msg)
3549
{
3550
fprintf(stderr, "One or more input pixels was INF, setting to 0.\n");
3551
inf_msg = true;
3552
}
3553
}
3554
3555
if (x.u == 0x80000000)
3556
{
3557
if (!neg_zero_msg)
3558
{
3559
fprintf(stderr, "One or more input pixels was -0, setting them to 0.\n");
3560
neg_zero_msg = true;
3561
}
3562
}
3563
3564
p = 0.0f;
3565
status = false;
3566
}
3567
else
3568
{
3569
//const float o = p;
3570
if (p < 0.0f)
3571
{
3572
p = 0.0f;
3573
3574
if (!neg_msg)
3575
{
3576
fprintf(stderr, "One or more input pixels was negative -- setting these pixel components to 0 because ASTC HDR doesn't support signed values.\n");
3577
neg_msg = true;
3578
}
3579
3580
status = false;
3581
}
3582
3583
if (p > highest_mag)
3584
{
3585
p = highest_mag;
3586
3587
if (!clamp_msg)
3588
{
3589
fprintf(stderr, "One or more input pixels had to be clamped to %f.\n", highest_mag);
3590
clamp_msg = true;
3591
}
3592
3593
status = false;
3594
}
3595
}
3596
}
3597
}
3598
}
3599
3600
return status;
3601
}
3602
3603
imagef& flip_y()
3604
{
3605
for (uint32_t y = 0; y < m_height / 2; ++y)
3606
for (uint32_t x = 0; x < m_width; ++x)
3607
std::swap((*this)(x, y), (*this)(x, m_height - 1 - y));
3608
3609
return *this;
3610
}
3611
3612
bool has_alpha(uint32_t channel = 3) const
3613
{
3614
for (uint32_t y = 0; y < m_height; ++y)
3615
for (uint32_t x = 0; x < m_width; ++x)
3616
if ((*this)(x, y)[channel] != 1.0f)
3617
return true;
3618
3619
return false;
3620
}
3621
3622
vec4F get_filtered_vec4F(float x, float y) const
3623
{
3624
x -= .5f;
3625
y -= .5f;
3626
3627
int ix = (int)floorf(x);
3628
int iy = (int)floorf(y);
3629
float wx = x - ix;
3630
float wy = y - iy;
3631
3632
vec4F a(get_clamped(ix, iy));
3633
vec4F b(get_clamped(ix + 1, iy));
3634
vec4F c(get_clamped(ix, iy + 1));
3635
vec4F d(get_clamped(ix + 1, iy + 1));
3636
3637
vec4F result;
3638
3639
for (uint32_t i = 0; i < 4; i++)
3640
{
3641
const float top = lerp<float>((float)a[i], (float)b[i], wx);
3642
const float bot = lerp<float>((float)c[i], (float)d[i], wx);
3643
const float m = lerp<float>((float)top, (float)bot, wy);
3644
3645
result[i] = m;
3646
}
3647
3648
return result;
3649
}
3650
3651
private:
3652
uint32_t m_width, m_height, m_pitch; // all in pixels
3653
vec4F_vec m_pixels;
3654
};
3655
3656
// REC 709 coefficients
3657
const float REC_709_R = 0.212656f, REC_709_G = 0.715158f, REC_709_B = 0.072186f;
3658
3659
inline float get_luminance(const vec4F &c)
3660
{
3661
return c[0] * REC_709_R + c[1] * REC_709_G + c[2] * REC_709_B;
3662
}
3663
3664
float linear_to_srgb(float l);
3665
float srgb_to_linear(float s);
3666
3667
class fast_linear_to_srgb
3668
{
3669
public:
3670
fast_linear_to_srgb()
3671
{
3672
init();
3673
}
3674
3675
void init()
3676
{
3677
for (int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i)
3678
{
3679
float l = (float)i * (1.0f / (LINEAR_TO_SRGB_TABLE_SIZE - 1));
3680
m_linear_to_srgb_table[i] = (uint8_t)basisu::fast_floorf_int(255.0f * basisu::linear_to_srgb(l));
3681
}
3682
3683
float srgb_to_linear[256];
3684
for (int i = 0; i < 256; i++)
3685
srgb_to_linear[i] = basisu::srgb_to_linear((float)i / 255.0f);
3686
3687
for (int i = 0; i < 256; i++)
3688
m_srgb_to_linear_thresh[i] = (srgb_to_linear[i] + srgb_to_linear[basisu::minimum<int>(i + 1, 255)]) * .5f;
3689
}
3690
3691
inline uint8_t convert(float l) const
3692
{
3693
assert((l >= 0.0f) && (l <= 1.0f));
3694
int j = basisu::fast_roundf_int((LINEAR_TO_SRGB_TABLE_SIZE - 1) * l);
3695
3696
assert((j >= 0) && (j < LINEAR_TO_SRGB_TABLE_SIZE));
3697
int b = m_linear_to_srgb_table[j];
3698
3699
b += (l > m_srgb_to_linear_thresh[b]);
3700
3701
return (uint8_t)b;
3702
}
3703
3704
private:
3705
static constexpr int LINEAR_TO_SRGB_TABLE_SIZE = 2048;
3706
uint8_t m_linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE];
3707
3708
float m_srgb_to_linear_thresh[256];
3709
};
3710
3711
extern fast_linear_to_srgb g_fast_linear_to_srgb;
3712
3713
// Image metrics
3714
3715
class image_metrics
3716
{
3717
public:
3718
// TODO: Add ssim
3719
double m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim;
3720
bool m_has_neg, m_hf_mag_overflow, m_any_abnormal;
3721
3722
image_metrics()
3723
{
3724
clear();
3725
}
3726
3727
void clear()
3728
{
3729
m_max = 0;
3730
m_mean = 0;
3731
m_mean_squared = 0;
3732
m_rms = 0;
3733
m_psnr = 0;
3734
m_ssim = 0;
3735
m_has_neg = false;
3736
m_hf_mag_overflow = false;
3737
m_any_abnormal = false;
3738
}
3739
3740
void print(const char *pPrefix = nullptr) { printf("%sMax: %3.3f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); }
3741
void print_hp(const char* pPrefix = nullptr) { printf("%sMax: %3.6f Mean: %3.6f RMS: %3.6f PSNR: %2.6f dB, Any Neg: %u, Half float overflow: %u, Any NaN/Inf: %u\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr, m_has_neg, m_hf_mag_overflow, m_any_abnormal); }
3742
3743
void calc(const imagef& a, const imagef& b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool log = false);
3744
void calc_half(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error);
3745
void calc_half2(const imagef& a, const imagef& b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error);
3746
void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false);
3747
};
3748
3749
void print_image_metrics(const image& a, const image& b);
3750
3751
// Image saving/loading/resampling
3752
3753
bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr);
3754
bool load_png(const char* pFilename, image& img);
3755
inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); }
3756
3757
bool load_tga(const char* pFilename, image& img);
3758
inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); }
3759
3760
bool load_qoi(const char* pFilename, image& img);
3761
3762
bool load_jpg(const char *pFilename, image& img);
3763
bool load_jpg(const uint8_t* pBuf, size_t buf_size, image& img);
3764
inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); }
3765
3766
// Currently loads .PNG, .TGA, or .JPG
3767
bool load_image(const char* pFilename, image& img);
3768
inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); }
3769
3770
bool is_image_filename_hdr(const char* pFilename);
3771
3772
// Supports .HDR and most (but not all) .EXR's (see TinyEXR).
3773
bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear = true, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f);
3774
3775
inline bool load_image_hdr(const std::string& filename, imagef& img, bool ldr_srgb_to_linear = true, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f)
3776
{
3777
return load_image_hdr(filename.c_str(), img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias);
3778
}
3779
3780
enum class hdr_image_type
3781
{
3782
cHITRGBAHalfFloat = 0,
3783
cHITRGBAFloat = 1,
3784
cHITPNGImage = 2,
3785
cHITEXRImage = 3,
3786
cHITHDRImage = 4,
3787
cHITJPGImage = 5
3788
};
3789
3790
bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f);
3791
3792
uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans);
3793
uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans);
3794
3795
struct rgbe_header_info
3796
{
3797
std::string m_program;
3798
3799
// Note no validation is done, either gamma or exposure may be 0.
3800
double m_gamma;
3801
bool m_has_gamma;
3802
3803
double m_exposure; // watts/steradian/m^2.
3804
bool m_has_exposure;
3805
3806
void clear()
3807
{
3808
m_program.clear();
3809
m_gamma = 1.0f;
3810
m_has_gamma = false;
3811
m_exposure = 1.0f;
3812
m_has_exposure = false;
3813
}
3814
};
3815
3816
bool read_rgbe(const uint8_vec& filedata, imagef& img, rgbe_header_info& hdr_info);
3817
bool read_rgbe(const char* pFilename, imagef& img, rgbe_header_info &hdr_info);
3818
3819
bool write_rgbe(uint8_vec& file_data, imagef& img, rgbe_header_info& hdr_info);
3820
bool write_rgbe(const char* pFilename, imagef& img, rgbe_header_info& hdr_info);
3821
3822
bool read_exr(const char* pFilename, imagef& img, int& n_chans);
3823
bool read_exr(const void* pMem, size_t mem_size, imagef& img);
3824
3825
enum
3826
{
3827
WRITE_EXR_LINEAR_HINT = 1, // hint for lossy comp. methods: exr_perceptual_treatment_t, logarithmic or linear, defaults to logarithmic
3828
WRITE_EXR_STORE_FLOATS = 2, // use 32-bit floats, otherwise it uses half floats
3829
WRITE_EXR_NO_COMPRESSION = 4 // no compression, otherwise it uses ZIP compression (16 scanlines per block)
3830
};
3831
3832
// Supports 1 (Y), 3 (RGB), or 4 (RGBA) channel images.
3833
bool write_exr(const char* pFilename, const imagef& img, uint32_t n_chans, uint32_t flags);
3834
3835
enum
3836
{
3837
cImageSaveGrayscale = 1,
3838
cImageSaveIgnoreAlpha = 2
3839
};
3840
3841
bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0);
3842
inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); }
3843
3844
bool read_file_to_vec(const char* pFilename, uint8_vec& data);
3845
bool read_file_to_data(const char* pFilename, void *pData, size_t len);
3846
3847
bool write_data_to_file(const char* pFilename, const void* pData, size_t len);
3848
3849
inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); }
3850
3851
bool image_resample(const image &src, image &dst, bool srgb = false,
3852
const char *pFilter = "lanczos4", float filter_scale = 1.0f,
3853
bool wrapping = false,
3854
uint32_t first_comp = 0, uint32_t num_comps = 4);
3855
3856
bool image_resample(const imagef& src, imagef& dst,
3857
const char* pFilter = "lanczos4", float filter_scale = 1.0f,
3858
bool wrapping = false,
3859
uint32_t first_comp = 0, uint32_t num_comps = 4);
3860
3861
// Timing
3862
3863
typedef uint64_t timer_ticks;
3864
3865
class interval_timer
3866
{
3867
public:
3868
interval_timer();
3869
3870
void start();
3871
void stop();
3872
3873
double get_elapsed_secs() const;
3874
inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); }
3875
3876
static void init();
3877
static inline timer_ticks get_ticks_per_sec() { return g_freq; }
3878
static timer_ticks get_ticks();
3879
static double ticks_to_secs(timer_ticks ticks);
3880
static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; }
3881
3882
private:
3883
static timer_ticks g_init_ticks, g_freq;
3884
static double g_timer_freq;
3885
3886
timer_ticks m_start_time, m_stop_time;
3887
3888
bool m_started, m_stopped;
3889
};
3890
3891
inline double get_interval_timer() { return interval_timer::ticks_to_secs(interval_timer::get_ticks()); }
3892
3893
inline FILE *fopen_safe(const char *pFilename, const char *pMode)
3894
{
3895
#ifdef _WIN32
3896
FILE *pFile = nullptr;
3897
fopen_s(&pFile, pFilename, pMode);
3898
return pFile;
3899
#else
3900
return fopen(pFilename, pMode);
3901
#endif
3902
}
3903
3904
void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1);
3905
3906
const uint32_t cPixelBlockWidth = 4;
3907
const uint32_t cPixelBlockHeight = 4;
3908
const uint32_t cPixelBlockTotalPixels = cPixelBlockWidth * cPixelBlockHeight;
3909
3910
struct pixel_block
3911
{
3912
color_rgba m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x]
3913
3914
inline const color_rgba& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
3915
inline color_rgba& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
3916
3917
inline const color_rgba* get_ptr() const { return &m_pixels[0][0]; }
3918
inline color_rgba* get_ptr() { return &m_pixels[0][0]; }
3919
3920
inline void clear() { clear_obj(*this); }
3921
3922
inline bool operator== (const pixel_block& rhs) const
3923
{
3924
return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0;
3925
}
3926
};
3927
typedef basisu::vector<pixel_block> pixel_block_vec;
3928
3929
struct pixel_block_hdr
3930
{
3931
vec4F m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x]
3932
3933
inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
3934
inline vec4F& operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; }
3935
3936
inline const vec4F* get_ptr() const { return &m_pixels[0][0]; }
3937
inline vec4F* get_ptr() { return &m_pixels[0][0]; }
3938
3939
inline void clear() { clear_obj(*this); }
3940
3941
inline bool operator== (const pixel_block& rhs) const
3942
{
3943
return memcmp(m_pixels, rhs.m_pixels, sizeof(m_pixels)) == 0;
3944
}
3945
};
3946
typedef basisu::vector<pixel_block_hdr> pixel_block_hdr_vec;
3947
3948
void tonemap_image_reinhard(image& ldr_img, const imagef& hdr_img, float exposure, bool add_noise = false, bool per_component = true, bool luma_scaling = false);
3949
bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img);
3950
bool tonemap_image_compressive2(image& dst_img, const imagef& hdr_test_img);
3951
3952
// Intersection
3953
enum eClear { cClear = 0 };
3954
enum eInitExpand { cInitExpand = 0 };
3955
enum eIdentity { cIdentity = 0 };
3956
3957
template<typename vector_type>
3958
class ray
3959
{
3960
public:
3961
typedef vector_type vector_t;
3962
typedef typename vector_type::scalar_type scalar_type;
3963
3964
inline ray() { }
3965
inline ray(eClear) { clear(); }
3966
inline ray(const vector_type& origin, const vector_type& direction) : m_origin(origin), m_direction(direction) { }
3967
3968
inline void clear()
3969
{
3970
m_origin.clear();
3971
m_direction.clear();
3972
}
3973
3974
inline const vector_type& get_origin(void) const { return m_origin; }
3975
inline void set_origin(const vector_type& origin) { m_origin = origin; }
3976
3977
inline const vector_type& get_direction(void) const { return m_direction; }
3978
inline void set_direction(const vector_type& direction) { m_direction = direction; }
3979
3980
inline void set_endpoints(const vector_type& start, const vector_type& end)
3981
{
3982
m_origin = start;
3983
3984
m_direction = end - start;
3985
m_direction.normalize_in_place();
3986
}
3987
3988
inline vector_type eval(scalar_type t) const
3989
{
3990
return m_origin + m_direction * t;
3991
}
3992
3993
private:
3994
vector_type m_origin;
3995
vector_type m_direction;
3996
};
3997
3998
typedef ray<vec2F> ray2F;
3999
typedef ray<vec3F> ray3F;
4000
4001
template<typename T>
4002
class vec_interval
4003
{
4004
public:
4005
enum { N = T::num_elements };
4006
typedef typename T::scalar_type scalar_type;
4007
4008
inline vec_interval(const T& v) { m_bounds[0] = v; m_bounds[1] = v; }
4009
inline vec_interval(const T& low, const T& high) { m_bounds[0] = low; m_bounds[1] = high; }
4010
4011
inline vec_interval() { }
4012
inline vec_interval(eClear) { clear(); }
4013
inline vec_interval(eInitExpand) { init_expand(); }
4014
4015
inline void clear() { m_bounds[0].clear(); m_bounds[1].clear(); }
4016
4017
inline void init_expand()
4018
{
4019
m_bounds[0].set(1e+30f, 1e+30f, 1e+30f);
4020
m_bounds[1].set(-1e+30f, -1e+30f, -1e+30f);
4021
}
4022
4023
inline vec_interval expand(const T& p)
4024
{
4025
for (uint32_t c = 0; c < N; c++)
4026
{
4027
if (p[c] < m_bounds[0][c])
4028
m_bounds[0][c] = p[c];
4029
4030
if (p[c] > m_bounds[1][c])
4031
m_bounds[1][c] = p[c];
4032
}
4033
4034
return *this;
4035
}
4036
4037
inline const T& operator[] (uint32_t i) const { assert(i < 2); return m_bounds[i]; }
4038
inline T& operator[] (uint32_t i) { assert(i < 2); return m_bounds[i]; }
4039
4040
const T& get_low() const { return m_bounds[0]; }
4041
T& get_low() { return m_bounds[0]; }
4042
4043
const T& get_high() const { return m_bounds[1]; }
4044
T& get_high() { return m_bounds[1]; }
4045
4046
scalar_type get_dim(uint32_t axis) const { return m_bounds[1][axis] - m_bounds[0][axis]; }
4047
4048
bool contains(const T& p) const
4049
{
4050
const T& low = get_low(), high = get_high();
4051
4052
for (uint32_t i = 0; i < N; i++)
4053
{
4054
if (p[i] < low[i])
4055
return false;
4056
4057
if (p[i] > high[i])
4058
return false;
4059
}
4060
return true;
4061
}
4062
4063
private:
4064
T m_bounds[2];
4065
};
4066
4067
typedef vec_interval<vec1F> vec_interval1F;
4068
typedef vec_interval<vec2F> vec_interval2F;
4069
typedef vec_interval<vec3F> vec_interval3F;
4070
typedef vec_interval<vec4F> vec_interval4F;
4071
4072
typedef vec_interval1F aabb1F;
4073
typedef vec_interval2F aabb2F;
4074
typedef vec_interval3F aabb3F;
4075
4076
namespace intersection
4077
{
4078
enum result
4079
{
4080
cBackfacing = -1,
4081
cFailure = 0,
4082
cSuccess,
4083
cParallel,
4084
cInside,
4085
};
4086
4087
// Returns cInside, cSuccess, or cFailure.
4088
// Algorithm: Graphics Gems 1
4089
template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type>
4090
result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box)
4091
{
4092
enum
4093
{
4094
cNumDim = vector_type::num_elements,
4095
cRight = 0,
4096
cLeft = 1,
4097
cMiddle = 2
4098
};
4099
4100
bool inside = true;
4101
int quadrant[cNumDim];
4102
scalar_type candidate_plane[cNumDim];
4103
4104
for (int i = 0; i < cNumDim; i++)
4105
{
4106
if (ray.get_origin()[i] < box[0][i])
4107
{
4108
quadrant[i] = cLeft;
4109
candidate_plane[i] = box[0][i];
4110
inside = false;
4111
}
4112
else if (ray.get_origin()[i] > box[1][i])
4113
{
4114
quadrant[i] = cRight;
4115
candidate_plane[i] = box[1][i];
4116
inside = false;
4117
}
4118
else
4119
{
4120
quadrant[i] = cMiddle;
4121
}
4122
}
4123
4124
if (inside)
4125
{
4126
coord = ray.get_origin();
4127
t = 0.0f;
4128
return cInside;
4129
}
4130
4131
scalar_type max_t[cNumDim];
4132
for (int i = 0; i < cNumDim; i++)
4133
{
4134
if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f))
4135
max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i];
4136
else
4137
max_t[i] = -1.0f;
4138
}
4139
4140
int which_plane = 0;
4141
for (int i = 1; i < cNumDim; i++)
4142
if (max_t[which_plane] < max_t[i])
4143
which_plane = i;
4144
4145
if (max_t[which_plane] < 0.0f)
4146
return cFailure;
4147
4148
for (int i = 0; i < cNumDim; i++)
4149
{
4150
if (i != which_plane)
4151
{
4152
coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i];
4153
4154
if ((coord[i] < box[0][i]) || (coord[i] > box[1][i]))
4155
return cFailure;
4156
}
4157
else
4158
{
4159
coord[i] = candidate_plane[i];
4160
}
4161
4162
assert(coord[i] >= box[0][i] && coord[i] <= box[1][i]);
4163
}
4164
4165
t = max_t[which_plane];
4166
return cSuccess;
4167
}
4168
4169
template<typename vector_type, typename scalar_type, typename ray_type, typename aabb_type>
4170
result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box)
4171
{
4172
if (!box.contains(ray.get_origin()))
4173
{
4174
started_within = false;
4175
return ray_aabb(coord, t, ray, box);
4176
}
4177
4178
started_within = true;
4179
4180
typename vector_type::T diag_dist = box.diagonal_length() * 1.5f;
4181
ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction());
4182
4183
result res(ray_aabb(coord, t, outside_ray, box));
4184
if (res != cSuccess)
4185
return res;
4186
4187
t = basisu::maximum(0.0f, diag_dist - t);
4188
return cSuccess;
4189
}
4190
4191
} // intersect
4192
4193
// This float->half conversion matches how "F32TO16" works on Intel GPU's.
4194
// Input cannot be negative, Inf or Nan.
4195
inline basist::half_float float_to_half_non_neg_no_nan_inf(float val)
4196
{
4197
union { float f; int32_t i; uint32_t u; } fi = { val };
4198
const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF;
4199
int e = 0, m = 0;
4200
4201
assert(((fi.i >> 31) == 0) && (flt_e != 0xFF));
4202
4203
// not zero or denormal
4204
if (flt_e != 0)
4205
{
4206
int new_exp = flt_e - 127;
4207
if (new_exp > 15)
4208
e = 31;
4209
else if (new_exp < -14)
4210
m = lrintf((1 << 24) * fabsf(fi.f));
4211
else
4212
{
4213
e = new_exp + 15;
4214
m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));
4215
}
4216
}
4217
4218
assert((0 <= m) && (m <= 1024));
4219
if (m == 1024)
4220
{
4221
e++;
4222
m = 0;
4223
}
4224
4225
assert((e >= 0) && (e <= 31));
4226
assert((m >= 0) && (m <= 1023));
4227
4228
basist::half_float result = (basist::half_float)((e << 10) | m);
4229
return result;
4230
}
4231
4232
union fu32
4233
{
4234
uint32_t u;
4235
float f;
4236
};
4237
4238
// Supports positive and denormals only. No NaN or Inf.
4239
BASISU_FORCE_INLINE float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h)
4240
{
4241
assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h));
4242
4243
// add 112 to the exponent (112+half float's exp bias of 15=float32's bias of 127)
4244
static const fu32 K = { 0x77800000 };
4245
4246
fu32 o;
4247
o.u = h << 13;
4248
o.f *= K.f;
4249
4250
return o.f;
4251
}
4252
4253
// Positive, negative, or denormals. No NaN or Inf. Clamped to MAX_HALF_FLOAT.
4254
inline basist::half_float fast_float_to_half_trunc_no_nan_or_inf(float f)
4255
{
4256
assert(!isnan(f) && !isinf(f));
4257
4258
// Sutract 112 from the exponent, to change the bias from 127 to 15.
4259
static const fu32 g_f_to_h{ 0x7800000 };
4260
4261
fu32 fu;
4262
4263
fu.f = minimum<float>((float)basist::MAX_HALF_FLOAT, fabsf(f)) * g_f_to_h.f;
4264
4265
return (basist::half_float)(((fu.u >> (23 - 10)) & 0x7FFF) | ((f < 0.0f) ? 0x8000 : 0));
4266
}
4267
4268
inline basist::half_float fast_float_to_half_trunc_no_clamp_neg_nan_or_inf(float f)
4269
{
4270
assert(!isnan(f) && !isinf(f));
4271
assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT));
4272
4273
// Sutract 112 from the exponent, to change the bias from 127 to 15.
4274
static const fu32 g_f_to_h{ 0x7800000 };
4275
4276
fu32 fu;
4277
4278
fu.f = f * g_f_to_h.f;
4279
4280
return (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF);
4281
}
4282
4283
inline basist::half_float fast_float_to_half_no_clamp_neg_nan_or_inf(float f)
4284
{
4285
assert(!isnan(f) && !isinf(f));
4286
assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT));
4287
4288
// Sutract 112 from the exponent, to change the bias from 127 to 15.
4289
static const fu32 g_f_to_h{ 0x7800000 };
4290
4291
fu32 fu;
4292
4293
fu.f = f * g_f_to_h.f;
4294
4295
uint32_t h = (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF);
4296
4297
// round to even or nearest
4298
uint32_t mant = fu.u & 8191; // examine lowest 13 bits
4299
uint32_t inc = (mant > 4096) | ((mant == 4096) & (h & 1));
4300
h += inc;
4301
4302
if (h > basist::MAX_HALF_FLOAT_AS_INT_BITS)
4303
h = basist::MAX_HALF_FLOAT_AS_INT_BITS;
4304
4305
return (basist::half_float)h;
4306
}
4307
4308
} // namespace basisu
4309
4310
#include "basisu_math.h"
4311
4312