Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h
9905 views
1
// basisu_transcoder_internal.h - Universal texture format transcoder library.
2
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
3
//
4
// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
5
//
6
// Licensed under the Apache License, Version 2.0 (the "License");
7
// you may not use this file except in compliance with the License.
8
// You may obtain a copy of the License at
9
//
10
// http://www.apache.org/licenses/LICENSE-2.0
11
//
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
#pragma once
18
19
#ifdef _MSC_VER
20
#pragma warning (disable: 4127) // conditional expression is constant
21
#endif
22
23
// v1.50: Added UASTC HDR 4x4 support
24
// v1.60: Added RDO ASTC HDR 6x6 and intermediate support
25
#define BASISD_LIB_VERSION 160
26
#define BASISD_VERSION_STRING "01.60"
27
28
#ifdef _DEBUG
29
#define BASISD_BUILD_DEBUG
30
#else
31
#define BASISD_BUILD_RELEASE
32
#endif
33
34
#include "basisu.h"
35
36
#define BASISD_znew (z = 36969 * (z & 65535) + (z >> 16))
37
38
namespace basisu
39
{
40
extern bool g_debug_printf;
41
}
42
43
namespace basist
44
{
45
// Low-level formats directly supported by the transcoder (other supported texture formats are combinations of these low-level block formats).
46
// You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices.
47
enum class block_format
48
{
49
cETC1, // ETC1S RGB
50
cETC2_RGBA, // full ETC2 EAC RGBA8 block
51
cBC1, // DXT1 RGB
52
cBC3, // BC4 block followed by a four color BC1 block
53
cBC4, // DXT5A (alpha block only)
54
cBC5, // two BC4 blocks
55
cPVRTC1_4_RGB, // opaque-only PVRTC1 4bpp
56
cPVRTC1_4_RGBA, // PVRTC1 4bpp RGBA
57
cBC7, // Full BC7 block, any mode
58
cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block)
59
cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.)
60
cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format)
61
cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC
62
// data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking.
63
64
cATC_RGB,
65
cATC_RGBA_INTERPOLATED_ALPHA,
66
cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size
67
68
cPVRTC2_4_RGB,
69
cPVRTC2_4_RGBA,
70
71
cETC2_EAC_R11,
72
cETC2_EAC_RG11,
73
74
cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits)
75
76
cRGB32, // Writes RGB components to 32bpp output pixels
77
cRGBA32, // Writes RGB255 components to 32bpp output pixels
78
cA32, // Writes alpha component to 32bpp output pixels
79
80
cRGB565,
81
cBGR565,
82
83
cRGBA4444_COLOR,
84
cRGBA4444_ALPHA,
85
cRGBA4444_COLOR_OPAQUE,
86
cRGBA4444,
87
cRGBA_HALF,
88
cRGB_HALF,
89
cRGB_9E5,
90
91
cUASTC_4x4, // LDR, universal
92
cUASTC_HDR_4x4, // HDR, transcodes only to 4x4 HDR ASTC, BC6H, or uncompressed
93
cBC6H,
94
cASTC_HDR_4x4,
95
cASTC_HDR_6x6,
96
97
cTotalBlockFormats
98
};
99
100
inline uint32_t get_block_width(block_format fmt)
101
{
102
switch (fmt)
103
{
104
case block_format::cFXT1_RGB:
105
return 8;
106
case block_format::cASTC_HDR_6x6:
107
return 6;
108
default:
109
break;
110
}
111
return 4;
112
}
113
114
inline uint32_t get_block_height(block_format fmt)
115
{
116
switch (fmt)
117
{
118
case block_format::cASTC_HDR_6x6:
119
return 6;
120
default:
121
break;
122
}
123
return 4;
124
}
125
126
const int COLOR5_PAL0_PREV_HI = 9, COLOR5_PAL0_DELTA_LO = -9, COLOR5_PAL0_DELTA_HI = 31;
127
const int COLOR5_PAL1_PREV_HI = 21, COLOR5_PAL1_DELTA_LO = -21, COLOR5_PAL1_DELTA_HI = 21;
128
const int COLOR5_PAL2_PREV_HI = 31, COLOR5_PAL2_DELTA_LO = -31, COLOR5_PAL2_DELTA_HI = 9;
129
const int COLOR5_PAL_MIN_DELTA_B_RUNLEN = 3, COLOR5_PAL_DELTA_5_RUNLEN_VLC_BITS = 3;
130
131
const uint32_t ENDPOINT_PRED_TOTAL_SYMBOLS = (4 * 4 * 4 * 4) + 1;
132
const uint32_t ENDPOINT_PRED_REPEAT_LAST_SYMBOL = ENDPOINT_PRED_TOTAL_SYMBOLS - 1;
133
const uint32_t ENDPOINT_PRED_MIN_REPEAT_COUNT = 3;
134
const uint32_t ENDPOINT_PRED_COUNT_VLC_BITS = 4;
135
136
const uint32_t NUM_ENDPOINT_PREDS = 3;// BASISU_ARRAY_SIZE(g_endpoint_preds);
137
const uint32_t CR_ENDPOINT_PRED_INDEX = NUM_ENDPOINT_PREDS - 1;
138
const uint32_t NO_ENDPOINT_PRED_INDEX = 3;//NUM_ENDPOINT_PREDS;
139
const uint32_t MAX_SELECTOR_HISTORY_BUF_SIZE = 64;
140
const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3;
141
const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6;
142
const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS);
143
144
uint16_t crc16(const void *r, size_t size, uint16_t crc);
145
146
class huffman_decoding_table
147
{
148
friend class bitwise_decoder;
149
150
public:
151
huffman_decoding_table()
152
{
153
}
154
155
void clear()
156
{
157
basisu::clear_vector(m_code_sizes);
158
basisu::clear_vector(m_lookup);
159
basisu::clear_vector(m_tree);
160
}
161
162
bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits)
163
{
164
if (!total_syms)
165
{
166
clear();
167
return true;
168
}
169
170
m_code_sizes.resize(total_syms);
171
memcpy(&m_code_sizes[0], pCode_sizes, total_syms);
172
173
const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;
174
175
m_lookup.resize(0);
176
m_lookup.resize(huffman_fast_lookup_size);
177
178
m_tree.resize(0);
179
m_tree.resize(total_syms * 2);
180
181
uint32_t syms_using_codesize[basisu::cHuffmanMaxSupportedInternalCodeSize + 1];
182
basisu::clear_obj(syms_using_codesize);
183
for (uint32_t i = 0; i < total_syms; i++)
184
{
185
if (pCode_sizes[i] > basisu::cHuffmanMaxSupportedInternalCodeSize)
186
return false;
187
syms_using_codesize[pCode_sizes[i]]++;
188
}
189
190
uint32_t next_code[basisu::cHuffmanMaxSupportedInternalCodeSize + 1];
191
next_code[0] = next_code[1] = 0;
192
193
uint32_t used_syms = 0, total = 0;
194
for (uint32_t i = 1; i < basisu::cHuffmanMaxSupportedInternalCodeSize; i++)
195
{
196
used_syms += syms_using_codesize[i];
197
next_code[i + 1] = (total = ((total + syms_using_codesize[i]) << 1));
198
}
199
200
if (((1U << basisu::cHuffmanMaxSupportedInternalCodeSize) != total) && (used_syms != 1U))
201
return false;
202
203
for (int tree_next = -1, sym_index = 0; sym_index < (int)total_syms; ++sym_index)
204
{
205
uint32_t rev_code = 0, l, cur_code, code_size = pCode_sizes[sym_index];
206
if (!code_size)
207
continue;
208
209
cur_code = next_code[code_size]++;
210
211
for (l = code_size; l > 0; l--, cur_code >>= 1)
212
rev_code = (rev_code << 1) | (cur_code & 1);
213
214
if (code_size <= fast_lookup_bits)
215
{
216
uint32_t k = (code_size << 16) | sym_index;
217
while (rev_code < huffman_fast_lookup_size)
218
{
219
if (m_lookup[rev_code] != 0)
220
{
221
// Supplied codesizes can't create a valid prefix code.
222
return false;
223
}
224
225
m_lookup[rev_code] = k;
226
rev_code += (1 << code_size);
227
}
228
continue;
229
}
230
231
int tree_cur;
232
if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)]))
233
{
234
const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1);
235
if (m_lookup[idx] != 0)
236
{
237
// Supplied codesizes can't create a valid prefix code.
238
return false;
239
}
240
241
m_lookup[idx] = tree_next;
242
tree_cur = tree_next;
243
tree_next -= 2;
244
}
245
246
if (tree_cur >= 0)
247
{
248
// Supplied codesizes can't create a valid prefix code.
249
return false;
250
}
251
252
rev_code >>= (fast_lookup_bits - 1);
253
254
for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--)
255
{
256
tree_cur -= ((rev_code >>= 1) & 1);
257
258
const int idx = -tree_cur - 1;
259
if (idx < 0)
260
return false;
261
else if (idx >= (int)m_tree.size())
262
m_tree.resize(idx + 1);
263
264
if (!m_tree[idx])
265
{
266
m_tree[idx] = (int16_t)tree_next;
267
tree_cur = tree_next;
268
tree_next -= 2;
269
}
270
else
271
{
272
tree_cur = m_tree[idx];
273
if (tree_cur >= 0)
274
{
275
// Supplied codesizes can't create a valid prefix code.
276
return false;
277
}
278
}
279
}
280
281
tree_cur -= ((rev_code >>= 1) & 1);
282
283
const int idx = -tree_cur - 1;
284
if (idx < 0)
285
return false;
286
else if (idx >= (int)m_tree.size())
287
m_tree.resize(idx + 1);
288
289
if (m_tree[idx] != 0)
290
{
291
// Supplied codesizes can't create a valid prefix code.
292
return false;
293
}
294
295
m_tree[idx] = (int16_t)sym_index;
296
}
297
298
return true;
299
}
300
301
const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; }
302
const basisu::int_vec &get_lookup() const { return m_lookup; }
303
const basisu::int16_vec &get_tree() const { return m_tree; }
304
305
bool is_valid() const { return m_code_sizes.size() > 0; }
306
307
private:
308
basisu::uint8_vec m_code_sizes;
309
basisu::int_vec m_lookup;
310
basisu::int16_vec m_tree;
311
};
312
313
class bitwise_decoder
314
{
315
public:
316
bitwise_decoder() :
317
m_buf_size(0),
318
m_pBuf(nullptr),
319
m_pBuf_start(nullptr),
320
m_pBuf_end(nullptr),
321
m_bit_buf(0),
322
m_bit_buf_size(0)
323
{
324
}
325
326
void clear()
327
{
328
m_buf_size = 0;
329
m_pBuf = nullptr;
330
m_pBuf_start = nullptr;
331
m_pBuf_end = nullptr;
332
m_bit_buf = 0;
333
m_bit_buf_size = 0;
334
}
335
336
bool init(const uint8_t *pBuf, uint32_t buf_size)
337
{
338
if ((!pBuf) && (buf_size))
339
return false;
340
341
m_buf_size = buf_size;
342
m_pBuf = pBuf;
343
m_pBuf_start = pBuf;
344
m_pBuf_end = pBuf + buf_size;
345
m_bit_buf = 0;
346
m_bit_buf_size = 0;
347
return true;
348
}
349
350
void stop()
351
{
352
}
353
354
inline uint32_t peek_bits(uint32_t num_bits)
355
{
356
if (!num_bits)
357
return 0;
358
359
assert(num_bits <= 25);
360
361
while (m_bit_buf_size < num_bits)
362
{
363
uint32_t c = 0;
364
if (m_pBuf < m_pBuf_end)
365
c = *m_pBuf++;
366
367
m_bit_buf |= (c << m_bit_buf_size);
368
m_bit_buf_size += 8;
369
assert(m_bit_buf_size <= 32);
370
}
371
372
return m_bit_buf & ((1 << num_bits) - 1);
373
}
374
375
void remove_bits(uint32_t num_bits)
376
{
377
assert(m_bit_buf_size >= num_bits);
378
379
m_bit_buf >>= num_bits;
380
m_bit_buf_size -= num_bits;
381
}
382
383
uint32_t get_bits(uint32_t num_bits)
384
{
385
if (num_bits > 25)
386
{
387
assert(num_bits <= 32);
388
389
const uint32_t bits0 = peek_bits(25);
390
m_bit_buf >>= 25;
391
m_bit_buf_size -= 25;
392
num_bits -= 25;
393
394
const uint32_t bits = peek_bits(num_bits);
395
m_bit_buf >>= num_bits;
396
m_bit_buf_size -= num_bits;
397
398
return bits0 | (bits << 25);
399
}
400
401
const uint32_t bits = peek_bits(num_bits);
402
403
m_bit_buf >>= num_bits;
404
m_bit_buf_size -= num_bits;
405
406
return bits;
407
}
408
409
uint32_t decode_truncated_binary(uint32_t n)
410
{
411
assert(n >= 2);
412
413
const uint32_t k = basisu::floor_log2i(n);
414
const uint32_t u = (1 << (k + 1)) - n;
415
416
uint32_t result = get_bits(k);
417
418
if (result >= u)
419
result = ((result << 1) | get_bits(1)) - u;
420
421
return result;
422
}
423
424
uint32_t decode_rice(uint32_t m)
425
{
426
assert(m);
427
428
uint32_t q = 0;
429
for (;;)
430
{
431
uint32_t k = peek_bits(16);
432
433
uint32_t l = 0;
434
while (k & 1)
435
{
436
l++;
437
k >>= 1;
438
}
439
440
q += l;
441
442
remove_bits(l);
443
444
if (l < 16)
445
break;
446
}
447
448
return (q << m) + (get_bits(m + 1) >> 1);
449
}
450
451
inline uint32_t decode_vlc(uint32_t chunk_bits)
452
{
453
assert(chunk_bits);
454
455
const uint32_t chunk_size = 1 << chunk_bits;
456
const uint32_t chunk_mask = chunk_size - 1;
457
458
uint32_t v = 0;
459
uint32_t ofs = 0;
460
461
for ( ; ; )
462
{
463
uint32_t s = get_bits(chunk_bits + 1);
464
v |= ((s & chunk_mask) << ofs);
465
ofs += chunk_bits;
466
467
if ((s & chunk_size) == 0)
468
break;
469
470
if (ofs >= 32)
471
{
472
assert(0);
473
break;
474
}
475
}
476
477
return v;
478
}
479
480
inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits)
481
{
482
assert(ct.m_code_sizes.size());
483
484
const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;
485
486
while (m_bit_buf_size < 16)
487
{
488
uint32_t c = 0;
489
if (m_pBuf < m_pBuf_end)
490
c = *m_pBuf++;
491
492
m_bit_buf |= (c << m_bit_buf_size);
493
m_bit_buf_size += 8;
494
assert(m_bit_buf_size <= 32);
495
}
496
497
int code_len;
498
499
int sym;
500
if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0)
501
{
502
code_len = sym >> 16;
503
sym &= 0xFFFF;
504
}
505
else
506
{
507
code_len = fast_lookup_bits;
508
do
509
{
510
sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1
511
} while (sym < 0);
512
}
513
514
m_bit_buf >>= code_len;
515
m_bit_buf_size -= code_len;
516
517
return sym;
518
}
519
520
bool read_huffman_table(huffman_decoding_table &ct)
521
{
522
ct.clear();
523
524
const uint32_t total_used_syms = get_bits(basisu::cHuffmanMaxSymsLog2);
525
526
if (!total_used_syms)
527
return true;
528
if (total_used_syms > basisu::cHuffmanMaxSyms)
529
return false;
530
531
uint8_t code_length_code_sizes[basisu::cHuffmanTotalCodelengthCodes];
532
basisu::clear_obj(code_length_code_sizes);
533
534
const uint32_t num_codelength_codes = get_bits(5);
535
if ((num_codelength_codes < 1) || (num_codelength_codes > basisu::cHuffmanTotalCodelengthCodes))
536
return false;
537
538
for (uint32_t i = 0; i < num_codelength_codes; i++)
539
code_length_code_sizes[basisu::g_huffman_sorted_codelength_codes[i]] = static_cast<uint8_t>(get_bits(3));
540
541
huffman_decoding_table code_length_table;
542
if (!code_length_table.init(basisu::cHuffmanTotalCodelengthCodes, code_length_code_sizes))
543
return false;
544
545
if (!code_length_table.is_valid())
546
return false;
547
548
basisu::uint8_vec code_sizes(total_used_syms);
549
550
uint32_t cur = 0;
551
while (cur < total_used_syms)
552
{
553
int c = decode_huffman(code_length_table);
554
555
if (c <= 16)
556
code_sizes[cur++] = static_cast<uint8_t>(c);
557
else if (c == basisu::cHuffmanSmallZeroRunCode)
558
cur += get_bits(basisu::cHuffmanSmallZeroRunExtraBits) + basisu::cHuffmanSmallZeroRunSizeMin;
559
else if (c == basisu::cHuffmanBigZeroRunCode)
560
cur += get_bits(basisu::cHuffmanBigZeroRunExtraBits) + basisu::cHuffmanBigZeroRunSizeMin;
561
else
562
{
563
if (!cur)
564
return false;
565
566
uint32_t l;
567
if (c == basisu::cHuffmanSmallRepeatCode)
568
l = get_bits(basisu::cHuffmanSmallRepeatExtraBits) + basisu::cHuffmanSmallRepeatSizeMin;
569
else
570
l = get_bits(basisu::cHuffmanBigRepeatExtraBits) + basisu::cHuffmanBigRepeatSizeMin;
571
572
const uint8_t prev = code_sizes[cur - 1];
573
if (prev == 0)
574
return false;
575
do
576
{
577
if (cur >= total_used_syms)
578
return false;
579
code_sizes[cur++] = prev;
580
} while (--l > 0);
581
}
582
}
583
584
if (cur != total_used_syms)
585
return false;
586
587
return ct.init(total_used_syms, &code_sizes[0]);
588
}
589
590
size_t get_bits_remaining() const
591
{
592
size_t total_bytes_remaining = m_pBuf_end - m_pBuf;
593
return total_bytes_remaining * 8 + m_bit_buf_size;
594
}
595
596
private:
597
uint32_t m_buf_size;
598
const uint8_t *m_pBuf;
599
const uint8_t *m_pBuf_start;
600
const uint8_t *m_pBuf_end;
601
602
uint32_t m_bit_buf;
603
uint32_t m_bit_buf_size;
604
};
605
606
inline uint32_t basisd_rand(uint32_t seed)
607
{
608
if (!seed)
609
seed++;
610
uint32_t z = seed;
611
BASISD_znew;
612
return z;
613
}
614
615
// Returns random number in [0,limit). Max limit is 0xFFFF.
616
inline uint32_t basisd_urand(uint32_t& seed, uint32_t limit)
617
{
618
seed = basisd_rand(seed);
619
return (((seed ^ (seed >> 16)) & 0xFFFF) * limit) >> 16;
620
}
621
622
class approx_move_to_front
623
{
624
public:
625
approx_move_to_front(uint32_t n)
626
{
627
init(n);
628
}
629
630
void init(uint32_t n)
631
{
632
m_values.resize(n);
633
m_rover = n / 2;
634
}
635
636
const basisu::int_vec& get_values() const { return m_values; }
637
basisu::int_vec& get_values() { return m_values; }
638
639
uint32_t size() const { return (uint32_t)m_values.size(); }
640
641
const int& operator[] (uint32_t index) const { return m_values[index]; }
642
int operator[] (uint32_t index) { return m_values[index]; }
643
644
void add(int new_value)
645
{
646
m_values[m_rover++] = new_value;
647
if (m_rover == m_values.size())
648
m_rover = (uint32_t)m_values.size() / 2;
649
}
650
651
void use(uint32_t index)
652
{
653
if (index)
654
{
655
//std::swap(m_values[index / 2], m_values[index]);
656
int x = m_values[index / 2];
657
int y = m_values[index];
658
m_values[index / 2] = y;
659
m_values[index] = x;
660
}
661
}
662
663
// returns -1 if not found
664
int find(int value) const
665
{
666
for (uint32_t i = 0; i < m_values.size(); i++)
667
if (m_values[i] == value)
668
return i;
669
return -1;
670
}
671
672
void reset()
673
{
674
const uint32_t n = (uint32_t)m_values.size();
675
676
m_values.clear();
677
678
init(n);
679
}
680
681
private:
682
basisu::int_vec m_values;
683
uint32_t m_rover;
684
};
685
686
struct decoder_etc_block;
687
688
inline uint8_t clamp255(int32_t i)
689
{
690
return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);
691
}
692
693
enum eNoClamp
694
{
695
cNoClamp = 0
696
};
697
698
struct color32
699
{
700
union
701
{
702
struct
703
{
704
uint8_t r;
705
uint8_t g;
706
uint8_t b;
707
uint8_t a;
708
};
709
710
uint8_t c[4];
711
712
uint32_t m;
713
};
714
715
color32() { }
716
717
color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
718
color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); }
719
720
void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); c[3] = static_cast<uint8_t>(va); }
721
722
void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); }
723
void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
724
725
void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); }
726
727
uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; }
728
uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; }
729
730
bool operator== (const color32&rhs) const { return m == rhs.m; }
731
732
static color32 comp_min(const color32& a, const color32& b) { return color32(cNoClamp, basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); }
733
static color32 comp_max(const color32& a, const color32& b) { return color32(cNoClamp, basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); }
734
};
735
736
struct endpoint
737
{
738
color32 m_color5;
739
uint8_t m_inten5;
740
bool operator== (const endpoint& rhs) const
741
{
742
return (m_color5.r == rhs.m_color5.r) && (m_color5.g == rhs.m_color5.g) && (m_color5.b == rhs.m_color5.b) && (m_inten5 == rhs.m_inten5);
743
}
744
bool operator!= (const endpoint& rhs) const { return !(*this == rhs); }
745
};
746
747
struct selector
748
{
749
// Plain selectors (2-bits per value)
750
uint8_t m_selectors[4];
751
752
// ETC1 selectors
753
uint8_t m_bytes[4];
754
755
uint8_t m_lo_selector, m_hi_selector;
756
uint8_t m_num_unique_selectors;
757
bool operator== (const selector& rhs) const
758
{
759
return (m_selectors[0] == rhs.m_selectors[0]) &&
760
(m_selectors[1] == rhs.m_selectors[1]) &&
761
(m_selectors[2] == rhs.m_selectors[2]) &&
762
(m_selectors[3] == rhs.m_selectors[3]);
763
}
764
bool operator!= (const selector& rhs) const
765
{
766
return !(*this == rhs);
767
}
768
769
void init_flags()
770
{
771
uint32_t hist[4] = { 0, 0, 0, 0 };
772
for (uint32_t y = 0; y < 4; y++)
773
{
774
for (uint32_t x = 0; x < 4; x++)
775
{
776
uint32_t s = get_selector(x, y);
777
hist[s]++;
778
}
779
}
780
781
m_lo_selector = 3;
782
m_hi_selector = 0;
783
m_num_unique_selectors = 0;
784
785
for (uint32_t i = 0; i < 4; i++)
786
{
787
if (hist[i])
788
{
789
m_num_unique_selectors++;
790
if (i < m_lo_selector) m_lo_selector = static_cast<uint8_t>(i);
791
if (i > m_hi_selector) m_hi_selector = static_cast<uint8_t>(i);
792
}
793
}
794
}
795
796
// Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
797
inline uint32_t get_selector(uint32_t x, uint32_t y) const
798
{
799
assert((x < 4) && (y < 4));
800
return (m_selectors[y] >> (x * 2)) & 3;
801
}
802
803
void set_selector(uint32_t x, uint32_t y, uint32_t val)
804
{
805
static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 };
806
807
assert((x | y | val) < 4);
808
809
m_selectors[y] &= ~(3 << (x * 2));
810
m_selectors[y] |= (val << (x * 2));
811
812
const uint32_t etc1_bit_index = x * 4 + y;
813
814
uint8_t *p = &m_bytes[3 - (etc1_bit_index >> 3)];
815
816
const uint32_t byte_bit_ofs = etc1_bit_index & 7;
817
const uint32_t mask = 1 << byte_bit_ofs;
818
819
const uint32_t etc1_val = s_selector_index_to_etc1[val];
820
821
const uint32_t lsb = etc1_val & 1;
822
const uint32_t msb = etc1_val >> 1;
823
824
p[0] &= ~mask;
825
p[0] |= (lsb << byte_bit_ofs);
826
827
p[-2] &= ~mask;
828
p[-2] |= (msb << byte_bit_ofs);
829
}
830
};
831
832
bool basis_block_format_is_uncompressed(block_format tex_type);
833
834
//------------------------------------
835
836
typedef uint16_t half_float;
837
838
const double MIN_DENORM_HALF_FLOAT = 0.000000059604645; // smallest positive subnormal number
839
const double MIN_HALF_FLOAT = 0.00006103515625; // smallest positive normal number
840
const double MAX_HALF_FLOAT = 65504.0; // largest normal number
841
const uint32_t MAX_HALF_FLOAT_AS_INT_BITS = 0x7BFF; // the half float rep for 65504.0
842
843
inline uint32_t get_bits(uint32_t val, int low, int high)
844
{
845
const int num_bits = (high - low) + 1;
846
assert((num_bits >= 1) && (num_bits <= 32));
847
848
val >>= low;
849
if (num_bits != 32)
850
val &= ((1u << num_bits) - 1);
851
852
return val;
853
}
854
855
inline bool is_half_inf_or_nan(half_float v)
856
{
857
return get_bits(v, 10, 14) == 31;
858
}
859
860
inline bool is_half_denorm(half_float v)
861
{
862
int e = (v >> 10) & 31;
863
return !e;
864
}
865
866
inline int get_half_exp(half_float v)
867
{
868
int e = ((v >> 10) & 31);
869
return e ? (e - 15) : -14;
870
}
871
872
inline int get_half_mantissa(half_float v)
873
{
874
if (is_half_denorm(v))
875
return v & 0x3FF;
876
return (v & 0x3FF) | 0x400;
877
}
878
879
inline float get_half_mantissaf(half_float v)
880
{
881
return ((float)get_half_mantissa(v)) / 1024.0f;
882
}
883
884
inline int get_half_sign(half_float v)
885
{
886
return v ? ((v & 0x8000) ? -1 : 1) : 0;
887
}
888
889
inline bool half_is_signed(half_float v)
890
{
891
return (v & 0x8000) != 0;
892
}
893
894
#if 0
895
int hexp = get_half_exp(Cf);
896
float hman = get_half_mantissaf(Cf);
897
int hsign = get_half_sign(Cf);
898
float k = powf(2.0f, hexp) * hman * hsign;
899
if (is_half_inf_or_nan(Cf))
900
k = std::numeric_limits<float>::quiet_NaN();
901
#endif
902
903
half_float float_to_half(float val);
904
905
inline float half_to_float(half_float hval)
906
{
907
union { float f; uint32_t u; } x = { 0 };
908
909
uint32_t s = ((uint32_t)hval >> 15) & 1;
910
uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
911
uint32_t m = (uint32_t)hval & 0x3FF;
912
913
if (!e)
914
{
915
if (!m)
916
{
917
// +- 0
918
x.u = s << 31;
919
return x.f;
920
}
921
else
922
{
923
// denormalized
924
while (!(m & 0x00000400))
925
{
926
m <<= 1;
927
--e;
928
}
929
930
++e;
931
m &= ~0x00000400;
932
}
933
}
934
else if (e == 31)
935
{
936
if (m == 0)
937
{
938
// +/- INF
939
x.u = (s << 31) | 0x7f800000;
940
return x.f;
941
}
942
else
943
{
944
// +/- NaN
945
x.u = (s << 31) | 0x7f800000 | (m << 13);
946
return x.f;
947
}
948
}
949
950
e = e + (127 - 15);
951
m = m << 13;
952
953
assert(s <= 1);
954
assert(m <= 0x7FFFFF);
955
assert(e <= 255);
956
957
x.u = m | (e << 23) | (s << 31);
958
return x.f;
959
}
960
961
// Originally from bc6h_enc.h
962
963
void bc6h_enc_init();
964
965
const uint32_t MAX_BLOG16_VAL = 0xFFFF;
966
967
// BC6H internals
968
const uint32_t NUM_BC6H_MODES = 14;
969
const uint32_t BC6H_LAST_MODE_INDEX = 13;
970
const uint32_t BC6H_FIRST_1SUBSET_MODE_INDEX = 10; // in the MS docs, this is "mode 11" (where the first mode is 1), 60 bits for endpoints (10.10, 10.10, 10.10), 63 bits for weights
971
const uint32_t TOTAL_BC6H_PARTITION_PATTERNS = 32;
972
973
extern const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4]; // base, r, g, b
974
975
struct bc6h_bit_layout
976
{
977
int8_t m_comp; // R=0,G=1,B=2,D=3 (D=partition index)
978
int8_t m_index; // 0-3, 0-1 Low/High subset 1, 2-3 Low/High subset 2, -1=partition index (d)
979
int8_t m_last_bit;
980
int8_t m_first_bit; // may be -1 if a single bit, may be >m_last_bit if reversed
981
};
982
983
const uint32_t MAX_BC6H_LAYOUT_INDEX = 25;
984
extern const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX];
985
986
extern const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4]; // [y][x]
987
988
extern const uint8_t g_bc6h_weight3[8];
989
extern const uint8_t g_bc6h_weight4[16];
990
991
extern const int8_t g_bc6h_mode_lookup[32];
992
993
// Converts b16 to half float
994
inline half_float bc6h_blog16_to_half(uint32_t comp)
995
{
996
assert(comp <= 0xFFFF);
997
998
// scale the magnitude by 31/64
999
comp = (comp * 31u) >> 6u;
1000
return (half_float)comp;
1001
}
1002
1003
const uint32_t MAX_BC6H_HALF_FLOAT_AS_UINT = 0x7BFF;
1004
1005
// Inverts bc6h_blog16_to_half().
1006
// Returns the nearest blog16 given a half value.
1007
inline uint32_t bc6h_half_to_blog16(half_float h)
1008
{
1009
assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
1010
return (h * 64 + 30) / 31;
1011
}
1012
1013
// Suboptimal, but very close.
1014
inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits)
1015
{
1016
assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
1017
return (h * 64 + 30) / (31 * (1 << (16 - num_bits)));
1018
}
1019
1020
struct bc6h_block
1021
{
1022
uint8_t m_bytes[16];
1023
};
1024
1025
void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
1026
void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
1027
void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
1028
void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
1029
void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
1030
void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
1031
bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]);
1032
1033
struct bc6h_logical_block
1034
{
1035
uint32_t m_mode;
1036
uint32_t m_partition_pattern; // must be 0 if 1 subset
1037
uint32_t m_endpoints[3][4]; // [comp][subset*2+lh_index] - must be already properly packed
1038
uint8_t m_weights[16]; // weights must be of the proper size, taking into account skipped MSB's which must be 0
1039
1040
void clear()
1041
{
1042
basisu::clear_obj(*this);
1043
}
1044
};
1045
1046
void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk);
1047
1048
namespace bc7_mode_5_encoder
1049
{
1050
void encode_bc7_mode_5_block(void* pDst_block, color32* pPixels, bool hq_mode);
1051
}
1052
1053
} // namespace basist
1054
1055
1056
1057
1058