Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
9905 views
1
// basisu_transcoder.cpp
2
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
// http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
16
#include "basisu_transcoder.h"
17
#include "basisu_containers_impl.h"
18
19
#define BASISU_ASTC_HELPERS_IMPLEMENTATION
20
#include "basisu_astc_helpers.h"
21
22
#include "basisu_astc_hdr_core.h"
23
24
#include <limits.h>
25
26
#if defined(_MSC_VER)
27
#include <intrin.h> // For __popcnt intrinsic
28
#endif
29
30
#ifndef BASISD_IS_BIG_ENDIAN
31
// TODO: This doesn't work on OSX. How can this be so difficult?
32
//#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
33
// #define BASISD_IS_BIG_ENDIAN (1)
34
//#else
35
#define BASISD_IS_BIG_ENDIAN (0)
36
//#endif
37
#endif
38
39
#ifndef BASISD_USE_UNALIGNED_WORD_READS
40
#ifdef __EMSCRIPTEN__
41
// Can't use unaligned loads/stores with WebAssembly.
42
#define BASISD_USE_UNALIGNED_WORD_READS (0)
43
#elif defined(_M_AMD64) || defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)
44
#define BASISD_USE_UNALIGNED_WORD_READS (1)
45
#else
46
#define BASISD_USE_UNALIGNED_WORD_READS (0)
47
#endif
48
#endif
49
50
// Using unaligned loads and stores causes errors when using UBSan. Jam it off.
51
#if defined(__has_feature)
52
#if __has_feature(undefined_behavior_sanitizer)
53
#undef BASISD_USE_UNALIGNED_WORD_READS
54
#define BASISD_USE_UNALIGNED_WORD_READS 0
55
#endif
56
#endif
57
58
#define BASISD_SUPPORTED_BASIS_VERSION (0x13)
59
60
#ifndef BASISD_SUPPORT_KTX2
61
#error Must have defined BASISD_SUPPORT_KTX2
62
#endif
63
64
#ifndef BASISD_SUPPORT_KTX2_ZSTD
65
#error Must have defined BASISD_SUPPORT_KTX2_ZSTD
66
#endif
67
68
// Set to 1 for fuzz testing. This will disable all CRC16 checks on headers and compressed data.
69
#ifndef BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS
70
#define BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS 0
71
#endif
72
73
#ifndef BASISD_SUPPORT_DXT1
74
#define BASISD_SUPPORT_DXT1 1
75
#endif
76
77
#ifndef BASISD_SUPPORT_DXT5A
78
#define BASISD_SUPPORT_DXT5A 1
79
#endif
80
81
// Disable all BC7 transcoders if necessary (useful when cross compiling to Javascript)
82
#if defined(BASISD_SUPPORT_BC7) && !BASISD_SUPPORT_BC7
83
#ifndef BASISD_SUPPORT_BC7_MODE5
84
#define BASISD_SUPPORT_BC7_MODE5 0
85
#endif
86
#endif // !BASISD_SUPPORT_BC7
87
88
// BC7 mode 5 supports both opaque and opaque+alpha textures, and uses less memory BC1.
89
#ifndef BASISD_SUPPORT_BC7_MODE5
90
#define BASISD_SUPPORT_BC7_MODE5 1
91
#endif
92
93
#ifndef BASISD_SUPPORT_PVRTC1
94
#define BASISD_SUPPORT_PVRTC1 1
95
#endif
96
97
#ifndef BASISD_SUPPORT_ETC2_EAC_A8
98
#define BASISD_SUPPORT_ETC2_EAC_A8 1
99
#endif
100
101
// Set BASISD_SUPPORT_UASTC to 0 to completely disable support for transcoding UASTC files.
102
#ifndef BASISD_SUPPORT_UASTC
103
#define BASISD_SUPPORT_UASTC 1
104
#endif
105
106
#ifndef BASISD_SUPPORT_ASTC
107
#define BASISD_SUPPORT_ASTC 1
108
#endif
109
110
// Note that if BASISD_SUPPORT_ATC is enabled, BASISD_SUPPORT_DXT5A should also be enabled for alpha support.
111
#ifndef BASISD_SUPPORT_ATC
112
#define BASISD_SUPPORT_ATC 1
113
#endif
114
115
// Support for ETC2 EAC R11 and ETC2 EAC RG11
116
#ifndef BASISD_SUPPORT_ETC2_EAC_RG11
117
#define BASISD_SUPPORT_ETC2_EAC_RG11 1
118
#endif
119
120
// If BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY is 1, opaque blocks will be transcoded to ASTC at slightly higher quality (higher than BC1), but the transcoder tables will be 2x as large.
121
// This impacts grayscale and grayscale+alpha textures the most.
122
#ifndef BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
123
#ifdef __EMSCRIPTEN__
124
// Let's assume size matters more than quality when compiling with emscripten.
125
#define BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY 0
126
#else
127
// Compiling native, so an extra 64K lookup table is probably acceptable.
128
#define BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY 1
129
#endif
130
#endif
131
132
#ifndef BASISD_SUPPORT_FXT1
133
#define BASISD_SUPPORT_FXT1 1
134
#endif
135
136
#ifndef BASISD_SUPPORT_PVRTC2
137
#define BASISD_SUPPORT_PVRTC2 1
138
#endif
139
140
#if BASISD_SUPPORT_PVRTC2
141
#if !BASISD_SUPPORT_ATC
142
#error BASISD_SUPPORT_ATC must be 1 if BASISD_SUPPORT_PVRTC2 is 1
143
#endif
144
#endif
145
146
#if BASISD_SUPPORT_ATC
147
#if !BASISD_SUPPORT_DXT5A
148
#error BASISD_SUPPORT_DXT5A must be 1 if BASISD_SUPPORT_ATC is 1
149
#endif
150
#endif
151
152
#ifndef BASISD_SUPPORT_UASTC_HDR
153
#define BASISD_SUPPORT_UASTC_HDR 1
154
#endif
155
156
#define BASISD_WRITE_NEW_BC7_MODE5_TABLES 0
157
#define BASISD_WRITE_NEW_DXT1_TABLES 0
158
#define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES 0
159
#define BASISD_WRITE_NEW_ASTC_TABLES 0
160
#define BASISD_WRITE_NEW_ATC_TABLES 0
161
#define BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES 0
162
163
#ifndef BASISD_ENABLE_DEBUG_FLAGS
164
#define BASISD_ENABLE_DEBUG_FLAGS 0
165
#endif
166
167
// If KTX2 support is enabled, we may need Zstd for decompression of supercompressed UASTC files. Include this header.
168
#if BASISD_SUPPORT_KTX2
169
// If BASISD_SUPPORT_KTX2_ZSTD is 0, UASTC files compressed with Zstd cannot be loaded.
170
#if BASISD_SUPPORT_KTX2_ZSTD
171
// We only use two Zstd API's: ZSTD_decompress() and ZSTD_isError()
172
#include <zstd.h>
173
#endif
174
#endif
175
176
#if BASISD_SUPPORT_UASTC_HDR
177
using namespace basist::astc_6x6_hdr;
178
#endif
179
180
namespace basisu
181
{
182
bool g_debug_printf;
183
184
void enable_debug_printf(bool enabled)
185
{
186
g_debug_printf = enabled;
187
}
188
189
void debug_printf(const char* pFmt, ...)
190
{
191
#if BASISU_FORCE_DEVEL_MESSAGES
192
g_debug_printf = true;
193
#endif
194
if (g_debug_printf)
195
{
196
va_list args;
197
va_start(args, pFmt);
198
vprintf(pFmt, args);
199
va_end(args);
200
}
201
}
202
203
void debug_puts(const char* p)
204
{
205
#if BASISU_FORCE_DEVEL_MESSAGES
206
g_debug_printf = true;
207
#endif
208
if (g_debug_printf)
209
{
210
//puts(p);
211
printf("%s", p);
212
}
213
}
214
} // namespace basisu
215
216
namespace basist
217
{
218
#if BASISD_ENABLE_DEBUG_FLAGS
219
static uint32_t g_debug_flags = 0;
220
#endif
221
222
uint32_t get_debug_flags()
223
{
224
#if BASISD_ENABLE_DEBUG_FLAGS
225
return g_debug_flags;
226
#else
227
return 0;
228
#endif
229
}
230
231
void set_debug_flags(uint32_t f)
232
{
233
BASISU_NOTE_UNUSED(f);
234
#if BASISD_ENABLE_DEBUG_FLAGS
235
g_debug_flags = f;
236
#endif
237
}
238
239
inline uint16_t byteswap_uint16(uint16_t v)
240
{
241
return static_cast<uint16_t>((v >> 8) | (v << 8));
242
}
243
244
static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; }
245
static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; }
246
static inline float saturate(float value) { return clampf(value, 0, 1.0f); }
247
248
static inline uint8_t mul_8(uint32_t v, uint32_t q) { v = v * q + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
249
static inline int mul_8bit(int a, int b) { int t = a * b + 128; return (t + (t >> 8)) >> 8; }
250
static inline int lerp_8bit(int a, int b, int s) { assert(a >= 0 && a <= 255); assert(b >= 0 && b <= 255); assert(s >= 0 && s <= 255); return a + mul_8bit(b - a, s); }
251
252
struct vec2F
253
{
254
float c[2];
255
256
inline vec2F() {}
257
258
inline vec2F(float s) { c[0] = s; c[1] = s; }
259
inline vec2F(float x, float y) { c[0] = x; c[1] = y; }
260
261
inline void set(float x, float y) { c[0] = x; c[1] = y; }
262
263
inline float dot(const vec2F& o) const { return (c[0] * o.c[0]) + (c[1] * o.c[1]); }
264
265
inline float operator[] (uint32_t index) const { assert(index < 2); return c[index]; }
266
inline float& operator[] (uint32_t index) { assert(index < 2); return c[index]; }
267
268
inline vec2F& clamp(float l, float h)
269
{
270
c[0] = basisu::clamp(c[0], l, h);
271
c[1] = basisu::clamp(c[1], l, h);
272
return *this;
273
}
274
275
static vec2F lerp(const vec2F& a, const vec2F& b, float s)
276
{
277
vec2F res;
278
for (uint32_t i = 0; i < 2; i++)
279
res[i] = basisu::lerp(a[i], b[i], s);
280
return res;
281
}
282
};
283
284
struct vec3F
285
{
286
float c[3];
287
288
inline vec3F() {}
289
290
inline vec3F(float s) { c[0] = s; c[1] = s; c[2] = s; }
291
inline vec3F(float x, float y, float z) { c[0] = x; c[1] = y; c[2] = z; }
292
293
inline void set(float x, float y, float z) { c[0] = x; c[1] = y; c[2] = z; }
294
295
inline float dot(const vec3F& o) const { return (c[0] * o.c[0]) + (c[1] * o.c[1]) + (c[2] * o.c[2]); }
296
297
inline float operator[] (uint32_t index) const { assert(index < 3); return c[index]; }
298
inline float &operator[] (uint32_t index) { assert(index < 3); return c[index]; }
299
300
inline vec3F& clamp(float l, float h)
301
{
302
c[0] = basisu::clamp(c[0], l, h);
303
c[1] = basisu::clamp(c[1], l, h);
304
c[2] = basisu::clamp(c[2], l, h);
305
return *this;
306
}
307
308
static vec3F lerp(const vec3F& a, const vec3F& b, float s)
309
{
310
vec3F res;
311
for (uint32_t i = 0; i < 3; i++)
312
res[i] = basisu::lerp(a[i], b[i], s);
313
return res;
314
}
315
};
316
317
uint16_t crc16(const void* r, size_t size, uint16_t crc)
318
{
319
crc = ~crc;
320
321
const uint8_t* p = static_cast<const uint8_t*>(r);
322
for (; size; --size)
323
{
324
const uint16_t q = *p++ ^ (crc >> 8);
325
uint16_t k = (q >> 4) ^ q;
326
crc = (((crc << 8) ^ k) ^ (k << 5)) ^ (k << 12);
327
}
328
329
return static_cast<uint16_t>(~crc);
330
}
331
332
struct vec4F
333
{
334
float c[4];
335
336
inline void set(float x, float y, float z, float w) { c[0] = x; c[1] = y; c[2] = z; c[3] = w; }
337
338
float operator[] (uint32_t index) const { assert(index < 4); return c[index]; }
339
float& operator[] (uint32_t index) { assert(index < 4); return c[index]; }
340
};
341
342
enum etc_constants
343
{
344
cETC1BytesPerBlock = 8U,
345
346
cETC1SelectorBits = 2U,
347
cETC1SelectorValues = 1U << cETC1SelectorBits,
348
cETC1SelectorMask = cETC1SelectorValues - 1U,
349
350
cETC1BlockShift = 2U,
351
cETC1BlockSize = 1U << cETC1BlockShift,
352
353
cETC1LSBSelectorIndicesBitOffset = 0,
354
cETC1MSBSelectorIndicesBitOffset = 16,
355
356
cETC1FlipBitOffset = 32,
357
cETC1DiffBitOffset = 33,
358
359
cETC1IntenModifierNumBits = 3,
360
cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits,
361
cETC1RightIntenModifierTableBitOffset = 34,
362
cETC1LeftIntenModifierTableBitOffset = 37,
363
364
// Base+Delta encoding (5 bit bases, 3 bit delta)
365
cETC1BaseColorCompNumBits = 5,
366
cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits,
367
368
cETC1DeltaColorCompNumBits = 3,
369
cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits,
370
cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits,
371
372
cETC1BaseColor5RBitOffset = 59,
373
cETC1BaseColor5GBitOffset = 51,
374
cETC1BaseColor5BBitOffset = 43,
375
376
cETC1DeltaColor3RBitOffset = 56,
377
cETC1DeltaColor3GBitOffset = 48,
378
cETC1DeltaColor3BBitOffset = 40,
379
380
// Absolute (non-delta) encoding (two 4-bit per component bases)
381
cETC1AbsColorCompNumBits = 4,
382
cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits,
383
384
cETC1AbsColor4R1BitOffset = 60,
385
cETC1AbsColor4G1BitOffset = 52,
386
cETC1AbsColor4B1BitOffset = 44,
387
388
cETC1AbsColor4R2BitOffset = 56,
389
cETC1AbsColor4G2BitOffset = 48,
390
cETC1AbsColor4B2BitOffset = 40,
391
392
cETC1ColorDeltaMin = -4,
393
cETC1ColorDeltaMax = 3,
394
395
// Delta3:
396
// 0 1 2 3 4 5 6 7
397
// 000 001 010 011 100 101 110 111
398
// 0 1 2 3 -4 -3 -2 -1
399
};
400
401
#define DECLARE_ETC1_INTEN_TABLE(name, N) \
402
static const int name[cETC1IntenModifierValues][cETC1SelectorValues] = \
403
{ \
404
{ N * -8, N * -2, N * 2, N * 8 },{ N * -17, N * -5, N * 5, N * 17 },{ N * -29, N * -9, N * 9, N * 29 },{ N * -42, N * -13, N * 13, N * 42 }, \
405
{ N * -60, N * -18, N * 18, N * 60 },{ N * -80, N * -24, N * 24, N * 80 },{ N * -106, N * -33, N * 33, N * 106 },{ N * -183, N * -47, N * 47, N * 183 } \
406
};
407
408
DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables, 1);
409
DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables16, 16);
410
DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables48, 3 * 16);
411
412
//const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
413
const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
414
415
static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 };
416
417
struct decoder_etc_block
418
{
419
// big endian uint64:
420
// bit ofs: 56 48 40 32 24 16 8 0
421
// byte ofs: b0, b1, b2, b3, b4, b5, b6, b7
422
union
423
{
424
uint64_t m_uint64;
425
426
uint32_t m_uint32[2];
427
428
uint8_t m_bytes[8];
429
430
struct
431
{
432
signed m_dred2 : 3;
433
uint32_t m_red1 : 5;
434
435
signed m_dgreen2 : 3;
436
uint32_t m_green1 : 5;
437
438
signed m_dblue2 : 3;
439
uint32_t m_blue1 : 5;
440
441
uint32_t m_flip : 1;
442
uint32_t m_diff : 1;
443
uint32_t m_cw2 : 3;
444
uint32_t m_cw1 : 3;
445
446
uint32_t m_selectors;
447
} m_differential;
448
};
449
450
inline void clear()
451
{
452
assert(sizeof(*this) == 8);
453
basisu::clear_obj(*this);
454
}
455
456
inline void set_byte_bits(uint32_t ofs, uint32_t num, uint32_t bits)
457
{
458
assert((ofs + num) <= 64U);
459
assert(num && (num < 32U));
460
assert((ofs >> 3) == ((ofs + num - 1) >> 3));
461
assert(bits < (1U << num));
462
const uint32_t byte_ofs = 7 - (ofs >> 3);
463
const uint32_t byte_bit_ofs = ofs & 7;
464
const uint32_t mask = (1 << num) - 1;
465
m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs);
466
m_bytes[byte_ofs] |= (bits << byte_bit_ofs);
467
}
468
469
inline void set_flip_bit(bool flip)
470
{
471
m_bytes[3] &= ~1;
472
m_bytes[3] |= static_cast<uint8_t>(flip);
473
}
474
475
inline void set_diff_bit(bool diff)
476
{
477
m_bytes[3] &= ~2;
478
m_bytes[3] |= (static_cast<uint32_t>(diff) << 1);
479
}
480
481
// Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1)
482
inline void set_inten_table(uint32_t subblock_id, uint32_t t)
483
{
484
assert(subblock_id < 2);
485
assert(t < 8);
486
const uint32_t ofs = subblock_id ? 2 : 5;
487
m_bytes[3] &= ~(7 << ofs);
488
m_bytes[3] |= (t << ofs);
489
}
490
491
// Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables.
492
inline void set_selector(uint32_t x, uint32_t y, uint32_t val)
493
{
494
assert((x | y | val) < 4);
495
const uint32_t bit_index = x * 4 + y;
496
497
uint8_t* p = &m_bytes[7 - (bit_index >> 3)];
498
499
const uint32_t byte_bit_ofs = bit_index & 7;
500
const uint32_t mask = 1 << byte_bit_ofs;
501
502
static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 };
503
const uint32_t etc1_val = s_selector_index_to_etc1[val];
504
505
const uint32_t lsb = etc1_val & 1;
506
const uint32_t msb = etc1_val >> 1;
507
508
p[0] &= ~mask;
509
p[0] |= (lsb << byte_bit_ofs);
510
511
p[-2] &= ~mask;
512
p[-2] |= (msb << byte_bit_ofs);
513
}
514
515
// Returned encoded selector value ranges from 0-3 (this is NOT a direct index into g_etc1_inten_tables, see get_selector())
516
inline uint32_t get_raw_selector(uint32_t x, uint32_t y) const
517
{
518
assert((x | y) < 4);
519
520
const uint32_t bit_index = x * 4 + y;
521
const uint32_t byte_bit_ofs = bit_index & 7;
522
const uint8_t* p = &m_bytes[7 - (bit_index >> 3)];
523
const uint32_t lsb = (p[0] >> byte_bit_ofs) & 1;
524
const uint32_t msb = (p[-2] >> byte_bit_ofs) & 1;
525
const uint32_t val = lsb | (msb << 1);
526
527
return val;
528
}
529
530
// Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
531
inline uint32_t get_selector(uint32_t x, uint32_t y) const
532
{
533
static const uint8_t s_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
534
return s_etc1_to_selector_index[get_raw_selector(x, y)];
535
}
536
537
inline void set_raw_selector_bits(uint32_t bits)
538
{
539
m_bytes[4] = static_cast<uint8_t>(bits);
540
m_bytes[5] = static_cast<uint8_t>(bits >> 8);
541
m_bytes[6] = static_cast<uint8_t>(bits >> 16);
542
m_bytes[7] = static_cast<uint8_t>(bits >> 24);
543
}
544
545
inline bool are_all_selectors_the_same() const
546
{
547
uint32_t v = *reinterpret_cast<const uint32_t*>(&m_bytes[4]);
548
549
if ((v == 0xFFFFFFFF) || (v == 0xFFFF) || (!v) || (v == 0xFFFF0000))
550
return true;
551
552
return false;
553
}
554
555
inline void set_raw_selector_bits(uint8_t byte0, uint8_t byte1, uint8_t byte2, uint8_t byte3)
556
{
557
m_bytes[4] = byte0;
558
m_bytes[5] = byte1;
559
m_bytes[6] = byte2;
560
m_bytes[7] = byte3;
561
}
562
563
inline uint32_t get_raw_selector_bits() const
564
{
565
return m_bytes[4] | (m_bytes[5] << 8) | (m_bytes[6] << 16) | (m_bytes[7] << 24);
566
}
567
568
inline void set_base4_color(uint32_t idx, uint16_t c)
569
{
570
if (idx)
571
{
572
set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15);
573
set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15);
574
set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15);
575
}
576
else
577
{
578
set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15);
579
set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15);
580
set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15);
581
}
582
}
583
584
inline void set_base5_color(uint16_t c)
585
{
586
set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31);
587
set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31);
588
set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31);
589
}
590
591
void set_delta3_color(uint16_t c)
592
{
593
set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7);
594
set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7);
595
set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7);
596
}
597
598
void set_block_color4(const color32& c0_unscaled, const color32& c1_unscaled)
599
{
600
set_diff_bit(false);
601
602
set_base4_color(0, pack_color4(c0_unscaled, false));
603
set_base4_color(1, pack_color4(c1_unscaled, false));
604
}
605
606
void set_block_color5(const color32& c0_unscaled, const color32& c1_unscaled)
607
{
608
set_diff_bit(true);
609
610
set_base5_color(pack_color5(c0_unscaled, false));
611
612
int dr = c1_unscaled.r - c0_unscaled.r;
613
int dg = c1_unscaled.g - c0_unscaled.g;
614
int db = c1_unscaled.b - c0_unscaled.b;
615
616
set_delta3_color(pack_delta3(dr, dg, db));
617
}
618
619
bool set_block_color5_check(const color32& c0_unscaled, const color32& c1_unscaled)
620
{
621
set_diff_bit(true);
622
623
set_base5_color(pack_color5(c0_unscaled, false));
624
625
int dr = c1_unscaled.r - c0_unscaled.r;
626
int dg = c1_unscaled.g - c0_unscaled.g;
627
int db = c1_unscaled.b - c0_unscaled.b;
628
629
if (((dr < cETC1ColorDeltaMin) || (dr > cETC1ColorDeltaMax)) ||
630
((dg < cETC1ColorDeltaMin) || (dg > cETC1ColorDeltaMax)) ||
631
((db < cETC1ColorDeltaMin) || (db > cETC1ColorDeltaMax)))
632
return false;
633
634
set_delta3_color(pack_delta3(dr, dg, db));
635
636
return true;
637
}
638
639
inline uint32_t get_byte_bits(uint32_t ofs, uint32_t num) const
640
{
641
assert((ofs + num) <= 64U);
642
assert(num && (num <= 8U));
643
assert((ofs >> 3) == ((ofs + num - 1) >> 3));
644
const uint32_t byte_ofs = 7 - (ofs >> 3);
645
const uint32_t byte_bit_ofs = ofs & 7;
646
return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1);
647
}
648
649
inline uint16_t get_base5_color() const
650
{
651
const uint32_t r = get_byte_bits(cETC1BaseColor5RBitOffset, 5);
652
const uint32_t g = get_byte_bits(cETC1BaseColor5GBitOffset, 5);
653
const uint32_t b = get_byte_bits(cETC1BaseColor5BBitOffset, 5);
654
return static_cast<uint16_t>(b | (g << 5U) | (r << 10U));
655
}
656
657
inline uint16_t get_base4_color(uint32_t idx) const
658
{
659
uint32_t r, g, b;
660
if (idx)
661
{
662
r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4);
663
g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4);
664
b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4);
665
}
666
else
667
{
668
r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4);
669
g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4);
670
b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4);
671
}
672
return static_cast<uint16_t>(b | (g << 4U) | (r << 8U));
673
}
674
675
inline color32 get_base5_color_unscaled() const
676
{
677
return color32(m_differential.m_red1, m_differential.m_green1, m_differential.m_blue1, 255);
678
}
679
680
inline bool get_flip_bit() const
681
{
682
return (m_bytes[3] & 1) != 0;
683
}
684
685
inline bool get_diff_bit() const
686
{
687
return (m_bytes[3] & 2) != 0;
688
}
689
690
inline uint32_t get_inten_table(uint32_t subblock_id) const
691
{
692
assert(subblock_id < 2);
693
const uint32_t ofs = subblock_id ? 2 : 5;
694
return (m_bytes[3] >> ofs) & 7;
695
}
696
697
inline uint16_t get_delta3_color() const
698
{
699
const uint32_t r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3);
700
const uint32_t g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3);
701
const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3);
702
return static_cast<uint16_t>(b | (g << 3U) | (r << 6U));
703
}
704
705
void get_block_colors(color32* pBlock_colors, uint32_t subblock_index) const
706
{
707
color32 b;
708
709
if (get_diff_bit())
710
{
711
if (subblock_index)
712
unpack_color5(b, get_base5_color(), get_delta3_color(), true, 255);
713
else
714
unpack_color5(b, get_base5_color(), true);
715
}
716
else
717
{
718
b = unpack_color4(get_base4_color(subblock_index), true, 255);
719
}
720
721
const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)];
722
723
pBlock_colors[0].set_noclamp_rgba(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255);
724
pBlock_colors[1].set_noclamp_rgba(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255);
725
pBlock_colors[2].set_noclamp_rgba(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255);
726
pBlock_colors[3].set_noclamp_rgba(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255);
727
}
728
729
static uint16_t pack_color4(const color32& color, bool scaled, uint32_t bias = 127U)
730
{
731
return pack_color4(color.r, color.g, color.b, scaled, bias);
732
}
733
734
static uint16_t pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U)
735
{
736
if (scaled)
737
{
738
r = (r * 15U + bias) / 255U;
739
g = (g * 15U + bias) / 255U;
740
b = (b * 15U + bias) / 255U;
741
}
742
743
r = basisu::minimum(r, 15U);
744
g = basisu::minimum(g, 15U);
745
b = basisu::minimum(b, 15U);
746
747
return static_cast<uint16_t>(b | (g << 4U) | (r << 8U));
748
}
749
750
static uint16_t pack_color5(const color32& color, bool scaled, uint32_t bias = 127U)
751
{
752
return pack_color5(color.r, color.g, color.b, scaled, bias);
753
}
754
755
static uint16_t pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U)
756
{
757
if (scaled)
758
{
759
r = (r * 31U + bias) / 255U;
760
g = (g * 31U + bias) / 255U;
761
b = (b * 31U + bias) / 255U;
762
}
763
764
r = basisu::minimum(r, 31U);
765
g = basisu::minimum(g, 31U);
766
b = basisu::minimum(b, 31U);
767
768
return static_cast<uint16_t>(b | (g << 5U) | (r << 10U));
769
}
770
771
uint16_t pack_delta3(const color32& color)
772
{
773
return pack_delta3(color.r, color.g, color.b);
774
}
775
776
uint16_t pack_delta3(int r, int g, int b)
777
{
778
assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax));
779
assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax));
780
assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax));
781
if (r < 0) r += 8;
782
if (g < 0) g += 8;
783
if (b < 0) b += 8;
784
return static_cast<uint16_t>(b | (g << 3) | (r << 6));
785
}
786
787
static void unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3)
788
{
789
r = (packed_delta3 >> 6) & 7;
790
g = (packed_delta3 >> 3) & 7;
791
b = packed_delta3 & 7;
792
if (r >= 4) r -= 8;
793
if (g >= 4) g -= 8;
794
if (b >= 4) b -= 8;
795
}
796
797
static color32 unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha)
798
{
799
uint32_t b = packed_color5 & 31U;
800
uint32_t g = (packed_color5 >> 5U) & 31U;
801
uint32_t r = (packed_color5 >> 10U) & 31U;
802
803
if (scaled)
804
{
805
b = (b << 3U) | (b >> 2U);
806
g = (g << 3U) | (g >> 2U);
807
r = (r << 3U) | (r >> 2U);
808
}
809
810
assert(alpha <= 255);
811
812
return color32(cNoClamp, r, g, b, alpha);
813
}
814
815
static void unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled)
816
{
817
color32 c(unpack_color5(packed_color5, scaled, 0));
818
r = c.r;
819
g = c.g;
820
b = c.b;
821
}
822
823
static void unpack_color5(color32& result, uint16_t packed_color5, bool scaled)
824
{
825
result = unpack_color5(packed_color5, scaled, 255);
826
}
827
828
static bool unpack_color5(color32& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha)
829
{
830
int dr, dg, db;
831
unpack_delta3(dr, dg, db, packed_delta3);
832
833
int r = ((packed_color5 >> 10U) & 31U) + dr;
834
int g = ((packed_color5 >> 5U) & 31U) + dg;
835
int b = (packed_color5 & 31U) + db;
836
837
bool success = true;
838
if (static_cast<uint32_t>(r | g | b) > 31U)
839
{
840
success = false;
841
r = basisu::clamp<int>(r, 0, 31);
842
g = basisu::clamp<int>(g, 0, 31);
843
b = basisu::clamp<int>(b, 0, 31);
844
}
845
846
if (scaled)
847
{
848
b = (b << 3U) | (b >> 2U);
849
g = (g << 3U) | (g >> 2U);
850
r = (r << 3U) | (r >> 2U);
851
}
852
853
result.set_noclamp_rgba(r, g, b, basisu::minimum(alpha, 255U));
854
return success;
855
}
856
857
static color32 unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha)
858
{
859
uint32_t b = packed_color4 & 15U;
860
uint32_t g = (packed_color4 >> 4U) & 15U;
861
uint32_t r = (packed_color4 >> 8U) & 15U;
862
863
if (scaled)
864
{
865
b = (b << 4U) | b;
866
g = (g << 4U) | g;
867
r = (r << 4U) | r;
868
}
869
870
return color32(cNoClamp, r, g, b, basisu::minimum(alpha, 255U));
871
}
872
873
static void unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled)
874
{
875
color32 c(unpack_color4(packed_color4, scaled, 0));
876
r = c.r;
877
g = c.g;
878
b = c.b;
879
}
880
881
static void get_diff_subblock_colors(color32* pDst, uint16_t packed_color5, uint32_t table_idx)
882
{
883
assert(table_idx < cETC1IntenModifierValues);
884
const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
885
886
uint32_t r, g, b;
887
unpack_color5(r, g, b, packed_color5, true);
888
889
const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
890
891
const int y0 = pInten_modifer_table[0];
892
pDst[0].set(clamp255(ir + y0), clamp255(ig + y0), clamp255(ib + y0), 255);
893
894
const int y1 = pInten_modifer_table[1];
895
pDst[1].set(clamp255(ir + y1), clamp255(ig + y1), clamp255(ib + y1), 255);
896
897
const int y2 = pInten_modifer_table[2];
898
pDst[2].set(clamp255(ir + y2), clamp255(ig + y2), clamp255(ib + y2), 255);
899
900
const int y3 = pInten_modifer_table[3];
901
pDst[3].set(clamp255(ir + y3), clamp255(ig + y3), clamp255(ib + y3), 255);
902
}
903
904
static int clamp255(int x)
905
{
906
if (x & 0xFFFFFF00)
907
{
908
if (x < 0)
909
x = 0;
910
else if (x > 255)
911
x = 255;
912
}
913
914
return x;
915
}
916
917
static void get_block_colors5(color32* pBlock_colors, const color32& base_color5, uint32_t inten_table)
918
{
919
color32 b(base_color5);
920
921
b.r = (b.r << 3) | (b.r >> 2);
922
b.g = (b.g << 3) | (b.g >> 2);
923
b.b = (b.b << 3) | (b.b >> 2);
924
925
const int* pInten_table = g_etc1_inten_tables[inten_table];
926
927
pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255);
928
pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255);
929
pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255);
930
pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255);
931
}
932
933
static void get_block_color5(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t& r, uint32_t &g, uint32_t &b)
934
{
935
assert(index < 4);
936
937
uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2);
938
uint32_t bg = (base_color5.g << 3) | (base_color5.g >> 2);
939
uint32_t bb = (base_color5.b << 3) | (base_color5.b >> 2);
940
941
const int* pInten_table = g_etc1_inten_tables[inten_table];
942
943
r = clamp255(br + pInten_table[index]);
944
g = clamp255(bg + pInten_table[index]);
945
b = clamp255(bb + pInten_table[index]);
946
}
947
948
static void get_block_color5_r(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t &r)
949
{
950
assert(index < 4);
951
952
uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2);
953
954
const int* pInten_table = g_etc1_inten_tables[inten_table];
955
956
r = clamp255(br + pInten_table[index]);
957
}
958
959
static void get_block_colors5_g(int* pBlock_colors, const color32& base_color5, uint32_t inten_table)
960
{
961
const int g = (base_color5.g << 3) | (base_color5.g >> 2);
962
963
const int* pInten_table = g_etc1_inten_tables[inten_table];
964
965
pBlock_colors[0] = clamp255(g + pInten_table[0]);
966
pBlock_colors[1] = clamp255(g + pInten_table[1]);
967
pBlock_colors[2] = clamp255(g + pInten_table[2]);
968
pBlock_colors[3] = clamp255(g + pInten_table[3]);
969
}
970
971
static void get_block_colors5_bounds(color32* pBlock_colors, const color32& base_color5, uint32_t inten_table, uint32_t l = 0, uint32_t h = 3)
972
{
973
color32 b(base_color5);
974
975
b.r = (b.r << 3) | (b.r >> 2);
976
b.g = (b.g << 3) | (b.g >> 2);
977
b.b = (b.b << 3) | (b.b >> 2);
978
979
const int* pInten_table = g_etc1_inten_tables[inten_table];
980
981
pBlock_colors[0].set(clamp255(b.r + pInten_table[l]), clamp255(b.g + pInten_table[l]), clamp255(b.b + pInten_table[l]), 255);
982
pBlock_colors[1].set(clamp255(b.r + pInten_table[h]), clamp255(b.g + pInten_table[h]), clamp255(b.b + pInten_table[h]), 255);
983
}
984
985
static void get_block_colors5_bounds_g(uint32_t* pBlock_colors, const color32& base_color5, uint32_t inten_table, uint32_t l = 0, uint32_t h = 3)
986
{
987
color32 b(base_color5);
988
989
b.g = (b.g << 3) | (b.g >> 2);
990
991
const int* pInten_table = g_etc1_inten_tables[inten_table];
992
993
pBlock_colors[0] = clamp255(b.g + pInten_table[l]);
994
pBlock_colors[1] = clamp255(b.g + pInten_table[h]);
995
}
996
};
997
998
enum dxt_constants
999
{
1000
cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U,
1001
cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U,
1002
};
1003
1004
static const uint8_t g_etc1_x_selector_unpack[4][256] =
1005
{
1006
{
1007
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1008
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1009
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1010
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1011
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1012
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1013
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1014
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1015
},
1016
{
1017
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
1018
2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
1019
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
1020
2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
1021
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
1022
2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
1023
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
1024
2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
1025
},
1026
1027
{
1028
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
1029
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
1030
2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
1031
2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
1032
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
1033
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
1034
2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
1035
2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
1036
},
1037
1038
{
1039
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1040
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1041
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1042
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1043
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
1044
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
1045
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
1046
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
1047
}
1048
};
1049
1050
struct dxt1_block
1051
{
1052
enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
1053
1054
uint8_t m_low_color[cTotalEndpointBytes];
1055
uint8_t m_high_color[cTotalEndpointBytes];
1056
uint8_t m_selectors[cTotalSelectorBytes];
1057
1058
inline void clear() { basisu::clear_obj(*this); }
1059
1060
inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
1061
inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
1062
inline void set_low_color(uint16_t c) { m_low_color[0] = static_cast<uint8_t>(c & 0xFF); m_low_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
1063
inline void set_high_color(uint16_t c) { m_high_color[0] = static_cast<uint8_t>(c & 0xFF); m_high_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
1064
inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; }
1065
inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); }
1066
1067
static uint16_t pack_color(const color32& color, bool scaled, uint32_t bias = 127U)
1068
{
1069
uint32_t r = color.r, g = color.g, b = color.b;
1070
if (scaled)
1071
{
1072
r = (r * 31U + bias) / 255U;
1073
g = (g * 63U + bias) / 255U;
1074
b = (b * 31U + bias) / 255U;
1075
}
1076
return static_cast<uint16_t>(basisu::minimum(b, 31U) | (basisu::minimum(g, 63U) << 5U) | (basisu::minimum(r, 31U) << 11U));
1077
}
1078
1079
static uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast<uint16_t>(b | (g << 5U) | (r << 11U)); }
1080
};
1081
1082
struct dxt_selector_range
1083
{
1084
uint32_t m_low;
1085
uint32_t m_high;
1086
};
1087
1088
struct etc1_to_dxt1_56_solution
1089
{
1090
uint8_t m_lo;
1091
uint8_t m_hi;
1092
uint16_t m_err;
1093
};
1094
1095
#if BASISD_SUPPORT_DXT1
1096
static dxt_selector_range g_etc1_to_dxt1_selector_ranges[] =
1097
{
1098
{ 0, 3 },
1099
1100
{ 1, 3 },
1101
{ 0, 2 },
1102
1103
{ 1, 2 },
1104
1105
{ 2, 3 },
1106
{ 0, 1 },
1107
};
1108
1109
const uint32_t NUM_ETC1_TO_DXT1_SELECTOR_RANGES = sizeof(g_etc1_to_dxt1_selector_ranges) / sizeof(g_etc1_to_dxt1_selector_ranges[0]);
1110
1111
static uint32_t g_etc1_to_dxt1_selector_range_index[4][4];
1112
1113
const uint32_t NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS = 10;
1114
static const uint8_t g_etc1_to_dxt1_selector_mappings[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][4] =
1115
{
1116
{ 0, 0, 1, 1 },
1117
{ 0, 0, 1, 2 },
1118
{ 0, 0, 1, 3 },
1119
{ 0, 0, 2, 3 },
1120
{ 0, 1, 1, 1 },
1121
{ 0, 1, 2, 2 },
1122
{ 0, 1, 2, 3 },
1123
{ 0, 2, 3, 3 },
1124
{ 1, 2, 2, 2 },
1125
{ 1, 2, 3, 3 },
1126
};
1127
1128
static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256];
1129
static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256];
1130
1131
static const etc1_to_dxt1_56_solution g_etc1_to_dxt_6[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = {
1132
#include "basisu_transcoder_tables_dxt1_6.inc"
1133
};
1134
1135
static const etc1_to_dxt1_56_solution g_etc1_to_dxt_5[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = {
1136
#include "basisu_transcoder_tables_dxt1_5.inc"
1137
};
1138
#endif // BASISD_SUPPORT_DXT1
1139
1140
#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC
1141
// First saw the idea for optimal BC1 single-color block encoding using lookup tables in ryg_dxt.
1142
struct bc1_match_entry
1143
{
1144
uint8_t m_hi;
1145
uint8_t m_lo;
1146
};
1147
static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256]; // selector 1, allow equals hi/lo
1148
static bc1_match_entry g_bc1_match5_equals_0[256], g_bc1_match6_equals_0[256]; // selector 0, allow equals hi/lo
1149
1150
static void prepare_bc1_single_color_table(bc1_match_entry* pTable, const uint8_t* pExpand, int size0, int size1, int sel)
1151
{
1152
for (int i = 0; i < 256; i++)
1153
{
1154
int lowest_e = 256;
1155
for (int lo = 0; lo < size0; lo++)
1156
{
1157
for (int hi = 0; hi < size1; hi++)
1158
{
1159
const int lo_e = pExpand[lo], hi_e = pExpand[hi];
1160
int e;
1161
1162
if (sel == 1)
1163
{
1164
// Selector 1
1165
e = basisu::iabs(((hi_e * 2 + lo_e) / 3) - i);
1166
e += (basisu::iabs(hi_e - lo_e) * 3) / 100;
1167
}
1168
else
1169
{
1170
assert(sel == 0);
1171
1172
// Selector 0
1173
e = basisu::iabs(hi_e - i);
1174
}
1175
1176
if (e < lowest_e)
1177
{
1178
pTable[i].m_hi = static_cast<uint8_t>(hi);
1179
pTable[i].m_lo = static_cast<uint8_t>(lo);
1180
1181
lowest_e = e;
1182
}
1183
1184
} // hi
1185
} // lo
1186
}
1187
}
1188
#endif
1189
1190
#if BASISD_WRITE_NEW_DXT1_TABLES
1191
static void create_etc1_to_dxt1_5_conversion_table()
1192
{
1193
FILE* pFile = nullptr;
1194
fopen_s(&pFile, "basisu_transcoder_tables_dxt1_5.inc", "w");
1195
1196
uint32_t n = 0;
1197
1198
for (int inten = 0; inten < 8; inten++)
1199
{
1200
for (uint32_t g = 0; g < 32; g++)
1201
{
1202
color32 block_colors[4];
1203
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
1204
1205
for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
1206
{
1207
const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
1208
const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
1209
1210
for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
1211
{
1212
uint32_t best_lo = 0;
1213
uint32_t best_hi = 0;
1214
uint64_t best_err = UINT64_MAX;
1215
1216
for (uint32_t hi = 0; hi <= 31; hi++)
1217
{
1218
for (uint32_t lo = 0; lo <= 31; lo++)
1219
{
1220
//if (lo == hi) continue;
1221
1222
uint32_t colors[4];
1223
1224
colors[0] = (lo << 3) | (lo >> 2);
1225
colors[3] = (hi << 3) | (hi >> 2);
1226
1227
colors[1] = (colors[0] * 2 + colors[3]) / 3;
1228
colors[2] = (colors[3] * 2 + colors[0]) / 3;
1229
1230
uint64_t total_err = 0;
1231
1232
for (uint32_t s = low_selector; s <= high_selector; s++)
1233
{
1234
int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
1235
1236
total_err += err * err;
1237
}
1238
1239
if (total_err < best_err)
1240
{
1241
best_err = total_err;
1242
best_lo = lo;
1243
best_hi = hi;
1244
}
1245
}
1246
}
1247
1248
assert(best_err <= 0xFFFF);
1249
1250
//table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
1251
//table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
1252
//table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
1253
1254
//assert(best_lo != best_hi);
1255
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
1256
n++;
1257
if ((n & 31) == 31)
1258
fprintf(pFile, "\n");
1259
} // m
1260
} // sr
1261
} // g
1262
} // inten
1263
1264
fclose(pFile);
1265
}
1266
1267
static void create_etc1_to_dxt1_6_conversion_table()
1268
{
1269
FILE* pFile = nullptr;
1270
fopen_s(&pFile, "basisu_transcoder_tables_dxt1_6.inc", "w");
1271
1272
uint32_t n = 0;
1273
1274
for (int inten = 0; inten < 8; inten++)
1275
{
1276
for (uint32_t g = 0; g < 32; g++)
1277
{
1278
color32 block_colors[4];
1279
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
1280
1281
for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
1282
{
1283
const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
1284
const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
1285
1286
for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
1287
{
1288
uint32_t best_lo = 0;
1289
uint32_t best_hi = 0;
1290
uint64_t best_err = UINT64_MAX;
1291
1292
for (uint32_t hi = 0; hi <= 63; hi++)
1293
{
1294
for (uint32_t lo = 0; lo <= 63; lo++)
1295
{
1296
//if (lo == hi) continue;
1297
1298
uint32_t colors[4];
1299
1300
colors[0] = (lo << 2) | (lo >> 4);
1301
colors[3] = (hi << 2) | (hi >> 4);
1302
1303
colors[1] = (colors[0] * 2 + colors[3]) / 3;
1304
colors[2] = (colors[3] * 2 + colors[0]) / 3;
1305
1306
uint64_t total_err = 0;
1307
1308
for (uint32_t s = low_selector; s <= high_selector; s++)
1309
{
1310
int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
1311
1312
total_err += err * err;
1313
}
1314
1315
if (total_err < best_err)
1316
{
1317
best_err = total_err;
1318
best_lo = lo;
1319
best_hi = hi;
1320
}
1321
}
1322
}
1323
1324
assert(best_err <= 0xFFFF);
1325
1326
//table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
1327
//table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
1328
//table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
1329
1330
//assert(best_lo != best_hi);
1331
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
1332
n++;
1333
if ((n & 31) == 31)
1334
fprintf(pFile, "\n");
1335
1336
} // m
1337
} // sr
1338
} // g
1339
} // inten
1340
1341
fclose(pFile);
1342
}
1343
#endif
1344
1345
#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
1346
static const int8_t g_eac_modifier_table[16][8] =
1347
{
1348
{ -3, -6, -9, -15, 2, 5, 8, 14 },
1349
{ -3, -7, -10, -13, 2, 6, 9, 12 },
1350
{ -2, -5, -8, -13, 1, 4, 7, 12 },
1351
{ -2, -4, -6, -13, 1, 3, 5, 12 },
1352
{ -3, -6, -8, -12, 2, 5, 7, 11 },
1353
{ -3, -7, -9, -11, 2, 6, 8, 10 },
1354
{ -4, -7, -8, -11, 3, 6, 7, 10 },
1355
{ -3, -5, -8, -11, 2, 4, 7, 10 },
1356
1357
{ -2, -6, -8, -10, 1, 5, 7, 9 },
1358
{ -2, -5, -8, -10, 1, 4, 7, 9 },
1359
{ -2, -4, -8, -10, 1, 3, 7, 9 },
1360
{ -2, -5, -7, -10, 1, 4, 6, 9 },
1361
{ -3, -4, -7, -10, 2, 3, 6, 9 },
1362
{ -1, -2, -3, -10, 0, 1, 2, 9 }, // entry 13
1363
{ -4, -6, -8, -9, 3, 5, 7, 8 },
1364
{ -3, -5, -7, -9, 2, 4, 6, 8 }
1365
};
1366
1367
// Used by ETC2 EAC A8 and ETC2 EAC R11/RG11.
1368
struct eac_block
1369
{
1370
uint16_t m_base : 8;
1371
1372
uint16_t m_table : 4;
1373
uint16_t m_multiplier : 4;
1374
1375
uint8_t m_selectors[6];
1376
1377
uint32_t get_selector(uint32_t x, uint32_t y) const
1378
{
1379
assert((x < 4) && (y < 4));
1380
1381
const uint32_t ofs = 45 - (y + x * 4) * 3;
1382
1383
const uint64_t pixels = get_selector_bits();
1384
1385
return (pixels >> ofs) & 7;
1386
}
1387
1388
void set_selector(uint32_t x, uint32_t y, uint32_t s)
1389
{
1390
assert((x < 4) && (y < 4) && (s < 8));
1391
1392
const uint32_t ofs = 45 - (y + x * 4) * 3;
1393
1394
uint64_t pixels = get_selector_bits();
1395
1396
pixels &= ~(7ULL << ofs);
1397
pixels |= (static_cast<uint64_t>(s) << ofs);
1398
1399
set_selector_bits(pixels);
1400
}
1401
1402
uint64_t get_selector_bits() const
1403
{
1404
uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) |
1405
((uint64_t)m_selectors[2] << 24) |
1406
((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5];
1407
return pixels;
1408
}
1409
1410
void set_selector_bits(uint64_t pixels)
1411
{
1412
m_selectors[0] = (uint8_t)(pixels >> 40);
1413
m_selectors[1] = (uint8_t)(pixels >> 32);
1414
m_selectors[2] = (uint8_t)(pixels >> 24);
1415
m_selectors[3] = (uint8_t)(pixels >> 16);
1416
m_selectors[4] = (uint8_t)(pixels >> 8);
1417
m_selectors[5] = (uint8_t)(pixels);
1418
}
1419
};
1420
1421
#endif // #if BASISD_SUPPORT_UASTC BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
1422
1423
#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
1424
static const dxt_selector_range s_etc2_eac_selector_ranges[] =
1425
{
1426
{ 0, 3 },
1427
1428
{ 1, 3 },
1429
{ 0, 2 },
1430
1431
{ 1, 2 },
1432
};
1433
1434
const uint32_t NUM_ETC2_EAC_SELECTOR_RANGES = sizeof(s_etc2_eac_selector_ranges) / sizeof(s_etc2_eac_selector_ranges[0]);
1435
1436
struct etc1_g_to_eac_conversion
1437
{
1438
uint8_t m_base;
1439
uint8_t m_table_mul; // mul*16+table
1440
uint16_t m_trans; // translates ETC1 selectors to ETC2_EAC_A8
1441
};
1442
#endif // BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
1443
1444
#if BASISD_SUPPORT_ETC2_EAC_A8
1445
1446
#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
1447
struct pack_eac_a8_results
1448
{
1449
uint32_t m_base;
1450
uint32_t m_table;
1451
uint32_t m_multiplier;
1452
basisu::vector<uint8_t> m_selectors;
1453
basisu::vector<uint8_t> m_selectors_temp;
1454
};
1455
1456
static uint64_t pack_eac_a8_exhaustive(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels)
1457
{
1458
results.m_selectors.resize(num_pixels);
1459
results.m_selectors_temp.resize(num_pixels);
1460
1461
uint64_t best_err = UINT64_MAX;
1462
1463
for (uint32_t base_color = 0; base_color < 256; base_color++)
1464
{
1465
for (uint32_t multiplier = 1; multiplier < 16; multiplier++)
1466
{
1467
for (uint32_t table = 0; table < 16; table++)
1468
{
1469
uint64_t total_err = 0;
1470
1471
for (uint32_t i = 0; i < num_pixels; i++)
1472
{
1473
const int a = pPixels[i];
1474
1475
uint32_t best_s_err = UINT32_MAX;
1476
uint32_t best_s = 0;
1477
for (uint32_t s = 0; s < 8; s++)
1478
{
1479
int v = (int)multiplier * g_eac_modifier_table[table][s] + (int)base_color;
1480
if (v < 0)
1481
v = 0;
1482
else if (v > 255)
1483
v = 255;
1484
1485
uint32_t err = abs(a - v);
1486
if (err < best_s_err)
1487
{
1488
best_s_err = err;
1489
best_s = s;
1490
}
1491
}
1492
1493
results.m_selectors_temp[i] = static_cast<uint8_t>(best_s);
1494
1495
total_err += best_s_err * best_s_err;
1496
if (total_err >= best_err)
1497
break;
1498
}
1499
1500
if (total_err < best_err)
1501
{
1502
best_err = total_err;
1503
results.m_base = base_color;
1504
results.m_multiplier = multiplier;
1505
results.m_table = table;
1506
results.m_selectors.swap(results.m_selectors_temp);
1507
}
1508
1509
} // table
1510
1511
} // multiplier
1512
1513
} // base_color
1514
1515
return best_err;
1516
}
1517
#endif // BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
1518
1519
static
1520
#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
1521
const
1522
#endif
1523
etc1_g_to_eac_conversion s_etc1_g_to_etc2_a8[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] =
1524
{
1525
{ { 0,1,3328 },{ 0,1,3328 },{ 0,1,256 },{ 0,1,256 } },
1526
{ { 0,226,3936 },{ 0,226,3936 },{ 0,81,488 },{ 0,81,488 } },
1527
{ { 6,178,4012 },{ 6,178,4008 },{ 0,146,501 },{ 0,130,496 } },
1528
{ { 14,178,4012 },{ 14,178,4008 },{ 8,146,501 },{ 6,82,496 } },
1529
{ { 23,178,4012 },{ 23,178,4008 },{ 17,146,501 },{ 3,228,496 } },
1530
{ { 31,178,4012 },{ 31,178,4008 },{ 25,146,501 },{ 11,228,496 } },
1531
{ { 39,178,4012 },{ 39,178,4008 },{ 33,146,501 },{ 19,228,496 } },
1532
{ { 47,178,4012 },{ 47,178,4008 },{ 41,146,501 },{ 27,228,496 } },
1533
{ { 56,178,4012 },{ 56,178,4008 },{ 50,146,501 },{ 36,228,496 } },
1534
{ { 64,178,4012 },{ 64,178,4008 },{ 58,146,501 },{ 44,228,496 } },
1535
{ { 72,178,4012 },{ 72,178,4008 },{ 66,146,501 },{ 52,228,496 } },
1536
{ { 80,178,4012 },{ 80,178,4008 },{ 74,146,501 },{ 60,228,496 } },
1537
{ { 89,178,4012 },{ 89,178,4008 },{ 83,146,501 },{ 69,228,496 } },
1538
{ { 97,178,4012 },{ 97,178,4008 },{ 91,146,501 },{ 77,228,496 } },
1539
{ { 105,178,4012 },{ 105,178,4008 },{ 99,146,501 },{ 85,228,496 } },
1540
{ { 113,178,4012 },{ 113,178,4008 },{ 107,146,501 },{ 93,228,496 } },
1541
{ { 122,178,4012 },{ 122,178,4008 },{ 116,146,501 },{ 102,228,496 } },
1542
{ { 130,178,4012 },{ 130,178,4008 },{ 124,146,501 },{ 110,228,496 } },
1543
{ { 138,178,4012 },{ 138,178,4008 },{ 132,146,501 },{ 118,228,496 } },
1544
{ { 146,178,4012 },{ 146,178,4008 },{ 140,146,501 },{ 126,228,496 } },
1545
{ { 155,178,4012 },{ 155,178,4008 },{ 149,146,501 },{ 135,228,496 } },
1546
{ { 163,178,4012 },{ 163,178,4008 },{ 157,146,501 },{ 143,228,496 } },
1547
{ { 171,178,4012 },{ 171,178,4008 },{ 165,146,501 },{ 151,228,496 } },
1548
{ { 179,178,4012 },{ 179,178,4008 },{ 173,146,501 },{ 159,228,496 } },
1549
{ { 188,178,4012 },{ 188,178,4008 },{ 182,146,501 },{ 168,228,496 } },
1550
{ { 196,178,4012 },{ 196,178,4008 },{ 190,146,501 },{ 176,228,496 } },
1551
{ { 204,178,4012 },{ 204,178,4008 },{ 198,146,501 },{ 184,228,496 } },
1552
{ { 212,178,4012 },{ 212,178,4008 },{ 206,146,501 },{ 192,228,496 } },
1553
{ { 221,178,4012 },{ 221,178,4008 },{ 215,146,501 },{ 201,228,496 } },
1554
{ { 229,178,4012 },{ 229,178,4008 },{ 223,146,501 },{ 209,228,496 } },
1555
{ { 235,66,4012 },{ 221,100,4008 },{ 231,146,501 },{ 217,228,496 } },
1556
{ { 211,102,4085 },{ 118,31,4080 },{ 211,102,501 },{ 118,31,496 } },
1557
{ { 1,2,3328 },{ 1,2,3328 },{ 0,1,320 },{ 0,1,320 } },
1558
{ { 7,162,3905 },{ 7,162,3904 },{ 1,17,480 },{ 1,17,480 } },
1559
{ { 15,162,3906 },{ 15,162,3904 },{ 1,117,352 },{ 1,117,352 } },
1560
{ { 23,162,3906 },{ 23,162,3904 },{ 5,34,500 },{ 4,53,424 } },
1561
{ { 32,162,3906 },{ 32,162,3904 },{ 14,34,500 },{ 3,69,424 } },
1562
{ { 40,162,3906 },{ 40,162,3904 },{ 22,34,500 },{ 1,133,496 } },
1563
{ { 48,162,3906 },{ 48,162,3904 },{ 30,34,500 },{ 4,85,496 } },
1564
{ { 56,162,3906 },{ 56,162,3904 },{ 38,34,500 },{ 12,85,496 } },
1565
{ { 65,162,3906 },{ 65,162,3904 },{ 47,34,500 },{ 1,106,424 } },
1566
{ { 73,162,3906 },{ 73,162,3904 },{ 55,34,500 },{ 9,106,424 } },
1567
{ { 81,162,3906 },{ 81,162,3904 },{ 63,34,500 },{ 7,234,496 } },
1568
{ { 89,162,3906 },{ 89,162,3904 },{ 71,34,500 },{ 15,234,496 } },
1569
{ { 98,162,3906 },{ 98,162,3904 },{ 80,34,500 },{ 24,234,496 } },
1570
{ { 106,162,3906 },{ 106,162,3904 },{ 88,34,500 },{ 32,234,496 } },
1571
{ { 114,162,3906 },{ 114,162,3904 },{ 96,34,500 },{ 40,234,496 } },
1572
{ { 122,162,3906 },{ 122,162,3904 },{ 104,34,500 },{ 48,234,496 } },
1573
{ { 131,162,3906 },{ 131,162,3904 },{ 113,34,500 },{ 57,234,496 } },
1574
{ { 139,162,3906 },{ 139,162,3904 },{ 121,34,500 },{ 65,234,496 } },
1575
{ { 147,162,3906 },{ 147,162,3904 },{ 129,34,500 },{ 73,234,496 } },
1576
{ { 155,162,3906 },{ 155,162,3904 },{ 137,34,500 },{ 81,234,496 } },
1577
{ { 164,162,3906 },{ 164,162,3904 },{ 146,34,500 },{ 90,234,496 } },
1578
{ { 172,162,3906 },{ 172,162,3904 },{ 154,34,500 },{ 98,234,496 } },
1579
{ { 180,162,3906 },{ 180,162,3904 },{ 162,34,500 },{ 106,234,496 } },
1580
{ { 188,162,3906 },{ 188,162,3904 },{ 170,34,500 },{ 114,234,496 } },
1581
{ { 197,162,3906 },{ 197,162,3904 },{ 179,34,500 },{ 123,234,496 } },
1582
{ { 205,162,3906 },{ 205,162,3904 },{ 187,34,500 },{ 131,234,496 } },
1583
{ { 213,162,3906 },{ 213,162,3904 },{ 195,34,500 },{ 139,234,496 } },
1584
{ { 221,162,3906 },{ 221,162,3904 },{ 203,34,500 },{ 147,234,496 } },
1585
{ { 230,162,3906 },{ 230,162,3904 },{ 212,34,500 },{ 156,234,496 } },
1586
{ { 238,162,3906 },{ 174,106,4008 },{ 220,34,500 },{ 164,234,496 } },
1587
{ { 240,178,4001 },{ 182,106,4008 },{ 228,34,500 },{ 172,234,496 } },
1588
{ { 166,108,4085 },{ 115,31,4080 },{ 166,108,501 },{ 115,31,496 } },
1589
{ { 1,68,3328 },{ 1,68,3328 },{ 0,17,384 },{ 0,17,384 } },
1590
{ { 1,148,3904 },{ 1,148,3904 },{ 1,2,384 },{ 1,2,384 } },
1591
{ { 21,18,3851 },{ 21,18,3848 },{ 1,50,488 },{ 1,50,488 } },
1592
{ { 27,195,3851 },{ 29,18,3848 },{ 0,67,488 },{ 0,67,488 } },
1593
{ { 34,195,3907 },{ 38,18,3848 },{ 20,66,482 },{ 0,3,496 } },
1594
{ { 42,195,3907 },{ 46,18,3848 },{ 28,66,482 },{ 2,6,424 } },
1595
{ { 50,195,3907 },{ 54,18,3848 },{ 36,66,482 },{ 4,22,424 } },
1596
{ { 58,195,3907 },{ 62,18,3848 },{ 44,66,482 },{ 3,73,424 } },
1597
{ { 67,195,3907 },{ 71,18,3848 },{ 53,66,482 },{ 3,22,496 } },
1598
{ { 75,195,3907 },{ 79,18,3848 },{ 61,66,482 },{ 2,137,496 } },
1599
{ { 83,195,3907 },{ 87,18,3848 },{ 69,66,482 },{ 1,89,496 } },
1600
{ { 91,195,3907 },{ 95,18,3848 },{ 77,66,482 },{ 9,89,496 } },
1601
{ { 100,195,3907 },{ 104,18,3848 },{ 86,66,482 },{ 18,89,496 } },
1602
{ { 108,195,3907 },{ 112,18,3848 },{ 94,66,482 },{ 26,89,496 } },
1603
{ { 116,195,3907 },{ 120,18,3848 },{ 102,66,482 },{ 34,89,496 } },
1604
{ { 124,195,3907 },{ 128,18,3848 },{ 110,66,482 },{ 42,89,496 } },
1605
{ { 133,195,3907 },{ 137,18,3848 },{ 119,66,482 },{ 51,89,496 } },
1606
{ { 141,195,3907 },{ 145,18,3848 },{ 127,66,482 },{ 59,89,496 } },
1607
{ { 149,195,3907 },{ 153,18,3848 },{ 135,66,482 },{ 67,89,496 } },
1608
{ { 157,195,3907 },{ 161,18,3848 },{ 143,66,482 },{ 75,89,496 } },
1609
{ { 166,195,3907 },{ 170,18,3848 },{ 152,66,482 },{ 84,89,496 } },
1610
{ { 174,195,3907 },{ 178,18,3848 },{ 160,66,482 },{ 92,89,496 } },
1611
{ { 182,195,3907 },{ 186,18,3848 },{ 168,66,482 },{ 100,89,496 } },
1612
{ { 190,195,3907 },{ 194,18,3848 },{ 176,66,482 },{ 108,89,496 } },
1613
{ { 199,195,3907 },{ 203,18,3848 },{ 185,66,482 },{ 117,89,496 } },
1614
{ { 207,195,3907 },{ 211,18,3848 },{ 193,66,482 },{ 125,89,496 } },
1615
{ { 215,195,3907 },{ 219,18,3848 },{ 201,66,482 },{ 133,89,496 } },
1616
{ { 223,195,3907 },{ 227,18,3848 },{ 209,66,482 },{ 141,89,496 } },
1617
{ { 231,195,3907 },{ 168,89,4008 },{ 218,66,482 },{ 150,89,496 } },
1618
{ { 236,18,3907 },{ 176,89,4008 },{ 226,66,482 },{ 158,89,496 } },
1619
{ { 158,90,4085 },{ 103,31,4080 },{ 158,90,501 },{ 103,31,496 } },
1620
{ { 166,90,4085 },{ 111,31,4080 },{ 166,90,501 },{ 111,31,496 } },
1621
{ { 0,70,3328 },{ 0,70,3328 },{ 0,45,256 },{ 0,45,256 } },
1622
{ { 0,117,3904 },{ 0,117,3904 },{ 0,35,384 },{ 0,35,384 } },
1623
{ { 13,165,3905 },{ 13,165,3904 },{ 3,221,416 },{ 3,221,416 } },
1624
{ { 21,165,3906 },{ 21,165,3904 },{ 11,221,416 },{ 11,221,416 } },
1625
{ { 30,165,3906 },{ 30,165,3904 },{ 7,61,352 },{ 7,61,352 } },
1626
{ { 38,165,3906 },{ 38,165,3904 },{ 2,125,352 },{ 2,125,352 } },
1627
{ { 46,165,3906 },{ 46,165,3904 },{ 2,37,500 },{ 10,125,352 } },
1628
{ { 54,165,3906 },{ 54,165,3904 },{ 10,37,500 },{ 5,61,424 } },
1629
{ { 63,165,3906 },{ 63,165,3904 },{ 19,37,500 },{ 1,189,424 } },
1630
{ { 4,254,4012 },{ 71,165,3904 },{ 27,37,500 },{ 9,189,424 } },
1631
{ { 12,254,4012 },{ 79,165,3904 },{ 35,37,500 },{ 4,77,424 } },
1632
{ { 20,254,4012 },{ 87,165,3904 },{ 43,37,500 },{ 12,77,424 } },
1633
{ { 29,254,4012 },{ 96,165,3904 },{ 52,37,500 },{ 8,93,424 } },
1634
{ { 37,254,4012 },{ 104,165,3904 },{ 60,37,500 },{ 3,141,496 } },
1635
{ { 45,254,4012 },{ 112,165,3904 },{ 68,37,500 },{ 11,141,496 } },
1636
{ { 53,254,4012 },{ 120,165,3904 },{ 76,37,500 },{ 6,93,496 } },
1637
{ { 62,254,4012 },{ 129,165,3904 },{ 85,37,500 },{ 15,93,496 } },
1638
{ { 70,254,4012 },{ 137,165,3904 },{ 93,37,500 },{ 23,93,496 } },
1639
{ { 78,254,4012 },{ 145,165,3904 },{ 101,37,500 },{ 31,93,496 } },
1640
{ { 86,254,4012 },{ 153,165,3904 },{ 109,37,500 },{ 39,93,496 } },
1641
{ { 95,254,4012 },{ 162,165,3904 },{ 118,37,500 },{ 48,93,496 } },
1642
{ { 103,254,4012 },{ 170,165,3904 },{ 126,37,500 },{ 56,93,496 } },
1643
{ { 111,254,4012 },{ 178,165,3904 },{ 134,37,500 },{ 64,93,496 } },
1644
{ { 119,254,4012 },{ 186,165,3904 },{ 142,37,500 },{ 72,93,496 } },
1645
{ { 128,254,4012 },{ 195,165,3904 },{ 151,37,500 },{ 81,93,496 } },
1646
{ { 136,254,4012 },{ 203,165,3904 },{ 159,37,500 },{ 89,93,496 } },
1647
{ { 212,165,3906 },{ 136,77,4008 },{ 167,37,500 },{ 97,93,496 } },
1648
{ { 220,165,3394 },{ 131,93,4008 },{ 175,37,500 },{ 105,93,496 } },
1649
{ { 214,181,4001 },{ 140,93,4008 },{ 184,37,500 },{ 114,93,496 } },
1650
{ { 222,181,4001 },{ 148,93,4008 },{ 192,37,500 },{ 122,93,496 } },
1651
{ { 114,95,4085 },{ 99,31,4080 },{ 114,95,501 },{ 99,31,496 } },
1652
{ { 122,95,4085 },{ 107,31,4080 },{ 122,95,501 },{ 107,31,496 } },
1653
{ { 0,102,3840 },{ 0,102,3840 },{ 0,18,384 },{ 0,18,384 } },
1654
{ { 5,167,3904 },{ 5,167,3904 },{ 0,13,256 },{ 0,13,256 } },
1655
{ { 4,54,3968 },{ 4,54,3968 },{ 1,67,448 },{ 1,67,448 } },
1656
{ { 30,198,3850 },{ 30,198,3848 },{ 0,3,480 },{ 0,3,480 } },
1657
{ { 39,198,3850 },{ 39,198,3848 },{ 3,52,488 },{ 3,52,488 } },
1658
{ { 47,198,3851 },{ 47,198,3848 },{ 3,4,488 },{ 3,4,488 } },
1659
{ { 55,198,3851 },{ 55,198,3848 },{ 1,70,488 },{ 1,70,488 } },
1660
{ { 54,167,3906 },{ 63,198,3848 },{ 3,22,488 },{ 3,22,488 } },
1661
{ { 62,167,3906 },{ 72,198,3848 },{ 24,118,488 },{ 0,6,496 } },
1662
{ { 70,167,3906 },{ 80,198,3848 },{ 32,118,488 },{ 2,89,488 } },
1663
{ { 78,167,3906 },{ 88,198,3848 },{ 40,118,488 },{ 1,73,496 } },
1664
{ { 86,167,3906 },{ 96,198,3848 },{ 48,118,488 },{ 0,28,424 } },
1665
{ { 95,167,3906 },{ 105,198,3848 },{ 57,118,488 },{ 9,28,424 } },
1666
{ { 103,167,3906 },{ 113,198,3848 },{ 65,118,488 },{ 5,108,496 } },
1667
{ { 111,167,3906 },{ 121,198,3848 },{ 73,118,488 },{ 13,108,496 } },
1668
{ { 119,167,3906 },{ 129,198,3848 },{ 81,118,488 },{ 21,108,496 } },
1669
{ { 128,167,3906 },{ 138,198,3848 },{ 90,118,488 },{ 6,28,496 } },
1670
{ { 136,167,3906 },{ 146,198,3848 },{ 98,118,488 },{ 14,28,496 } },
1671
{ { 144,167,3906 },{ 154,198,3848 },{ 106,118,488 },{ 22,28,496 } },
1672
{ { 152,167,3906 },{ 162,198,3848 },{ 114,118,488 },{ 30,28,496 } },
1673
{ { 161,167,3906 },{ 171,198,3848 },{ 123,118,488 },{ 39,28,496 } },
1674
{ { 169,167,3906 },{ 179,198,3848 },{ 131,118,488 },{ 47,28,496 } },
1675
{ { 177,167,3906 },{ 187,198,3848 },{ 139,118,488 },{ 55,28,496 } },
1676
{ { 185,167,3906 },{ 195,198,3848 },{ 147,118,488 },{ 63,28,496 } },
1677
{ { 194,167,3906 },{ 120,12,4008 },{ 156,118,488 },{ 72,28,496 } },
1678
{ { 206,198,3907 },{ 116,28,4008 },{ 164,118,488 },{ 80,28,496 } },
1679
{ { 214,198,3907 },{ 124,28,4008 },{ 172,118,488 },{ 88,28,496 } },
1680
{ { 222,198,3395 },{ 132,28,4008 },{ 180,118,488 },{ 96,28,496 } },
1681
{ { 207,134,4001 },{ 141,28,4008 },{ 189,118,488 },{ 105,28,496 } },
1682
{ { 95,30,4085 },{ 86,31,4080 },{ 95,30,501 },{ 86,31,496 } },
1683
{ { 103,30,4085 },{ 94,31,4080 },{ 103,30,501 },{ 94,31,496 } },
1684
{ { 111,30,4085 },{ 102,31,4080 },{ 111,30,501 },{ 102,31,496 } },
1685
{ { 0,104,3840 },{ 0,104,3840 },{ 0,18,448 },{ 0,18,448 } },
1686
{ { 4,39,3904 },{ 4,39,3904 },{ 0,4,384 },{ 0,4,384 } },
1687
{ { 0,56,3968 },{ 0,56,3968 },{ 0,84,448 },{ 0,84,448 } },
1688
{ { 6,110,3328 },{ 6,110,3328 },{ 0,20,448 },{ 0,20,448 } },
1689
{ { 41,200,3850 },{ 41,200,3848 },{ 1,4,480 },{ 1,4,480 } },
1690
{ { 49,200,3850 },{ 49,200,3848 },{ 1,8,416 },{ 1,8,416 } },
1691
{ { 57,200,3851 },{ 57,200,3848 },{ 1,38,488 },{ 1,38,488 } },
1692
{ { 65,200,3851 },{ 65,200,3848 },{ 1,120,488 },{ 1,120,488 } },
1693
{ { 74,200,3851 },{ 74,200,3848 },{ 2,72,488 },{ 2,72,488 } },
1694
{ { 69,6,3907 },{ 82,200,3848 },{ 2,24,488 },{ 2,24,488 } },
1695
{ { 77,6,3907 },{ 90,200,3848 },{ 26,120,488 },{ 10,24,488 } },
1696
{ { 97,63,3330 },{ 98,200,3848 },{ 34,120,488 },{ 2,8,496 } },
1697
{ { 106,63,3330 },{ 107,200,3848 },{ 43,120,488 },{ 3,92,488 } },
1698
{ { 114,63,3330 },{ 115,200,3848 },{ 51,120,488 },{ 11,92,488 } },
1699
{ { 122,63,3330 },{ 123,200,3848 },{ 59,120,488 },{ 7,76,496 } },
1700
{ { 130,63,3330 },{ 131,200,3848 },{ 67,120,488 },{ 15,76,496 } },
1701
{ { 139,63,3330 },{ 140,200,3848 },{ 76,120,488 },{ 24,76,496 } },
1702
{ { 147,63,3330 },{ 148,200,3848 },{ 84,120,488 },{ 32,76,496 } },
1703
{ { 155,63,3330 },{ 156,200,3848 },{ 92,120,488 },{ 40,76,496 } },
1704
{ { 163,63,3330 },{ 164,200,3848 },{ 100,120,488 },{ 48,76,496 } },
1705
{ { 172,63,3330 },{ 173,200,3848 },{ 109,120,488 },{ 57,76,496 } },
1706
{ { 184,6,3851 },{ 181,200,3848 },{ 117,120,488 },{ 65,76,496 } },
1707
{ { 192,6,3851 },{ 133,28,3936 },{ 125,120,488 },{ 73,76,496 } },
1708
{ { 189,200,3907 },{ 141,28,3936 },{ 133,120,488 },{ 81,76,496 } },
1709
{ { 198,200,3907 },{ 138,108,4000 },{ 142,120,488 },{ 90,76,496 } },
1710
{ { 206,200,3907 },{ 146,108,4000 },{ 150,120,488 },{ 98,76,496 } },
1711
{ { 214,200,3395 },{ 154,108,4000 },{ 158,120,488 },{ 106,76,496 } },
1712
{ { 190,136,4001 },{ 162,108,4000 },{ 166,120,488 },{ 114,76,496 } },
1713
{ { 123,30,4076 },{ 87,15,4080 },{ 123,30,492 },{ 87,15,496 } },
1714
{ { 117,110,4084 },{ 80,31,4080 },{ 117,110,500 },{ 80,31,496 } },
1715
{ { 125,110,4084 },{ 88,31,4080 },{ 125,110,500 },{ 88,31,496 } },
1716
{ { 133,110,4084 },{ 96,31,4080 },{ 133,110,500 },{ 96,31,496 } },
1717
{ { 9,56,3904 },{ 9,56,3904 },{ 0,67,448 },{ 0,67,448 } },
1718
{ { 1,8,3904 },{ 1,8,3904 },{ 1,84,448 },{ 1,84,448 } },
1719
{ { 1,124,3904 },{ 1,124,3904 },{ 0,39,384 },{ 0,39,384 } },
1720
{ { 9,124,3904 },{ 9,124,3904 },{ 1,4,448 },{ 1,4,448 } },
1721
{ { 6,76,3904 },{ 6,76,3904 },{ 0,70,448 },{ 0,70,448 } },
1722
{ { 62,6,3859 },{ 62,6,3856 },{ 2,38,480 },{ 2,38,480 } },
1723
{ { 70,6,3859 },{ 70,6,3856 },{ 5,43,416 },{ 5,43,416 } },
1724
{ { 78,6,3859 },{ 78,6,3856 },{ 2,11,416 },{ 2,11,416 } },
1725
{ { 87,6,3859 },{ 87,6,3856 },{ 0,171,488 },{ 0,171,488 } },
1726
{ { 67,8,3906 },{ 95,6,3856 },{ 8,171,488 },{ 8,171,488 } },
1727
{ { 75,8,3907 },{ 103,6,3856 },{ 5,123,488 },{ 5,123,488 } },
1728
{ { 83,8,3907 },{ 111,6,3856 },{ 2,75,488 },{ 2,75,488 } },
1729
{ { 92,8,3907 },{ 120,6,3856 },{ 0,27,488 },{ 0,27,488 } },
1730
{ { 100,8,3907 },{ 128,6,3856 },{ 8,27,488 },{ 8,27,488 } },
1731
{ { 120,106,3843 },{ 136,6,3856 },{ 100,6,387 },{ 16,27,488 } },
1732
{ { 128,106,3843 },{ 144,6,3856 },{ 108,6,387 },{ 2,11,496 } },
1733
{ { 137,106,3843 },{ 153,6,3856 },{ 117,6,387 },{ 11,11,496 } },
1734
{ { 145,106,3843 },{ 161,6,3856 },{ 125,6,387 },{ 19,11,496 } },
1735
{ { 163,8,3851 },{ 137,43,3904 },{ 133,6,387 },{ 27,11,496 } },
1736
{ { 171,8,3851 },{ 101,11,4000 },{ 141,6,387 },{ 35,11,496 } },
1737
{ { 180,8,3851 },{ 110,11,4000 },{ 150,6,387 },{ 44,11,496 } },
1738
{ { 188,8,3851 },{ 118,11,4000 },{ 158,6,387 },{ 52,11,496 } },
1739
{ { 172,72,3907 },{ 126,11,4000 },{ 166,6,387 },{ 60,11,496 } },
1740
{ { 174,6,3971 },{ 134,11,4000 },{ 174,6,387 },{ 68,11,496 } },
1741
{ { 183,6,3971 },{ 143,11,4000 },{ 183,6,387 },{ 77,11,496 } },
1742
{ { 191,6,3971 },{ 151,11,4000 },{ 191,6,387 },{ 85,11,496 } },
1743
{ { 199,6,3971 },{ 159,11,4000 },{ 199,6,387 },{ 93,11,496 } },
1744
{ { 92,12,4084 },{ 69,15,4080 },{ 92,12,500 },{ 69,15,496 } },
1745
{ { 101,12,4084 },{ 78,15,4080 },{ 101,12,500 },{ 78,15,496 } },
1746
{ { 109,12,4084 },{ 86,15,4080 },{ 109,12,500 },{ 86,15,496 } },
1747
{ { 117,12,4084 },{ 79,31,4080 },{ 117,12,500 },{ 79,31,496 } },
1748
{ { 125,12,4084 },{ 87,31,4080 },{ 125,12,500 },{ 87,31,496 } },
1749
{ { 71,8,3602 },{ 71,8,3600 },{ 2,21,384 },{ 2,21,384 } },
1750
{ { 79,8,3611 },{ 79,8,3608 },{ 0,69,448 },{ 0,69,448 } },
1751
{ { 87,8,3611 },{ 87,8,3608 },{ 0,23,384 },{ 0,23,384 } },
1752
{ { 95,8,3611 },{ 95,8,3608 },{ 1,5,448 },{ 1,5,448 } },
1753
{ { 104,8,3611 },{ 104,8,3608 },{ 0,88,448 },{ 0,88,448 } },
1754
{ { 112,8,3611 },{ 112,8,3608 },{ 0,72,448 },{ 0,72,448 } },
1755
{ { 120,8,3611 },{ 121,8,3608 },{ 36,21,458 },{ 36,21,456 } },
1756
{ { 133,47,3091 },{ 129,8,3608 },{ 44,21,458 },{ 44,21,456 } },
1757
{ { 142,47,3091 },{ 138,8,3608 },{ 53,21,459 },{ 53,21,456 } },
1758
{ { 98,12,3850 },{ 98,12,3848 },{ 61,21,459 },{ 61,21,456 } },
1759
{ { 106,12,3850 },{ 106,12,3848 },{ 10,92,480 },{ 69,21,456 } },
1760
{ { 114,12,3851 },{ 114,12,3848 },{ 18,92,480 },{ 77,21,456 } },
1761
{ { 87,12,3906 },{ 87,12,3904 },{ 3,44,488 },{ 86,21,456 } },
1762
{ { 95,12,3906 },{ 95,12,3904 },{ 11,44,488 },{ 94,21,456 } },
1763
{ { 103,12,3906 },{ 103,12,3904 },{ 19,44,488 },{ 102,21,456 } },
1764
{ { 111,12,3907 },{ 111,12,3904 },{ 27,44,489 },{ 110,21,456 } },
1765
{ { 120,12,3907 },{ 120,12,3904 },{ 36,44,489 },{ 119,21,456 } },
1766
{ { 128,12,3907 },{ 128,12,3904 },{ 44,44,489 },{ 127,21,456 } },
1767
{ { 136,12,3907 },{ 136,12,3904 },{ 52,44,489 },{ 135,21,456 } },
1768
{ { 144,12,3907 },{ 144,12,3904 },{ 60,44,489 },{ 143,21,456 } },
1769
{ { 153,12,3907 },{ 153,12,3904 },{ 69,44,490 },{ 152,21,456 } },
1770
{ { 161,12,3395 },{ 149,188,3968 },{ 77,44,490 },{ 160,21,456 } },
1771
{ { 169,12,3395 },{ 198,21,3928 },{ 85,44,490 },{ 168,21,456 } },
1772
{ { 113,95,4001 },{ 201,69,3992 },{ 125,8,483 },{ 176,21,456 } },
1773
{ { 122,95,4001 },{ 200,21,3984 },{ 134,8,483 },{ 185,21,456 } },
1774
{ { 142,8,4067 },{ 208,21,3984 },{ 142,8,483 },{ 193,21,456 } },
1775
{ { 151,8,4067 },{ 47,15,4080 },{ 151,8,483 },{ 47,15,496 } },
1776
{ { 159,8,4067 },{ 55,15,4080 },{ 159,8,483 },{ 55,15,496 } },
1777
{ { 168,8,4067 },{ 64,15,4080 },{ 168,8,483 },{ 64,15,496 } },
1778
{ { 160,40,4075 },{ 72,15,4080 },{ 160,40,491 },{ 72,15,496 } },
1779
{ { 168,40,4075 },{ 80,15,4080 },{ 168,40,491 },{ 80,15,496 } },
1780
{ { 144,8,4082 },{ 88,15,4080 },{ 144,8,498 },{ 88,15,496 } }
1781
};
1782
#endif // BASISD_SUPPORT_ETC2_EAC_A8
1783
1784
#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
1785
static void create_etc2_eac_a8_conversion_table()
1786
{
1787
FILE* pFile = fopen("basisu_decoder_tables_etc2_eac_a8.inc", "w");
1788
1789
for (uint32_t inten = 0; inten < 8; inten++)
1790
{
1791
for (uint32_t base = 0; base < 32; base++)
1792
{
1793
color32 block_colors[4];
1794
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten);
1795
1796
fprintf(pFile, "{");
1797
1798
for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++)
1799
{
1800
const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low;
1801
const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high;
1802
1803
// We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector.
1804
// Now find the best ETC2 EAC A8 base/table/multiplier that fits these colors.
1805
1806
uint8_t pixels[4];
1807
uint32_t num_pixels = 0;
1808
for (uint32_t s = low_selector; s <= high_selector; s++)
1809
pixels[num_pixels++] = block_colors[s].g;
1810
1811
pack_eac_a8_results pack_results;
1812
pack_eac_a8_exhaustive(pack_results, pixels, num_pixels);
1813
1814
etc1_g_to_eac_conversion& c = s_etc1_g_to_etc2_a8[base + inten * 32][sel_range];
1815
1816
c.m_base = pack_results.m_base;
1817
c.m_table_mul = pack_results.m_table * 16 + pack_results.m_multiplier;
1818
c.m_trans = 0;
1819
1820
for (uint32_t s = 0; s < 4; s++)
1821
{
1822
if ((s < low_selector) || (s > high_selector))
1823
continue;
1824
1825
uint32_t etc2_selector = pack_results.m_selectors[s - low_selector];
1826
1827
c.m_trans |= (etc2_selector << (s * 3));
1828
}
1829
1830
fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans);
1831
if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1))
1832
fprintf(pFile, ",");
1833
}
1834
1835
fprintf(pFile, "},\n");
1836
}
1837
}
1838
1839
fclose(pFile);
1840
}
1841
#endif
1842
1843
#if BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
1844
struct pack_eac_r11_results
1845
{
1846
uint32_t m_base;
1847
uint32_t m_table;
1848
uint32_t m_multiplier;
1849
basisu::vector<uint8_t> m_selectors;
1850
basisu::vector<uint8_t> m_selectors_temp;
1851
};
1852
1853
static uint64_t pack_eac_r11_exhaustive(pack_eac_r11_results& results, const uint8_t* pPixels, uint32_t num_pixels)
1854
{
1855
results.m_selectors.resize(num_pixels);
1856
results.m_selectors_temp.resize(num_pixels);
1857
1858
uint64_t best_err = UINT64_MAX;
1859
1860
for (uint32_t base_color = 0; base_color < 256; base_color++)
1861
{
1862
for (uint32_t multiplier = 0; multiplier < 16; multiplier++)
1863
{
1864
for (uint32_t table = 0; table < 16; table++)
1865
{
1866
uint64_t total_err = 0;
1867
1868
for (uint32_t i = 0; i < num_pixels; i++)
1869
{
1870
// Convert 8-bit input to 11-bits
1871
const int a = (pPixels[i] * 2047 + 128) / 255;
1872
1873
uint32_t best_s_err = UINT32_MAX;
1874
uint32_t best_s = 0;
1875
for (uint32_t s = 0; s < 8; s++)
1876
{
1877
int v = (int)(multiplier ? (multiplier * 8) : 1) * g_eac_modifier_table[table][s] + (int)base_color * 8 + 4;
1878
if (v < 0)
1879
v = 0;
1880
else if (v > 2047)
1881
v = 2047;
1882
1883
uint32_t err = abs(a - v);
1884
if (err < best_s_err)
1885
{
1886
best_s_err = err;
1887
best_s = s;
1888
}
1889
}
1890
1891
results.m_selectors_temp[i] = static_cast<uint8_t>(best_s);
1892
1893
total_err += best_s_err * best_s_err;
1894
if (total_err >= best_err)
1895
break;
1896
}
1897
1898
if (total_err < best_err)
1899
{
1900
best_err = total_err;
1901
results.m_base = base_color;
1902
results.m_multiplier = multiplier;
1903
results.m_table = table;
1904
results.m_selectors.swap(results.m_selectors_temp);
1905
}
1906
1907
} // table
1908
1909
} // multiplier
1910
1911
} // base_color
1912
1913
return best_err;
1914
}
1915
1916
static void create_etc2_eac_r11_conversion_table()
1917
{
1918
FILE* pFile = nullptr;
1919
fopen_s(&pFile, "basisu_decoder_tables_etc2_eac_r11.inc", "w");
1920
1921
for (uint32_t inten = 0; inten < 8; inten++)
1922
{
1923
for (uint32_t base = 0; base < 32; base++)
1924
{
1925
color32 block_colors[4];
1926
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten);
1927
1928
fprintf(pFile, "{");
1929
1930
for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++)
1931
{
1932
const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low;
1933
const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high;
1934
1935
// We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector.
1936
// Now find the best ETC2 EAC R11 base/table/multiplier that fits these colors.
1937
1938
uint8_t pixels[4];
1939
uint32_t num_pixels = 0;
1940
for (uint32_t s = low_selector; s <= high_selector; s++)
1941
pixels[num_pixels++] = block_colors[s].g;
1942
1943
pack_eac_r11_results pack_results;
1944
pack_eac_r11_exhaustive(pack_results, pixels, num_pixels);
1945
1946
etc1_g_to_eac_conversion c;
1947
1948
c.m_base = (uint8_t)pack_results.m_base;
1949
c.m_table_mul = (uint8_t)(pack_results.m_table * 16 + pack_results.m_multiplier);
1950
c.m_trans = 0;
1951
1952
for (uint32_t s = 0; s < 4; s++)
1953
{
1954
if ((s < low_selector) || (s > high_selector))
1955
continue;
1956
1957
uint32_t etc2_selector = pack_results.m_selectors[s - low_selector];
1958
1959
c.m_trans |= (etc2_selector << (s * 3));
1960
}
1961
1962
fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans);
1963
if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1))
1964
fprintf(pFile, ",");
1965
}
1966
1967
fprintf(pFile, "},\n");
1968
}
1969
}
1970
1971
fclose(pFile);
1972
}
1973
#endif // BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
1974
1975
#if BASISD_WRITE_NEW_ASTC_TABLES
1976
static void create_etc1_to_astc_conversion_table_0_47();
1977
static void create_etc1_to_astc_conversion_table_0_255();
1978
#endif
1979
1980
#if BASISD_SUPPORT_ASTC
1981
static void transcoder_init_astc();
1982
#endif
1983
1984
#if BASISD_WRITE_NEW_BC7_MODE5_TABLES
1985
static void create_etc1_to_bc7_m5_color_conversion_table();
1986
static void create_etc1_to_bc7_m5_alpha_conversion_table();
1987
#endif
1988
1989
#if BASISD_SUPPORT_BC7_MODE5
1990
static void transcoder_init_bc7_mode5();
1991
#endif
1992
1993
#if BASISD_WRITE_NEW_ATC_TABLES
1994
static void create_etc1s_to_atc_conversion_tables();
1995
#endif
1996
1997
#if BASISD_SUPPORT_ATC
1998
static void transcoder_init_atc();
1999
#endif
2000
2001
#if BASISD_SUPPORT_PVRTC2
2002
static void transcoder_init_pvrtc2();
2003
#endif
2004
2005
#if BASISD_SUPPORT_UASTC
2006
void uastc_init();
2007
#endif
2008
2009
#if BASISD_SUPPORT_UASTC_HDR
2010
namespace astc_6x6_hdr
2011
{
2012
static void init_quantize_tables();
2013
static void fast_encode_bc6h_init();
2014
}
2015
#endif
2016
2017
#if BASISD_SUPPORT_BC7_MODE5
2018
namespace bc7_mode_5_encoder
2019
{
2020
void encode_bc7_mode5_init();
2021
}
2022
#endif
2023
2024
static bool g_transcoder_initialized;
2025
2026
// Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz.
2027
// If this is too slow, these computed tables can easilky be moved to be compiled in.
2028
void basisu_transcoder_init()
2029
{
2030
if (g_transcoder_initialized)
2031
{
2032
BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");
2033
return;
2034
}
2035
2036
BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");
2037
2038
#if BASISD_SUPPORT_UASTC
2039
uastc_init();
2040
#endif
2041
2042
#if BASISD_SUPPORT_UASTC_HDR
2043
// TODO: Examine this, optimize for startup time/mem utilization.
2044
astc_helpers::init_tables(false);
2045
2046
astc_hdr_core_init();
2047
#endif
2048
2049
#if BASISD_SUPPORT_ASTC
2050
transcoder_init_astc();
2051
#endif
2052
2053
#if BASISD_WRITE_NEW_ASTC_TABLES
2054
create_etc1_to_astc_conversion_table_0_47();
2055
create_etc1_to_astc_conversion_table_0_255();
2056
exit(0);
2057
#endif
2058
2059
#if BASISD_WRITE_NEW_BC7_MODE5_TABLES
2060
create_etc1_to_bc7_m5_color_conversion_table();
2061
create_etc1_to_bc7_m5_alpha_conversion_table();
2062
exit(0);
2063
#endif
2064
2065
#if BASISD_WRITE_NEW_DXT1_TABLES
2066
create_etc1_to_dxt1_5_conversion_table();
2067
create_etc1_to_dxt1_6_conversion_table();
2068
exit(0);
2069
#endif
2070
2071
#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
2072
create_etc2_eac_a8_conversion_table();
2073
exit(0);
2074
#endif
2075
2076
#if BASISD_WRITE_NEW_ATC_TABLES
2077
create_etc1s_to_atc_conversion_tables();
2078
exit(0);
2079
#endif
2080
2081
#if BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
2082
create_etc2_eac_r11_conversion_table();
2083
exit(0);
2084
#endif
2085
2086
#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC
2087
uint8_t bc1_expand5[32];
2088
for (int i = 0; i < 32; i++)
2089
bc1_expand5[i] = static_cast<uint8_t>((i << 3) | (i >> 2));
2090
prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, 32, 1);
2091
prepare_bc1_single_color_table(g_bc1_match5_equals_0, bc1_expand5, 1, 32, 0);
2092
2093
uint8_t bc1_expand6[64];
2094
for (int i = 0; i < 64; i++)
2095
bc1_expand6[i] = static_cast<uint8_t>((i << 2) | (i >> 4));
2096
prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, 64, 1);
2097
prepare_bc1_single_color_table(g_bc1_match6_equals_0, bc1_expand6, 1, 64, 0);
2098
2099
#if 0
2100
for (uint32_t i = 0; i < 256; i++)
2101
{
2102
printf("%u %u %u\n", i, (i * 63 + 127) / 255, g_bc1_match6_equals_0[i].m_hi);
2103
}
2104
exit(0);
2105
#endif
2106
2107
#endif
2108
2109
#if BASISD_SUPPORT_DXT1
2110
for (uint32_t i = 0; i < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; i++)
2111
{
2112
uint32_t l = g_etc1_to_dxt1_selector_ranges[i].m_low;
2113
uint32_t h = g_etc1_to_dxt1_selector_ranges[i].m_high;
2114
g_etc1_to_dxt1_selector_range_index[l][h] = i;
2115
}
2116
2117
for (uint32_t sm = 0; sm < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; sm++)
2118
{
2119
uint8_t etc1_to_dxt1_selector_mappings_raw_dxt1[4];
2120
uint8_t etc1_to_dxt1_selector_mappings_raw_dxt1_inv[4];
2121
2122
for (uint32_t j = 0; j < 4; j++)
2123
{
2124
static const uint8_t s_linear_dxt1_to_dxt1[4] = { 0, 2, 3, 1 };
2125
static const uint8_t s_dxt1_inverted_xlat[4] = { 1, 0, 3, 2 };
2126
2127
etc1_to_dxt1_selector_mappings_raw_dxt1[j] = (uint8_t)s_linear_dxt1_to_dxt1[g_etc1_to_dxt1_selector_mappings[sm][j]];
2128
etc1_to_dxt1_selector_mappings_raw_dxt1_inv[j] = (uint8_t)s_dxt1_inverted_xlat[etc1_to_dxt1_selector_mappings_raw_dxt1[j]];
2129
}
2130
2131
for (uint32_t i = 0; i < 256; i++)
2132
{
2133
uint32_t k = 0, k_inv = 0;
2134
for (uint32_t s = 0; s < 4; s++)
2135
{
2136
k |= (etc1_to_dxt1_selector_mappings_raw_dxt1[(i >> (s * 2)) & 3] << (s * 2));
2137
k_inv |= (etc1_to_dxt1_selector_mappings_raw_dxt1_inv[(i >> (s * 2)) & 3] << (s * 2));
2138
}
2139
g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[sm][i] = (uint8_t)k;
2140
g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[sm][i] = (uint8_t)k_inv;
2141
}
2142
}
2143
#endif
2144
2145
#if BASISD_SUPPORT_BC7_MODE5
2146
transcoder_init_bc7_mode5();
2147
#endif
2148
2149
#if BASISD_SUPPORT_ATC
2150
transcoder_init_atc();
2151
#endif
2152
2153
#if BASISD_SUPPORT_PVRTC2
2154
transcoder_init_pvrtc2();
2155
#endif
2156
2157
#if BASISD_SUPPORT_UASTC_HDR
2158
bc6h_enc_init();
2159
astc_6x6_hdr::init_quantize_tables();
2160
fast_encode_bc6h_init();
2161
#endif
2162
2163
#if BASISD_SUPPORT_BC7_MODE5
2164
bc7_mode_5_encoder::encode_bc7_mode5_init();
2165
#endif
2166
2167
g_transcoder_initialized = true;
2168
}
2169
2170
#if BASISD_SUPPORT_DXT1
2171
static void convert_etc1s_to_dxt1(dxt1_block* pDst_block, const endpoint *pEndpoints, const selector* pSelector, bool use_threecolor_blocks)
2172
{
2173
#if !BASISD_WRITE_NEW_DXT1_TABLES
2174
const uint32_t low_selector = pSelector->m_lo_selector;
2175
const uint32_t high_selector = pSelector->m_hi_selector;
2176
2177
const color32& base_color = pEndpoints->m_color5;
2178
const uint32_t inten_table = pEndpoints->m_inten5;
2179
2180
if (low_selector == high_selector)
2181
{
2182
uint32_t r, g, b;
2183
decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
2184
2185
uint32_t mask = 0xAA;
2186
uint32_t max16 = (g_bc1_match5_equals_1[r].m_hi << 11) | (g_bc1_match6_equals_1[g].m_hi << 5) | g_bc1_match5_equals_1[b].m_hi;
2187
uint32_t min16 = (g_bc1_match5_equals_1[r].m_lo << 11) | (g_bc1_match6_equals_1[g].m_lo << 5) | g_bc1_match5_equals_1[b].m_lo;
2188
2189
if ((!use_threecolor_blocks) && (min16 == max16))
2190
{
2191
// This is an annoying edge case that impacts BC3.
2192
// This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's.
2193
mask = 0;
2194
2195
// Make l > h
2196
if (min16 > 0)
2197
min16--;
2198
else
2199
{
2200
// l = h = 0
2201
assert(min16 == max16 && max16 == 0);
2202
2203
max16 = 1;
2204
min16 = 0;
2205
mask = 0x55;
2206
}
2207
2208
assert(max16 > min16);
2209
}
2210
2211
if (max16 < min16)
2212
{
2213
std::swap(max16, min16);
2214
mask ^= 0x55;
2215
}
2216
2217
pDst_block->set_low_color(static_cast<uint16_t>(max16));
2218
pDst_block->set_high_color(static_cast<uint16_t>(min16));
2219
pDst_block->m_selectors[0] = static_cast<uint8_t>(mask);
2220
pDst_block->m_selectors[1] = static_cast<uint8_t>(mask);
2221
pDst_block->m_selectors[2] = static_cast<uint8_t>(mask);
2222
pDst_block->m_selectors[3] = static_cast<uint8_t>(mask);
2223
2224
return;
2225
}
2226
else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
2227
{
2228
color32 block_colors[4];
2229
2230
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
2231
2232
const uint32_t r0 = block_colors[0].r;
2233
const uint32_t g0 = block_colors[0].g;
2234
const uint32_t b0 = block_colors[0].b;
2235
2236
const uint32_t r1 = block_colors[3].r;
2237
const uint32_t g1 = block_colors[3].g;
2238
const uint32_t b1 = block_colors[3].b;
2239
2240
uint32_t max16 = (g_bc1_match5_equals_0[r0].m_hi << 11) | (g_bc1_match6_equals_0[g0].m_hi << 5) | g_bc1_match5_equals_0[b0].m_hi;
2241
uint32_t min16 = (g_bc1_match5_equals_0[r1].m_hi << 11) | (g_bc1_match6_equals_0[g1].m_hi << 5) | g_bc1_match5_equals_0[b1].m_hi;
2242
2243
uint32_t l = 0, h = 1;
2244
2245
if (min16 == max16)
2246
{
2247
// Make l > h
2248
if (min16 > 0)
2249
{
2250
min16--;
2251
2252
l = 0;
2253
h = 0;
2254
}
2255
else
2256
{
2257
// l = h = 0
2258
assert(min16 == max16 && max16 == 0);
2259
2260
max16 = 1;
2261
min16 = 0;
2262
2263
l = 1;
2264
h = 1;
2265
}
2266
2267
assert(max16 > min16);
2268
}
2269
2270
if (max16 < min16)
2271
{
2272
std::swap(max16, min16);
2273
l = 1;
2274
h = 0;
2275
}
2276
2277
pDst_block->set_low_color((uint16_t)max16);
2278
pDst_block->set_high_color((uint16_t)min16);
2279
2280
for (uint32_t y = 0; y < 4; y++)
2281
{
2282
for (uint32_t x = 0; x < 4; x++)
2283
{
2284
uint32_t s = pSelector->get_selector(x, y);
2285
pDst_block->set_selector(x, y, (s == 3) ? h : l);
2286
}
2287
}
2288
2289
return;
2290
}
2291
2292
const uint32_t selector_range_table = g_etc1_to_dxt1_selector_range_index[low_selector][high_selector];
2293
2294
//[32][8][RANGES][MAPPING]
2295
const etc1_to_dxt1_56_solution* pTable_r = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
2296
const etc1_to_dxt1_56_solution* pTable_g = &g_etc1_to_dxt_6[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
2297
const etc1_to_dxt1_56_solution* pTable_b = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
2298
2299
uint32_t best_err = UINT_MAX;
2300
uint32_t best_mapping = 0;
2301
2302
assert(NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS == 10);
2303
#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
2304
DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
2305
DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
2306
#undef DO_ITER
2307
2308
uint32_t l = dxt1_block::pack_unscaled_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
2309
uint32_t h = dxt1_block::pack_unscaled_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
2310
2311
const uint8_t* pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[best_mapping][0];
2312
2313
if (l < h)
2314
{
2315
std::swap(l, h);
2316
pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[best_mapping][0];
2317
}
2318
2319
pDst_block->set_low_color(static_cast<uint16_t>(l));
2320
pDst_block->set_high_color(static_cast<uint16_t>(h));
2321
2322
if (l == h)
2323
{
2324
uint8_t mask = 0;
2325
2326
if (!use_threecolor_blocks)
2327
{
2328
// This is an annoying edge case that impacts BC3.
2329
2330
// Make l > h
2331
if (h > 0)
2332
h--;
2333
else
2334
{
2335
// l = h = 0
2336
assert(l == h && h == 0);
2337
2338
h = 0;
2339
l = 1;
2340
mask = 0x55;
2341
}
2342
2343
assert(l > h);
2344
pDst_block->set_low_color(static_cast<uint16_t>(l));
2345
pDst_block->set_high_color(static_cast<uint16_t>(h));
2346
}
2347
2348
pDst_block->m_selectors[0] = mask;
2349
pDst_block->m_selectors[1] = mask;
2350
pDst_block->m_selectors[2] = mask;
2351
pDst_block->m_selectors[3] = mask;
2352
2353
return;
2354
}
2355
2356
pDst_block->m_selectors[0] = pSelectors_xlat_256[pSelector->m_selectors[0]];
2357
pDst_block->m_selectors[1] = pSelectors_xlat_256[pSelector->m_selectors[1]];
2358
pDst_block->m_selectors[2] = pSelectors_xlat_256[pSelector->m_selectors[2]];
2359
pDst_block->m_selectors[3] = pSelectors_xlat_256[pSelector->m_selectors[3]];
2360
#endif
2361
}
2362
2363
#if BASISD_ENABLE_DEBUG_FLAGS
2364
static void convert_etc1s_to_dxt1_vis(dxt1_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector, bool use_threecolor_blocks)
2365
{
2366
convert_etc1s_to_dxt1(pDst_block, pEndpoints, pSelector, use_threecolor_blocks);
2367
2368
if (g_debug_flags & cDebugFlagVisBC1Sels)
2369
{
2370
uint32_t l = dxt1_block::pack_unscaled_color(31, 63, 31);
2371
uint32_t h = dxt1_block::pack_unscaled_color(0, 0, 0);
2372
pDst_block->set_low_color(static_cast<uint16_t>(l));
2373
pDst_block->set_high_color(static_cast<uint16_t>(h));
2374
}
2375
else if (g_debug_flags & cDebugFlagVisBC1Endpoints)
2376
{
2377
for (uint32_t y = 0; y < 4; y++)
2378
for (uint32_t x = 0; x < 4; x++)
2379
pDst_block->set_selector(x, y, (y < 2) ? 0 : 1);
2380
}
2381
}
2382
#endif
2383
#endif
2384
2385
#if BASISD_SUPPORT_FXT1
2386
struct fxt1_block
2387
{
2388
union
2389
{
2390
struct
2391
{
2392
uint64_t m_t00 : 2;
2393
uint64_t m_t01 : 2;
2394
uint64_t m_t02 : 2;
2395
uint64_t m_t03 : 2;
2396
uint64_t m_t04 : 2;
2397
uint64_t m_t05 : 2;
2398
uint64_t m_t06 : 2;
2399
uint64_t m_t07 : 2;
2400
uint64_t m_t08 : 2;
2401
uint64_t m_t09 : 2;
2402
uint64_t m_t10 : 2;
2403
uint64_t m_t11 : 2;
2404
uint64_t m_t12 : 2;
2405
uint64_t m_t13 : 2;
2406
uint64_t m_t14 : 2;
2407
uint64_t m_t15 : 2;
2408
uint64_t m_t16 : 2;
2409
uint64_t m_t17 : 2;
2410
uint64_t m_t18 : 2;
2411
uint64_t m_t19 : 2;
2412
uint64_t m_t20 : 2;
2413
uint64_t m_t21 : 2;
2414
uint64_t m_t22 : 2;
2415
uint64_t m_t23 : 2;
2416
uint64_t m_t24 : 2;
2417
uint64_t m_t25 : 2;
2418
uint64_t m_t26 : 2;
2419
uint64_t m_t27 : 2;
2420
uint64_t m_t28 : 2;
2421
uint64_t m_t29 : 2;
2422
uint64_t m_t30 : 2;
2423
uint64_t m_t31 : 2;
2424
} m_lo;
2425
uint64_t m_lo_bits;
2426
uint8_t m_sels[8];
2427
};
2428
union
2429
{
2430
struct
2431
{
2432
#ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING
2433
uint64_t m_b1 : 5;
2434
uint64_t m_g1 : 5;
2435
uint64_t m_r1 : 5;
2436
uint64_t m_b0 : 5;
2437
uint64_t m_g0 : 5;
2438
uint64_t m_r0 : 5;
2439
uint64_t m_b3 : 5;
2440
uint64_t m_g3 : 5;
2441
uint64_t m_r3 : 5;
2442
uint64_t m_b2 : 5;
2443
uint64_t m_g2 : 5;
2444
uint64_t m_r2 : 5;
2445
#else
2446
uint64_t m_b0 : 5;
2447
uint64_t m_g0 : 5;
2448
uint64_t m_r0 : 5;
2449
uint64_t m_b1 : 5;
2450
uint64_t m_g1 : 5;
2451
uint64_t m_r1 : 5;
2452
uint64_t m_b2 : 5;
2453
uint64_t m_g2 : 5;
2454
uint64_t m_r2 : 5;
2455
uint64_t m_b3 : 5;
2456
uint64_t m_g3 : 5;
2457
uint64_t m_r3 : 5;
2458
#endif
2459
uint64_t m_alpha : 1;
2460
uint64_t m_glsb : 2;
2461
uint64_t m_mode : 1;
2462
} m_hi;
2463
uint64_t m_hi_bits;
2464
};
2465
};
2466
2467
static uint8_t conv_dxt1_to_fxt1_sels(uint32_t sels)
2468
{
2469
static uint8_t s_conv_table[16] = { 0, 3, 1, 2, 12, 15, 13, 14, 4, 7, 5, 6, 8, 11, 9, 10 };
2470
return s_conv_table[sels & 15] | (s_conv_table[sels >> 4] << 4);
2471
}
2472
2473
static void convert_etc1s_to_fxt1(void *pDst, const endpoint *pEndpoints, const selector *pSelectors, uint32_t fxt1_subblock)
2474
{
2475
fxt1_block* pBlock = static_cast<fxt1_block*>(pDst);
2476
2477
// CC_MIXED is basically DXT1 with different encoding tricks.
2478
// So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless.
2479
// (It's not completely lossless because FXT1 rounds in its color lerps while DXT1 doesn't, but it should be good enough.)
2480
dxt1_block blk;
2481
convert_etc1s_to_dxt1(&blk, pEndpoints, pSelectors, false);
2482
2483
const uint32_t l = blk.get_low_color();
2484
const uint32_t h = blk.get_high_color();
2485
2486
color32 color0((l >> 11) & 31, (l >> 5) & 63, l & 31, 255);
2487
color32 color1((h >> 11) & 31, (h >> 5) & 63, h & 31, 255);
2488
2489
uint32_t g0 = color0.g & 1;
2490
uint32_t g1 = color1.g & 1;
2491
2492
color0.g >>= 1;
2493
color1.g >>= 1;
2494
2495
blk.m_selectors[0] = conv_dxt1_to_fxt1_sels(blk.m_selectors[0]);
2496
blk.m_selectors[1] = conv_dxt1_to_fxt1_sels(blk.m_selectors[1]);
2497
blk.m_selectors[2] = conv_dxt1_to_fxt1_sels(blk.m_selectors[2]);
2498
blk.m_selectors[3] = conv_dxt1_to_fxt1_sels(blk.m_selectors[3]);
2499
2500
if ((blk.get_selector(0, 0) >> 1) != (g0 ^ g1))
2501
{
2502
std::swap(color0, color1);
2503
std::swap(g0, g1);
2504
2505
blk.m_selectors[0] ^= 0xFF;
2506
blk.m_selectors[1] ^= 0xFF;
2507
blk.m_selectors[2] ^= 0xFF;
2508
blk.m_selectors[3] ^= 0xFF;
2509
}
2510
2511
if (fxt1_subblock == 0)
2512
{
2513
pBlock->m_hi.m_mode = 1;
2514
pBlock->m_hi.m_alpha = 0;
2515
pBlock->m_hi.m_glsb = g1 | (g1 << 1);
2516
pBlock->m_hi.m_r0 = color0.r;
2517
pBlock->m_hi.m_g0 = color0.g;
2518
pBlock->m_hi.m_b0 = color0.b;
2519
pBlock->m_hi.m_r1 = color1.r;
2520
pBlock->m_hi.m_g1 = color1.g;
2521
pBlock->m_hi.m_b1 = color1.b;
2522
pBlock->m_hi.m_r2 = color0.r;
2523
pBlock->m_hi.m_g2 = color0.g;
2524
pBlock->m_hi.m_b2 = color0.b;
2525
pBlock->m_hi.m_r3 = color1.r;
2526
pBlock->m_hi.m_g3 = color1.g;
2527
pBlock->m_hi.m_b3 = color1.b;
2528
pBlock->m_sels[0] = blk.m_selectors[0];
2529
pBlock->m_sels[1] = blk.m_selectors[1];
2530
pBlock->m_sels[2] = blk.m_selectors[2];
2531
pBlock->m_sels[3] = blk.m_selectors[3];
2532
2533
static const uint8_t s_border_dup[4] = { 0, 85, 170, 255 };
2534
pBlock->m_sels[4] = s_border_dup[blk.m_selectors[0] >> 6];
2535
pBlock->m_sels[5] = s_border_dup[blk.m_selectors[1] >> 6];
2536
pBlock->m_sels[6] = s_border_dup[blk.m_selectors[2] >> 6];
2537
pBlock->m_sels[7] = s_border_dup[blk.m_selectors[3] >> 6];
2538
}
2539
else
2540
{
2541
pBlock->m_hi.m_glsb = (pBlock->m_hi.m_glsb & 1) | (g1 << 1);
2542
pBlock->m_hi.m_r2 = color0.r;
2543
pBlock->m_hi.m_g2 = color0.g;
2544
pBlock->m_hi.m_b2 = color0.b;
2545
pBlock->m_hi.m_r3 = color1.r;
2546
pBlock->m_hi.m_g3 = color1.g;
2547
pBlock->m_hi.m_b3 = color1.b;
2548
pBlock->m_sels[4] = blk.m_selectors[0];
2549
pBlock->m_sels[5] = blk.m_selectors[1];
2550
pBlock->m_sels[6] = blk.m_selectors[2];
2551
pBlock->m_sels[7] = blk.m_selectors[3];
2552
}
2553
}
2554
#endif // BASISD_SUPPORT_FXT1
2555
#if BASISD_SUPPORT_DXT5A
2556
static dxt_selector_range s_dxt5a_selector_ranges[] =
2557
{
2558
{ 0, 3 },
2559
2560
{ 1, 3 },
2561
{ 0, 2 },
2562
2563
{ 1, 2 },
2564
};
2565
2566
const uint32_t NUM_DXT5A_SELECTOR_RANGES = sizeof(s_dxt5a_selector_ranges) / sizeof(s_dxt5a_selector_ranges[0]);
2567
2568
struct etc1_g_to_dxt5a_conversion
2569
{
2570
uint8_t m_lo, m_hi;
2571
uint16_t m_trans;
2572
};
2573
2574
static etc1_g_to_dxt5a_conversion g_etc1_g_to_dxt5a[32 * 8][NUM_DXT5A_SELECTOR_RANGES] =
2575
{
2576
{ { 8, 0, 393 },{ 8, 0, 392 },{ 2, 0, 9 },{ 2, 0, 8 }, }, { { 6, 16, 710 },{ 16, 6, 328 },{ 0, 10, 96 },{ 10, 6, 8 }, },
2577
{ { 28, 5, 1327 },{ 24, 14, 328 },{ 8, 18, 96 },{ 18, 14, 8 }, }, { { 36, 13, 1327 },{ 32, 22, 328 },{ 16, 26, 96 },{ 26, 22, 8 }, },
2578
{ { 45, 22, 1327 },{ 41, 31, 328 },{ 25, 35, 96 },{ 35, 31, 8 }, }, { { 53, 30, 1327 },{ 49, 39, 328 },{ 33, 43, 96 },{ 43, 39, 8 }, },
2579
{ { 61, 38, 1327 },{ 57, 47, 328 },{ 41, 51, 96 },{ 51, 47, 8 }, }, { { 69, 46, 1327 },{ 65, 55, 328 },{ 49, 59, 96 },{ 59, 55, 8 }, },
2580
{ { 78, 55, 1327 },{ 74, 64, 328 },{ 58, 68, 96 },{ 68, 64, 8 }, }, { { 86, 63, 1327 },{ 82, 72, 328 },{ 66, 76, 96 },{ 76, 72, 8 }, },
2581
{ { 94, 71, 1327 },{ 90, 80, 328 },{ 74, 84, 96 },{ 84, 80, 8 }, }, { { 102, 79, 1327 },{ 98, 88, 328 },{ 82, 92, 96 },{ 92, 88, 8 }, },
2582
{ { 111, 88, 1327 },{ 107, 97, 328 },{ 91, 101, 96 },{ 101, 97, 8 }, }, { { 119, 96, 1327 },{ 115, 105, 328 },{ 99, 109, 96 },{ 109, 105, 8 }, },
2583
{ { 127, 104, 1327 },{ 123, 113, 328 },{ 107, 117, 96 },{ 117, 113, 8 }, }, { { 135, 112, 1327 },{ 131, 121, 328 },{ 115, 125, 96 },{ 125, 121, 8 }, },
2584
{ { 144, 121, 1327 },{ 140, 130, 328 },{ 124, 134, 96 },{ 134, 130, 8 }, }, { { 152, 129, 1327 },{ 148, 138, 328 },{ 132, 142, 96 },{ 142, 138, 8 }, },
2585
{ { 160, 137, 1327 },{ 156, 146, 328 },{ 140, 150, 96 },{ 150, 146, 8 }, }, { { 168, 145, 1327 },{ 164, 154, 328 },{ 148, 158, 96 },{ 158, 154, 8 }, },
2586
{ { 177, 154, 1327 },{ 173, 163, 328 },{ 157, 167, 96 },{ 167, 163, 8 }, }, { { 185, 162, 1327 },{ 181, 171, 328 },{ 165, 175, 96 },{ 175, 171, 8 }, },
2587
{ { 193, 170, 1327 },{ 189, 179, 328 },{ 173, 183, 96 },{ 183, 179, 8 }, }, { { 201, 178, 1327 },{ 197, 187, 328 },{ 181, 191, 96 },{ 191, 187, 8 }, },
2588
{ { 210, 187, 1327 },{ 206, 196, 328 },{ 190, 200, 96 },{ 200, 196, 8 }, }, { { 218, 195, 1327 },{ 214, 204, 328 },{ 198, 208, 96 },{ 208, 204, 8 }, },
2589
{ { 226, 203, 1327 },{ 222, 212, 328 },{ 206, 216, 96 },{ 216, 212, 8 }, }, { { 234, 211, 1327 },{ 230, 220, 328 },{ 214, 224, 96 },{ 224, 220, 8 }, },
2590
{ { 243, 220, 1327 },{ 239, 229, 328 },{ 223, 233, 96 },{ 233, 229, 8 }, }, { { 251, 228, 1327 },{ 247, 237, 328 },{ 231, 241, 96 },{ 241, 237, 8 }, },
2591
{ { 239, 249, 3680 },{ 245, 249, 3648 },{ 239, 249, 96 },{ 249, 245, 8 }, }, { { 247, 253, 4040 },{ 255, 253, 8 },{ 247, 253, 456 },{ 255, 253, 8 }, },
2592
{ { 5, 17, 566 },{ 5, 17, 560 },{ 5, 0, 9 },{ 5, 0, 8 }, }, { { 25, 0, 313 },{ 25, 3, 328 },{ 13, 0, 49 },{ 13, 3, 8 }, },
2593
{ { 39, 0, 1329 },{ 33, 11, 328 },{ 11, 21, 70 },{ 21, 11, 8 }, }, { { 47, 7, 1329 },{ 41, 19, 328 },{ 29, 7, 33 },{ 29, 19, 8 }, },
2594
{ { 50, 11, 239 },{ 50, 28, 328 },{ 38, 16, 33 },{ 38, 28, 8 }, }, { { 92, 13, 2423 },{ 58, 36, 328 },{ 46, 24, 33 },{ 46, 36, 8 }, },
2595
{ { 100, 21, 2423 },{ 66, 44, 328 },{ 54, 32, 33 },{ 54, 44, 8 }, }, { { 86, 7, 1253 },{ 74, 52, 328 },{ 62, 40, 33 },{ 62, 52, 8 }, },
2596
{ { 95, 16, 1253 },{ 83, 61, 328 },{ 71, 49, 33 },{ 71, 61, 8 }, }, { { 103, 24, 1253 },{ 91, 69, 328 },{ 79, 57, 33 },{ 79, 69, 8 }, },
2597
{ { 111, 32, 1253 },{ 99, 77, 328 },{ 87, 65, 33 },{ 87, 77, 8 }, }, { { 119, 40, 1253 },{ 107, 85, 328 },{ 95, 73, 33 },{ 95, 85, 8 }, },
2598
{ { 128, 49, 1253 },{ 116, 94, 328 },{ 104, 82, 33 },{ 104, 94, 8 }, }, { { 136, 57, 1253 },{ 124, 102, 328 },{ 112, 90, 33 },{ 112, 102, 8 }, },
2599
{ { 144, 65, 1253 },{ 132, 110, 328 },{ 120, 98, 33 },{ 120, 110, 8 }, }, { { 152, 73, 1253 },{ 140, 118, 328 },{ 128, 106, 33 },{ 128, 118, 8 }, },
2600
{ { 161, 82, 1253 },{ 149, 127, 328 },{ 137, 115, 33 },{ 137, 127, 8 }, }, { { 169, 90, 1253 },{ 157, 135, 328 },{ 145, 123, 33 },{ 145, 135, 8 }, },
2601
{ { 177, 98, 1253 },{ 165, 143, 328 },{ 153, 131, 33 },{ 153, 143, 8 }, }, { { 185, 106, 1253 },{ 173, 151, 328 },{ 161, 139, 33 },{ 161, 151, 8 }, },
2602
{ { 194, 115, 1253 },{ 182, 160, 328 },{ 170, 148, 33 },{ 170, 160, 8 }, }, { { 202, 123, 1253 },{ 190, 168, 328 },{ 178, 156, 33 },{ 178, 168, 8 }, },
2603
{ { 210, 131, 1253 },{ 198, 176, 328 },{ 186, 164, 33 },{ 186, 176, 8 }, }, { { 218, 139, 1253 },{ 206, 184, 328 },{ 194, 172, 33 },{ 194, 184, 8 }, },
2604
{ { 227, 148, 1253 },{ 215, 193, 328 },{ 203, 181, 33 },{ 203, 193, 8 }, }, { { 235, 156, 1253 },{ 223, 201, 328 },{ 211, 189, 33 },{ 211, 201, 8 }, },
2605
{ { 243, 164, 1253 },{ 231, 209, 328 },{ 219, 197, 33 },{ 219, 209, 8 }, }, { { 183, 239, 867 },{ 239, 217, 328 },{ 227, 205, 33 },{ 227, 217, 8 }, },
2606
{ { 254, 214, 1329 },{ 248, 226, 328 },{ 236, 214, 33 },{ 236, 226, 8 }, }, { { 222, 244, 3680 },{ 234, 244, 3648 },{ 244, 222, 33 },{ 244, 234, 8 }, },
2607
{ { 230, 252, 3680 },{ 242, 252, 3648 },{ 252, 230, 33 },{ 252, 242, 8 }, }, { { 238, 250, 4040 },{ 255, 250, 8 },{ 238, 250, 456 },{ 255, 250, 8 }, },
2608
{ { 9, 29, 566 },{ 9, 29, 560 },{ 9, 0, 9 },{ 9, 0, 8 }, }, { { 17, 37, 566 },{ 17, 37, 560 },{ 17, 0, 9 },{ 17, 0, 8 }, },
2609
{ { 45, 0, 313 },{ 45, 0, 312 },{ 25, 0, 49 },{ 25, 7, 8 }, }, { { 14, 63, 2758 },{ 5, 53, 784 },{ 15, 33, 70 },{ 33, 15, 8 }, },
2610
{ { 71, 6, 1329 },{ 72, 4, 1328 },{ 42, 4, 33 },{ 42, 24, 8 }, }, { { 70, 3, 239 },{ 70, 2, 232 },{ 50, 12, 33 },{ 50, 32, 8 }, },
2611
{ { 0, 98, 2842 },{ 78, 10, 232 },{ 58, 20, 33 },{ 58, 40, 8 }, }, { { 97, 27, 1329 },{ 86, 18, 232 },{ 66, 28, 33 },{ 66, 48, 8 }, },
2612
{ { 0, 94, 867 },{ 95, 27, 232 },{ 75, 37, 33 },{ 75, 57, 8 }, }, { { 8, 102, 867 },{ 103, 35, 232 },{ 83, 45, 33 },{ 83, 65, 8 }, },
2613
{ { 12, 112, 867 },{ 111, 43, 232 },{ 91, 53, 33 },{ 91, 73, 8 }, }, { { 139, 2, 1253 },{ 119, 51, 232 },{ 99, 61, 33 },{ 99, 81, 8 }, },
2614
{ { 148, 13, 1253 },{ 128, 60, 232 },{ 108, 70, 33 },{ 108, 90, 8 }, }, { { 156, 21, 1253 },{ 136, 68, 232 },{ 116, 78, 33 },{ 116, 98, 8 }, },
2615
{ { 164, 29, 1253 },{ 144, 76, 232 },{ 124, 86, 33 },{ 124, 106, 8 }, }, { { 172, 37, 1253 },{ 152, 84, 232 },{ 132, 94, 33 },{ 132, 114, 8 }, },
2616
{ { 181, 46, 1253 },{ 161, 93, 232 },{ 141, 103, 33 },{ 141, 123, 8 }, }, { { 189, 54, 1253 },{ 169, 101, 232 },{ 149, 111, 33 },{ 149, 131, 8 }, },
2617
{ { 197, 62, 1253 },{ 177, 109, 232 },{ 157, 119, 33 },{ 157, 139, 8 }, }, { { 205, 70, 1253 },{ 185, 117, 232 },{ 165, 127, 33 },{ 165, 147, 8 }, },
2618
{ { 214, 79, 1253 },{ 194, 126, 232 },{ 174, 136, 33 },{ 174, 156, 8 }, }, { { 222, 87, 1253 },{ 202, 134, 232 },{ 182, 144, 33 },{ 182, 164, 8 }, },
2619
{ { 230, 95, 1253 },{ 210, 142, 232 },{ 190, 152, 33 },{ 190, 172, 8 }, }, { { 238, 103, 1253 },{ 218, 150, 232 },{ 198, 160, 33 },{ 198, 180, 8 }, },
2620
{ { 247, 112, 1253 },{ 227, 159, 232 },{ 207, 169, 33 },{ 207, 189, 8 }, }, { { 255, 120, 1253 },{ 235, 167, 232 },{ 215, 177, 33 },{ 215, 197, 8 }, },
2621
{ { 146, 243, 867 },{ 243, 175, 232 },{ 223, 185, 33 },{ 223, 205, 8 }, }, { { 184, 231, 3682 },{ 203, 251, 784 },{ 231, 193, 33 },{ 231, 213, 8 }, },
2622
{ { 193, 240, 3682 },{ 222, 240, 3648 },{ 240, 202, 33 },{ 240, 222, 8 }, }, { { 255, 210, 169 },{ 230, 248, 3648 },{ 248, 210, 33 },{ 248, 230, 8 }, },
2623
{ { 218, 238, 4040 },{ 255, 238, 8 },{ 218, 238, 456 },{ 255, 238, 8 }, }, { { 226, 246, 4040 },{ 255, 246, 8 },{ 226, 246, 456 },{ 255, 246, 8 }, },
2624
{ { 13, 42, 566 },{ 13, 42, 560 },{ 13, 0, 9 },{ 13, 0, 8 }, }, { { 50, 0, 329 },{ 50, 0, 328 },{ 21, 0, 9 },{ 21, 0, 8 }, },
2625
{ { 29, 58, 566 },{ 67, 2, 1352 },{ 3, 29, 70 },{ 29, 3, 8 }, }, { { 10, 79, 2758 },{ 76, 11, 1352 },{ 11, 37, 70 },{ 37, 11, 8 }, },
2626
{ { 7, 75, 790 },{ 7, 75, 784 },{ 20, 46, 70 },{ 46, 20, 8 }, }, { { 15, 83, 790 },{ 97, 1, 1328 },{ 28, 54, 70 },{ 54, 28, 8 }, },
2627
{ { 101, 7, 1329 },{ 105, 9, 1328 },{ 62, 0, 39 },{ 62, 36, 8 }, }, { { 99, 1, 239 },{ 99, 3, 232 },{ 1, 71, 98 },{ 70, 44, 8 }, },
2628
{ { 107, 11, 239 },{ 108, 12, 232 },{ 10, 80, 98 },{ 79, 53, 8 }, }, { { 115, 19, 239 },{ 116, 20, 232 },{ 18, 88, 98 },{ 87, 61, 8 }, },
2629
{ { 123, 27, 239 },{ 124, 28, 232 },{ 26, 96, 98 },{ 95, 69, 8 }, }, { { 131, 35, 239 },{ 132, 36, 232 },{ 34, 104, 98 },{ 103, 77, 8 }, },
2630
{ { 140, 44, 239 },{ 141, 45, 232 },{ 43, 113, 98 },{ 112, 86, 8 }, }, { { 148, 52, 239 },{ 149, 53, 232 },{ 51, 121, 98 },{ 120, 94, 8 }, },
2631
{ { 156, 60, 239 },{ 157, 61, 232 },{ 59, 129, 98 },{ 128, 102, 8 }, }, { { 164, 68, 239 },{ 165, 69, 232 },{ 67, 137, 98 },{ 136, 110, 8 }, },
2632
{ { 173, 77, 239 },{ 174, 78, 232 },{ 76, 146, 98 },{ 145, 119, 8 }, }, { { 181, 85, 239 },{ 182, 86, 232 },{ 84, 154, 98 },{ 153, 127, 8 }, },
2633
{ { 189, 93, 239 },{ 190, 94, 232 },{ 92, 162, 98 },{ 161, 135, 8 }, }, { { 197, 101, 239 },{ 198, 102, 232 },{ 100, 170, 98 },{ 169, 143, 8 }, },
2634
{ { 206, 110, 239 },{ 207, 111, 232 },{ 109, 179, 98 },{ 178, 152, 8 }, }, { { 214, 118, 239 },{ 215, 119, 232 },{ 117, 187, 98 },{ 186, 160, 8 }, },
2635
{ { 222, 126, 239 },{ 223, 127, 232 },{ 125, 195, 98 },{ 194, 168, 8 }, }, { { 230, 134, 239 },{ 231, 135, 232 },{ 133, 203, 98 },{ 202, 176, 8 }, },
2636
{ { 239, 143, 239 },{ 240, 144, 232 },{ 142, 212, 98 },{ 211, 185, 8 }, }, { { 247, 151, 239 },{ 180, 248, 784 },{ 150, 220, 98 },{ 219, 193, 8 }, },
2637
{ { 159, 228, 3682 },{ 201, 227, 3648 },{ 158, 228, 98 },{ 227, 201, 8 }, }, { { 181, 249, 3928 },{ 209, 235, 3648 },{ 166, 236, 98 },{ 235, 209, 8 }, },
2638
{ { 255, 189, 169 },{ 218, 244, 3648 },{ 175, 245, 98 },{ 244, 218, 8 }, }, { { 197, 226, 4040 },{ 226, 252, 3648 },{ 183, 253, 98 },{ 252, 226, 8 }, },
2639
{ { 205, 234, 4040 },{ 255, 234, 8 },{ 205, 234, 456 },{ 255, 234, 8 }, }, { { 213, 242, 4040 },{ 255, 242, 8 },{ 213, 242, 456 },{ 255, 242, 8 }, },
2640
{ { 18, 60, 566 },{ 18, 60, 560 },{ 18, 0, 9 },{ 18, 0, 8 }, }, { { 26, 68, 566 },{ 26, 68, 560 },{ 26, 0, 9 },{ 26, 0, 8 }, },
2641
{ { 34, 76, 566 },{ 34, 76, 560 },{ 34, 0, 9 },{ 34, 0, 8 }, }, { { 5, 104, 2758 },{ 98, 5, 1352 },{ 42, 0, 57 },{ 42, 6, 8 }, },
2642
{ { 92, 0, 313 },{ 93, 1, 312 },{ 15, 51, 70 },{ 51, 15, 8 }, }, { { 3, 101, 790 },{ 3, 101, 784 },{ 0, 59, 88 },{ 59, 23, 8 }, },
2643
{ { 14, 107, 790 },{ 11, 109, 784 },{ 31, 67, 70 },{ 67, 31, 8 }, }, { { 19, 117, 790 },{ 19, 117, 784 },{ 39, 75, 70 },{ 75, 39, 8 }, },
2644
{ { 28, 126, 790 },{ 28, 126, 784 },{ 83, 5, 33 },{ 84, 48, 8 }, }, { { 132, 0, 239 },{ 36, 134, 784 },{ 91, 13, 33 },{ 92, 56, 8 }, },
2645
{ { 142, 4, 239 },{ 44, 142, 784 },{ 99, 21, 33 },{ 100, 64, 8 }, }, { { 150, 12, 239 },{ 52, 150, 784 },{ 107, 29, 33 },{ 108, 72, 8 }, },
2646
{ { 159, 21, 239 },{ 61, 159, 784 },{ 116, 38, 33 },{ 117, 81, 8 }, }, { { 167, 29, 239 },{ 69, 167, 784 },{ 124, 46, 33 },{ 125, 89, 8 }, },
2647
{ { 175, 37, 239 },{ 77, 175, 784 },{ 132, 54, 33 },{ 133, 97, 8 }, }, { { 183, 45, 239 },{ 85, 183, 784 },{ 140, 62, 33 },{ 141, 105, 8 }, },
2648
{ { 192, 54, 239 },{ 94, 192, 784 },{ 149, 71, 33 },{ 150, 114, 8 }, }, { { 200, 62, 239 },{ 102, 200, 784 },{ 157, 79, 33 },{ 158, 122, 8 }, },
2649
{ { 208, 70, 239 },{ 110, 208, 784 },{ 165, 87, 33 },{ 166, 130, 8 }, }, { { 216, 78, 239 },{ 118, 216, 784 },{ 173, 95, 33 },{ 174, 138, 8 }, },
2650
{ { 225, 87, 239 },{ 127, 225, 784 },{ 182, 104, 33 },{ 183, 147, 8 }, }, { { 233, 95, 239 },{ 135, 233, 784 },{ 190, 112, 33 },{ 191, 155, 8 }, },
2651
{ { 241, 103, 239 },{ 143, 241, 784 },{ 198, 120, 33 },{ 199, 163, 8 }, }, { { 111, 208, 3682 },{ 151, 249, 784 },{ 206, 128, 33 },{ 207, 171, 8 }, },
2652
{ { 120, 217, 3682 },{ 180, 216, 3648 },{ 215, 137, 33 },{ 216, 180, 8 }, }, { { 128, 225, 3682 },{ 188, 224, 3648 },{ 223, 145, 33 },{ 224, 188, 8 }, },
2653
{ { 155, 253, 3928 },{ 196, 232, 3648 },{ 231, 153, 33 },{ 232, 196, 8 }, }, { { 144, 241, 3682 },{ 204, 240, 3648 },{ 239, 161, 33 },{ 240, 204, 8 }, },
2654
{ { 153, 250, 3682 },{ 213, 249, 3648 },{ 248, 170, 33 },{ 249, 213, 8 }, }, { { 179, 221, 4040 },{ 255, 221, 8 },{ 179, 221, 456 },{ 255, 221, 8 }, },
2655
{ { 187, 229, 4040 },{ 255, 229, 8 },{ 187, 229, 456 },{ 255, 229, 8 }, }, { { 195, 237, 4040 },{ 255, 237, 8 },{ 195, 237, 456 },{ 255, 237, 8 }, },
2656
{ { 24, 80, 566 },{ 24, 80, 560 },{ 24, 0, 9 },{ 24, 0, 8 }, }, { { 32, 88, 566 },{ 32, 88, 560 },{ 32, 0, 9 },{ 32, 0, 8 }, },
2657
{ { 40, 96, 566 },{ 40, 96, 560 },{ 40, 0, 9 },{ 40, 0, 8 }, }, { { 48, 104, 566 },{ 48, 104, 560 },{ 48, 0, 9 },{ 48, 0, 8 }, },
2658
{ { 9, 138, 2758 },{ 130, 7, 1352 },{ 9, 57, 70 },{ 57, 9, 8 }, }, { { 119, 0, 313 },{ 120, 0, 312 },{ 17, 65, 70 },{ 65, 17, 8 }, },
2659
{ { 0, 128, 784 },{ 128, 6, 312 },{ 25, 73, 70 },{ 73, 25, 8 }, }, { { 6, 137, 790 },{ 5, 136, 784 },{ 33, 81, 70 },{ 81, 33, 8 }, },
2660
{ { 42, 171, 2758 },{ 14, 145, 784 },{ 42, 90, 70 },{ 90, 42, 8 }, }, { { 50, 179, 2758 },{ 22, 153, 784 },{ 50, 98, 70 },{ 98, 50, 8 }, },
2661
{ { 58, 187, 2758 },{ 30, 161, 784 },{ 58, 106, 70 },{ 106, 58, 8 }, }, { { 191, 18, 1329 },{ 38, 169, 784 },{ 112, 9, 33 },{ 114, 66, 8 }, },
2662
{ { 176, 0, 239 },{ 47, 178, 784 },{ 121, 18, 33 },{ 123, 75, 8 }, }, { { 187, 1, 239 },{ 55, 186, 784 },{ 129, 26, 33 },{ 131, 83, 8 }, },
2663
{ { 195, 10, 239 },{ 63, 194, 784 },{ 137, 34, 33 },{ 139, 91, 8 }, }, { { 203, 18, 239 },{ 71, 202, 784 },{ 145, 42, 33 },{ 147, 99, 8 }, },
2664
{ { 212, 27, 239 },{ 80, 211, 784 },{ 154, 51, 33 },{ 156, 108, 8 }, }, { { 220, 35, 239 },{ 88, 219, 784 },{ 162, 59, 33 },{ 164, 116, 8 }, },
2665
{ { 228, 43, 239 },{ 96, 227, 784 },{ 170, 67, 33 },{ 172, 124, 8 }, }, { { 236, 51, 239 },{ 104, 235, 784 },{ 178, 75, 33 },{ 180, 132, 8 }, },
2666
{ { 245, 60, 239 },{ 113, 244, 784 },{ 187, 84, 33 },{ 189, 141, 8 }, }, { { 91, 194, 3680 },{ 149, 197, 3648 },{ 195, 92, 33 },{ 197, 149, 8 }, },
2667
{ { 99, 202, 3680 },{ 157, 205, 3648 },{ 203, 100, 33 },{ 205, 157, 8 }, }, { { 107, 210, 3680 },{ 165, 213, 3648 },{ 211, 108, 33 },{ 213, 165, 8 }, },
2668
{ { 119, 249, 3928 },{ 174, 222, 3648 },{ 220, 117, 33 },{ 222, 174, 8 }, }, { { 127, 255, 856 },{ 182, 230, 3648 },{ 228, 125, 33 },{ 230, 182, 8 }, },
2669
{ { 255, 135, 169 },{ 190, 238, 3648 },{ 236, 133, 33 },{ 238, 190, 8 }, }, { { 140, 243, 3680 },{ 198, 246, 3648 },{ 244, 141, 33 },{ 246, 198, 8 }, },
2670
{ { 151, 207, 4040 },{ 255, 207, 8 },{ 151, 207, 456 },{ 255, 207, 8 }, }, { { 159, 215, 4040 },{ 255, 215, 8 },{ 159, 215, 456 },{ 255, 215, 8 }, },
2671
{ { 167, 223, 4040 },{ 255, 223, 8 },{ 167, 223, 456 },{ 255, 223, 8 }, }, { { 175, 231, 4040 },{ 255, 231, 8 },{ 175, 231, 456 },{ 255, 231, 8 }, },
2672
{ { 33, 106, 566 },{ 33, 106, 560 },{ 33, 0, 9 },{ 33, 0, 8 }, }, { { 41, 114, 566 },{ 41, 114, 560 },{ 41, 0, 9 },{ 41, 0, 8 }, },
2673
{ { 49, 122, 566 },{ 49, 122, 560 },{ 49, 0, 9 },{ 49, 0, 8 }, }, { { 57, 130, 566 },{ 57, 130, 560 },{ 57, 0, 9 },{ 57, 0, 8 }, },
2674
{ { 66, 139, 566 },{ 66, 139, 560 },{ 66, 0, 9 },{ 66, 0, 8 }, }, { { 74, 147, 566 },{ 170, 7, 1352 },{ 8, 74, 70 },{ 74, 8, 8 }, },
2675
{ { 152, 0, 313 },{ 178, 15, 1352 },{ 0, 82, 80 },{ 82, 16, 8 }, }, { { 162, 0, 313 },{ 186, 23, 1352 },{ 24, 90, 70 },{ 90, 24, 8 }, },
2676
{ { 0, 171, 784 },{ 195, 32, 1352 },{ 33, 99, 70 },{ 99, 33, 8 }, }, { { 6, 179, 790 },{ 203, 40, 1352 },{ 41, 107, 70 },{ 107, 41, 8 }, },
2677
{ { 15, 187, 790 },{ 211, 48, 1352 },{ 115, 0, 41 },{ 115, 49, 8 }, }, { { 61, 199, 710 },{ 219, 56, 1352 },{ 57, 123, 70 },{ 123, 57, 8 }, },
2678
{ { 70, 208, 710 },{ 228, 65, 1352 },{ 66, 132, 70 },{ 132, 66, 8 }, }, { { 78, 216, 710 },{ 236, 73, 1352 },{ 74, 140, 70 },{ 140, 74, 8 }, },
2679
{ { 86, 224, 710 },{ 244, 81, 1352 },{ 145, 7, 33 },{ 148, 82, 8 }, }, { { 222, 8, 233 },{ 252, 89, 1352 },{ 153, 15, 33 },{ 156, 90, 8 }, },
2680
{ { 235, 0, 239 },{ 241, 101, 328 },{ 166, 6, 39 },{ 165, 99, 8 }, }, { { 32, 170, 3680 },{ 249, 109, 328 },{ 0, 175, 98 },{ 173, 107, 8 }, },
2681
{ { 40, 178, 3680 },{ 115, 181, 3648 },{ 8, 183, 98 },{ 181, 115, 8 }, }, { { 48, 186, 3680 },{ 123, 189, 3648 },{ 16, 191, 98 },{ 189, 123, 8 }, },
2682
{ { 57, 195, 3680 },{ 132, 198, 3648 },{ 25, 200, 98 },{ 198, 132, 8 }, }, { { 67, 243, 3928 },{ 140, 206, 3648 },{ 33, 208, 98 },{ 206, 140, 8 }, },
2683
{ { 76, 251, 3928 },{ 148, 214, 3648 },{ 41, 216, 98 },{ 214, 148, 8 }, }, { { 86, 255, 856 },{ 156, 222, 3648 },{ 49, 224, 98 },{ 222, 156, 8 }, },
2684
{ { 255, 93, 169 },{ 165, 231, 3648 },{ 58, 233, 98 },{ 231, 165, 8 }, }, { { 98, 236, 3680 },{ 173, 239, 3648 },{ 66, 241, 98 },{ 239, 173, 8 }, },
2685
{ { 108, 181, 4040 },{ 181, 247, 3648 },{ 74, 249, 98 },{ 247, 181, 8 }, }, { { 116, 189, 4040 },{ 255, 189, 8 },{ 116, 189, 456 },{ 255, 189, 8 }, },
2686
{ { 125, 198, 4040 },{ 255, 198, 8 },{ 125, 198, 456 },{ 255, 198, 8 }, }, { { 133, 206, 4040 },{ 255, 206, 8 },{ 133, 206, 456 },{ 255, 206, 8 }, },
2687
{ { 141, 214, 4040 },{ 255, 214, 8 },{ 141, 214, 456 },{ 255, 214, 8 }, }, { { 149, 222, 4040 },{ 255, 222, 8 },{ 149, 222, 456 },{ 255, 222, 8 }, },
2688
{ { 47, 183, 566 },{ 47, 183, 560 },{ 47, 0, 9 },{ 47, 0, 8 }, }, { { 55, 191, 566 },{ 55, 191, 560 },{ 55, 0, 9 },{ 55, 0, 8 }, },
2689
{ { 63, 199, 566 },{ 63, 199, 560 },{ 63, 0, 9 },{ 63, 0, 8 }, }, { { 71, 207, 566 },{ 71, 207, 560 },{ 71, 0, 9 },{ 71, 0, 8 }, },
2690
{ { 80, 216, 566 },{ 80, 216, 560 },{ 80, 0, 9 },{ 80, 0, 8 }, }, { { 88, 224, 566 },{ 88, 224, 560 },{ 88, 0, 9 },{ 88, 0, 8 }, },
2691
{ { 3, 233, 710 },{ 3, 233, 704 },{ 2, 96, 70 },{ 96, 2, 8 }, }, { { 11, 241, 710 },{ 11, 241, 704 },{ 10, 104, 70 },{ 104, 10, 8 }, },
2692
{ { 20, 250, 710 },{ 20, 250, 704 },{ 19, 113, 70 },{ 113, 19, 8 }, }, { { 27, 121, 3654 },{ 27, 121, 3648 },{ 27, 121, 70 },{ 121, 27, 8 }, },
2693
{ { 35, 129, 3654 },{ 35, 129, 3648 },{ 35, 129, 70 },{ 129, 35, 8 }, }, { { 43, 137, 3654 },{ 43, 137, 3648 },{ 43, 137, 70 },{ 137, 43, 8 }, },
2694
{ { 52, 146, 3654 },{ 52, 146, 3648 },{ 52, 146, 70 },{ 146, 52, 8 }, }, { { 60, 154, 3654 },{ 60, 154, 3648 },{ 60, 154, 70 },{ 154, 60, 8 }, },
2695
{ { 68, 162, 3654 },{ 68, 162, 3648 },{ 68, 162, 70 },{ 162, 68, 8 }, }, { { 76, 170, 3654 },{ 76, 170, 3648 },{ 76, 170, 70 },{ 170, 76, 8 }, },
2696
{ { 85, 179, 3654 },{ 85, 179, 3648 },{ 85, 179, 70 },{ 179, 85, 8 }, }, { { 93, 187, 3654 },{ 93, 187, 3648 },{ 93, 187, 70 },{ 187, 93, 8 }, },
2697
{ { 101, 195, 3654 },{ 101, 195, 3648 },{ 101, 195, 70 },{ 195, 101, 8 }, }, { { 109, 203, 3654 },{ 109, 203, 3648 },{ 109, 203, 70 },{ 203, 109, 8 }, },
2698
{ { 118, 212, 3654 },{ 118, 212, 3648 },{ 118, 212, 70 },{ 212, 118, 8 }, }, { { 126, 220, 3654 },{ 126, 220, 3648 },{ 126, 220, 70 },{ 220, 126, 8 }, },
2699
{ { 134, 228, 3654 },{ 134, 228, 3648 },{ 134, 228, 70 },{ 228, 134, 8 }, }, { { 5, 236, 3680 },{ 142, 236, 3648 },{ 5, 236, 96 },{ 236, 142, 8 }, },
2700
{ { 14, 245, 3680 },{ 151, 245, 3648 },{ 14, 245, 96 },{ 245, 151, 8 }, }, { { 23, 159, 4040 },{ 159, 253, 3648 },{ 23, 159, 456 },{ 253, 159, 8 }, },
2701
{ { 31, 167, 4040 },{ 255, 167, 8 },{ 31, 167, 456 },{ 255, 167, 8 }, }, { { 39, 175, 4040 },{ 255, 175, 8 },{ 39, 175, 456 },{ 255, 175, 8 }, },
2702
{ { 48, 184, 4040 },{ 255, 184, 8 },{ 48, 184, 456 },{ 255, 184, 8 }, }, { { 56, 192, 4040 },{ 255, 192, 8 },{ 56, 192, 456 },{ 255, 192, 8 }, },
2703
{ { 64, 200, 4040 },{ 255, 200, 8 },{ 64, 200, 456 },{ 255, 200, 8 }, },{ { 72, 208, 4040 },{ 255, 208, 8 },{ 72, 208, 456 },{ 255, 208, 8 }, },
2704
2705
};
2706
2707
struct dxt5a_block
2708
{
2709
uint8_t m_endpoints[2];
2710
2711
enum { cTotalSelectorBytes = 6 };
2712
uint8_t m_selectors[cTotalSelectorBytes];
2713
2714
inline void clear()
2715
{
2716
basisu::clear_obj(*this);
2717
}
2718
2719
inline uint32_t get_low_alpha() const
2720
{
2721
return m_endpoints[0];
2722
}
2723
2724
inline uint32_t get_high_alpha() const
2725
{
2726
return m_endpoints[1];
2727
}
2728
2729
inline void set_low_alpha(uint32_t i)
2730
{
2731
assert(i <= UINT8_MAX);
2732
m_endpoints[0] = static_cast<uint8_t>(i);
2733
}
2734
2735
inline void set_high_alpha(uint32_t i)
2736
{
2737
assert(i <= UINT8_MAX);
2738
m_endpoints[1] = static_cast<uint8_t>(i);
2739
}
2740
2741
inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
2742
2743
uint32_t get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); }
2744
uint32_t get_selectors_as_word(uint32_t index) { assert(index < 3); return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); }
2745
2746
inline uint32_t get_selector(uint32_t x, uint32_t y) const
2747
{
2748
assert((x < 4U) && (y < 4U));
2749
2750
uint32_t selector_index = (y * 4) + x;
2751
uint32_t bit_index = selector_index * cDXT5SelectorBits;
2752
2753
uint32_t byte_index = bit_index >> 3;
2754
uint32_t bit_ofs = bit_index & 7;
2755
2756
uint32_t v = m_selectors[byte_index];
2757
if (byte_index < (cTotalSelectorBytes - 1))
2758
v |= (m_selectors[byte_index + 1] << 8);
2759
2760
return (v >> bit_ofs) & 7;
2761
}
2762
2763
inline void set_selector(uint32_t x, uint32_t y, uint32_t val)
2764
{
2765
assert((x < 4U) && (y < 4U) && (val < 8U));
2766
2767
uint32_t selector_index = (y * 4) + x;
2768
uint32_t bit_index = selector_index * cDXT5SelectorBits;
2769
2770
uint32_t byte_index = bit_index >> 3;
2771
uint32_t bit_ofs = bit_index & 7;
2772
2773
uint32_t v = m_selectors[byte_index];
2774
if (byte_index < (cTotalSelectorBytes - 1))
2775
v |= (m_selectors[byte_index + 1] << 8);
2776
2777
v &= (~(7 << bit_ofs));
2778
v |= (val << bit_ofs);
2779
2780
m_selectors[byte_index] = static_cast<uint8_t>(v);
2781
if (byte_index < (cTotalSelectorBytes - 1))
2782
m_selectors[byte_index + 1] = static_cast<uint8_t>(v >> 8);
2783
}
2784
2785
enum { cMaxSelectorValues = 8 };
2786
2787
static uint32_t get_block_values6(color32* pDst, uint32_t l, uint32_t h)
2788
{
2789
pDst[0].a = static_cast<uint8_t>(l);
2790
pDst[1].a = static_cast<uint8_t>(h);
2791
pDst[2].a = static_cast<uint8_t>((l * 4 + h) / 5);
2792
pDst[3].a = static_cast<uint8_t>((l * 3 + h * 2) / 5);
2793
pDst[4].a = static_cast<uint8_t>((l * 2 + h * 3) / 5);
2794
pDst[5].a = static_cast<uint8_t>((l + h * 4) / 5);
2795
pDst[6].a = 0;
2796
pDst[7].a = 255;
2797
return 6;
2798
}
2799
2800
static uint32_t get_block_values8(color32* pDst, uint32_t l, uint32_t h)
2801
{
2802
pDst[0].a = static_cast<uint8_t>(l);
2803
pDst[1].a = static_cast<uint8_t>(h);
2804
pDst[2].a = static_cast<uint8_t>((l * 6 + h) / 7);
2805
pDst[3].a = static_cast<uint8_t>((l * 5 + h * 2) / 7);
2806
pDst[4].a = static_cast<uint8_t>((l * 4 + h * 3) / 7);
2807
pDst[5].a = static_cast<uint8_t>((l * 3 + h * 4) / 7);
2808
pDst[6].a = static_cast<uint8_t>((l * 2 + h * 5) / 7);
2809
pDst[7].a = static_cast<uint8_t>((l + h * 6) / 7);
2810
return 8;
2811
}
2812
2813
static uint32_t get_block_values(color32* pDst, uint32_t l, uint32_t h)
2814
{
2815
if (l > h)
2816
return get_block_values8(pDst, l, h);
2817
else
2818
return get_block_values6(pDst, l, h);
2819
}
2820
};
2821
2822
static void convert_etc1s_to_dxt5a(dxt5a_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
2823
{
2824
const uint32_t low_selector = pSelector->m_lo_selector;
2825
const uint32_t high_selector = pSelector->m_hi_selector;
2826
2827
const color32& base_color = pEndpoints->m_color5;
2828
const uint32_t inten_table = pEndpoints->m_inten5;
2829
2830
if (low_selector == high_selector)
2831
{
2832
uint32_t r;
2833
decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
2834
2835
pDst_block->set_low_alpha(r);
2836
pDst_block->set_high_alpha(r);
2837
pDst_block->m_selectors[0] = 0;
2838
pDst_block->m_selectors[1] = 0;
2839
pDst_block->m_selectors[2] = 0;
2840
pDst_block->m_selectors[3] = 0;
2841
pDst_block->m_selectors[4] = 0;
2842
pDst_block->m_selectors[5] = 0;
2843
return;
2844
}
2845
else if (pSelector->m_num_unique_selectors == 2)
2846
{
2847
color32 block_colors[4];
2848
2849
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
2850
2851
const uint32_t r0 = block_colors[low_selector].r;
2852
const uint32_t r1 = block_colors[high_selector].r;
2853
2854
pDst_block->set_low_alpha(r0);
2855
pDst_block->set_high_alpha(r1);
2856
2857
// TODO: Optimize this
2858
for (uint32_t y = 0; y < 4; y++)
2859
{
2860
for (uint32_t x = 0; x < 4; x++)
2861
{
2862
uint32_t s = pSelector->get_selector(x, y);
2863
pDst_block->set_selector(x, y, (s == high_selector) ? 1 : 0);
2864
}
2865
}
2866
2867
return;
2868
}
2869
2870
uint32_t selector_range_table = 0;
2871
for (selector_range_table = 0; selector_range_table < NUM_DXT5A_SELECTOR_RANGES; selector_range_table++)
2872
if ((low_selector == s_dxt5a_selector_ranges[selector_range_table].m_low) && (high_selector == s_dxt5a_selector_ranges[selector_range_table].m_high))
2873
break;
2874
if (selector_range_table >= NUM_DXT5A_SELECTOR_RANGES)
2875
selector_range_table = 0;
2876
2877
const etc1_g_to_dxt5a_conversion* pTable_entry = &g_etc1_g_to_dxt5a[base_color.r + inten_table * 32][selector_range_table];
2878
2879
pDst_block->set_low_alpha(pTable_entry->m_lo);
2880
pDst_block->set_high_alpha(pTable_entry->m_hi);
2881
2882
// TODO: Optimize this (like ETC1->BC1)
2883
for (uint32_t y = 0; y < 4; y++)
2884
{
2885
for (uint32_t x = 0; x < 4; x++)
2886
{
2887
uint32_t s = pSelector->get_selector(x, y);
2888
2889
uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
2890
2891
pDst_block->set_selector(x, y, ds);
2892
}
2893
}
2894
}
2895
#endif //BASISD_SUPPORT_DXT5A
2896
2897
// PVRTC
2898
2899
#if BASISD_SUPPORT_PVRTC1 || BASISD_SUPPORT_UASTC
2900
static const uint16_t g_pvrtc_swizzle_table[256] =
2901
{
2902
0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015, 0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, 0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115, 0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155,
2903
0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415, 0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455, 0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515, 0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555,
2904
0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015, 0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055, 0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115, 0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155,
2905
0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415, 0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455, 0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515, 0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555,
2906
0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015, 0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055, 0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115, 0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155,
2907
0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415, 0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455, 0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515, 0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555,
2908
0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015, 0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055, 0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115, 0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155,
2909
0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415, 0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455, 0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515, 0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555
2910
};
2911
2912
// Note we can't use simple calculations to convert PVRTC1 encoded endpoint components to/from 8-bits, due to hardware approximations.
2913
static const uint8_t g_pvrtc_5[32] = { 0,8,16,24,33,41,49,57,66,74,82,90,99,107,115,123,132,140,148,156,165,173,181,189,198,206,214,222,231,239,247,255 };
2914
static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 };
2915
static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 };
2916
static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 };
2917
2918
static const uint8_t g_pvrtc_5_floor[256] =
2919
{
2920
0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,
2921
3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,
2922
7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,
2923
11,11,11,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,
2924
15,15,15,15,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,
2925
19,19,19,19,19,20,20,20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,
2926
23,23,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,
2927
27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31
2928
};
2929
2930
static const uint8_t g_pvrtc_5_ceil[256] =
2931
{
2932
0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,
2933
4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,
2934
8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,12,12,12,12,12,
2935
12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,16,16,16,16,
2936
16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,20,20,20,
2937
20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,24,24,
2938
24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28,
2939
28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31
2940
};
2941
2942
static const uint8_t g_pvrtc_4_floor[256] =
2943
{
2944
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2945
1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2946
3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
2947
5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,
2948
7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,
2949
9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,
2950
11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,
2951
13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15
2952
};
2953
2954
static const uint8_t g_pvrtc_4_ceil[256] =
2955
{
2956
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2957
2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
2958
4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,
2959
6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,
2960
8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,
2961
10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,
2962
12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,
2963
14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15
2964
};
2965
2966
static const uint8_t g_pvrtc_3_floor[256] =
2967
{
2968
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2969
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2970
1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2971
2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2972
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,
2973
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,
2974
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,
2975
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7
2976
};
2977
2978
static const uint8_t g_pvrtc_3_ceil[256] =
2979
{
2980
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2981
1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2982
2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2983
3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
2984
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,
2985
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,
2986
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,
2987
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
2988
};
2989
2990
static const uint8_t g_pvrtc_alpha_floor[256] =
2991
{
2992
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2993
0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2994
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2995
2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2996
3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
2997
4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
2998
5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
2999
6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8
3000
};
3001
3002
static const uint8_t g_pvrtc_alpha_ceil[256] =
3003
{
3004
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
3005
1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3006
2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
3007
3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
3008
4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
3009
5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
3010
6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
3011
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
3012
};
3013
3014
struct pvrtc4_block
3015
{
3016
uint32_t m_modulation;
3017
uint32_t m_endpoints;
3018
3019
pvrtc4_block() : m_modulation(0), m_endpoints(0) { }
3020
3021
inline bool operator== (const pvrtc4_block& rhs) const
3022
{
3023
return (m_modulation == rhs.m_modulation) && (m_endpoints == rhs.m_endpoints);
3024
}
3025
3026
inline void clear()
3027
{
3028
m_modulation = 0;
3029
m_endpoints = 0;
3030
}
3031
3032
inline bool get_block_uses_transparent_modulation() const
3033
{
3034
return (m_endpoints & 1) != 0;
3035
}
3036
3037
inline void set_block_uses_transparent_modulation(bool m)
3038
{
3039
m_endpoints = (m_endpoints & ~1U) | static_cast<uint32_t>(m);
3040
}
3041
3042
inline bool is_endpoint_opaque(uint32_t endpoint_index) const
3043
{
3044
static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U };
3045
return (m_endpoints & s_bitmasks[basisu::open_range_check(endpoint_index, 2U)]) != 0;
3046
}
3047
3048
inline void set_endpoint_opaque(uint32_t endpoint_index, bool opaque)
3049
{
3050
assert(endpoint_index < 2);
3051
static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U };
3052
if (opaque)
3053
m_endpoints |= s_bitmasks[endpoint_index];
3054
else
3055
m_endpoints &= ~s_bitmasks[endpoint_index];
3056
}
3057
3058
inline color32 get_endpoint_5554(uint32_t endpoint_index) const
3059
{
3060
assert(endpoint_index < 2);
3061
static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
3062
uint32_t packed = (m_endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
3063
3064
uint32_t r, g, b, a;
3065
if (packed & 0x8000)
3066
{
3067
// opaque 554 or 555
3068
r = (packed >> 10) & 31;
3069
g = (packed >> 5) & 31;
3070
b = packed & 31;
3071
3072
if (!endpoint_index)
3073
b |= (b >> 4);
3074
3075
a = 0xF;
3076
}
3077
else
3078
{
3079
// translucent 4433 or 4443
3080
r = (packed >> 7) & 0x1E;
3081
g = (packed >> 3) & 0x1E;
3082
b = (packed & 0xF) << 1;
3083
3084
r |= (r >> 4);
3085
g |= (g >> 4);
3086
3087
if (!endpoint_index)
3088
b |= (b >> 3);
3089
else
3090
b |= (b >> 4);
3091
3092
a = (packed >> 11) & 0xE;
3093
}
3094
3095
assert((r < 32) && (g < 32) && (b < 32) && (a < 16));
3096
3097
return color32(r, g, b, a);
3098
}
3099
3100
inline color32 get_endpoint_8888(uint32_t endpoint_index) const
3101
{
3102
assert(endpoint_index < 2);
3103
static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
3104
uint32_t packed = (m_endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
3105
3106
uint32_t r, g, b, a;
3107
if (packed & 0x8000)
3108
{
3109
// opaque 554 or 555
3110
// 1RRRRRGGGGGBBBBM
3111
// 1RRRRRGGGGGBBBBB
3112
r = (packed >> 10) & 31;
3113
g = (packed >> 5) & 31;
3114
b = packed & 31;
3115
3116
r = g_pvrtc_5[r];
3117
g = g_pvrtc_5[g];
3118
3119
if (!endpoint_index)
3120
b = g_pvrtc_4[b >> 1];
3121
else
3122
b = g_pvrtc_5[b];
3123
3124
a = 255;
3125
}
3126
else
3127
{
3128
// translucent 4433 or 4443
3129
// 0AAA RRRR GGGG BBBM
3130
// 0AAA RRRR GGGG BBBB
3131
r = (packed >> 8) & 0xF;
3132
g = (packed >> 4) & 0xF;
3133
b = packed & 0xF;
3134
a = (packed >> 12) & 7;
3135
3136
r = g_pvrtc_4[r];
3137
g = g_pvrtc_4[g];
3138
3139
if (!endpoint_index)
3140
b = g_pvrtc_3[b >> 1];
3141
else
3142
b = g_pvrtc_4[b];
3143
3144
a = g_pvrtc_alpha[a];
3145
}
3146
3147
return color32(r, g, b, a);
3148
}
3149
3150
inline uint32_t get_endpoint_l8(uint32_t endpoint_index) const
3151
{
3152
color32 c(get_endpoint_8888(endpoint_index));
3153
return c.r + c.g + c.b + c.a;
3154
}
3155
3156
inline uint32_t get_opaque_endpoint_l0() const
3157
{
3158
uint32_t packed = m_endpoints & 0xFFFE;
3159
3160
uint32_t r, g, b;
3161
assert(packed & 0x8000);
3162
3163
// opaque 554 or 555
3164
r = (packed >> 10) & 31;
3165
g = (packed >> 5) & 31;
3166
b = packed & 31;
3167
b |= (b >> 4);
3168
3169
return r + g + b;
3170
}
3171
3172
inline uint32_t get_opaque_endpoint_l1() const
3173
{
3174
uint32_t packed = m_endpoints >> 16;
3175
3176
uint32_t r, g, b;
3177
assert(packed & 0x8000);
3178
3179
// opaque 554 or 555
3180
r = (packed >> 10) & 31;
3181
g = (packed >> 5) & 31;
3182
b = packed & 31;
3183
3184
return r + g + b;
3185
}
3186
3187
static uint32_t get_component_precision_in_bits(uint32_t c, uint32_t endpoint_index, bool opaque_endpoint)
3188
{
3189
static const uint32_t s_comp_prec[4][4] =
3190
{
3191
// R0 G0 B0 A0 R1 G1 B1 A1
3192
{ 4, 4, 3, 3 },{ 4, 4, 4, 3 }, // transparent endpoint
3193
3194
{ 5, 5, 4, 0 },{ 5, 5, 5, 0 } // opaque endpoint
3195
};
3196
return s_comp_prec[basisu::open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)][basisu::open_range_check(c, 4U)];
3197
}
3198
3199
static color32 get_color_precision_in_bits(uint32_t endpoint_index, bool opaque_endpoint)
3200
{
3201
static const color32 s_color_prec[4] =
3202
{
3203
color32(4, 4, 3, 3), color32(4, 4, 4, 3), // transparent endpoint
3204
color32(5, 5, 4, 0), color32(5, 5, 5, 0) // opaque endpoint
3205
};
3206
return s_color_prec[basisu::open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)];
3207
}
3208
3209
inline void set_opaque_endpoint_floor(uint32_t endpoint_index, const color32& c)
3210
{
3211
assert(endpoint_index < 2);
3212
const uint32_t m = m_endpoints & 1;
3213
3214
uint32_t r = g_pvrtc_5_floor[c[0]], g = g_pvrtc_5_floor[c[1]], b = c[2];
3215
3216
if (!endpoint_index)
3217
b = g_pvrtc_4_floor[b] << 1;
3218
else
3219
b = g_pvrtc_5_floor[b];
3220
3221
// rgba=555 here
3222
assert((r < 32) && (g < 32) && (b < 32));
3223
3224
// 1RRRRRGGGGGBBBBM
3225
// 1RRRRRGGGGGBBBBB
3226
3227
// opaque 554 or 555
3228
uint32_t packed = 0x8000 | (r << 10) | (g << 5) | b;
3229
if (!endpoint_index)
3230
packed = (packed & ~1) | m;
3231
3232
assert(packed <= 0xFFFF);
3233
3234
if (endpoint_index)
3235
m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
3236
else
3237
m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
3238
}
3239
3240
inline void set_opaque_endpoint_ceil(uint32_t endpoint_index, const color32& c)
3241
{
3242
assert(endpoint_index < 2);
3243
const uint32_t m = m_endpoints & 1;
3244
3245
uint32_t r = g_pvrtc_5_ceil[c[0]], g = g_pvrtc_5_ceil[c[1]], b = c[2];
3246
3247
if (!endpoint_index)
3248
b = g_pvrtc_4_ceil[b] << 1;
3249
else
3250
b = g_pvrtc_5_ceil[b];
3251
3252
// rgba=555 here
3253
assert((r < 32) && (g < 32) && (b < 32));
3254
3255
// 1RRRRRGGGGGBBBBM
3256
// 1RRRRRGGGGGBBBBB
3257
3258
// opaque 554 or 555
3259
uint32_t packed = 0x8000 | (r << 10) | (g << 5) | b;
3260
if (!endpoint_index)
3261
packed |= m;
3262
3263
assert(packed <= 0xFFFF);
3264
3265
if (endpoint_index)
3266
m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
3267
else
3268
m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
3269
}
3270
3271
// opaque endpoints: 554 or 555
3272
// transparent endpoints: 3443 or 3444
3273
inline void set_endpoint_raw(uint32_t endpoint_index, const color32& c, bool opaque_endpoint)
3274
{
3275
assert(endpoint_index < 2);
3276
const uint32_t m = m_endpoints & 1;
3277
uint32_t r = c[0], g = c[1], b = c[2], a = c[3];
3278
3279
uint32_t packed;
3280
3281
if (opaque_endpoint)
3282
{
3283
if (!endpoint_index)
3284
{
3285
// 554
3286
// 1RRRRRGGGGGBBBBM
3287
assert((r < 32) && (g < 32) && (b < 16));
3288
packed = 0x8000 | (r << 10) | (g << 5) | (b << 1) | m;
3289
}
3290
else
3291
{
3292
// 555
3293
// 1RRRRRGGGGGBBBBB
3294
assert((r < 32) && (g < 32) && (b < 32));
3295
packed = 0x8000 | (r << 10) | (g << 5) | b;
3296
}
3297
}
3298
else
3299
{
3300
if (!endpoint_index)
3301
{
3302
// 3443
3303
// 0AAA RRRR GGGG BBBM
3304
assert((r < 16) && (g < 16) && (b < 8) && (a < 8));
3305
packed = (a << 12) | (r << 8) | (g << 4) | (b << 1) | m;
3306
}
3307
else
3308
{
3309
// 3444
3310
// 0AAA RRRR GGGG BBBB
3311
assert((r < 16) && (g < 16) && (b < 16) && (a < 8));
3312
packed = (a << 12) | (r << 8) | (g << 4) | b;
3313
}
3314
}
3315
3316
assert(packed <= 0xFFFF);
3317
3318
if (endpoint_index)
3319
m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
3320
else
3321
m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
3322
}
3323
3324
inline void set_endpoint_floor(uint32_t endpoint_index, const color32& c)
3325
{
3326
assert(endpoint_index < 2);
3327
3328
int a = g_pvrtc_alpha_floor[c.a];
3329
if (a == 8)
3330
{
3331
// 554 or 555
3332
uint32_t r = g_pvrtc_5_floor[c[0]], g = g_pvrtc_5_floor[c[1]], b = c[2];
3333
3334
if (!endpoint_index)
3335
b = g_pvrtc_4_floor[b];
3336
else
3337
b = g_pvrtc_5_floor[b];
3338
3339
set_endpoint_raw(endpoint_index, color32(r, g, b, a), true);
3340
}
3341
else
3342
{
3343
// 4433 or 4443
3344
uint32_t r = g_pvrtc_4_floor[c[0]], g = g_pvrtc_4_floor[c[1]], b = c[2];
3345
3346
if (!endpoint_index)
3347
b = g_pvrtc_3_floor[b];
3348
else
3349
b = g_pvrtc_4_floor[b];
3350
3351
set_endpoint_raw(endpoint_index, color32(r, g, b, a), false);
3352
}
3353
}
3354
3355
inline void set_endpoint_ceil(uint32_t endpoint_index, const color32& c)
3356
{
3357
assert(endpoint_index < 2);
3358
3359
int a = g_pvrtc_alpha_ceil[c.a];
3360
if (a == 8)
3361
{
3362
// 554 or 555
3363
uint32_t r = g_pvrtc_5_ceil[c[0]], g = g_pvrtc_5_ceil[c[1]], b = c[2];
3364
3365
if (!endpoint_index)
3366
b = g_pvrtc_4_ceil[b];
3367
else
3368
b = g_pvrtc_5_ceil[b];
3369
3370
set_endpoint_raw(endpoint_index, color32(r, g, b, a), true);
3371
}
3372
else
3373
{
3374
// 4433 or 4443
3375
uint32_t r = g_pvrtc_4_ceil[c[0]], g = g_pvrtc_4_ceil[c[1]], b = c[2];
3376
3377
if (!endpoint_index)
3378
b = g_pvrtc_3_ceil[b];
3379
else
3380
b = g_pvrtc_4_ceil[b];
3381
3382
set_endpoint_raw(endpoint_index, color32(r, g, b, a), false);
3383
}
3384
}
3385
3386
inline uint32_t get_modulation(uint32_t x, uint32_t y) const
3387
{
3388
assert((x < 4) && (y < 4));
3389
return (m_modulation >> ((y * 4 + x) * 2)) & 3;
3390
}
3391
3392
// Scaled by 8
3393
inline const uint32_t* get_scaled_modulation_values(bool block_uses_transparent_modulation) const
3394
{
3395
static const uint32_t s_block_scales[2][4] = { { 0, 3, 5, 8 },{ 0, 4, 4, 8 } };
3396
return s_block_scales[block_uses_transparent_modulation];
3397
}
3398
3399
// Scaled by 8
3400
inline uint32_t get_scaled_modulation(uint32_t x, uint32_t y) const
3401
{
3402
return get_scaled_modulation_values(get_block_uses_transparent_modulation())[get_modulation(x, y)];
3403
}
3404
3405
inline void set_modulation(uint32_t x, uint32_t y, uint32_t s)
3406
{
3407
assert((x < 4) && (y < 4) && (s < 4));
3408
uint32_t n = (y * 4 + x) * 2;
3409
m_modulation = (m_modulation & (~(3 << n))) | (s << n);
3410
assert(get_modulation(x, y) == s);
3411
}
3412
3413
// Assumes modulation was initialized to 0
3414
inline void set_modulation_fast(uint32_t x, uint32_t y, uint32_t s)
3415
{
3416
assert((x < 4) && (y < 4) && (s < 4));
3417
uint32_t n = (y * 4 + x) * 2;
3418
m_modulation |= (s << n);
3419
assert(get_modulation(x, y) == s);
3420
}
3421
};
3422
3423
#if 0
3424
static const uint8_t g_pvrtc_bilinear_weights[16][4] =
3425
{
3426
{ 4, 4, 4, 4 }, { 2, 6, 2, 6 }, { 8, 0, 8, 0 }, { 6, 2, 6, 2 },
3427
{ 2, 2, 6, 6 }, { 1, 3, 3, 9 }, { 4, 0, 12, 0 }, { 3, 1, 9, 3 },
3428
{ 8, 8, 0, 0 }, { 4, 12, 0, 0 }, { 16, 0, 0, 0 }, { 12, 4, 0, 0 },
3429
{ 6, 6, 2, 2 }, { 3, 9, 1, 3 }, { 12, 0, 4, 0 }, { 9, 3, 3, 1 },
3430
};
3431
#endif
3432
3433
struct pvrtc1_temp_block
3434
{
3435
decoder_etc_block m_etc1_block;
3436
uint32_t m_pvrtc_endpoints;
3437
};
3438
3439
static inline uint32_t get_opaque_endpoint_l0(uint32_t endpoints)
3440
{
3441
uint32_t packed = endpoints;
3442
3443
uint32_t r, g, b;
3444
assert(packed & 0x8000);
3445
3446
r = (packed >> 10) & 31;
3447
g = (packed >> 5) & 31;
3448
b = packed & 30;
3449
b |= (b >> 4);
3450
3451
return r + g + b;
3452
}
3453
3454
static inline uint32_t get_opaque_endpoint_l1(uint32_t endpoints)
3455
{
3456
uint32_t packed = endpoints >> 16;
3457
3458
uint32_t r, g, b;
3459
assert(packed & 0x8000);
3460
3461
r = (packed >> 10) & 31;
3462
g = (packed >> 5) & 31;
3463
b = packed & 31;
3464
3465
return r + g + b;
3466
}
3467
3468
static color32 get_endpoint_8888(uint32_t endpoints, uint32_t endpoint_index)
3469
{
3470
assert(endpoint_index < 2);
3471
static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
3472
uint32_t packed = (endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
3473
3474
uint32_t r, g, b, a;
3475
if (packed & 0x8000)
3476
{
3477
// opaque 554 or 555
3478
// 1RRRRRGGGGGBBBBM
3479
// 1RRRRRGGGGGBBBBB
3480
r = (packed >> 10) & 31;
3481
g = (packed >> 5) & 31;
3482
b = packed & 31;
3483
3484
r = g_pvrtc_5[r];
3485
g = g_pvrtc_5[g];
3486
3487
if (!endpoint_index)
3488
b = g_pvrtc_4[b >> 1];
3489
else
3490
b = g_pvrtc_5[b];
3491
3492
a = 255;
3493
}
3494
else
3495
{
3496
// translucent 4433 or 4443
3497
// 0AAA RRRR GGGG BBBM
3498
// 0AAA RRRR GGGG BBBB
3499
r = (packed >> 8) & 0xF;
3500
g = (packed >> 4) & 0xF;
3501
b = packed & 0xF;
3502
a = (packed >> 12) & 7;
3503
3504
r = g_pvrtc_4[r];
3505
g = g_pvrtc_4[g];
3506
3507
if (!endpoint_index)
3508
b = g_pvrtc_3[b >> 1];
3509
else
3510
b = g_pvrtc_4[b];
3511
3512
a = g_pvrtc_alpha[a];
3513
}
3514
3515
return color32(r, g, b, a);
3516
}
3517
3518
static uint32_t get_endpoint_l8(uint32_t endpoints, uint32_t endpoint_index)
3519
{
3520
color32 c(get_endpoint_8888(endpoints, endpoint_index));
3521
return c.r + c.g + c.b + c.a;
3522
}
3523
#endif
3524
3525
#if BASISD_SUPPORT_PVRTC1
3526
// TODO: Support decoding a non-pow2 ETC1S texture into the next larger pow2 PVRTC texture.
3527
static void fixup_pvrtc1_4_modulation_rgb(const decoder_etc_block* pETC_Blocks, const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y)
3528
{
3529
const uint32_t x_mask = num_blocks_x - 1;
3530
const uint32_t y_mask = num_blocks_y - 1;
3531
const uint32_t x_bits = basisu::total_bits(x_mask);
3532
const uint32_t y_bits = basisu::total_bits(y_mask);
3533
const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
3534
//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
3535
const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
3536
3537
uint32_t block_index = 0;
3538
3539
// really 3x3
3540
int e0[4][4], e1[4][4];
3541
3542
for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
3543
{
3544
const uint32_t* pE_rows[3];
3545
3546
for (int ey = 0; ey < 3; ey++)
3547
{
3548
int by = y + ey - 1;
3549
3550
const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
3551
3552
pE_rows[ey] = pE;
3553
3554
for (int ex = 0; ex < 3; ex++)
3555
{
3556
int bx = 0 + ex - 1;
3557
3558
const uint32_t e = pE[bx & x_mask];
3559
3560
e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31;
3561
e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31;
3562
}
3563
}
3564
3565
const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
3566
3567
for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
3568
{
3569
const decoder_etc_block& src_block = pETC_Blocks[block_index];
3570
3571
const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
3572
3573
uint32_t swizzled = x_swizzle | y_swizzle;
3574
if (num_blocks_x != num_blocks_y)
3575
{
3576
swizzled &= swizzle_mask;
3577
3578
if (num_blocks_x > num_blocks_y)
3579
swizzled |= ((x >> min_bits) << (min_bits * 2));
3580
else
3581
swizzled |= ((y >> min_bits) << (min_bits * 2));
3582
}
3583
3584
pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
3585
pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
3586
3587
uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1];
3588
uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1];
3589
uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1];
3590
3591
const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1];
3592
int by = (base_r + base_g + base_b) * 16;
3593
int block_colors_y_x16[4];
3594
block_colors_y_x16[0] = by + pInten_table48[2];
3595
block_colors_y_x16[1] = by + pInten_table48[3];
3596
block_colors_y_x16[2] = by + pInten_table48[1];
3597
block_colors_y_x16[3] = by + pInten_table48[0];
3598
3599
{
3600
const uint32_t ex = 2;
3601
int bx = x + ex - 1;
3602
bx &= x_mask;
3603
3604
#define DO_ROW(ey) \
3605
{ \
3606
const uint32_t e = pE_rows[ey][bx]; \
3607
e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \
3608
e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \
3609
}
3610
3611
DO_ROW(0);
3612
DO_ROW(1);
3613
DO_ROW(2);
3614
#undef DO_ROW
3615
}
3616
3617
uint32_t mod = 0;
3618
3619
uint32_t lookup_x[4];
3620
3621
#define DO_LOOKUP(lx) { \
3622
const uint32_t byte_ofs = 7 - (((lx) * 4) >> 3); \
3623
const uint32_t lsb_bits = src_block.m_bytes[byte_ofs] >> (((lx) & 1) * 4); \
3624
const uint32_t msb_bits = src_block.m_bytes[byte_ofs - 2] >> (((lx) & 1) * 4); \
3625
lookup_x[lx] = (lsb_bits & 0xF) | ((msb_bits & 0xF) << 4); }
3626
3627
DO_LOOKUP(0);
3628
DO_LOOKUP(1);
3629
DO_LOOKUP(2);
3630
DO_LOOKUP(3);
3631
#undef DO_LOOKUP
3632
3633
#define DO_PIX(lx, ly, w0, w1, w2, w3) \
3634
{ \
3635
int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
3636
int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
3637
int cl = block_colors_y_x16[g_etc1_x_selector_unpack[ly][lookup_x[lx]]]; \
3638
int dl = cb_l - ca_l; \
3639
int vl = cl - ca_l; \
3640
int p = vl * 16; \
3641
if (ca_l > cb_l) { p = -p; dl = -dl; } \
3642
uint32_t m = 0; \
3643
if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
3644
if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
3645
if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
3646
mod |= m; \
3647
}
3648
3649
{
3650
const uint32_t ex = 0, ey = 0;
3651
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3652
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3653
DO_PIX(0, 0, 4, 4, 4, 4);
3654
DO_PIX(1, 0, 2, 6, 2, 6);
3655
DO_PIX(0, 1, 2, 2, 6, 6);
3656
DO_PIX(1, 1, 1, 3, 3, 9);
3657
}
3658
3659
{
3660
const uint32_t ex = 1, ey = 0;
3661
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3662
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3663
DO_PIX(2, 0, 8, 0, 8, 0);
3664
DO_PIX(3, 0, 6, 2, 6, 2);
3665
DO_PIX(2, 1, 4, 0, 12, 0);
3666
DO_PIX(3, 1, 3, 1, 9, 3);
3667
}
3668
3669
{
3670
const uint32_t ex = 0, ey = 1;
3671
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3672
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3673
DO_PIX(0, 2, 8, 8, 0, 0);
3674
DO_PIX(1, 2, 4, 12, 0, 0);
3675
DO_PIX(0, 3, 6, 6, 2, 2);
3676
DO_PIX(1, 3, 3, 9, 1, 3);
3677
}
3678
3679
{
3680
const uint32_t ex = 1, ey = 1;
3681
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3682
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3683
DO_PIX(2, 2, 16, 0, 0, 0);
3684
DO_PIX(3, 2, 12, 4, 0, 0);
3685
DO_PIX(2, 3, 12, 0, 4, 0);
3686
DO_PIX(3, 3, 9, 3, 3, 1);
3687
}
3688
#undef DO_PIX
3689
3690
pDst_block->m_modulation = mod;
3691
3692
e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
3693
e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
3694
e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
3695
3696
e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
3697
e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
3698
e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
3699
3700
} // x
3701
} // y
3702
}
3703
3704
static void fixup_pvrtc1_4_modulation_rgba(
3705
const decoder_etc_block* pETC_Blocks,
3706
const uint32_t* pPVRTC_endpoints,
3707
void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, void *pAlpha_blocks,
3708
const endpoint* pEndpoints, const selector* pSelectors)
3709
{
3710
const uint32_t x_mask = num_blocks_x - 1;
3711
const uint32_t y_mask = num_blocks_y - 1;
3712
const uint32_t x_bits = basisu::total_bits(x_mask);
3713
const uint32_t y_bits = basisu::total_bits(y_mask);
3714
const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
3715
//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
3716
const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
3717
3718
uint32_t block_index = 0;
3719
3720
// really 3x3
3721
int e0[4][4], e1[4][4];
3722
3723
for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
3724
{
3725
const uint32_t* pE_rows[3];
3726
3727
for (int ey = 0; ey < 3; ey++)
3728
{
3729
int by = y + ey - 1;
3730
3731
const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
3732
3733
pE_rows[ey] = pE;
3734
3735
for (int ex = 0; ex < 3; ex++)
3736
{
3737
int bx = 0 + ex - 1;
3738
3739
const uint32_t e = pE[bx & x_mask];
3740
3741
e0[ex][ey] = get_endpoint_l8(e, 0);
3742
e1[ex][ey] = get_endpoint_l8(e, 1);
3743
}
3744
}
3745
3746
const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
3747
3748
for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
3749
{
3750
const decoder_etc_block& src_block = pETC_Blocks[block_index];
3751
3752
const uint16_t* pSrc_alpha_block = reinterpret_cast<const uint16_t*>(static_cast<const uint32_t*>(pAlpha_blocks) + x + (y * num_blocks_x));
3753
const endpoint* pAlpha_endpoints = &pEndpoints[pSrc_alpha_block[0]];
3754
const selector* pAlpha_selectors = &pSelectors[pSrc_alpha_block[1]];
3755
3756
const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
3757
3758
uint32_t swizzled = x_swizzle | y_swizzle;
3759
if (num_blocks_x != num_blocks_y)
3760
{
3761
swizzled &= swizzle_mask;
3762
3763
if (num_blocks_x > num_blocks_y)
3764
swizzled |= ((x >> min_bits) << (min_bits * 2));
3765
else
3766
swizzled |= ((y >> min_bits) << (min_bits * 2));
3767
}
3768
3769
pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
3770
pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
3771
3772
uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1];
3773
uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1];
3774
uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1];
3775
3776
const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1];
3777
int by = (base_r + base_g + base_b) * 16;
3778
int block_colors_y_x16[4];
3779
block_colors_y_x16[0] = basisu::clamp<int>(by + pInten_table48[0], 0, 48 * 255);
3780
block_colors_y_x16[1] = basisu::clamp<int>(by + pInten_table48[1], 0, 48 * 255);
3781
block_colors_y_x16[2] = basisu::clamp<int>(by + pInten_table48[2], 0, 48 * 255);
3782
block_colors_y_x16[3] = basisu::clamp<int>(by + pInten_table48[3], 0, 48 * 255);
3783
3784
uint32_t alpha_base_g = g_etc_5_to_8[pAlpha_endpoints->m_color5.g] * 16;
3785
const int* pInten_table16 = g_etc1_inten_tables16[pAlpha_endpoints->m_inten5];
3786
int alpha_block_colors_x16[4];
3787
alpha_block_colors_x16[0] = basisu::clamp<int>(alpha_base_g + pInten_table16[0], 0, 16 * 255);
3788
alpha_block_colors_x16[1] = basisu::clamp<int>(alpha_base_g + pInten_table16[1], 0, 16 * 255);
3789
alpha_block_colors_x16[2] = basisu::clamp<int>(alpha_base_g + pInten_table16[2], 0, 16 * 255);
3790
alpha_block_colors_x16[3] = basisu::clamp<int>(alpha_base_g + pInten_table16[3], 0, 16 * 255);
3791
3792
// clamp((base_r + base_g + base_b) * 16 + color_inten[s] * 48) + clamp(alpha_base_g * 16 + alpha_inten[as] * 16)
3793
3794
{
3795
const uint32_t ex = 2;
3796
int bx = x + ex - 1;
3797
bx &= x_mask;
3798
3799
#define DO_ROW(ey) \
3800
{ \
3801
const uint32_t e = pE_rows[ey][bx]; \
3802
e0[ex][ey] = get_endpoint_l8(e, 0); \
3803
e1[ex][ey] = get_endpoint_l8(e, 1); \
3804
}
3805
3806
DO_ROW(0);
3807
DO_ROW(1);
3808
DO_ROW(2);
3809
#undef DO_ROW
3810
}
3811
3812
uint32_t mod = 0;
3813
3814
#define DO_PIX(lx, ly, w0, w1, w2, w3) \
3815
{ \
3816
int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
3817
int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
3818
int cl = block_colors_y_x16[(src_block.m_bytes[4 + ly] >> (lx * 2)) & 3] + alpha_block_colors_x16[(pAlpha_selectors->m_selectors[ly] >> (lx * 2)) & 3]; \
3819
int dl = cb_l - ca_l; \
3820
int vl = cl - ca_l; \
3821
int p = vl * 16; \
3822
if (ca_l > cb_l) { p = -p; dl = -dl; } \
3823
uint32_t m = 0; \
3824
if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
3825
if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
3826
if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
3827
mod |= m; \
3828
}
3829
3830
{
3831
const uint32_t ex = 0, ey = 0;
3832
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3833
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3834
DO_PIX(0, 0, 4, 4, 4, 4);
3835
DO_PIX(1, 0, 2, 6, 2, 6);
3836
DO_PIX(0, 1, 2, 2, 6, 6);
3837
DO_PIX(1, 1, 1, 3, 3, 9);
3838
}
3839
3840
{
3841
const uint32_t ex = 1, ey = 0;
3842
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3843
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3844
DO_PIX(2, 0, 8, 0, 8, 0);
3845
DO_PIX(3, 0, 6, 2, 6, 2);
3846
DO_PIX(2, 1, 4, 0, 12, 0);
3847
DO_PIX(3, 1, 3, 1, 9, 3);
3848
}
3849
3850
{
3851
const uint32_t ex = 0, ey = 1;
3852
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3853
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3854
DO_PIX(0, 2, 8, 8, 0, 0);
3855
DO_PIX(1, 2, 4, 12, 0, 0);
3856
DO_PIX(0, 3, 6, 6, 2, 2);
3857
DO_PIX(1, 3, 3, 9, 1, 3);
3858
}
3859
3860
{
3861
const uint32_t ex = 1, ey = 1;
3862
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3863
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3864
DO_PIX(2, 2, 16, 0, 0, 0);
3865
DO_PIX(3, 2, 12, 4, 0, 0);
3866
DO_PIX(2, 3, 12, 0, 4, 0);
3867
DO_PIX(3, 3, 9, 3, 3, 1);
3868
}
3869
#undef DO_PIX
3870
3871
pDst_block->m_modulation = mod;
3872
3873
e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
3874
e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
3875
e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
3876
3877
e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
3878
e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
3879
e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
3880
3881
} // x
3882
} // y
3883
}
3884
#endif // BASISD_SUPPORT_PVRTC1
3885
3886
#if BASISD_SUPPORT_BC7_MODE5
3887
static dxt_selector_range g_etc1_to_bc7_m5_selector_ranges[] =
3888
{
3889
{ 0, 3 },
3890
{ 1, 3 },
3891
{ 0, 2 },
3892
{ 1, 2 },
3893
{ 2, 3 },
3894
{ 0, 1 },
3895
};
3896
3897
const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5_selector_ranges) / sizeof(g_etc1_to_bc7_m5_selector_ranges[0]);
3898
3899
static uint32_t g_etc1_to_bc7_m5_selector_range_index[4][4];
3900
3901
const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS = 10;
3902
static const uint8_t g_etc1_to_bc7_m5_selector_mappings[NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS][4] =
3903
{
3904
{ 0, 0, 1, 1 },
3905
{ 0, 0, 1, 2 },
3906
{ 0, 0, 1, 3 },
3907
{ 0, 0, 2, 3 },
3908
{ 0, 1, 1, 1 },
3909
{ 0, 1, 2, 2 },
3910
{ 0, 1, 2, 3 },
3911
{ 0, 2, 3, 3 },
3912
{ 1, 2, 2, 2 },
3913
{ 1, 2, 3, 3 },
3914
};
3915
3916
struct etc1_to_bc7_m5_solution
3917
{
3918
uint8_t m_lo;
3919
uint8_t m_hi;
3920
uint16_t m_err;
3921
};
3922
3923
static const etc1_to_bc7_m5_solution g_etc1_to_bc7_m5_color[32 * 8 * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS * NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES] = {
3924
#include "basisu_transcoder_tables_bc7_m5_color.inc"
3925
};
3926
3927
static dxt_selector_range g_etc1_to_bc7_m5a_selector_ranges[] =
3928
{
3929
{ 0, 3 },
3930
{ 1, 3 },
3931
{ 0, 2 },
3932
{ 1, 2 },
3933
{ 2, 3 },
3934
{ 0, 1 }
3935
};
3936
3937
const uint32_t NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5a_selector_ranges) / sizeof(g_etc1_to_bc7_m5a_selector_ranges[0]);
3938
3939
static uint32_t g_etc1_to_bc7_m5a_selector_range_index[4][4];
3940
3941
struct etc1_g_to_bc7_m5a_conversion
3942
{
3943
uint8_t m_lo, m_hi;
3944
uint8_t m_trans;
3945
};
3946
3947
static etc1_g_to_bc7_m5a_conversion g_etc1_g_to_bc7_m5a[8 * 32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES] =
3948
{
3949
#include "basisu_transcoder_tables_bc7_m5_alpha.inc"
3950
};
3951
3952
static inline uint32_t set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t cur_ofs)
3953
{
3954
assert(num_bits < 32);
3955
assert(val < (1ULL << num_bits));
3956
3957
uint32_t mask = static_cast<uint32_t>((1ULL << num_bits) - 1);
3958
3959
while (num_bits)
3960
{
3961
const uint32_t n = basisu::minimum<uint32_t>(8 - (cur_ofs & 7), num_bits);
3962
3963
pBytes[cur_ofs >> 3] &= ~static_cast<uint8_t>(mask << (cur_ofs & 7));
3964
pBytes[cur_ofs >> 3] |= static_cast<uint8_t>(val << (cur_ofs & 7));
3965
3966
val >>= n;
3967
mask >>= n;
3968
3969
num_bits -= n;
3970
cur_ofs += n;
3971
}
3972
3973
return cur_ofs;
3974
}
3975
3976
struct bc7_mode_5
3977
{
3978
union
3979
{
3980
struct
3981
{
3982
uint64_t m_mode : 6;
3983
uint64_t m_rot : 2;
3984
3985
uint64_t m_r0 : 7;
3986
uint64_t m_r1 : 7;
3987
uint64_t m_g0 : 7;
3988
uint64_t m_g1 : 7;
3989
uint64_t m_b0 : 7;
3990
uint64_t m_b1 : 7;
3991
uint64_t m_a0 : 8;
3992
uint64_t m_a1_0 : 6;
3993
3994
} m_lo;
3995
3996
uint64_t m_lo_bits;
3997
};
3998
3999
union
4000
{
4001
struct
4002
{
4003
uint64_t m_a1_1 : 2;
4004
4005
// bit 2
4006
uint64_t m_c00 : 1;
4007
uint64_t m_c10 : 2;
4008
uint64_t m_c20 : 2;
4009
uint64_t m_c30 : 2;
4010
4011
uint64_t m_c01 : 2;
4012
uint64_t m_c11 : 2;
4013
uint64_t m_c21 : 2;
4014
uint64_t m_c31 : 2;
4015
4016
uint64_t m_c02 : 2;
4017
uint64_t m_c12 : 2;
4018
uint64_t m_c22 : 2;
4019
uint64_t m_c32 : 2;
4020
4021
uint64_t m_c03 : 2;
4022
uint64_t m_c13 : 2;
4023
uint64_t m_c23 : 2;
4024
uint64_t m_c33 : 2;
4025
4026
// bit 33
4027
uint64_t m_a00 : 1;
4028
uint64_t m_a10 : 2;
4029
uint64_t m_a20 : 2;
4030
uint64_t m_a30 : 2;
4031
4032
uint64_t m_a01 : 2;
4033
uint64_t m_a11 : 2;
4034
uint64_t m_a21 : 2;
4035
uint64_t m_a31 : 2;
4036
4037
uint64_t m_a02 : 2;
4038
uint64_t m_a12 : 2;
4039
uint64_t m_a22 : 2;
4040
uint64_t m_a32 : 2;
4041
4042
uint64_t m_a03 : 2;
4043
uint64_t m_a13 : 2;
4044
uint64_t m_a23 : 2;
4045
uint64_t m_a33 : 2;
4046
4047
} m_hi;
4048
4049
uint64_t m_hi_bits;
4050
};
4051
};
4052
4053
#if BASISD_WRITE_NEW_BC7_MODE5_TABLES
4054
static void create_etc1_to_bc7_m5_color_conversion_table()
4055
{
4056
FILE* pFile = nullptr;
4057
fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_color.inc", "w");
4058
4059
uint32_t n = 0;
4060
4061
for (int inten = 0; inten < 8; inten++)
4062
{
4063
for (uint32_t g = 0; g < 32; g++)
4064
{
4065
color32 block_colors[4];
4066
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
4067
4068
for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES; sr++)
4069
{
4070
const uint32_t low_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_low;
4071
const uint32_t high_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_high;
4072
4073
for (uint32_t m = 0; m < NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS; m++)
4074
{
4075
uint32_t best_lo = 0;
4076
uint32_t best_hi = 0;
4077
uint64_t best_err = UINT64_MAX;
4078
4079
for (uint32_t hi = 0; hi <= 127; hi++)
4080
{
4081
for (uint32_t lo = 0; lo <= 127; lo++)
4082
{
4083
uint32_t colors[4];
4084
4085
colors[0] = (lo << 1) | (lo >> 6);
4086
colors[3] = (hi << 1) | (hi >> 6);
4087
4088
colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64;
4089
colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64;
4090
4091
uint64_t total_err = 0;
4092
4093
for (uint32_t s = low_selector; s <= high_selector; s++)
4094
{
4095
int err = block_colors[s].g - colors[g_etc1_to_bc7_m5_selector_mappings[m][s]];
4096
4097
int err_scale = 1;
4098
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
4099
// the low/high selectors which are clamping to either 0 or 255.
4100
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
4101
err_scale = 5;
4102
4103
total_err += (err * err) * err_scale;
4104
}
4105
4106
if (total_err < best_err)
4107
{
4108
best_err = total_err;
4109
best_lo = lo;
4110
best_hi = hi;
4111
}
4112
}
4113
}
4114
4115
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
4116
4117
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
4118
n++;
4119
if ((n & 31) == 31)
4120
fprintf(pFile, "\n");
4121
} // m
4122
} // sr
4123
} // g
4124
} // inten
4125
4126
fclose(pFile);
4127
}
4128
4129
static void create_etc1_to_bc7_m5_alpha_conversion_table()
4130
{
4131
FILE* pFile = nullptr;
4132
fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_alpha.inc", "w");
4133
4134
uint32_t n = 0;
4135
4136
for (int inten = 0; inten < 8; inten++)
4137
{
4138
for (uint32_t g = 0; g < 32; g++)
4139
{
4140
color32 block_colors[4];
4141
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
4142
4143
for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES; sr++)
4144
{
4145
const uint32_t low_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_low;
4146
const uint32_t high_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_high;
4147
4148
uint32_t best_lo = 0;
4149
uint32_t best_hi = 0;
4150
uint64_t best_err = UINT64_MAX;
4151
uint32_t best_output_selectors = 0;
4152
4153
for (uint32_t hi = 0; hi <= 255; hi++)
4154
{
4155
for (uint32_t lo = 0; lo <= 255; lo++)
4156
{
4157
uint32_t colors[4];
4158
4159
colors[0] = lo;
4160
colors[3] = hi;
4161
4162
colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64;
4163
colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64;
4164
4165
uint64_t total_err = 0;
4166
uint32_t output_selectors = 0;
4167
4168
for (uint32_t s = low_selector; s <= high_selector; s++)
4169
{
4170
int best_mapping_err = INT_MAX;
4171
int best_k = 0;
4172
for (int k = 0; k < 4; k++)
4173
{
4174
int mapping_err = block_colors[s].g - colors[k];
4175
mapping_err *= mapping_err;
4176
4177
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
4178
// the low/high selectors which are clamping to either 0 or 255.
4179
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
4180
mapping_err *= 5;
4181
4182
if (mapping_err < best_mapping_err)
4183
{
4184
best_mapping_err = mapping_err;
4185
best_k = k;
4186
}
4187
} // k
4188
4189
total_err += best_mapping_err;
4190
output_selectors |= (best_k << (s * 2));
4191
} // s
4192
4193
if (total_err < best_err)
4194
{
4195
best_err = total_err;
4196
best_lo = lo;
4197
best_hi = hi;
4198
best_output_selectors = output_selectors;
4199
}
4200
4201
} // lo
4202
} // hi
4203
4204
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_output_selectors);
4205
n++;
4206
if ((n & 31) == 31)
4207
fprintf(pFile, "\n");
4208
4209
} // sr
4210
} // g
4211
} // inten
4212
4213
fclose(pFile);
4214
}
4215
#endif // BASISD_WRITE_NEW_BC7_MODE5_TABLES
4216
4217
struct bc7_m5_match_entry
4218
{
4219
uint8_t m_hi;
4220
uint8_t m_lo;
4221
};
4222
4223
static bc7_m5_match_entry g_bc7_m5_equals_1[256] =
4224
{
4225
{0,0},{1,0},{3,0},{4,0},{6,0},{7,0},{9,0},{10,0},{12,0},{13,0},{15,0},{16,0},{18,0},{20,0},{21,0},{23,0},
4226
{24,0},{26,0},{27,0},{29,0},{30,0},{32,0},{33,0},{35,0},{36,0},{38,0},{39,0},{41,0},{42,0},{44,0},{45,0},{47,0},
4227
{48,0},{50,0},{52,0},{53,0},{55,0},{56,0},{58,0},{59,0},{61,0},{62,0},{64,0},{65,0},{66,0},{68,0},{69,0},{71,0},
4228
{72,0},{74,0},{75,0},{77,0},{78,0},{80,0},{82,0},{83,0},{85,0},{86,0},{88,0},{89,0},{91,0},{92,0},{94,0},{95,0},
4229
{97,0},{98,0},{100,0},{101,0},{103,0},{104,0},{106,0},{107,0},{109,0},{110,0},{112,0},{114,0},{115,0},{117,0},{118,0},{120,0},
4230
{121,0},{123,0},{124,0},{126,0},{127,0},{127,1},{126,2},{126,3},{127,3},{127,4},{126,5},{126,6},{127,6},{127,7},{126,8},{126,9},
4231
{127,9},{127,10},{126,11},{126,12},{127,12},{127,13},{126,14},{125,15},{127,15},{126,16},{126,17},{127,17},{127,18},{126,19},{126,20},{127,20},
4232
{127,21},{126,22},{126,23},{127,23},{127,24},{126,25},{126,26},{127,26},{127,27},{126,28},{126,29},{127,29},{127,30},{126,31},{126,32},{127,32},
4233
{127,33},{126,34},{126,35},{127,35},{127,36},{126,37},{126,38},{127,38},{127,39},{126,40},{126,41},{127,41},{127,42},{126,43},{126,44},{127,44},
4234
{127,45},{126,46},{125,47},{127,47},{126,48},{126,49},{127,49},{127,50},{126,51},{126,52},{127,52},{127,53},{126,54},{126,55},{127,55},{127,56},
4235
{126,57},{126,58},{127,58},{127,59},{126,60},{126,61},{127,61},{127,62},{126,63},{125,64},{126,64},{126,65},{127,65},{127,66},{126,67},{126,68},
4236
{127,68},{127,69},{126,70},{126,71},{127,71},{127,72},{126,73},{126,74},{127,74},{127,75},{126,76},{125,77},{127,77},{126,78},{126,79},{127,79},
4237
{127,80},{126,81},{126,82},{127,82},{127,83},{126,84},{126,85},{127,85},{127,86},{126,87},{126,88},{127,88},{127,89},{126,90},{126,91},{127,91},
4238
{127,92},{126,93},{126,94},{127,94},{127,95},{126,96},{126,97},{127,97},{127,98},{126,99},{126,100},{127,100},{127,101},{126,102},{126,103},{127,103},
4239
{127,104},{126,105},{126,106},{127,106},{127,107},{126,108},{125,109},{127,109},{126,110},{126,111},{127,111},{127,112},{126,113},{126,114},{127,114},{127,115},
4240
{126,116},{126,117},{127,117},{127,118},{126,119},{126,120},{127,120},{127,121},{126,122},{126,123},{127,123},{127,124},{126,125},{126,126},{127,126},{127,127}
4241
};
4242
4243
static void transcoder_init_bc7_mode5()
4244
{
4245
#if 0
4246
// This is a little too much work to do at init time, so precompute it.
4247
for (int i = 0; i < 256; i++)
4248
{
4249
int lowest_e = 256;
4250
for (int lo = 0; lo < 128; lo++)
4251
{
4252
for (int hi = 0; hi < 128; hi++)
4253
{
4254
const int lo_e = (lo << 1) | (lo >> 6);
4255
const int hi_e = (hi << 1) | (hi >> 6);
4256
4257
// Selector 1
4258
int v = (lo_e * (64 - 21) + hi_e * 21 + 32) >> 6;
4259
int e = abs(v - i);
4260
4261
if (e < lowest_e)
4262
{
4263
g_bc7_m5_equals_1[i].m_hi = static_cast<uint8_t>(hi);
4264
g_bc7_m5_equals_1[i].m_lo = static_cast<uint8_t>(lo);
4265
4266
lowest_e = e;
4267
}
4268
4269
} // hi
4270
4271
} // lo
4272
4273
printf("{%u,%u},", g_bc7_m5_equals_1[i].m_hi, g_bc7_m5_equals_1[i].m_lo);
4274
if ((i & 15) == 15) printf("\n");
4275
}
4276
#endif
4277
4278
for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES; i++)
4279
{
4280
uint32_t l = g_etc1_to_bc7_m5_selector_ranges[i].m_low;
4281
uint32_t h = g_etc1_to_bc7_m5_selector_ranges[i].m_high;
4282
g_etc1_to_bc7_m5_selector_range_index[l][h] = i;
4283
}
4284
4285
for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES; i++)
4286
{
4287
uint32_t l = g_etc1_to_bc7_m5a_selector_ranges[i].m_low;
4288
uint32_t h = g_etc1_to_bc7_m5a_selector_ranges[i].m_high;
4289
g_etc1_to_bc7_m5a_selector_range_index[l][h] = i;
4290
}
4291
}
4292
4293
static void convert_etc1s_to_bc7_m5_color(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
4294
{
4295
bc7_mode_5* pDst_block = static_cast<bc7_mode_5*>(pDst);
4296
4297
// First ensure the block is cleared to all 0's
4298
static_cast<uint64_t*>(pDst)[0] = 0;
4299
static_cast<uint64_t*>(pDst)[1] = 0;
4300
4301
// Set alpha to 255
4302
pDst_block->m_lo.m_mode = 1 << 5;
4303
pDst_block->m_lo.m_a0 = 255;
4304
pDst_block->m_lo.m_a1_0 = 63;
4305
pDst_block->m_hi.m_a1_1 = 3;
4306
4307
const uint32_t low_selector = pSelector->m_lo_selector;
4308
const uint32_t high_selector = pSelector->m_hi_selector;
4309
4310
const uint32_t base_color_r = pEndpoints->m_color5.r;
4311
const uint32_t base_color_g = pEndpoints->m_color5.g;
4312
const uint32_t base_color_b = pEndpoints->m_color5.b;
4313
const uint32_t inten_table = pEndpoints->m_inten5;
4314
4315
if (pSelector->m_num_unique_selectors == 1)
4316
{
4317
// Solid color block - use precomputed tables and set selectors to 1.
4318
uint32_t r, g, b;
4319
decoder_etc_block::get_block_color5(pEndpoints->m_color5, inten_table, low_selector, r, g, b);
4320
4321
pDst_block->m_lo.m_r0 = g_bc7_m5_equals_1[r].m_lo;
4322
pDst_block->m_lo.m_g0 = g_bc7_m5_equals_1[g].m_lo;
4323
pDst_block->m_lo.m_b0 = g_bc7_m5_equals_1[b].m_lo;
4324
4325
pDst_block->m_lo.m_r1 = g_bc7_m5_equals_1[r].m_hi;
4326
pDst_block->m_lo.m_g1 = g_bc7_m5_equals_1[g].m_hi;
4327
pDst_block->m_lo.m_b1 = g_bc7_m5_equals_1[b].m_hi;
4328
4329
set_block_bits((uint8_t*)pDst, 0x2aaaaaab, 31, 66);
4330
return;
4331
}
4332
else if (pSelector->m_num_unique_selectors == 2)
4333
{
4334
// Only one or two unique selectors, so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks.
4335
color32 block_colors[4];
4336
4337
decoder_etc_block::get_block_colors5(block_colors, color32(base_color_r, base_color_g, base_color_b, 255), inten_table);
4338
4339
const uint32_t r0 = block_colors[low_selector].r;
4340
const uint32_t g0 = block_colors[low_selector].g;
4341
const uint32_t b0 = block_colors[low_selector].b;
4342
4343
const uint32_t r1 = block_colors[high_selector].r;
4344
const uint32_t g1 = block_colors[high_selector].g;
4345
const uint32_t b1 = block_colors[high_selector].b;
4346
4347
pDst_block->m_lo.m_r0 = r0 >> 1;
4348
pDst_block->m_lo.m_g0 = g0 >> 1;
4349
pDst_block->m_lo.m_b0 = b0 >> 1;
4350
4351
pDst_block->m_lo.m_r1 = r1 >> 1;
4352
pDst_block->m_lo.m_g1 = g1 >> 1;
4353
pDst_block->m_lo.m_b1 = b1 >> 1;
4354
4355
uint32_t output_low_selector = 0, output_bit_offset = 0, output_bits = 0;
4356
4357
for (uint32_t y = 0; y < 4; y++)
4358
{
4359
for (uint32_t x = 0; x < 4; x++)
4360
{
4361
uint32_t s = pSelector->get_selector(x, y);
4362
uint32_t os = (s == low_selector) ? output_low_selector : (3 ^ output_low_selector);
4363
4364
uint32_t num_bits = 2;
4365
4366
if ((x | y) == 0)
4367
{
4368
if (os & 2)
4369
{
4370
pDst_block->m_lo.m_r0 = r1 >> 1;
4371
pDst_block->m_lo.m_g0 = g1 >> 1;
4372
pDst_block->m_lo.m_b0 = b1 >> 1;
4373
4374
pDst_block->m_lo.m_r1 = r0 >> 1;
4375
pDst_block->m_lo.m_g1 = g0 >> 1;
4376
pDst_block->m_lo.m_b1 = b0 >> 1;
4377
4378
output_low_selector = 3;
4379
os = 0;
4380
}
4381
4382
num_bits = 1;
4383
}
4384
4385
output_bits |= (os << output_bit_offset);
4386
output_bit_offset += num_bits;
4387
}
4388
}
4389
4390
set_block_bits((uint8_t*)pDst, output_bits, 31, 66);
4391
return;
4392
}
4393
4394
const uint32_t selector_range_table = g_etc1_to_bc7_m5_selector_range_index[low_selector][high_selector];
4395
4396
//[32][8][RANGES][MAPPING]
4397
const etc1_to_bc7_m5_solution* pTable_r = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_r) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
4398
const etc1_to_bc7_m5_solution* pTable_g = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_g) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
4399
const etc1_to_bc7_m5_solution* pTable_b = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_b) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
4400
4401
uint32_t best_err = UINT_MAX;
4402
uint32_t best_mapping = 0;
4403
4404
assert(NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS == 10);
4405
#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
4406
DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
4407
DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
4408
#undef DO_ITER
4409
4410
const uint8_t* pSelectors_xlat = &g_etc1_to_bc7_m5_selector_mappings[best_mapping][0];
4411
4412
uint32_t s_inv = 0;
4413
if (pSelectors_xlat[pSelector->get_selector(0, 0)] & 2)
4414
{
4415
pDst_block->m_lo.m_r0 = pTable_r[best_mapping].m_hi;
4416
pDst_block->m_lo.m_g0 = pTable_g[best_mapping].m_hi;
4417
pDst_block->m_lo.m_b0 = pTable_b[best_mapping].m_hi;
4418
4419
pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_lo;
4420
pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_lo;
4421
pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_lo;
4422
4423
s_inv = 3;
4424
}
4425
else
4426
{
4427
pDst_block->m_lo.m_r0 = pTable_r[best_mapping].m_lo;
4428
pDst_block->m_lo.m_g0 = pTable_g[best_mapping].m_lo;
4429
pDst_block->m_lo.m_b0 = pTable_b[best_mapping].m_lo;
4430
4431
pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_hi;
4432
pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_hi;
4433
pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_hi;
4434
}
4435
4436
uint32_t output_bits = 0, output_bit_ofs = 0;
4437
4438
for (uint32_t y = 0; y < 4; y++)
4439
{
4440
for (uint32_t x = 0; x < 4; x++)
4441
{
4442
const uint32_t s = pSelector->get_selector(x, y);
4443
4444
const uint32_t os = pSelectors_xlat[s] ^ s_inv;
4445
4446
output_bits |= (os << output_bit_ofs);
4447
4448
output_bit_ofs += (((x | y) == 0) ? 1 : 2);
4449
}
4450
}
4451
4452
set_block_bits((uint8_t*)pDst, output_bits, 31, 66);
4453
}
4454
4455
static void convert_etc1s_to_bc7_m5_alpha(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
4456
{
4457
bc7_mode_5* pDst_block = static_cast<bc7_mode_5*>(pDst);
4458
4459
const uint32_t low_selector = pSelector->m_lo_selector;
4460
const uint32_t high_selector = pSelector->m_hi_selector;
4461
4462
const uint32_t base_color_r = pEndpoints->m_color5.r;
4463
const uint32_t inten_table = pEndpoints->m_inten5;
4464
4465
if (pSelector->m_num_unique_selectors == 1)
4466
{
4467
uint32_t r;
4468
decoder_etc_block::get_block_color5_r(pEndpoints->m_color5, inten_table, low_selector, r);
4469
4470
pDst_block->m_lo.m_a0 = r;
4471
pDst_block->m_lo.m_a1_0 = r & 63;
4472
pDst_block->m_hi.m_a1_1 = r >> 6;
4473
4474
return;
4475
}
4476
else if (pSelector->m_num_unique_selectors == 2)
4477
{
4478
// Only one or two unique selectors, so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks.
4479
int block_colors[4];
4480
4481
decoder_etc_block::get_block_colors5_g(block_colors, pEndpoints->m_color5, inten_table);
4482
4483
pDst_block->m_lo.m_a0 = block_colors[low_selector];
4484
pDst_block->m_lo.m_a1_0 = block_colors[high_selector] & 63;
4485
pDst_block->m_hi.m_a1_1 = block_colors[high_selector] >> 6;
4486
4487
uint32_t output_low_selector = 0, output_bit_offset = 0, output_bits = 0;
4488
4489
for (uint32_t y = 0; y < 4; y++)
4490
{
4491
for (uint32_t x = 0; x < 4; x++)
4492
{
4493
const uint32_t s = pSelector->get_selector(x, y);
4494
uint32_t os = (s == low_selector) ? output_low_selector : (3 ^ output_low_selector);
4495
4496
uint32_t num_bits = 2;
4497
4498
if ((x | y) == 0)
4499
{
4500
if (os & 2)
4501
{
4502
pDst_block->m_lo.m_a0 = block_colors[high_selector];
4503
pDst_block->m_lo.m_a1_0 = block_colors[low_selector] & 63;
4504
pDst_block->m_hi.m_a1_1 = block_colors[low_selector] >> 6;
4505
4506
output_low_selector = 3;
4507
os = 0;
4508
}
4509
4510
num_bits = 1;
4511
}
4512
4513
output_bits |= (os << output_bit_offset);
4514
output_bit_offset += num_bits;
4515
}
4516
}
4517
4518
set_block_bits((uint8_t*)pDst, output_bits, 31, 97);
4519
return;
4520
}
4521
4522
const uint32_t selector_range_table = g_etc1_to_bc7_m5a_selector_range_index[low_selector][high_selector];
4523
4524
const etc1_g_to_bc7_m5a_conversion* pTable = &g_etc1_g_to_bc7_m5a[inten_table * (32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES) + base_color_r * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES + selector_range_table];
4525
4526
pDst_block->m_lo.m_a0 = pTable->m_lo;
4527
pDst_block->m_lo.m_a1_0 = pTable->m_hi & 63;
4528
pDst_block->m_hi.m_a1_1 = pTable->m_hi >> 6;
4529
4530
uint32_t output_bit_offset = 0, output_bits = 0, selector_trans = pTable->m_trans;
4531
4532
for (uint32_t y = 0; y < 4; y++)
4533
{
4534
for (uint32_t x = 0; x < 4; x++)
4535
{
4536
const uint32_t s = pSelector->get_selector(x, y);
4537
uint32_t os = (selector_trans >> (s * 2)) & 3;
4538
4539
uint32_t num_bits = 2;
4540
4541
if ((x | y) == 0)
4542
{
4543
if (os & 2)
4544
{
4545
pDst_block->m_lo.m_a0 = pTable->m_hi;
4546
pDst_block->m_lo.m_a1_0 = pTable->m_lo & 63;
4547
pDst_block->m_hi.m_a1_1 = pTable->m_lo >> 6;
4548
4549
selector_trans ^= 0xFF;
4550
os ^= 3;
4551
}
4552
4553
num_bits = 1;
4554
}
4555
4556
output_bits |= (os << output_bit_offset);
4557
output_bit_offset += num_bits;
4558
}
4559
}
4560
4561
set_block_bits((uint8_t*)pDst, output_bits, 31, 97);
4562
}
4563
4564
static inline vec3F rgb_to_ycocg(const vec3F& rgb)
4565
{
4566
return vec3F(rgb.dot(vec3F(0.25f, 0.5f, 0.25f)), rgb.dot(vec3F(0.5f, 0.0f, -0.5f)), rgb.dot(vec3F(-0.25f, 0.5f, -0.25f)));
4567
}
4568
4569
static inline vec2F rgb_to_cocg(const vec3F& rgb)
4570
{
4571
return vec2F(rgb.dot(vec3F(0.5f, 0.0f, -0.5f)), rgb.dot(vec3F(-0.25f, 0.5f, -0.25f)));
4572
}
4573
4574
static inline vec3F ycocg_to_rgb(const vec3F& ycocg)
4575
{
4576
return vec3F(ycocg.dot(vec3F(1.0f, 1.0f, -1.0f)), ycocg.dot(vec3F(1.0f, 0.0f, 1.0f)), ycocg.dot(vec3F(1.0f, -1.0f, -1.0f)));
4577
}
4578
4579
static inline vec3F color32_to_vec3F(const color32& c)
4580
{
4581
return vec3F(c.r, c.g, c.b);
4582
}
4583
4584
static inline vec3F color5_to_ycocg(const endpoint& e)
4585
{
4586
const int r = (e.m_color5[0] << 3) | (e.m_color5[0] >> 2);
4587
const int g = (e.m_color5[1] << 3) | (e.m_color5[1] >> 2);
4588
const int b = (e.m_color5[2] << 3) | (e.m_color5[2] >> 2);
4589
return rgb_to_ycocg(vec3F((float)r, (float)g, (float)b));
4590
}
4591
4592
static inline vec2F color5_to_cocg(const endpoint& e)
4593
{
4594
const int r = (e.m_color5[0] << 3) | (e.m_color5[0] >> 2);
4595
const int g = (e.m_color5[1] << 3) | (e.m_color5[1] >> 2);
4596
const int b = (e.m_color5[2] << 3) | (e.m_color5[2] >> 2);
4597
return rgb_to_cocg(vec3F((float)r, (float)g, (float)b));
4598
}
4599
4600
static inline uint32_t bc7_7_to_8(uint32_t v)
4601
{
4602
assert(v < 128);
4603
return (v << 1) | (v >> 6);
4604
}
4605
4606
static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w)
4607
{
4608
assert(w < 4);
4609
return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6;
4610
}
4611
4612
static inline vec2F get_endpoint_cocg_clamped(int bx, int by, const basisu::vector2D<uint16_t>& decoded_endpoints, const endpoint* pEndpoints)
4613
{
4614
const uint32_t endpoint_index = decoded_endpoints.at_clamped(bx, by);
4615
return color5_to_cocg(pEndpoints[endpoint_index]);
4616
}
4617
4618
static void chroma_filter_bc7_mode5(const basisu::vector2D<uint16_t>& decoded_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t output_row_pitch_in_blocks_or_pixels, const endpoint *pEndpoints)
4619
{
4620
const bool hq_bc7_mode_5_encoder_mode = false;
4621
4622
const int CHROMA_THRESH = 10;
4623
4624
uint32_t total_filtered_blocks = 0;
4625
4626
for (int by = 0; by < (int)num_blocks_y; by++)
4627
{
4628
for (int bx = 0; bx < (int)num_blocks_x; bx++)
4629
{
4630
vec2F center_cocg(color5_to_cocg(pEndpoints[decoded_endpoints(bx, by)]));
4631
4632
//bool filter_flag = false;
4633
for (int dy = -1; dy <= 1; dy++)
4634
{
4635
const int oy = by + dy;
4636
if ((oy < 0) || (oy >= (int)num_blocks_y))
4637
continue;
4638
4639
for (int dx = -1; dx <= 1; dx++)
4640
{
4641
if ((dx | dy) == 0)
4642
continue;
4643
4644
const int ox = bx + dx;
4645
if ((ox < 0) || (ox >= (int)num_blocks_x))
4646
continue;
4647
4648
vec2F nearby_cocg(color5_to_cocg(pEndpoints[decoded_endpoints(ox, oy)]));
4649
4650
float delta_co = fabsf(nearby_cocg[0] - center_cocg[0]);
4651
float delta_cg = fabsf(nearby_cocg[1] - center_cocg[1]);
4652
4653
if ((delta_co > CHROMA_THRESH) || (delta_cg > CHROMA_THRESH))
4654
{
4655
//filter_flag = true;
4656
goto do_filter;
4657
}
4658
4659
} // dx
4660
} // dy
4661
4662
continue;
4663
4664
do_filter:;
4665
4666
total_filtered_blocks++;
4667
4668
bc7_mode_5* pDst_block = (bc7_mode_5*)(static_cast<uint8_t*>(pDst_blocks) + (bx + by * output_row_pitch_in_blocks_or_pixels) * sizeof(bc7_mode_5));
4669
4670
//memset(pDst_block, 0x80, 16);
4671
4672
int lr = bc7_7_to_8(pDst_block->m_lo.m_r0);
4673
int lg = bc7_7_to_8(pDst_block->m_lo.m_g0);
4674
int lb = bc7_7_to_8(pDst_block->m_lo.m_b0);
4675
4676
int hr = bc7_7_to_8(pDst_block->m_lo.m_r1);
4677
int hg = bc7_7_to_8(pDst_block->m_lo.m_g1);
4678
int hb = bc7_7_to_8(pDst_block->m_lo.m_b1);
4679
4680
float y_vals[4];
4681
for (uint32_t i = 0; i < 4; i++)
4682
{
4683
int cr = bc7_interp2(lr, hr, i);
4684
int cg = bc7_interp2(lg, hg, i);
4685
int cb = bc7_interp2(lb, hb, i);
4686
y_vals[i] = (float)cr * .25f + (float)cg * .5f + (float)cb * .25f;
4687
} // i
4688
4689
uint64_t sel_bits = pDst_block->m_hi_bits >> 2;
4690
4691
float block_y_vals[16]; // [y][x]
4692
float y_sum = 0.0f, y_sum_sq = 0.0f;
4693
4694
for (uint32_t i = 0; i < 16; i++)
4695
{
4696
uint32_t sel = sel_bits & (i ? 3 : 1);
4697
sel_bits >>= (i ? 2 : 1);
4698
float y = y_vals[sel];
4699
block_y_vals[i] = y;
4700
y_sum += y;
4701
y_sum_sq += y * y;
4702
4703
} // i
4704
4705
const float S = 1.0f / 16.0f;
4706
float y_var = (y_sum_sq * S) - basisu::squaref(y_sum * S);
4707
4708
// Don't bother if the block is too smooth.
4709
const float Y_VAR_SKIP_THRESH = 3.0f;
4710
if (y_var < Y_VAR_SKIP_THRESH)
4711
continue;
4712
4713
color32 block_to_pack[16];
4714
4715
for (int bpy = 0; bpy < 4; bpy++)
4716
{
4717
const int uby = by + ((bpy - 2) >> 2);
4718
4719
for (int bpx = 0; bpx < 4; bpx++)
4720
{
4721
const float fx = ((float)((bpx + 2) & 3) + .5f) * (1.0f / 4.0f);
4722
const float fy = ((float)((bpy + 2) & 3) + .5f) * (1.0f / 4.0f);
4723
4724
const int ubx = bx + ((bpx - 2) >> 2);
4725
4726
vec2F a(get_endpoint_cocg_clamped(ubx, uby, decoded_endpoints, pEndpoints));
4727
vec2F b(get_endpoint_cocg_clamped(ubx + 1, uby, decoded_endpoints, pEndpoints));
4728
vec2F c(get_endpoint_cocg_clamped(ubx, uby + 1, decoded_endpoints, pEndpoints));
4729
vec2F d(get_endpoint_cocg_clamped(ubx + 1, uby + 1, decoded_endpoints, pEndpoints));
4730
4731
assert((fx >= 0) && (fx <= 1.0f) && (fy >= 0) && (fy <= 1.0f));
4732
4733
// TODO: Could merge this into 4 muls on each corner by weights
4734
vec2F ab = vec2F::lerp(a, b, fx);
4735
vec2F cd = vec2F::lerp(c, d, fx);
4736
vec2F f = vec2F::lerp(ab, cd, fy);
4737
4738
vec3F final_ycocg(block_y_vals[bpx + bpy * 4], f[0], f[1]);
4739
4740
vec3F final_conv(ycocg_to_rgb(final_ycocg));
4741
final_conv.clamp(0.0f, 255.0f);
4742
4743
block_to_pack[bpx + bpy * 4].set_noclamp_rgba((int)(.5f + final_conv[0]), (int)(.5f + final_conv[1]), (int)(.5f + final_conv[2]), 255);
4744
4745
} // x
4746
} // y
4747
4748
bc7_mode_5_encoder::encode_bc7_mode_5_block(pDst_block, block_to_pack, hq_bc7_mode_5_encoder_mode);
4749
4750
} // bx
4751
} // by
4752
4753
//basisu::fmt_printf("Chroma thresh: {}, Total blocks to filter: {} out of {} {}\n", CHROMA_THRESH, total_filtered_blocks, num_blocks_x * num_blocks_y, (float)total_filtered_blocks * 100.0f / (num_blocks_x * num_blocks_y));
4754
}
4755
#endif // BASISD_SUPPORT_BC7_MODE5
4756
4757
#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_UASTC
4758
static const uint8_t g_etc2_eac_a8_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 };
4759
#endif
4760
4761
#if BASISD_SUPPORT_ETC2_EAC_A8
4762
static void convert_etc1s_to_etc2_eac_a8(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
4763
{
4764
const uint32_t low_selector = pSelector->m_lo_selector;
4765
const uint32_t high_selector = pSelector->m_hi_selector;
4766
4767
const color32& base_color = pEndpoints->m_color5;
4768
const uint32_t inten_table = pEndpoints->m_inten5;
4769
4770
if (low_selector == high_selector)
4771
{
4772
uint32_t r;
4773
decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
4774
4775
// Constant alpha block
4776
// Select table 13, use selector 4 (0), set multiplier to 1 and base color g
4777
pDst_block->m_base = r;
4778
pDst_block->m_table = 13;
4779
pDst_block->m_multiplier = 1;
4780
4781
// selectors are all 4's
4782
memcpy(pDst_block->m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
4783
4784
return;
4785
}
4786
4787
uint32_t selector_range_table = 0;
4788
for (selector_range_table = 0; selector_range_table < NUM_ETC2_EAC_SELECTOR_RANGES; selector_range_table++)
4789
if ((low_selector == s_etc2_eac_selector_ranges[selector_range_table].m_low) && (high_selector == s_etc2_eac_selector_ranges[selector_range_table].m_high))
4790
break;
4791
if (selector_range_table >= NUM_ETC2_EAC_SELECTOR_RANGES)
4792
selector_range_table = 0;
4793
4794
const etc1_g_to_eac_conversion* pTable_entry = &s_etc1_g_to_etc2_a8[base_color.r + inten_table * 32][selector_range_table];
4795
4796
pDst_block->m_base = pTable_entry->m_base;
4797
pDst_block->m_table = pTable_entry->m_table_mul >> 4;
4798
pDst_block->m_multiplier = pTable_entry->m_table_mul & 15;
4799
4800
uint64_t selector_bits = 0;
4801
4802
for (uint32_t y = 0; y < 4; y++)
4803
{
4804
for (uint32_t x = 0; x < 4; x++)
4805
{
4806
uint32_t s = pSelector->get_selector(x, y);
4807
4808
uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
4809
4810
const uint32_t dst_ofs = 45 - (y + x * 4) * 3;
4811
selector_bits |= (static_cast<uint64_t>(ds) << dst_ofs);
4812
}
4813
}
4814
4815
pDst_block->set_selector_bits(selector_bits);
4816
}
4817
#endif // BASISD_SUPPORT_ETC2_EAC_A8
4818
4819
#if BASISD_SUPPORT_ETC2_EAC_RG11
4820
static const etc1_g_to_eac_conversion s_etc1_g_to_etc2_r11[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] =
4821
{
4822
{{0,1,3328},{0,1,3328},{0,16,457},{0,16,456}},
4823
{{0,226,3936},{0,226,3936},{0,17,424},{8,0,472}},
4824
{{6,178,4012},{6,178,4008},{0,146,501},{16,0,472}},
4825
{{14,178,4012},{14,178,4008},{8,146,501},{24,0,472}},
4826
{{23,178,4012},{23,178,4008},{17,146,501},{33,0,472}},
4827
{{31,178,4012},{31,178,4008},{25,146,501},{41,0,472}},
4828
{{39,178,4012},{39,178,4008},{33,146,501},{49,0,472}},
4829
{{47,178,4012},{47,178,4008},{41,146,501},{27,228,496}},
4830
{{56,178,4012},{56,178,4008},{50,146,501},{36,228,496}},
4831
{{64,178,4012},{64,178,4008},{58,146,501},{44,228,496}},
4832
{{72,178,4012},{72,178,4008},{66,146,501},{52,228,496}},
4833
{{80,178,4012},{80,178,4008},{74,146,501},{60,228,496}},
4834
{{89,178,4012},{89,178,4008},{83,146,501},{69,228,496}},
4835
{{97,178,4012},{97,178,4008},{91,146,501},{77,228,496}},
4836
{{105,178,4012},{105,178,4008},{99,146,501},{85,228,496}},
4837
{{113,178,4012},{113,178,4008},{107,146,501},{93,228,496}},
4838
{{122,178,4012},{122,178,4008},{116,146,501},{102,228,496}},
4839
{{130,178,4012},{130,178,4008},{124,146,501},{110,228,496}},
4840
{{138,178,4012},{138,178,4008},{132,146,501},{118,228,496}},
4841
{{146,178,4012},{146,178,4008},{140,146,501},{126,228,496}},
4842
{{155,178,4012},{155,178,4008},{149,146,501},{135,228,496}},
4843
{{163,178,4012},{163,178,4008},{157,146,501},{143,228,496}},
4844
{{171,178,4012},{171,178,4008},{165,146,501},{151,228,496}},
4845
{{179,178,4012},{179,178,4008},{173,146,501},{159,228,496}},
4846
{{188,178,4012},{188,178,4008},{182,146,501},{168,228,496}},
4847
{{196,178,4012},{196,178,4008},{190,146,501},{176,228,496}},
4848
{{204,178,4012},{204,178,4008},{198,146,501},{184,228,496}},
4849
{{212,178,4012},{212,178,4008},{206,146,501},{192,228,496}},
4850
{{221,178,4012},{221,178,4008},{215,146,501},{201,228,496}},
4851
{{229,178,4012},{229,178,4008},{223,146,501},{209,228,496}},
4852
{{235,66,4012},{221,100,4008},{231,146,501},{217,228,496}},
4853
{{211,102,4085},{254,32,4040},{211,102,501},{254,32,456}},
4854
{{0,2,3328},{0,2,3328},{0,1,320},{0,1,320}},
4855
{{7,162,3905},{7,162,3904},{0,17,480},{0,17,480}},
4856
{{15,162,3906},{15,162,3904},{1,117,352},{1,117,352}},
4857
{{23,162,3906},{23,162,3904},{5,34,500},{4,53,424}},
4858
{{32,162,3906},{32,162,3904},{14,34,500},{3,69,424}},
4859
{{40,162,3906},{40,162,3904},{22,34,500},{1,133,496}},
4860
{{48,162,3906},{48,162,3904},{30,34,500},{4,85,496}},
4861
{{56,162,3906},{56,162,3904},{38,34,500},{12,85,496}},
4862
{{65,162,3906},{65,162,3904},{47,34,500},{1,106,424}},
4863
{{73,162,3906},{73,162,3904},{55,34,500},{9,106,424}},
4864
{{81,162,3906},{81,162,3904},{63,34,500},{7,234,496}},
4865
{{89,162,3906},{89,162,3904},{71,34,500},{15,234,496}},
4866
{{98,162,3906},{98,162,3904},{80,34,500},{24,234,496}},
4867
{{106,162,3906},{106,162,3904},{88,34,500},{32,234,496}},
4868
{{114,162,3906},{114,162,3904},{96,34,500},{40,234,496}},
4869
{{122,162,3906},{122,162,3904},{104,34,500},{48,234,496}},
4870
{{131,162,3906},{131,162,3904},{113,34,500},{57,234,496}},
4871
{{139,162,3906},{139,162,3904},{121,34,500},{65,234,496}},
4872
{{147,162,3906},{147,162,3904},{129,34,500},{73,234,496}},
4873
{{155,162,3906},{155,162,3904},{137,34,500},{81,234,496}},
4874
{{164,162,3906},{164,162,3904},{146,34,500},{90,234,496}},
4875
{{172,162,3906},{172,162,3904},{154,34,500},{98,234,496}},
4876
{{180,162,3906},{180,162,3904},{162,34,500},{106,234,496}},
4877
{{188,162,3906},{188,162,3904},{170,34,500},{114,234,496}},
4878
{{197,162,3906},{197,162,3904},{179,34,500},{123,234,496}},
4879
{{205,162,3906},{205,162,3904},{187,34,500},{131,234,496}},
4880
{{213,162,3906},{213,162,3904},{195,34,500},{139,234,496}},
4881
{{221,162,3906},{221,162,3904},{203,34,500},{147,234,496}},
4882
{{230,162,3906},{230,162,3904},{212,34,500},{156,234,496}},
4883
{{238,162,3906},{174,106,4008},{220,34,500},{164,234,496}},
4884
{{240,178,4001},{182,106,4008},{228,34,500},{172,234,496}},
4885
{{166,108,4085},{115,31,4080},{166,108,501},{115,31,496}},
4886
{{1,68,3328},{1,68,3328},{0,1,384},{0,1,384}},
4887
{{1,51,3968},{1,51,3968},{0,2,384},{0,2,384}},
4888
{{21,18,3851},{21,18,3848},{1,50,488},{1,50,488}},
4889
{{26,195,3851},{29,18,3848},{0,67,488},{0,67,488}},
4890
{{35,195,3851},{38,18,3848},{12,115,488},{0,3,496}},
4891
{{43,195,3851},{46,18,3848},{20,115,488},{2,6,424}},
4892
{{51,195,3851},{54,18,3848},{36,66,482},{4,22,424}},
4893
{{59,195,3851},{62,18,3848},{44,66,482},{3,73,424}},
4894
{{68,195,3851},{71,18,3848},{53,66,482},{3,22,496}},
4895
{{76,195,3851},{79,18,3848},{61,66,482},{2,137,496}},
4896
{{84,195,3851},{87,18,3848},{69,66,482},{1,89,496}},
4897
{{92,195,3851},{95,18,3848},{77,66,482},{9,89,496}},
4898
{{101,195,3851},{104,18,3848},{86,66,482},{18,89,496}},
4899
{{109,195,3851},{112,18,3848},{94,66,482},{26,89,496}},
4900
{{117,195,3851},{120,18,3848},{102,66,482},{34,89,496}},
4901
{{125,195,3851},{128,18,3848},{110,66,482},{42,89,496}},
4902
{{134,195,3851},{137,18,3848},{119,66,482},{51,89,496}},
4903
{{141,195,3907},{145,18,3848},{127,66,482},{59,89,496}},
4904
{{149,195,3907},{153,18,3848},{135,66,482},{67,89,496}},
4905
{{157,195,3907},{161,18,3848},{143,66,482},{75,89,496}},
4906
{{166,195,3907},{170,18,3848},{152,66,482},{84,89,496}},
4907
{{174,195,3907},{178,18,3848},{160,66,482},{92,89,496}},
4908
{{182,195,3907},{186,18,3848},{168,66,482},{100,89,496}},
4909
{{190,195,3907},{194,18,3848},{176,66,482},{108,89,496}},
4910
{{199,195,3907},{203,18,3848},{185,66,482},{117,89,496}},
4911
{{207,195,3907},{211,18,3848},{193,66,482},{125,89,496}},
4912
{{215,195,3907},{219,18,3848},{201,66,482},{133,89,496}},
4913
{{223,195,3907},{227,18,3848},{209,66,482},{141,89,496}},
4914
{{232,195,3907},{168,89,4008},{218,66,482},{150,89,496}},
4915
{{236,18,3907},{176,89,4008},{226,66,482},{158,89,496}},
4916
{{158,90,4085},{103,31,4080},{158,90,501},{103,31,496}},
4917
{{166,90,4085},{111,31,4080},{166,90,501},{111,31,496}},
4918
{{0,70,3328},{0,70,3328},{0,17,448},{0,17,448}},
4919
{{0,117,3904},{0,117,3904},{0,35,384},{0,35,384}},
4920
{{13,165,3905},{13,165,3904},{2,211,480},{2,211,480}},
4921
{{21,165,3906},{21,165,3904},{1,51,488},{1,51,488}},
4922
{{30,165,3906},{30,165,3904},{7,61,352},{7,61,352}},
4923
{{38,165,3906},{38,165,3904},{2,125,352},{2,125,352}},
4924
{{46,165,3906},{46,165,3904},{1,37,500},{10,125,352}},
4925
{{54,165,3906},{54,165,3904},{9,37,500},{5,61,424}},
4926
{{63,165,3906},{63,165,3904},{18,37,500},{1,189,424}},
4927
{{71,165,3906},{71,165,3904},{26,37,500},{9,189,424}},
4928
{{79,165,3906},{79,165,3904},{34,37,500},{4,77,424}},
4929
{{87,165,3906},{87,165,3904},{42,37,500},{12,77,424}},
4930
{{96,165,3906},{96,165,3904},{51,37,500},{8,93,424}},
4931
{{104,165,3906},{104,165,3904},{59,37,500},{3,141,496}},
4932
{{112,165,3906},{112,165,3904},{68,37,500},{11,141,496}},
4933
{{120,165,3906},{120,165,3904},{76,37,500},{6,93,496}},
4934
{{129,165,3906},{129,165,3904},{85,37,500},{15,93,496}},
4935
{{70,254,4012},{137,165,3904},{93,37,500},{23,93,496}},
4936
{{145,165,3906},{145,165,3904},{101,37,500},{31,93,496}},
4937
{{86,254,4012},{153,165,3904},{109,37,500},{39,93,496}},
4938
{{163,165,3906},{162,165,3904},{118,37,500},{48,93,496}},
4939
{{171,165,3906},{170,165,3904},{126,37,500},{56,93,496}},
4940
{{179,165,3906},{178,165,3904},{134,37,500},{64,93,496}},
4941
{{187,165,3906},{187,165,3904},{142,37,500},{72,93,496}},
4942
{{196,165,3906},{196,165,3904},{151,37,500},{81,93,496}},
4943
{{204,165,3906},{204,165,3904},{159,37,500},{89,93,496}},
4944
{{212,165,3906},{136,77,4008},{167,37,500},{97,93,496}},
4945
{{220,165,3906},{131,93,4008},{175,37,500},{105,93,496}},
4946
{{214,181,4001},{140,93,4008},{184,37,500},{114,93,496}},
4947
{{222,181,4001},{148,93,4008},{192,37,500},{122,93,496}},
4948
{{115,95,4085},{99,31,4080},{115,95,501},{99,31,496}},
4949
{{123,95,4085},{107,31,4080},{123,95,501},{107,31,496}},
4950
{{0,102,3840},{0,102,3840},{0,18,384},{0,18,384}},
4951
{{5,167,3904},{5,167,3904},{0,13,256},{0,13,256}},
4952
{{4,54,3968},{4,54,3968},{1,67,448},{1,67,448}},
4953
{{30,198,3850},{30,198,3848},{0,3,480},{0,3,480}},
4954
{{39,198,3850},{39,198,3848},{3,52,488},{3,52,488}},
4955
{{47,198,3851},{47,198,3848},{3,4,488},{3,4,488}},
4956
{{55,198,3851},{55,198,3848},{1,70,488},{1,70,488}},
4957
{{53,167,3906},{63,198,3848},{3,22,488},{3,22,488}},
4958
{{62,167,3906},{72,198,3848},{24,118,488},{0,6,496}},
4959
{{70,167,3906},{80,198,3848},{32,118,488},{2,89,488}},
4960
{{78,167,3906},{88,198,3848},{40,118,488},{1,73,496}},
4961
{{86,167,3906},{96,198,3848},{48,118,488},{0,28,424}},
4962
{{95,167,3906},{105,198,3848},{57,118,488},{9,28,424}},
4963
{{103,167,3906},{113,198,3848},{65,118,488},{5,108,496}},
4964
{{111,167,3906},{121,198,3848},{73,118,488},{13,108,496}},
4965
{{119,167,3906},{129,198,3848},{81,118,488},{21,108,496}},
4966
{{128,167,3906},{138,198,3848},{90,118,488},{6,28,496}},
4967
{{136,167,3906},{146,198,3848},{98,118,488},{14,28,496}},
4968
{{145,167,3906},{154,198,3848},{106,118,488},{22,28,496}},
4969
{{153,167,3906},{162,198,3848},{114,118,488},{30,28,496}},
4970
{{162,167,3906},{171,198,3848},{123,118,488},{39,28,496}},
4971
{{170,167,3906},{179,198,3848},{131,118,488},{47,28,496}},
4972
{{178,167,3906},{187,198,3848},{139,118,488},{55,28,496}},
4973
{{186,167,3906},{195,198,3848},{147,118,488},{63,28,496}},
4974
{{194,167,3906},{120,12,4008},{156,118,488},{72,28,496}},
4975
{{206,198,3907},{116,28,4008},{164,118,488},{80,28,496}},
4976
{{214,198,3907},{124,28,4008},{172,118,488},{88,28,496}},
4977
{{222,198,3395},{132,28,4008},{180,118,488},{96,28,496}},
4978
{{207,134,4001},{141,28,4008},{189,118,488},{105,28,496}},
4979
{{95,30,4085},{86,31,4080},{95,30,501},{86,31,496}},
4980
{{103,30,4085},{94,31,4080},{103,30,501},{94,31,496}},
4981
{{111,30,4085},{102,31,4080},{111,30,501},{102,31,496}},
4982
{{0,104,3840},{0,104,3840},{0,18,448},{0,18,448}},
4983
{{4,39,3904},{4,39,3904},{0,4,384},{0,4,384}},
4984
{{0,56,3968},{0,56,3968},{0,84,448},{0,84,448}},
4985
{{6,110,3328},{6,110,3328},{0,20,448},{0,20,448}},
4986
{{41,200,3850},{41,200,3848},{1,4,480},{1,4,480}},
4987
{{49,200,3850},{49,200,3848},{1,8,416},{1,8,416}},
4988
{{57,200,3851},{57,200,3848},{1,38,488},{1,38,488}},
4989
{{65,200,3851},{65,200,3848},{1,120,488},{1,120,488}},
4990
{{74,200,3851},{74,200,3848},{2,72,488},{2,72,488}},
4991
{{68,6,3907},{82,200,3848},{2,24,488},{2,24,488}},
4992
{{77,6,3907},{90,200,3848},{26,120,488},{10,24,488}},
4993
{{97,63,3330},{98,200,3848},{34,120,488},{2,8,496}},
4994
{{106,63,3330},{107,200,3848},{43,120,488},{3,92,488}},
4995
{{114,63,3330},{115,200,3848},{51,120,488},{11,92,488}},
4996
{{122,63,3330},{123,200,3848},{59,120,488},{7,76,496}},
4997
{{130,63,3330},{131,200,3848},{67,120,488},{15,76,496}},
4998
{{139,63,3330},{140,200,3848},{76,120,488},{24,76,496}},
4999
{{147,63,3330},{148,200,3848},{84,120,488},{32,76,496}},
5000
{{155,63,3330},{156,200,3848},{92,120,488},{40,76,496}},
5001
{{164,63,3330},{164,200,3848},{100,120,488},{48,76,496}},
5002
{{173,63,3330},{173,200,3848},{109,120,488},{57,76,496}},
5003
{{184,6,3851},{181,200,3848},{117,120,488},{65,76,496}},
5004
{{192,6,3851},{133,28,3936},{125,120,488},{73,76,496}},
5005
{{189,200,3907},{141,28,3936},{133,120,488},{81,76,496}},
5006
{{198,200,3907},{138,108,4000},{142,120,488},{90,76,496}},
5007
{{206,200,3907},{146,108,4000},{150,120,488},{98,76,496}},
5008
{{214,200,3395},{154,108,4000},{158,120,488},{106,76,496}},
5009
{{190,136,4001},{162,108,4000},{166,120,488},{114,76,496}},
5010
{{123,30,4076},{87,15,4080},{123,30,492},{87,15,496}},
5011
{{117,110,4084},{80,31,4080},{117,110,500},{80,31,496}},
5012
{{125,110,4084},{88,31,4080},{125,110,500},{88,31,496}},
5013
{{133,110,4084},{96,31,4080},{133,110,500},{96,31,496}},
5014
{{9,56,3904},{9,56,3904},{0,67,448},{0,67,448}},
5015
{{1,8,3904},{1,8,3904},{1,84,448},{1,84,448}},
5016
{{1,124,3904},{1,124,3904},{0,39,384},{0,39,384}},
5017
{{9,124,3904},{9,124,3904},{1,4,448},{1,4,448}},
5018
{{6,76,3904},{6,76,3904},{0,70,448},{0,70,448}},
5019
{{62,6,3859},{62,6,3856},{2,38,480},{2,38,480}},
5020
{{70,6,3859},{70,6,3856},{5,43,416},{5,43,416}},
5021
{{78,6,3859},{78,6,3856},{2,11,416},{2,11,416}},
5022
{{87,6,3859},{87,6,3856},{0,171,488},{0,171,488}},
5023
{{67,8,3906},{95,6,3856},{8,171,488},{8,171,488}},
5024
{{75,8,3907},{103,6,3856},{5,123,488},{5,123,488}},
5025
{{83,8,3907},{111,6,3856},{2,75,488},{2,75,488}},
5026
{{92,8,3907},{120,6,3856},{0,27,488},{0,27,488}},
5027
{{100,8,3907},{128,6,3856},{8,27,488},{8,27,488}},
5028
{{120,106,3843},{136,6,3856},{99,6,387},{16,27,488}},
5029
{{128,106,3843},{144,6,3856},{107,6,387},{2,11,496}},
5030
{{137,106,3843},{153,6,3856},{117,6,387},{11,11,496}},
5031
{{145,106,3843},{161,6,3856},{125,6,387},{19,11,496}},
5032
{{163,8,3851},{137,43,3904},{133,6,387},{27,11,496}},
5033
{{171,8,3851},{145,43,3904},{141,6,387},{35,11,496}},
5034
{{180,8,3851},{110,11,4000},{150,6,387},{44,11,496}},
5035
{{188,8,3851},{118,11,4000},{158,6,387},{52,11,496}},
5036
{{172,72,3907},{126,11,4000},{166,6,387},{60,11,496}},
5037
{{174,6,3971},{134,11,4000},{174,6,387},{68,11,496}},
5038
{{183,6,3971},{143,11,4000},{183,6,387},{77,11,496}},
5039
{{191,6,3971},{151,11,4000},{191,6,387},{85,11,496}},
5040
{{199,6,3971},{159,11,4000},{199,6,387},{93,11,496}},
5041
{{92,12,4084},{69,15,4080},{92,12,500},{69,15,496}},
5042
{{101,12,4084},{78,15,4080},{101,12,500},{78,15,496}},
5043
{{110,12,4084},{86,15,4080},{110,12,500},{86,15,496}},
5044
{{118,12,4084},{79,31,4080},{118,12,500},{79,31,496}},
5045
{{126,12,4084},{87,31,4080},{126,12,500},{87,31,496}},
5046
{{71,8,3602},{71,8,3600},{2,21,384},{2,21,384}},
5047
{{79,8,3611},{79,8,3608},{0,69,448},{0,69,448}},
5048
{{87,8,3611},{87,8,3608},{0,23,384},{0,23,384}},
5049
{{95,8,3611},{95,8,3608},{1,5,448},{1,5,448}},
5050
{{104,8,3611},{104,8,3608},{0,88,448},{0,88,448}},
5051
{{112,8,3611},{112,8,3608},{0,72,448},{0,72,448}},
5052
{{120,8,3611},{121,8,3608},{36,21,458},{36,21,456}},
5053
{{133,47,3091},{129,8,3608},{44,21,458},{44,21,456}},
5054
{{142,47,3091},{138,8,3608},{53,21,459},{53,21,456}},
5055
{{98,12,3850},{98,12,3848},{61,21,459},{61,21,456}},
5056
{{106,12,3850},{106,12,3848},{10,92,480},{69,21,456}},
5057
{{114,12,3851},{114,12,3848},{18,92,480},{77,21,456}},
5058
{{123,12,3851},{123,12,3848},{3,44,488},{86,21,456}},
5059
{{95,12,3906},{95,12,3904},{11,44,488},{94,21,456}},
5060
{{103,12,3906},{103,12,3904},{19,44,488},{102,21,456}},
5061
{{111,12,3907},{111,12,3904},{27,44,489},{110,21,456}},
5062
{{120,12,3907},{120,12,3904},{36,44,489},{119,21,456}},
5063
{{128,12,3907},{128,12,3904},{44,44,489},{127,21,456}},
5064
{{136,12,3907},{136,12,3904},{52,44,489},{135,21,456}},
5065
{{144,12,3907},{144,12,3904},{60,44,490},{144,21,456}},
5066
{{153,12,3907},{153,12,3904},{69,44,490},{153,21,456}},
5067
{{161,12,3395},{149,188,3968},{77,44,490},{161,21,456}},
5068
{{169,12,3395},{199,21,3928},{85,44,490},{169,21,456}},
5069
{{113,95,4001},{202,69,3992},{125,8,483},{177,21,456}},
5070
{{122,95,4001},{201,21,3984},{134,8,483},{186,21,456}},
5071
{{143,8,4067},{209,21,3984},{142,8,483},{194,21,456}},
5072
{{151,8,4067},{47,15,4080},{151,8,483},{47,15,496}},
5073
{{159,8,4067},{55,15,4080},{159,8,483},{55,15,496}},
5074
{{168,8,4067},{64,15,4080},{168,8,483},{64,15,496}},
5075
{{160,40,4075},{72,15,4080},{160,40,491},{72,15,496}},
5076
{{168,40,4075},{80,15,4080},{168,40,491},{80,15,496}},
5077
{{144,8,4082},{88,15,4080},{144,8,498},{88,15,496}},
5078
};
5079
5080
static void convert_etc1s_to_etc2_eac_r11(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
5081
{
5082
const uint32_t low_selector = pSelector->m_lo_selector;
5083
const uint32_t high_selector = pSelector->m_hi_selector;
5084
5085
const color32& base_color = pEndpoints->m_color5;
5086
const uint32_t inten_table = pEndpoints->m_inten5;
5087
5088
if (low_selector == high_selector)
5089
{
5090
uint32_t r;
5091
decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
5092
5093
// Constant alpha block
5094
// Select table 13, use selector 4 (0), set multiplier to 1 and base color r
5095
pDst_block->m_base = r;
5096
pDst_block->m_table = 13;
5097
pDst_block->m_multiplier = 1;
5098
5099
// selectors are all 4's
5100
static const uint8_t s_etc2_eac_r11_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 };
5101
memcpy(pDst_block->m_selectors, s_etc2_eac_r11_sel4, sizeof(s_etc2_eac_r11_sel4));
5102
5103
return;
5104
}
5105
5106
uint32_t selector_range_table = 0;
5107
for (selector_range_table = 0; selector_range_table < NUM_ETC2_EAC_SELECTOR_RANGES; selector_range_table++)
5108
if ((low_selector == s_etc2_eac_selector_ranges[selector_range_table].m_low) && (high_selector == s_etc2_eac_selector_ranges[selector_range_table].m_high))
5109
break;
5110
if (selector_range_table >= NUM_ETC2_EAC_SELECTOR_RANGES)
5111
selector_range_table = 0;
5112
5113
const etc1_g_to_eac_conversion* pTable_entry = &s_etc1_g_to_etc2_r11[base_color.r + inten_table * 32][selector_range_table];
5114
5115
pDst_block->m_base = pTable_entry->m_base;
5116
pDst_block->m_table = pTable_entry->m_table_mul >> 4;
5117
pDst_block->m_multiplier = pTable_entry->m_table_mul & 15;
5118
5119
uint64_t selector_bits = 0;
5120
5121
for (uint32_t y = 0; y < 4; y++)
5122
{
5123
for (uint32_t x = 0; x < 4; x++)
5124
{
5125
uint32_t s = pSelector->get_selector(x, y);
5126
5127
uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
5128
5129
const uint32_t dst_ofs = 45 - (y + x * 4) * 3;
5130
selector_bits |= (static_cast<uint64_t>(ds) << dst_ofs);
5131
}
5132
}
5133
5134
pDst_block->set_selector_bits(selector_bits);
5135
}
5136
#endif // BASISD_SUPPORT_ETC2_EAC_RG11
5137
5138
// ASTC
5139
struct etc1_to_astc_solution
5140
{
5141
uint8_t m_lo;
5142
uint8_t m_hi;
5143
uint16_t m_err;
5144
};
5145
5146
#if BASISD_SUPPORT_ASTC
5147
static dxt_selector_range g_etc1_to_astc_selector_ranges[] =
5148
{
5149
{ 0, 3 },
5150
5151
{ 1, 3 },
5152
{ 0, 2 },
5153
5154
{ 1, 2 },
5155
5156
{ 2, 3 },
5157
{ 0, 1 },
5158
};
5159
5160
const uint32_t NUM_ETC1_TO_ASTC_SELECTOR_RANGES = sizeof(g_etc1_to_astc_selector_ranges) / sizeof(g_etc1_to_astc_selector_ranges[0]);
5161
5162
static uint32_t g_etc1_to_astc_selector_range_index[4][4];
5163
5164
const uint32_t NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS = 10;
5165
static const uint8_t g_etc1_to_astc_selector_mappings[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS][4] =
5166
{
5167
{ 0, 0, 1, 1 },
5168
{ 0, 0, 1, 2 },
5169
{ 0, 0, 1, 3 },
5170
{ 0, 0, 2, 3 },
5171
{ 0, 1, 1, 1 },
5172
{ 0, 1, 2, 2 },
5173
{ 0, 1, 2, 3 },
5174
{ 0, 2, 3, 3 },
5175
{ 1, 2, 2, 2 },
5176
{ 1, 2, 3, 3 },
5177
};
5178
5179
static const etc1_to_astc_solution g_etc1_to_astc[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = {
5180
#include "basisu_transcoder_tables_astc.inc"
5181
};
5182
5183
// The best selector mapping to use given a base base+inten table and used selector range for converting grayscale data.
5184
static uint8_t g_etc1_to_astc_best_grayscale_mapping[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES];
5185
5186
#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
5187
static const etc1_to_astc_solution g_etc1_to_astc_0_255[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = {
5188
#include "basisu_transcoder_tables_astc_0_255.inc"
5189
};
5190
static uint8_t g_etc1_to_astc_best_grayscale_mapping_0_255[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES];
5191
#endif
5192
5193
static uint32_t g_ise_to_unquant[48];
5194
5195
#if BASISD_WRITE_NEW_ASTC_TABLES
5196
static void create_etc1_to_astc_conversion_table_0_47()
5197
{
5198
FILE* pFile = nullptr;
5199
fopen_s(&pFile, "basisu_transcoder_tables_astc.inc", "w");
5200
5201
uint32_t n = 0;
5202
5203
for (int inten = 0; inten < 8; inten++)
5204
{
5205
for (uint32_t g = 0; g < 32; g++)
5206
{
5207
color32 block_colors[4];
5208
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
5209
5210
for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++)
5211
{
5212
const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low;
5213
const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high;
5214
5215
uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5216
uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5217
uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5218
uint64_t highest_best_err = 0;
5219
5220
for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
5221
{
5222
uint32_t best_lo = 0;
5223
uint32_t best_hi = 0;
5224
uint64_t best_err = UINT64_MAX;
5225
5226
for (uint32_t hi = 0; hi <= 47; hi++)
5227
{
5228
for (uint32_t lo = 0; lo <= 47; lo++)
5229
{
5230
uint32_t colors[4];
5231
5232
for (uint32_t s = 0; s < 4; s++)
5233
{
5234
uint32_t s_scaled = s | (s << 2) | (s << 4);
5235
if (s_scaled > 32)
5236
s_scaled++;
5237
5238
uint32_t c0 = g_ise_to_unquant[lo] | (g_ise_to_unquant[lo] << 8);
5239
uint32_t c1 = g_ise_to_unquant[hi] | (g_ise_to_unquant[hi] << 8);
5240
colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8;
5241
}
5242
5243
uint64_t total_err = 0;
5244
5245
for (uint32_t s = low_selector; s <= high_selector; s++)
5246
{
5247
int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]];
5248
5249
int err_scale = 1;
5250
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
5251
// the low/high selectors which are clamping to either 0 or 255.
5252
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
5253
err_scale = 8;
5254
5255
total_err += (err * err) * err_scale;
5256
}
5257
5258
if (total_err < best_err)
5259
{
5260
best_err = total_err;
5261
best_lo = lo;
5262
best_hi = hi;
5263
}
5264
}
5265
}
5266
5267
mapping_best_low[m] = best_lo;
5268
mapping_best_high[m] = best_hi;
5269
mapping_best_err[m] = best_err;
5270
highest_best_err = basisu::maximum(highest_best_err, best_err);
5271
5272
} // m
5273
5274
for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
5275
{
5276
uint64_t err = mapping_best_err[m];
5277
5278
err = basisu::minimum<uint64_t>(err, 0xFFFF);
5279
5280
fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err);
5281
5282
n++;
5283
if ((n & 31) == 31)
5284
fprintf(pFile, "\n");
5285
} // m
5286
5287
} // sr
5288
} // g
5289
} // inten
5290
5291
fclose(pFile);
5292
}
5293
5294
static void create_etc1_to_astc_conversion_table_0_255()
5295
{
5296
FILE* pFile = nullptr;
5297
fopen_s(&pFile, "basisu_transcoder_tables_astc_0_255.inc", "w");
5298
5299
uint32_t n = 0;
5300
5301
for (int inten = 0; inten < 8; inten++)
5302
{
5303
for (uint32_t g = 0; g < 32; g++)
5304
{
5305
color32 block_colors[4];
5306
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
5307
5308
for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++)
5309
{
5310
const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low;
5311
const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high;
5312
5313
uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5314
uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5315
uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5316
uint64_t highest_best_err = 0;
5317
5318
for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
5319
{
5320
uint32_t best_lo = 0;
5321
uint32_t best_hi = 0;
5322
uint64_t best_err = UINT64_MAX;
5323
5324
for (uint32_t hi = 0; hi <= 255; hi++)
5325
{
5326
for (uint32_t lo = 0; lo <= 255; lo++)
5327
{
5328
uint32_t colors[4];
5329
5330
for (uint32_t s = 0; s < 4; s++)
5331
{
5332
uint32_t s_scaled = s | (s << 2) | (s << 4);
5333
if (s_scaled > 32)
5334
s_scaled++;
5335
5336
uint32_t c0 = lo | (lo << 8);
5337
uint32_t c1 = hi | (hi << 8);
5338
colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8;
5339
}
5340
5341
uint64_t total_err = 0;
5342
5343
for (uint32_t s = low_selector; s <= high_selector; s++)
5344
{
5345
int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]];
5346
5347
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
5348
// the low/high selectors which are clamping to either 0 or 255.
5349
int err_scale = 1;
5350
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
5351
err_scale = 8;
5352
5353
total_err += (err * err) * err_scale;
5354
}
5355
5356
if (total_err < best_err)
5357
{
5358
best_err = total_err;
5359
best_lo = lo;
5360
best_hi = hi;
5361
}
5362
}
5363
}
5364
5365
mapping_best_low[m] = best_lo;
5366
mapping_best_high[m] = best_hi;
5367
mapping_best_err[m] = best_err;
5368
highest_best_err = basisu::maximum(highest_best_err, best_err);
5369
} // m
5370
5371
for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
5372
{
5373
uint64_t err = mapping_best_err[m];
5374
5375
err = basisu::minimum<uint64_t>(err, 0xFFFF);
5376
5377
fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err);
5378
5379
n++;
5380
if ((n & 31) == 31)
5381
fprintf(pFile, "\n");
5382
} // m
5383
5384
} // sr
5385
} // g
5386
} // inten
5387
5388
fclose(pFile);
5389
}
5390
#endif
5391
5392
#endif
5393
5394
#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC
5395
// Table encodes 5 trits to 8 output bits. 3^5 entries.
5396
// Inverse of the trit bit manipulation process in https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
5397
static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,
5398
43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154,
5399
131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202,
5400
208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224,
5401
225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159,
5402
191, 223, 124, 125, 126 };
5403
5404
// Extracts bits [low,high]
5405
static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)
5406
{
5407
return (bits >> low) & ((1 << (high - low + 1)) - 1);
5408
}
5409
5410
// Writes bits to output in an endian safe way
5411
static inline void astc_set_bits(uint32_t* pOutput, int& bit_pos, uint32_t value, uint32_t total_bits)
5412
{
5413
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5414
5415
while (total_bits)
5416
{
5417
const uint32_t bits_to_write = basisu::minimum<int>(total_bits, 8 - (bit_pos & 7));
5418
5419
pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7));
5420
5421
bit_pos += bits_to_write;
5422
total_bits -= bits_to_write;
5423
value >>= bits_to_write;
5424
}
5425
}
5426
5427
// Encodes 5 values to output, usable for any range that uses trits and bits
5428
static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
5429
{
5430
// First extract the trits and the bits from the 5 input values
5431
int trits = 0, bits[5];
5432
const uint32_t bit_mask = (1 << n) - 1;
5433
for (int i = 0; i < 5; i++)
5434
{
5435
static const int s_muls[5] = { 1, 3, 9, 27, 81 };
5436
5437
const int t = pValues[i] >> n;
5438
5439
trits += t * s_muls[i];
5440
bits[i] = pValues[i] & bit_mask;
5441
}
5442
5443
// Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits.
5444
// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
5445
5446
assert(trits < 243);
5447
const int T = g_astc_trit_encode[trits];
5448
5449
// Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94.
5450
astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2);
5451
5452
astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |
5453
(bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);
5454
}
5455
#endif // #if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC
5456
5457
#if BASISD_SUPPORT_ASTC
5458
struct astc_block_params
5459
{
5460
// 2 groups of 5, but only a max of 8 are used (RRGGBBAA00)
5461
uint8_t m_endpoints[10];
5462
uint8_t m_weights[32];
5463
};
5464
5465
// Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2).
5466
// We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity.
5467
// Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color.
5468
// 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47.
5469
// Note the input [0,47] endpoint values are not linear - they are encoded as outlined in the ASTC spec:
5470
// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization
5471
// 32 total weights, stored as 16 CA CA, each ranging from 0-3.
5472
static void astc_pack_block_cem_12_weight_range2(uint32_t *pOutput, const astc_block_params* pBlock)
5473
{
5474
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5475
5476
// Write constant block mode, color component selector, number of partitions, color endpoint mode
5477
// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
5478
pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x01; pBytes[3] = 0x00;
5479
pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0;
5480
5481
pOutput[2] = 0;
5482
pOutput[3] = 0;
5483
5484
// Pack 8 endpoints (each ranging between [0,47]) using BISE starting at bit 17
5485
int bit_pos = 17;
5486
astc_encode_trits(pOutput, pBlock->m_endpoints, bit_pos, 4);
5487
astc_encode_trits(pOutput, pBlock->m_endpoints + 5, bit_pos, 4);
5488
5489
// Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order.
5490
5491
for (uint32_t i = 0; i < 32; i++)
5492
{
5493
static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
5494
const uint32_t ofs = 126 - (i * 2);
5495
pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
5496
}
5497
}
5498
5499
// CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights
5500
// This ASTC mode is basically block truncation coding (BTC) using 1-bit weights and 8-bit/component endpoints - very convenient.
5501
static void astc_pack_block_cem_12_weight_range0(uint32_t* pOutput, const astc_block_params* pBlock)
5502
{
5503
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5504
5505
// Write constant block mode, color component selector, number of partitions, color endpoint mode
5506
// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
5507
pBytes[0] = 0x41; pBytes[1] = 0x84; pBytes[2] = 0x01; pBytes[3] = 0x00;
5508
pOutput[1] = 0;
5509
pBytes[8] = 0x00; pBytes[9] = 0x00; pBytes[10] = 0x00; pBytes[11] = 0xc0;
5510
pOutput[3] = 0;
5511
5512
// Pack 8 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
5513
int bit_pos = 17;
5514
for (uint32_t i = 0; i < 8; i++)
5515
astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
5516
5517
// Pack 32 1-bit weights, which are stored from the top down into the block in opposite bit order.
5518
for (uint32_t i = 0; i < 32; i++)
5519
{
5520
const uint32_t ofs = 127 - i;
5521
pBytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7));
5522
}
5523
}
5524
5525
#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
5526
// Optional 8-bit endpoint packing functions.
5527
5528
// CEM mode 4 (LDR Luminance+Alpha Direct), 8-bit endpoints, 2 bit weights
5529
static void astc_pack_block_cem_4_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock)
5530
{
5531
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5532
5533
// Write constant block mode, color component selector, number of partitions, color endpoint mode
5534
// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
5535
pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x00; pBytes[3] = 0x00;
5536
pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0;
5537
5538
pOutput[2] = 0;
5539
pOutput[3] = 0;
5540
5541
// Pack 4 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
5542
int bit_pos = 17;
5543
for (uint32_t i = 0; i < 4; i++)
5544
astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
5545
5546
// Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order.
5547
for (uint32_t i = 0; i < 32; i++)
5548
{
5549
static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
5550
const uint32_t ofs = 126 - (i * 2);
5551
pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
5552
}
5553
}
5554
5555
// CEM mode 8 (LDR RGB Direct), 8-bit endpoints, 2 bit weights
5556
static void astc_pack_block_cem_8_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock)
5557
{
5558
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5559
5560
// Write constant block mode, color component selector, number of partitions, color endpoint mode
5561
// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
5562
pBytes[0] = 0x42; pBytes[1] = 0x00; pBytes[2] = 0x01; pBytes[3] = 0x00;
5563
5564
pOutput[1] = 0;
5565
pOutput[2] = 0;
5566
pOutput[3] = 0;
5567
5568
// Pack 6 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
5569
int bit_pos = 17;
5570
for (uint32_t i = 0; i < 6; i++)
5571
astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
5572
5573
// Pack 16 2-bit weights, which are stored from the top down into the block in opposite bit order.
5574
for (uint32_t i = 0; i < 16; i++)
5575
{
5576
static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
5577
const uint32_t ofs = 126 - (i * 2);
5578
pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
5579
}
5580
}
5581
#endif
5582
5583
// Optimal quantized [0,47] entry to use given [0,255] input
5584
static uint8_t g_astc_single_color_encoding_0[256];
5585
5586
// Optimal quantized [0,47] low/high values given [0,255] input assuming a selector of 1
5587
static struct
5588
{
5589
uint8_t m_lo, m_hi;
5590
} g_astc_single_color_encoding_1[256];
5591
5592
static void transcoder_init_astc()
5593
{
5594
for (uint32_t base_color = 0; base_color < 32; base_color++)
5595
{
5596
for (uint32_t inten_table = 0; inten_table < 8; inten_table++)
5597
{
5598
for (uint32_t range_index = 0; range_index < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; range_index++)
5599
{
5600
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(inten_table * 32 + base_color) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + range_index * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5601
5602
uint32_t best_mapping = 0;
5603
uint32_t best_err = UINT32_MAX;
5604
for (uint32_t mapping_index = 0; mapping_index < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; mapping_index++)
5605
{
5606
if (pTable_g[mapping_index].m_err < best_err)
5607
{
5608
best_err = pTable_g[mapping_index].m_err;
5609
best_mapping = mapping_index;
5610
}
5611
}
5612
5613
g_etc1_to_astc_best_grayscale_mapping[base_color][inten_table][range_index] = static_cast<uint8_t>(best_mapping);
5614
}
5615
}
5616
}
5617
5618
#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
5619
for (uint32_t base_color = 0; base_color < 32; base_color++)
5620
{
5621
for (uint32_t inten_table = 0; inten_table < 8; inten_table++)
5622
{
5623
for (uint32_t range_index = 0; range_index < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; range_index++)
5624
{
5625
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + range_index * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5626
5627
uint32_t best_mapping = 0;
5628
uint32_t best_err = UINT32_MAX;
5629
for (uint32_t mapping_index = 0; mapping_index < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; mapping_index++)
5630
{
5631
if (pTable_g[mapping_index].m_err < best_err)
5632
{
5633
best_err = pTable_g[mapping_index].m_err;
5634
best_mapping = mapping_index;
5635
}
5636
}
5637
5638
g_etc1_to_astc_best_grayscale_mapping_0_255[base_color][inten_table][range_index] = static_cast<uint8_t>(best_mapping);
5639
}
5640
}
5641
}
5642
#endif
5643
5644
for (uint32_t i = 0; i < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; i++)
5645
{
5646
uint32_t l = g_etc1_to_astc_selector_ranges[i].m_low;
5647
uint32_t h = g_etc1_to_astc_selector_ranges[i].m_high;
5648
g_etc1_to_astc_selector_range_index[l][h] = i;
5649
}
5650
5651
// Endpoint dequantization, see:
5652
// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization
5653
for (uint32_t trit = 0; trit < 3; trit++)
5654
{
5655
for (uint32_t bit = 0; bit < 16; bit++)
5656
{
5657
const uint32_t A = (bit & 1) ? 511 : 0;
5658
const uint32_t B = (bit >> 1) | ((bit >> 1) << 6);
5659
const uint32_t C = 22;
5660
const uint32_t D = trit;
5661
5662
uint32_t unq = D * C + B;
5663
unq = unq ^ A;
5664
unq = (A & 0x80) | (unq >> 2);
5665
5666
g_ise_to_unquant[bit | (trit << 4)] = unq;
5667
}
5668
}
5669
5670
// Compute table used for optimal single color encoding.
5671
for (int i = 0; i < 256; i++)
5672
{
5673
int lowest_e = INT_MAX;
5674
5675
for (int lo = 0; lo < 48; lo++)
5676
{
5677
for (int hi = 0; hi < 48; hi++)
5678
{
5679
const int lo_v = g_ise_to_unquant[lo];
5680
const int hi_v = g_ise_to_unquant[hi];
5681
5682
int l = lo_v | (lo_v << 8);
5683
int h = hi_v | (hi_v << 8);
5684
5685
int v = ((l * (64 - 21) + (h * 21) + 32) / 64) >> 8;
5686
5687
int e = abs(v - i);
5688
5689
if (e < lowest_e)
5690
{
5691
g_astc_single_color_encoding_1[i].m_hi = static_cast<uint8_t>(hi);
5692
g_astc_single_color_encoding_1[i].m_lo = static_cast<uint8_t>(lo);
5693
5694
lowest_e = e;
5695
}
5696
5697
} // hi
5698
} // lo
5699
}
5700
5701
for (int i = 0; i < 256; i++)
5702
{
5703
int lowest_e = INT_MAX;
5704
5705
for (int lo = 0; lo < 48; lo++)
5706
{
5707
const int lo_v = g_ise_to_unquant[lo];
5708
5709
int e = abs(lo_v - i);
5710
5711
if (e < lowest_e)
5712
{
5713
g_astc_single_color_encoding_0[i] = static_cast<uint8_t>(lo);
5714
5715
lowest_e = e;
5716
}
5717
} // lo
5718
}
5719
}
5720
5721
// Converts opaque or color+alpha ETC1S block to ASTC 4x4.
5722
// This function tries to use the best ASTC mode given the block's actual contents.
5723
static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector,
5724
bool transcode_alpha, const endpoint *pEndpoint_codebook, const selector *pSelector_codebook)
5725
{
5726
astc_block_params blk;
5727
5728
blk.m_endpoints[8] = 0;
5729
blk.m_endpoints[9] = 0;
5730
5731
int constant_alpha_val = 255;
5732
int num_unique_alpha_selectors = 1;
5733
5734
if (transcode_alpha)
5735
{
5736
const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
5737
5738
num_unique_alpha_selectors = alpha_selectors.m_num_unique_selectors;
5739
5740
if (num_unique_alpha_selectors == 1)
5741
{
5742
const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
5743
5744
const color32& alpha_base_color = alpha_endpoint.m_color5;
5745
const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
5746
5747
int alpha_block_colors[4];
5748
decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
5749
5750
constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
5751
}
5752
}
5753
5754
const color32& base_color = pEndpoints->m_color5;
5755
const uint32_t inten_table = pEndpoints->m_inten5;
5756
5757
const uint32_t low_selector = pSelector->m_lo_selector;
5758
const uint32_t high_selector = pSelector->m_hi_selector;
5759
5760
// Handle solid color or BTC blocks, which can always be encoded from ETC1S to ASTC losslessly.
5761
if ((pSelector->m_num_unique_selectors == 1) && (num_unique_alpha_selectors == 1))
5762
{
5763
// Both color and alpha are constant, write a solid color block and exit.
5764
// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-void-extent-blocks
5765
uint32_t r, g, b;
5766
decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
5767
5768
uint32_t* pOutput = static_cast<uint32_t*>(pDst_block);
5769
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pDst_block);
5770
5771
pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff;
5772
5773
pOutput[1] = 0xffffffff;
5774
pOutput[2] = 0;
5775
pOutput[3] = 0;
5776
5777
int bit_pos = 64;
5778
astc_set_bits(pOutput, bit_pos, r | (r << 8), 16);
5779
astc_set_bits(pOutput, bit_pos, g | (g << 8), 16);
5780
astc_set_bits(pOutput, bit_pos, b | (b << 8), 16);
5781
astc_set_bits(pOutput, bit_pos, constant_alpha_val | (constant_alpha_val << 8), 16);
5782
5783
return;
5784
}
5785
else if ((pSelector->m_num_unique_selectors <= 2) && (num_unique_alpha_selectors <= 2))
5786
{
5787
// Both color and alpha use <= 2 unique selectors each.
5788
// Use block truncation coding, which is lossless with ASTC (8-bit endpoints, 1-bit weights).
5789
color32 block_colors[4];
5790
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
5791
5792
blk.m_endpoints[0] = block_colors[low_selector].r;
5793
blk.m_endpoints[2] = block_colors[low_selector].g;
5794
blk.m_endpoints[4] = block_colors[low_selector].b;
5795
5796
blk.m_endpoints[1] = block_colors[high_selector].r;
5797
blk.m_endpoints[3] = block_colors[high_selector].g;
5798
blk.m_endpoints[5] = block_colors[high_selector].b;
5799
5800
int s0 = blk.m_endpoints[0] + blk.m_endpoints[2] + blk.m_endpoints[4];
5801
int s1 = blk.m_endpoints[1] + blk.m_endpoints[3] + blk.m_endpoints[5];
5802
bool invert = false;
5803
if (s1 < s0)
5804
{
5805
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
5806
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
5807
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
5808
invert = true;
5809
}
5810
5811
if (transcode_alpha)
5812
{
5813
const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
5814
const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
5815
5816
const color32& alpha_base_color = alpha_endpoint.m_color5;
5817
const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
5818
5819
const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
5820
const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
5821
5822
int alpha_block_colors[4];
5823
decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
5824
5825
blk.m_endpoints[6] = static_cast<uint8_t>(alpha_block_colors[alpha_low_selector]);
5826
blk.m_endpoints[7] = static_cast<uint8_t>(alpha_block_colors[alpha_high_selector]);
5827
5828
for (uint32_t y = 0; y < 4; y++)
5829
{
5830
for (uint32_t x = 0; x < 4; x++)
5831
{
5832
uint32_t s = alpha_selectors.get_selector(x, y);
5833
s = (s == alpha_high_selector) ? 1 : 0;
5834
5835
blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(s);
5836
} // x
5837
} // y
5838
}
5839
else
5840
{
5841
blk.m_endpoints[6] = 255;
5842
blk.m_endpoints[7] = 255;
5843
5844
for (uint32_t i = 0; i < 16; i++)
5845
blk.m_weights[i * 2 + 1] = 0;
5846
}
5847
5848
for (uint32_t y = 0; y < 4; y++)
5849
{
5850
for (uint32_t x = 0; x < 4; x++)
5851
{
5852
uint32_t s = pSelector->get_selector(x, y);
5853
5854
s = (s == high_selector) ? 1 : 0;
5855
5856
if (invert)
5857
s = 1 - s;
5858
5859
blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(s);
5860
} // x
5861
} // y
5862
5863
astc_pack_block_cem_12_weight_range0(reinterpret_cast<uint32_t*>(pDst_block), &blk);
5864
5865
return;
5866
}
5867
5868
// Either alpha and/or color use > 2 unique selectors each, so we must do something more complex.
5869
5870
#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
5871
// The optional higher quality modes use 8-bits endpoints vs. [0,47] endpoints.
5872
5873
// If the block's base color is grayscale, all pixels are grayscale, so encode the block as Luminance+Alpha.
5874
if ((base_color.r == base_color.g) && (base_color.r == base_color.b))
5875
{
5876
if (transcode_alpha)
5877
{
5878
const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
5879
const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
5880
5881
const color32& alpha_base_color = alpha_endpoint.m_color5;
5882
const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
5883
5884
const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
5885
const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
5886
5887
if (num_unique_alpha_selectors <= 2)
5888
{
5889
// Simple alpha block with only 1 or 2 unique values, so use BTC. This is lossless.
5890
int alpha_block_colors[4];
5891
decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
5892
5893
blk.m_endpoints[2] = static_cast<uint8_t>(alpha_block_colors[alpha_low_selector]);
5894
blk.m_endpoints[3] = static_cast<uint8_t>(alpha_block_colors[alpha_high_selector]);
5895
5896
for (uint32_t i = 0; i < 16; i++)
5897
{
5898
uint32_t s = alpha_selectors.get_selector(i & 3, i >> 2);
5899
blk.m_weights[i * 2 + 1] = (s == alpha_high_selector) ? 3 : 0;
5900
}
5901
}
5902
else
5903
{
5904
// Convert ETC1S alpha
5905
const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector];
5906
5907
//[32][8][RANGES][MAPPING]
5908
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5909
5910
const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[alpha_base_color.g][alpha_inten_table][alpha_selector_range_table];
5911
5912
blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
5913
blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
5914
5915
const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
5916
5917
for (uint32_t y = 0; y < 4; y++)
5918
{
5919
for (uint32_t x = 0; x < 4; x++)
5920
{
5921
uint32_t s = alpha_selectors.get_selector(x, y);
5922
uint32_t as = pSelectors_xlat[s];
5923
5924
blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
5925
} // x
5926
} // y
5927
}
5928
}
5929
else
5930
{
5931
// No alpha slice - set output alpha to all 255's
5932
blk.m_endpoints[2] = 255;
5933
blk.m_endpoints[3] = 255;
5934
5935
for (uint32_t i = 0; i < 16; i++)
5936
blk.m_weights[i * 2 + 1] = 0;
5937
}
5938
5939
if (pSelector->m_num_unique_selectors <= 2)
5940
{
5941
// Simple color block with only 1 or 2 unique values, so use BTC. This is lossless.
5942
int block_colors[4];
5943
decoder_etc_block::get_block_colors5_g(block_colors, base_color, inten_table);
5944
5945
blk.m_endpoints[0] = static_cast<uint8_t>(block_colors[low_selector]);
5946
blk.m_endpoints[1] = static_cast<uint8_t>(block_colors[high_selector]);
5947
5948
for (uint32_t i = 0; i < 16; i++)
5949
{
5950
uint32_t s = pSelector->get_selector(i & 3, i >> 2);
5951
blk.m_weights[i * 2] = (s == high_selector) ? 3 : 0;
5952
}
5953
}
5954
else
5955
{
5956
// Convert ETC1S alpha
5957
const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
5958
5959
//[32][8][RANGES][MAPPING]
5960
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5961
5962
const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[base_color.g][inten_table][selector_range_table];
5963
5964
blk.m_endpoints[0] = pTable_g[best_mapping].m_lo;
5965
blk.m_endpoints[1] = pTable_g[best_mapping].m_hi;
5966
5967
const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
5968
5969
for (uint32_t y = 0; y < 4; y++)
5970
{
5971
for (uint32_t x = 0; x < 4; x++)
5972
{
5973
uint32_t s = pSelector->get_selector(x, y);
5974
uint32_t as = pSelectors_xlat[s];
5975
5976
blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
5977
} // x
5978
} // y
5979
}
5980
5981
astc_pack_block_cem_4_weight_range2(reinterpret_cast<uint32_t*>(pDst_block), &blk);
5982
return;
5983
}
5984
5985
// The block isn't grayscale and it uses > 2 unique selectors for opaque and/or alpha.
5986
// Check for fully opaque blocks, if so use 8-bit endpoints for slightly higher opaque quality (higher than BC1, but lower than BC7 mode 6 opaque).
5987
if ((num_unique_alpha_selectors == 1) && (constant_alpha_val == 255))
5988
{
5989
// Convert ETC1S color
5990
const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
5991
5992
//[32][8][RANGES][MAPPING]
5993
const etc1_to_astc_solution* pTable_r = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5994
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5995
const etc1_to_astc_solution* pTable_b = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5996
5997
uint32_t best_err = UINT_MAX;
5998
uint32_t best_mapping = 0;
5999
6000
assert(NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS == 10);
6001
#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
6002
DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
6003
DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
6004
#undef DO_ITER
6005
6006
blk.m_endpoints[0] = pTable_r[best_mapping].m_lo;
6007
blk.m_endpoints[1] = pTable_r[best_mapping].m_hi;
6008
6009
blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
6010
blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
6011
6012
blk.m_endpoints[4] = pTable_b[best_mapping].m_lo;
6013
blk.m_endpoints[5] = pTable_b[best_mapping].m_hi;
6014
6015
int s0 = blk.m_endpoints[0] + blk.m_endpoints[2] + blk.m_endpoints[4];
6016
int s1 = blk.m_endpoints[1] + blk.m_endpoints[3] + blk.m_endpoints[5];
6017
bool invert = false;
6018
6019
if (s1 < s0)
6020
{
6021
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
6022
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
6023
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
6024
invert = true;
6025
}
6026
6027
const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
6028
6029
for (uint32_t y = 0; y < 4; y++)
6030
{
6031
for (uint32_t x = 0; x < 4; x++)
6032
{
6033
uint32_t s = pSelector->get_selector(x, y);
6034
uint32_t as = pSelectors_xlat[s];
6035
if (invert)
6036
as = 3 - as;
6037
6038
blk.m_weights[x + y * 4] = static_cast<uint8_t>(as);
6039
} // x
6040
} // y
6041
6042
// Now pack to ASTC
6043
astc_pack_block_cem_8_weight_range2(reinterpret_cast<uint32_t*>(pDst_block), &blk);
6044
return;
6045
}
6046
#endif //#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
6047
6048
// Nothing else worked, so fall back to CEM Mode 12 (LDR RGBA Direct), [0,47] endpoints, weight range 2 (2-bit weights), dual planes.
6049
// This mode can handle everything, but at slightly less quality than BC1.
6050
if (transcode_alpha)
6051
{
6052
const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
6053
const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
6054
6055
const color32& alpha_base_color = alpha_endpoint.m_color5;
6056
const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
6057
6058
const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
6059
const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
6060
6061
if (alpha_low_selector == alpha_high_selector)
6062
{
6063
// Solid alpha block - use precomputed tables.
6064
int alpha_block_colors[4];
6065
decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
6066
6067
const uint32_t g = alpha_block_colors[alpha_low_selector];
6068
6069
blk.m_endpoints[6] = g_astc_single_color_encoding_1[g].m_lo;
6070
blk.m_endpoints[7] = g_astc_single_color_encoding_1[g].m_hi;
6071
6072
for (uint32_t i = 0; i < 16; i++)
6073
blk.m_weights[i * 2 + 1] = 1;
6074
}
6075
else if ((alpha_inten_table >= 7) && (alpha_selectors.m_num_unique_selectors == 2) && (alpha_low_selector == 0) && (alpha_high_selector == 3))
6076
{
6077
// Handle outlier case where only the two outer colors are used with inten table 7.
6078
color32 alpha_block_colors[4];
6079
6080
decoder_etc_block::get_block_colors5(alpha_block_colors, alpha_base_color, alpha_inten_table);
6081
6082
const uint32_t g0 = alpha_block_colors[0].g;
6083
const uint32_t g1 = alpha_block_colors[3].g;
6084
6085
blk.m_endpoints[6] = g_astc_single_color_encoding_0[g0];
6086
blk.m_endpoints[7] = g_astc_single_color_encoding_0[g1];
6087
6088
for (uint32_t y = 0; y < 4; y++)
6089
{
6090
for (uint32_t x = 0; x < 4; x++)
6091
{
6092
uint32_t s = alpha_selectors.get_selector(x, y);
6093
uint32_t as = (s == alpha_high_selector) ? 3 : 0;
6094
6095
blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
6096
} // x
6097
} // y
6098
}
6099
else
6100
{
6101
// Convert ETC1S alpha
6102
const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector];
6103
6104
//[32][8][RANGES][MAPPING]
6105
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
6106
6107
const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping[alpha_base_color.g][alpha_inten_table][alpha_selector_range_table];
6108
6109
blk.m_endpoints[6] = pTable_g[best_mapping].m_lo;
6110
blk.m_endpoints[7] = pTable_g[best_mapping].m_hi;
6111
6112
const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
6113
6114
for (uint32_t y = 0; y < 4; y++)
6115
{
6116
for (uint32_t x = 0; x < 4; x++)
6117
{
6118
uint32_t s = alpha_selectors.get_selector(x, y);
6119
uint32_t as = pSelectors_xlat[s];
6120
6121
blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
6122
} // x
6123
} // y
6124
}
6125
}
6126
else
6127
{
6128
// No alpha slice - set output alpha to all 255's
6129
// 1 is 255 when dequantized
6130
blk.m_endpoints[6] = 1;
6131
blk.m_endpoints[7] = 1;
6132
6133
for (uint32_t i = 0; i < 16; i++)
6134
blk.m_weights[i * 2 + 1] = 0;
6135
}
6136
6137
if (low_selector == high_selector)
6138
{
6139
// Solid color block - use precomputed tables of optimal endpoints assuming selector weights are all 1.
6140
color32 block_colors[4];
6141
6142
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
6143
6144
const uint32_t r = block_colors[low_selector].r;
6145
const uint32_t g = block_colors[low_selector].g;
6146
const uint32_t b = block_colors[low_selector].b;
6147
6148
blk.m_endpoints[0] = g_astc_single_color_encoding_1[r].m_lo;
6149
blk.m_endpoints[1] = g_astc_single_color_encoding_1[r].m_hi;
6150
6151
blk.m_endpoints[2] = g_astc_single_color_encoding_1[g].m_lo;
6152
blk.m_endpoints[3] = g_astc_single_color_encoding_1[g].m_hi;
6153
6154
blk.m_endpoints[4] = g_astc_single_color_encoding_1[b].m_lo;
6155
blk.m_endpoints[5] = g_astc_single_color_encoding_1[b].m_hi;
6156
6157
int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
6158
int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
6159
bool invert = false;
6160
6161
if (s1 < s0)
6162
{
6163
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
6164
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
6165
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
6166
invert = true;
6167
}
6168
6169
for (uint32_t i = 0; i < 16; i++)
6170
blk.m_weights[i * 2] = invert ? 2 : 1;
6171
}
6172
else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
6173
{
6174
// Handle outlier case where only the two outer colors are used with inten table 7.
6175
color32 block_colors[4];
6176
6177
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
6178
6179
const uint32_t r0 = block_colors[0].r;
6180
const uint32_t g0 = block_colors[0].g;
6181
const uint32_t b0 = block_colors[0].b;
6182
6183
const uint32_t r1 = block_colors[3].r;
6184
const uint32_t g1 = block_colors[3].g;
6185
const uint32_t b1 = block_colors[3].b;
6186
6187
blk.m_endpoints[0] = g_astc_single_color_encoding_0[r0];
6188
blk.m_endpoints[1] = g_astc_single_color_encoding_0[r1];
6189
6190
blk.m_endpoints[2] = g_astc_single_color_encoding_0[g0];
6191
blk.m_endpoints[3] = g_astc_single_color_encoding_0[g1];
6192
6193
blk.m_endpoints[4] = g_astc_single_color_encoding_0[b0];
6194
blk.m_endpoints[5] = g_astc_single_color_encoding_0[b1];
6195
6196
int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
6197
int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
6198
bool invert = false;
6199
6200
if (s1 < s0)
6201
{
6202
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
6203
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
6204
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
6205
invert = true;
6206
}
6207
6208
for (uint32_t y = 0; y < 4; y++)
6209
{
6210
for (uint32_t x = 0; x < 4; x++)
6211
{
6212
uint32_t s = pSelector->get_selector(x, y);
6213
uint32_t as = (s == low_selector) ? 0 : 3;
6214
6215
if (invert)
6216
as = 3 - as;
6217
6218
blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
6219
} // x
6220
} // y
6221
}
6222
else
6223
{
6224
// Convert ETC1S color
6225
const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
6226
6227
//[32][8][RANGES][MAPPING]
6228
const etc1_to_astc_solution* pTable_r = &g_etc1_to_astc[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
6229
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
6230
const etc1_to_astc_solution* pTable_b = &g_etc1_to_astc[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
6231
6232
uint32_t best_err = UINT_MAX;
6233
uint32_t best_mapping = 0;
6234
6235
assert(NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS == 10);
6236
#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
6237
DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
6238
DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
6239
#undef DO_ITER
6240
6241
blk.m_endpoints[0] = pTable_r[best_mapping].m_lo;
6242
blk.m_endpoints[1] = pTable_r[best_mapping].m_hi;
6243
6244
blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
6245
blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
6246
6247
blk.m_endpoints[4] = pTable_b[best_mapping].m_lo;
6248
blk.m_endpoints[5] = pTable_b[best_mapping].m_hi;
6249
6250
int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
6251
int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
6252
bool invert = false;
6253
6254
if (s1 < s0)
6255
{
6256
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
6257
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
6258
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
6259
invert = true;
6260
}
6261
6262
const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
6263
6264
for (uint32_t y = 0; y < 4; y++)
6265
{
6266
for (uint32_t x = 0; x < 4; x++)
6267
{
6268
uint32_t s = pSelector->get_selector(x, y);
6269
uint32_t as = pSelectors_xlat[s];
6270
if (invert)
6271
as = 3 - as;
6272
6273
blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
6274
} // x
6275
} // y
6276
}
6277
6278
// Now pack to ASTC
6279
astc_pack_block_cem_12_weight_range2(reinterpret_cast<uint32_t *>(pDst_block), &blk);
6280
}
6281
#endif
6282
6283
#if BASISD_SUPPORT_ATC
6284
// ATC and PVRTC2 both use these tables.
6285
struct etc1s_to_atc_solution
6286
{
6287
uint8_t m_lo;
6288
uint8_t m_hi;
6289
uint16_t m_err;
6290
};
6291
6292
static dxt_selector_range g_etc1s_to_atc_selector_ranges[] =
6293
{
6294
{ 0, 3 },
6295
{ 1, 3 },
6296
{ 0, 2 },
6297
{ 1, 2 },
6298
{ 2, 3 },
6299
{ 0, 1 },
6300
};
6301
6302
const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_RANGES = sizeof(g_etc1s_to_atc_selector_ranges) / sizeof(g_etc1s_to_atc_selector_ranges[0]);
6303
6304
static uint32_t g_etc1s_to_atc_selector_range_index[4][4];
6305
6306
const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS = 10;
6307
static const uint8_t g_etc1s_to_atc_selector_mappings[NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS][4] =
6308
{
6309
{ 0, 0, 1, 1 },
6310
{ 0, 0, 1, 2 },
6311
{ 0, 0, 1, 3 },
6312
{ 0, 0, 2, 3 },
6313
{ 0, 1, 1, 1 },
6314
{ 0, 1, 2, 2 },
6315
{ 0, 1, 2, 3 }, //6 - identity
6316
{ 0, 2, 3, 3 },
6317
{ 1, 2, 2, 2 },
6318
{ 1, 2, 3, 3 },
6319
};
6320
const uint32_t ATC_IDENTITY_SELECTOR_MAPPING_INDEX = 6;
6321
6322
#if BASISD_SUPPORT_PVRTC2
6323
static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_45[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
6324
#include "basisu_transcoder_tables_pvrtc2_45.inc"
6325
};
6326
6327
#if 0
6328
static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_alpha_33[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
6329
#include "basisu_transcoder_tables_pvrtc2_alpha_33.inc"
6330
};
6331
#endif
6332
6333
#endif
6334
6335
static const etc1s_to_atc_solution g_etc1s_to_atc_55[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
6336
#include "basisu_transcoder_tables_atc_55.inc"
6337
};
6338
6339
static const etc1s_to_atc_solution g_etc1s_to_atc_56[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
6340
#include "basisu_transcoder_tables_atc_56.inc"
6341
};
6342
6343
struct atc_match_entry
6344
{
6345
uint8_t m_lo;
6346
uint8_t m_hi;
6347
};
6348
static atc_match_entry g_pvrtc2_match45_equals_1[256], g_atc_match55_equals_1[256], g_atc_match56_equals_1[256]; // selector 1
6349
static atc_match_entry g_pvrtc2_match4[256], g_atc_match5[256], g_atc_match6[256];
6350
6351
static void prepare_atc_single_color_table(atc_match_entry* pTable, int size0, int size1, int sel)
6352
{
6353
for (int i = 0; i < 256; i++)
6354
{
6355
int lowest_e = 256;
6356
for (int lo = 0; lo < size0; lo++)
6357
{
6358
int lo_e = lo;
6359
if (size0 == 16)
6360
{
6361
lo_e = (lo_e << 1) | (lo_e >> 3);
6362
lo_e = (lo_e << 3) | (lo_e >> 2);
6363
}
6364
else if (size0 == 32)
6365
lo_e = (lo_e << 3) | (lo_e >> 2);
6366
else
6367
lo_e = (lo_e << 2) | (lo_e >> 4);
6368
6369
for (int hi = 0; hi < size1; hi++)
6370
{
6371
int hi_e = hi;
6372
if (size1 == 16)
6373
{
6374
// This is only for PVRTC2 - expand to 5 then 8
6375
hi_e = (hi_e << 1) | (hi_e >> 3);
6376
hi_e = (hi_e << 3) | (hi_e >> 2);
6377
}
6378
else if (size1 == 32)
6379
hi_e = (hi_e << 3) | (hi_e >> 2);
6380
else
6381
hi_e = (hi_e << 2) | (hi_e >> 4);
6382
6383
int e;
6384
6385
if (sel == 1)
6386
{
6387
// Selector 1
6388
e = abs(((lo_e * 5 + hi_e * 3) / 8) - i);
6389
}
6390
else
6391
{
6392
assert(sel == 3);
6393
6394
// Selector 3
6395
e = abs(hi_e - i);
6396
}
6397
6398
if (e < lowest_e)
6399
{
6400
pTable[i].m_lo = static_cast<uint8_t>(lo);
6401
pTable[i].m_hi = static_cast<uint8_t>(hi);
6402
6403
lowest_e = e;
6404
}
6405
6406
} // hi
6407
} // lo
6408
} // i
6409
}
6410
6411
static void transcoder_init_atc()
6412
{
6413
prepare_atc_single_color_table(g_pvrtc2_match45_equals_1, 16, 32, 1);
6414
prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1);
6415
prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1);
6416
6417
prepare_atc_single_color_table(g_pvrtc2_match4, 1, 16, 3);
6418
prepare_atc_single_color_table(g_atc_match5, 1, 32, 3);
6419
prepare_atc_single_color_table(g_atc_match6, 1, 64, 3);
6420
6421
for (uint32_t i = 0; i < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; i++)
6422
{
6423
uint32_t l = g_etc1s_to_atc_selector_ranges[i].m_low;
6424
uint32_t h = g_etc1s_to_atc_selector_ranges[i].m_high;
6425
g_etc1s_to_atc_selector_range_index[l][h] = i;
6426
}
6427
}
6428
6429
struct atc_block
6430
{
6431
uint8_t m_lo[2];
6432
uint8_t m_hi[2];
6433
uint8_t m_sels[4];
6434
6435
void set_low_color(uint32_t r, uint32_t g, uint32_t b)
6436
{
6437
assert((r < 32) && (g < 32) && (b < 32));
6438
uint32_t x = (r << 10) | (g << 5) | b;
6439
m_lo[0] = x & 0xFF;
6440
m_lo[1] = (x >> 8) & 0xFF;
6441
}
6442
6443
void set_high_color(uint32_t r, uint32_t g, uint32_t b)
6444
{
6445
assert((r < 32) && (g < 64) && (b < 32));
6446
uint32_t x = (r << 11) | (g << 5) | b;
6447
m_hi[0] = x & 0xFF;
6448
m_hi[1] = (x >> 8) & 0xFF;
6449
}
6450
};
6451
6452
static void convert_etc1s_to_atc(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
6453
{
6454
atc_block* pBlock = static_cast<atc_block*>(pDst);
6455
6456
const uint32_t low_selector = pSelector->m_lo_selector;
6457
const uint32_t high_selector = pSelector->m_hi_selector;
6458
6459
const color32& base_color = pEndpoints->m_color5;
6460
const uint32_t inten_table = pEndpoints->m_inten5;
6461
6462
if (low_selector == high_selector)
6463
{
6464
uint32_t r, g, b;
6465
decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
6466
6467
pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match56_equals_1[g].m_lo, g_atc_match55_equals_1[b].m_lo);
6468
pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match56_equals_1[g].m_hi, g_atc_match55_equals_1[b].m_hi);
6469
6470
pBlock->m_sels[0] = 0x55;
6471
pBlock->m_sels[1] = 0x55;
6472
pBlock->m_sels[2] = 0x55;
6473
pBlock->m_sels[3] = 0x55;
6474
6475
return;
6476
}
6477
else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
6478
{
6479
color32 block_colors[4];
6480
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
6481
6482
const uint32_t r0 = block_colors[0].r;
6483
const uint32_t g0 = block_colors[0].g;
6484
const uint32_t b0 = block_colors[0].b;
6485
6486
const uint32_t r1 = block_colors[3].r;
6487
const uint32_t g1 = block_colors[3].g;
6488
const uint32_t b1 = block_colors[3].b;
6489
6490
pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_atc_match5[b0].m_hi);
6491
pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match6[g1].m_hi, g_atc_match5[b1].m_hi);
6492
6493
pBlock->m_sels[0] = pSelector->m_selectors[0];
6494
pBlock->m_sels[1] = pSelector->m_selectors[1];
6495
pBlock->m_sels[2] = pSelector->m_selectors[2];
6496
pBlock->m_sels[3] = pSelector->m_selectors[3];
6497
6498
return;
6499
}
6500
6501
const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector];
6502
6503
//[32][8][RANGES][MAPPING]
6504
const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
6505
const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_56[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
6506
const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
6507
6508
uint32_t best_err = UINT_MAX;
6509
uint32_t best_mapping = 0;
6510
6511
assert(NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS == 10);
6512
#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
6513
DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
6514
DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
6515
#undef DO_ITER
6516
6517
pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
6518
pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
6519
6520
if (ATC_IDENTITY_SELECTOR_MAPPING_INDEX == best_mapping)
6521
{
6522
pBlock->m_sels[0] = pSelector->m_selectors[0];
6523
pBlock->m_sels[1] = pSelector->m_selectors[1];
6524
pBlock->m_sels[2] = pSelector->m_selectors[2];
6525
pBlock->m_sels[3] = pSelector->m_selectors[3];
6526
}
6527
else
6528
{
6529
const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0];
6530
6531
const uint32_t sel_bits0 = pSelector->m_selectors[0];
6532
const uint32_t sel_bits1 = pSelector->m_selectors[1];
6533
const uint32_t sel_bits2 = pSelector->m_selectors[2];
6534
const uint32_t sel_bits3 = pSelector->m_selectors[3];
6535
6536
uint32_t atc_sels0 = 0, atc_sels1 = 0, atc_sels2 = 0, atc_sels3 = 0;
6537
6538
#define DO_X(x) { \
6539
const uint32_t x_shift = (x) * 2; \
6540
atc_sels0 |= (pSelectors_xlat[(sel_bits0 >> x_shift) & 3] << x_shift); \
6541
atc_sels1 |= (pSelectors_xlat[(sel_bits1 >> x_shift) & 3] << x_shift); \
6542
atc_sels2 |= (pSelectors_xlat[(sel_bits2 >> x_shift) & 3] << x_shift); \
6543
atc_sels3 |= (pSelectors_xlat[(sel_bits3 >> x_shift) & 3] << x_shift); }
6544
6545
DO_X(0);
6546
DO_X(1);
6547
DO_X(2);
6548
DO_X(3);
6549
#undef DO_X
6550
6551
pBlock->m_sels[0] = (uint8_t)atc_sels0;
6552
pBlock->m_sels[1] = (uint8_t)atc_sels1;
6553
pBlock->m_sels[2] = (uint8_t)atc_sels2;
6554
pBlock->m_sels[3] = (uint8_t)atc_sels3;
6555
}
6556
}
6557
6558
#if BASISD_WRITE_NEW_ATC_TABLES
6559
static void create_etc1s_to_atc_conversion_tables()
6560
{
6561
// ATC 55
6562
FILE* pFile = nullptr;
6563
fopen_s(&pFile, "basisu_transcoder_tables_atc_55.inc", "w");
6564
6565
uint32_t n = 0;
6566
6567
for (int inten = 0; inten < 8; inten++)
6568
{
6569
for (uint32_t g = 0; g < 32; g++)
6570
{
6571
color32 block_colors[4];
6572
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6573
6574
for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6575
{
6576
const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6577
const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6578
6579
for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6580
{
6581
uint32_t best_lo = 0;
6582
uint32_t best_hi = 0;
6583
uint64_t best_err = UINT64_MAX;
6584
6585
for (uint32_t hi = 0; hi <= 31; hi++)
6586
{
6587
for (uint32_t lo = 0; lo <= 31; lo++)
6588
{
6589
uint32_t colors[4];
6590
6591
colors[0] = (lo << 3) | (lo >> 2);
6592
colors[3] = (hi << 3) | (hi >> 2);
6593
6594
colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6595
colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6596
6597
uint64_t total_err = 0;
6598
6599
for (uint32_t s = low_selector; s <= high_selector; s++)
6600
{
6601
int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6602
6603
int err_scale = 1;
6604
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6605
// the low/high selectors which are clamping to either 0 or 255.
6606
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6607
err_scale = 5;
6608
6609
total_err += (err * err) * err_scale;
6610
}
6611
6612
if (total_err < best_err)
6613
{
6614
best_err = total_err;
6615
best_lo = lo;
6616
best_hi = hi;
6617
}
6618
}
6619
}
6620
6621
//assert(best_err <= 0xFFFF);
6622
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6623
6624
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6625
n++;
6626
if ((n & 31) == 31)
6627
fprintf(pFile, "\n");
6628
} // m
6629
} // sr
6630
} // g
6631
} // inten
6632
6633
fclose(pFile);
6634
pFile = nullptr;
6635
6636
// ATC 56
6637
fopen_s(&pFile, "basisu_transcoder_tables_atc_56.inc", "w");
6638
6639
n = 0;
6640
6641
for (int inten = 0; inten < 8; inten++)
6642
{
6643
for (uint32_t g = 0; g < 32; g++)
6644
{
6645
color32 block_colors[4];
6646
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6647
6648
for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6649
{
6650
const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6651
const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6652
6653
for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6654
{
6655
uint32_t best_lo = 0;
6656
uint32_t best_hi = 0;
6657
uint64_t best_err = UINT64_MAX;
6658
6659
for (uint32_t hi = 0; hi <= 63; hi++)
6660
{
6661
for (uint32_t lo = 0; lo <= 31; lo++)
6662
{
6663
uint32_t colors[4];
6664
6665
colors[0] = (lo << 3) | (lo >> 2);
6666
colors[3] = (hi << 2) | (hi >> 4);
6667
6668
colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6669
colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6670
6671
uint64_t total_err = 0;
6672
6673
for (uint32_t s = low_selector; s <= high_selector; s++)
6674
{
6675
int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6676
6677
int err_scale = 1;
6678
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6679
// the low/high selectors which are clamping to either 0 or 255.
6680
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6681
err_scale = 5;
6682
6683
total_err += (err * err) * err_scale;
6684
}
6685
6686
if (total_err < best_err)
6687
{
6688
best_err = total_err;
6689
best_lo = lo;
6690
best_hi = hi;
6691
}
6692
}
6693
}
6694
6695
//assert(best_err <= 0xFFFF);
6696
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6697
6698
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6699
n++;
6700
if ((n & 31) == 31)
6701
fprintf(pFile, "\n");
6702
} // m
6703
} // sr
6704
} // g
6705
} // inten
6706
6707
fclose(pFile);
6708
6709
// PVRTC2 45
6710
fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_45.inc", "w");
6711
6712
n = 0;
6713
6714
for (int inten = 0; inten < 8; inten++)
6715
{
6716
for (uint32_t g = 0; g < 32; g++)
6717
{
6718
color32 block_colors[4];
6719
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6720
6721
for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6722
{
6723
const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6724
const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6725
6726
for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6727
{
6728
uint32_t best_lo = 0;
6729
uint32_t best_hi = 0;
6730
uint64_t best_err = UINT64_MAX;
6731
6732
for (uint32_t hi = 0; hi <= 31; hi++)
6733
{
6734
for (uint32_t lo = 0; lo <= 15; lo++)
6735
{
6736
uint32_t colors[4];
6737
6738
colors[0] = (lo << 1) | (lo >> 3);
6739
colors[0] = (colors[0] << 3) | (colors[0] >> 2);
6740
6741
colors[3] = (hi << 3) | (hi >> 2);
6742
6743
colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6744
colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6745
6746
uint64_t total_err = 0;
6747
6748
for (uint32_t s = low_selector; s <= high_selector; s++)
6749
{
6750
int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6751
6752
int err_scale = 1;
6753
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6754
// the low/high selectors which are clamping to either 0 or 255.
6755
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6756
err_scale = 5;
6757
6758
total_err += (err * err) * err_scale;
6759
}
6760
6761
if (total_err < best_err)
6762
{
6763
best_err = total_err;
6764
best_lo = lo;
6765
best_hi = hi;
6766
}
6767
}
6768
}
6769
6770
//assert(best_err <= 0xFFFF);
6771
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6772
6773
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6774
n++;
6775
if ((n & 31) == 31)
6776
fprintf(pFile, "\n");
6777
} // m
6778
} // sr
6779
} // g
6780
} // inten
6781
6782
fclose(pFile);
6783
6784
#if 0
6785
// PVRTC2 34
6786
fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_34.inc", "w");
6787
6788
n = 0;
6789
6790
for (int inten = 0; inten < 8; inten++)
6791
{
6792
for (uint32_t g = 0; g < 32; g++)
6793
{
6794
color32 block_colors[4];
6795
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6796
6797
for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6798
{
6799
const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6800
const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6801
6802
for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6803
{
6804
uint32_t best_lo = 0;
6805
uint32_t best_hi = 0;
6806
uint64_t best_err = UINT64_MAX;
6807
6808
for (uint32_t hi = 0; hi <= 15; hi++)
6809
{
6810
for (uint32_t lo = 0; lo <= 7; lo++)
6811
{
6812
uint32_t colors[4];
6813
6814
colors[0] = (lo << 2) | (lo >> 1);
6815
colors[0] = (colors[0] << 3) | (colors[0] >> 2);
6816
6817
colors[3] = (hi << 1) | (hi >> 3);
6818
colors[3] = (colors[3] << 3) | (colors[3] >> 2);
6819
6820
colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6821
colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6822
6823
uint64_t total_err = 0;
6824
6825
for (uint32_t s = low_selector; s <= high_selector; s++)
6826
{
6827
int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6828
6829
int err_scale = 1;
6830
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6831
// the low/high selectors which are clamping to either 0 or 255.
6832
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6833
err_scale = 5;
6834
6835
total_err += (err * err) * err_scale;
6836
}
6837
6838
if (total_err < best_err)
6839
{
6840
best_err = total_err;
6841
best_lo = lo;
6842
best_hi = hi;
6843
}
6844
}
6845
}
6846
6847
//assert(best_err <= 0xFFFF);
6848
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6849
6850
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6851
n++;
6852
if ((n & 31) == 31)
6853
fprintf(pFile, "\n");
6854
} // m
6855
} // sr
6856
} // g
6857
} // inten
6858
6859
fclose(pFile);
6860
#endif
6861
#if 0
6862
// PVRTC2 44
6863
fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_44.inc", "w");
6864
6865
n = 0;
6866
6867
for (int inten = 0; inten < 8; inten++)
6868
{
6869
for (uint32_t g = 0; g < 32; g++)
6870
{
6871
color32 block_colors[4];
6872
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6873
6874
for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6875
{
6876
const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6877
const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6878
6879
for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6880
{
6881
uint32_t best_lo = 0;
6882
uint32_t best_hi = 0;
6883
uint64_t best_err = UINT64_MAX;
6884
6885
for (uint32_t hi = 0; hi <= 15; hi++)
6886
{
6887
for (uint32_t lo = 0; lo <= 15; lo++)
6888
{
6889
uint32_t colors[4];
6890
6891
colors[0] = (lo << 1) | (lo >> 3);
6892
colors[0] = (colors[0] << 3) | (colors[0] >> 2);
6893
6894
colors[3] = (hi << 1) | (hi >> 3);
6895
colors[3] = (colors[3] << 3) | (colors[3] >> 2);
6896
6897
colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6898
colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6899
6900
uint64_t total_err = 0;
6901
6902
for (uint32_t s = low_selector; s <= high_selector; s++)
6903
{
6904
int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6905
6906
int err_scale = 1;
6907
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6908
// the low/high selectors which are clamping to either 0 or 255.
6909
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6910
err_scale = 5;
6911
6912
total_err += (err * err) * err_scale;
6913
}
6914
6915
if (total_err < best_err)
6916
{
6917
best_err = total_err;
6918
best_lo = lo;
6919
best_hi = hi;
6920
}
6921
}
6922
}
6923
6924
//assert(best_err <= 0xFFFF);
6925
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6926
6927
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6928
n++;
6929
if ((n & 31) == 31)
6930
fprintf(pFile, "\n");
6931
} // m
6932
} // sr
6933
} // g
6934
} // inten
6935
6936
fclose(pFile);
6937
#endif
6938
6939
// PVRTC2 alpha 33
6940
fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_alpha_33.inc", "w");
6941
6942
n = 0;
6943
6944
for (int inten = 0; inten < 8; inten++)
6945
{
6946
for (uint32_t g = 0; g < 32; g++)
6947
{
6948
color32 block_colors[4];
6949
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6950
6951
for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6952
{
6953
const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6954
const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6955
6956
for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6957
{
6958
uint32_t best_lo = 0;
6959
uint32_t best_hi = 0;
6960
uint64_t best_err = UINT64_MAX;
6961
6962
for (uint32_t hi = 0; hi <= 7; hi++)
6963
{
6964
for (uint32_t lo = 0; lo <= 7; lo++)
6965
{
6966
uint32_t colors[4];
6967
6968
colors[0] = (lo << 1);
6969
colors[0] = (colors[0] << 4) | colors[0];
6970
6971
colors[3] = (hi << 1) | 1;
6972
colors[3] = (colors[3] << 4) | colors[3];
6973
6974
colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6975
colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6976
6977
uint64_t total_err = 0;
6978
6979
for (uint32_t s = low_selector; s <= high_selector; s++)
6980
{
6981
int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6982
6983
int err_scale = 1;
6984
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6985
// the low/high selectors which are clamping to either 0 or 255.
6986
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6987
err_scale = 5;
6988
6989
total_err += (err * err) * err_scale;
6990
}
6991
6992
if (total_err < best_err)
6993
{
6994
best_err = total_err;
6995
best_lo = lo;
6996
best_hi = hi;
6997
}
6998
}
6999
}
7000
7001
//assert(best_err <= 0xFFFF);
7002
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
7003
7004
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
7005
n++;
7006
if ((n & 31) == 31)
7007
fprintf(pFile, "\n");
7008
} // m
7009
} // sr
7010
} // g
7011
} // inten
7012
7013
fclose(pFile);
7014
}
7015
#endif // BASISD_WRITE_NEW_ATC_TABLES
7016
7017
#endif // BASISD_SUPPORT_ATC
7018
7019
#if BASISD_SUPPORT_PVRTC2
7020
struct pvrtc2_block
7021
{
7022
uint8_t m_modulation[4];
7023
7024
union
7025
{
7026
union
7027
{
7028
// Opaque mode: RGB colora=554 and colorb=555
7029
struct
7030
{
7031
uint32_t m_mod_flag : 1;
7032
uint32_t m_blue_a : 4;
7033
uint32_t m_green_a : 5;
7034
uint32_t m_red_a : 5;
7035
uint32_t m_hard_flag : 1;
7036
uint32_t m_blue_b : 5;
7037
uint32_t m_green_b : 5;
7038
uint32_t m_red_b : 5;
7039
uint32_t m_opaque_flag : 1;
7040
7041
} m_opaque_color_data;
7042
7043
// Transparent mode: RGBA colora=4433 and colorb=4443
7044
struct
7045
{
7046
uint32_t m_mod_flag : 1;
7047
uint32_t m_blue_a : 3;
7048
uint32_t m_green_a : 4;
7049
uint32_t m_red_a : 4;
7050
uint32_t m_alpha_a : 3;
7051
uint32_t m_hard_flag : 1;
7052
uint32_t m_blue_b : 4;
7053
uint32_t m_green_b : 4;
7054
uint32_t m_red_b : 4;
7055
uint32_t m_alpha_b : 3;
7056
uint32_t m_opaque_flag : 1;
7057
7058
} m_trans_color_data;
7059
};
7060
7061
uint32_t m_color_data_bits;
7062
};
7063
7064
// 554
7065
void set_low_color(uint32_t r, uint32_t g, uint32_t b)
7066
{
7067
assert((r < 32) && (g < 32) && (b < 16));
7068
m_opaque_color_data.m_red_a = r;
7069
m_opaque_color_data.m_green_a = g;
7070
m_opaque_color_data.m_blue_a = b;
7071
}
7072
7073
// 555
7074
void set_high_color(uint32_t r, uint32_t g, uint32_t b)
7075
{
7076
assert((r < 32) && (g < 32) && (b < 32));
7077
m_opaque_color_data.m_red_b = r;
7078
m_opaque_color_data.m_green_b = g;
7079
m_opaque_color_data.m_blue_b = b;
7080
}
7081
7082
// 4433
7083
void set_trans_low_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
7084
{
7085
assert((r < 16) && (g < 16) && (b < 8) && (a < 8));
7086
m_trans_color_data.m_red_a = r;
7087
m_trans_color_data.m_green_a = g;
7088
m_trans_color_data.m_blue_a = b;
7089
m_trans_color_data.m_alpha_a = a;
7090
}
7091
7092
// 4443
7093
void set_trans_high_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
7094
{
7095
assert((r < 16) && (g < 16) && (b < 16) && (a < 8));
7096
m_trans_color_data.m_red_b = r;
7097
m_trans_color_data.m_green_b = g;
7098
m_trans_color_data.m_blue_b = b;
7099
m_trans_color_data.m_alpha_b = a;
7100
}
7101
};
7102
7103
static struct
7104
{
7105
uint8_t m_l, m_h;
7106
} g_pvrtc2_trans_match34[256];
7107
7108
static struct
7109
{
7110
uint8_t m_l, m_h;
7111
} g_pvrtc2_trans_match44[256];
7112
7113
static struct
7114
{
7115
uint8_t m_l, m_h;
7116
} g_pvrtc2_alpha_match33[256];
7117
7118
static struct
7119
{
7120
uint8_t m_l, m_h;
7121
} g_pvrtc2_alpha_match33_0[256];
7122
7123
static struct
7124
{
7125
uint8_t m_l, m_h;
7126
} g_pvrtc2_alpha_match33_3[256];
7127
7128
// PVRTC2 can be forced to look like a slightly weaker variant of ATC/BC1, so that's what we do here for simplicity.
7129
static void convert_etc1s_to_pvrtc2_rgb(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
7130
{
7131
pvrtc2_block* pBlock = static_cast<pvrtc2_block*>(pDst);
7132
7133
pBlock->m_opaque_color_data.m_hard_flag = 1;
7134
pBlock->m_opaque_color_data.m_mod_flag = 0;
7135
pBlock->m_opaque_color_data.m_opaque_flag = 1;
7136
7137
const uint32_t low_selector = pSelector->m_lo_selector;
7138
const uint32_t high_selector = pSelector->m_hi_selector;
7139
7140
const color32& base_color = pEndpoints->m_color5;
7141
const uint32_t inten_table = pEndpoints->m_inten5;
7142
7143
if (low_selector == high_selector)
7144
{
7145
uint32_t r, g, b;
7146
decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
7147
7148
pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match55_equals_1[g].m_lo, g_pvrtc2_match45_equals_1[b].m_lo);
7149
pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match55_equals_1[g].m_hi, g_pvrtc2_match45_equals_1[b].m_hi);
7150
7151
pBlock->m_modulation[0] = 0x55;
7152
pBlock->m_modulation[1] = 0x55;
7153
pBlock->m_modulation[2] = 0x55;
7154
pBlock->m_modulation[3] = 0x55;
7155
7156
return;
7157
}
7158
else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
7159
{
7160
color32 block_colors[4];
7161
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
7162
7163
const uint32_t r0 = block_colors[0].r;
7164
const uint32_t g0 = block_colors[0].g;
7165
const uint32_t b0 = block_colors[0].b;
7166
7167
const uint32_t r1 = block_colors[3].r;
7168
const uint32_t g1 = block_colors[3].g;
7169
const uint32_t b1 = block_colors[3].b;
7170
7171
pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_pvrtc2_match4[b0].m_hi);
7172
pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match5[g1].m_hi, g_atc_match5[b1].m_hi);
7173
7174
pBlock->m_modulation[0] = pSelector->m_selectors[0];
7175
pBlock->m_modulation[1] = pSelector->m_selectors[1];
7176
pBlock->m_modulation[2] = pSelector->m_selectors[2];
7177
pBlock->m_modulation[3] = pSelector->m_selectors[3];
7178
7179
return;
7180
}
7181
7182
const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector];
7183
7184
//[32][8][RANGES][MAPPING]
7185
const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
7186
const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
7187
const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_pvrtc2_45[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
7188
7189
uint32_t best_err = UINT_MAX;
7190
uint32_t best_mapping = 0;
7191
7192
assert(NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS == 10);
7193
#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
7194
DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
7195
DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
7196
#undef DO_ITER
7197
7198
pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
7199
pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
7200
7201
if (ATC_IDENTITY_SELECTOR_MAPPING_INDEX == best_mapping)
7202
{
7203
pBlock->m_modulation[0] = pSelector->m_selectors[0];
7204
pBlock->m_modulation[1] = pSelector->m_selectors[1];
7205
pBlock->m_modulation[2] = pSelector->m_selectors[2];
7206
pBlock->m_modulation[3] = pSelector->m_selectors[3];
7207
}
7208
else
7209
{
7210
// TODO: We could make this faster using several precomputed 256 entry tables, like ETC1S->BC1 does.
7211
const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0];
7212
7213
const uint32_t sel_bits0 = pSelector->m_selectors[0];
7214
const uint32_t sel_bits1 = pSelector->m_selectors[1];
7215
const uint32_t sel_bits2 = pSelector->m_selectors[2];
7216
const uint32_t sel_bits3 = pSelector->m_selectors[3];
7217
7218
uint32_t sels0 = 0, sels1 = 0, sels2 = 0, sels3 = 0;
7219
7220
#define DO_X(x) { \
7221
const uint32_t x_shift = (x) * 2; \
7222
sels0 |= (pSelectors_xlat[(sel_bits0 >> x_shift) & 3] << x_shift); \
7223
sels1 |= (pSelectors_xlat[(sel_bits1 >> x_shift) & 3] << x_shift); \
7224
sels2 |= (pSelectors_xlat[(sel_bits2 >> x_shift) & 3] << x_shift); \
7225
sels3 |= (pSelectors_xlat[(sel_bits3 >> x_shift) & 3] << x_shift); }
7226
7227
DO_X(0);
7228
DO_X(1);
7229
DO_X(2);
7230
DO_X(3);
7231
#undef DO_X
7232
7233
pBlock->m_modulation[0] = (uint8_t)sels0;
7234
pBlock->m_modulation[1] = (uint8_t)sels1;
7235
pBlock->m_modulation[2] = (uint8_t)sels2;
7236
pBlock->m_modulation[3] = (uint8_t)sels3;
7237
}
7238
}
7239
7240
static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x; pV->c[3] = x; return pV; }
7241
static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x; pV->c[1] = y; pV->c[2] = z; pV->c[3] = w; return pV; }
7242
static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; }
7243
static inline vec4F vec4F_saturate(const vec4F* pV) { vec4F res; res.c[0] = saturate(pV->c[0]); res.c[1] = saturate(pV->c[1]); res.c[2] = saturate(pV->c[2]); res.c[3] = saturate(pV->c[3]); return res; }
7244
static inline vec4F vec4F_from_color(const color32* pC) { vec4F res; vec4F_set(&res, pC->c[0], pC->c[1], pC->c[2], pC->c[3]); return res; }
7245
static inline vec4F vec4F_add(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] + pRHS->c[0], pLHS->c[1] + pRHS->c[1], pLHS->c[2] + pRHS->c[2], pLHS->c[3] + pRHS->c[3]); return res; }
7246
static inline vec4F vec4F_sub(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] - pRHS->c[0], pLHS->c[1] - pRHS->c[1], pLHS->c[2] - pRHS->c[2], pLHS->c[3] - pRHS->c[3]); return res; }
7247
static inline float vec4F_dot(const vec4F* pLHS, const vec4F* pRHS) { return pLHS->c[0] * pRHS->c[0] + pLHS->c[1] * pRHS->c[1] + pLHS->c[2] * pRHS->c[2] + pLHS->c[3] * pRHS->c[3]; }
7248
static inline vec4F vec4F_mul(const vec4F* pLHS, float s) { vec4F res; vec4F_set(&res, pLHS->c[0] * s, pLHS->c[1] * s, pLHS->c[2] * s, pLHS->c[3] * s); return res; }
7249
static inline vec4F* vec4F_normalize_in_place(vec4F* pV) { float s = pV->c[0] * pV->c[0] + pV->c[1] * pV->c[1] + pV->c[2] * pV->c[2] + pV->c[3] * pV->c[3]; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->c[0] *= s; pV->c[1] *= s; pV->c[2] *= s; pV->c[3] *= s; } return pV; }
7250
7251
static color32 convert_rgba_5554_to_8888(const color32& col)
7252
{
7253
return color32((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]);
7254
}
7255
7256
static inline int sq(int x) { return x * x; }
7257
7258
// PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is expanded from 4 to 8 bits means it can never be 0.
7259
// This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha!
7260
// And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it.
7261
static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook)
7262
{
7263
pvrtc2_block* pBlock = static_cast<pvrtc2_block*>(pDst);
7264
7265
const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pBlock)[0]];
7266
const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pBlock)[1]];
7267
7268
pBlock->m_opaque_color_data.m_hard_flag = 1;
7269
pBlock->m_opaque_color_data.m_mod_flag = 0;
7270
pBlock->m_opaque_color_data.m_opaque_flag = 0;
7271
7272
const int num_unique_alpha_selectors = alpha_selectors.m_num_unique_selectors;
7273
7274
const color32& alpha_base_color = alpha_endpoint.m_color5;
7275
const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
7276
7277
int constant_alpha_val = -1;
7278
7279
int alpha_block_colors[4];
7280
decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
7281
7282
if (num_unique_alpha_selectors == 1)
7283
{
7284
constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
7285
}
7286
else
7287
{
7288
constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
7289
7290
for (uint32_t i = alpha_selectors.m_lo_selector + 1; i <= alpha_selectors.m_hi_selector; i++)
7291
{
7292
if (constant_alpha_val != alpha_block_colors[i])
7293
{
7294
constant_alpha_val = -1;
7295
break;
7296
}
7297
}
7298
}
7299
7300
if (constant_alpha_val >= 250)
7301
{
7302
// It's opaque enough, so don't bother trying to encode it as an alpha block.
7303
convert_etc1s_to_pvrtc2_rgb(pDst, pEndpoints, pSelector);
7304
return;
7305
}
7306
7307
const color32& base_color = pEndpoints->m_color5;
7308
const uint32_t inten_table = pEndpoints->m_inten5;
7309
7310
const uint32_t low_selector = pSelector->m_lo_selector;
7311
const uint32_t high_selector = pSelector->m_hi_selector;
7312
7313
const int num_unique_color_selectors = pSelector->m_num_unique_selectors;
7314
7315
// We need to reencode the block at the pixel level, unfortunately, from two ETC1S planes.
7316
// Do 4D incremental PCA, project all pixels to this hyperline, then quantize to packed endpoints and compute the modulation values.
7317
const int br = (base_color.r << 3) | (base_color.r >> 2);
7318
const int bg = (base_color.g << 3) | (base_color.g >> 2);
7319
const int bb = (base_color.b << 3) | (base_color.b >> 2);
7320
7321
color32 block_cols[4];
7322
for (uint32_t i = 0; i < 4; i++)
7323
{
7324
const int ci = g_etc1_inten_tables[inten_table][i];
7325
block_cols[i].set_clamped(br + ci, bg + ci, bb + ci, alpha_block_colors[i]);
7326
}
7327
7328
bool solid_color_block = true;
7329
if (num_unique_color_selectors > 1)
7330
{
7331
for (uint32_t i = low_selector + 1; i <= high_selector; i++)
7332
{
7333
if ((block_cols[low_selector].r != block_cols[i].r) || (block_cols[low_selector].g != block_cols[i].g) || (block_cols[low_selector].b != block_cols[i].b))
7334
{
7335
solid_color_block = false;
7336
break;
7337
}
7338
}
7339
}
7340
7341
if ((solid_color_block) && (constant_alpha_val >= 0))
7342
{
7343
// Constant color/alpha block.
7344
// This is more complex than it may seem because of the way color and alpha are packed in PVRTC2. We need to evaluate mod0, mod1 and mod3 encodings to find the best one.
7345
uint32_t r, g, b;
7346
decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
7347
7348
// Mod 0
7349
uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255;
7350
uint32_t la0 = g_pvrtc2_alpha_match33_0[constant_alpha_val].m_l;
7351
7352
uint32_t cr0 = (lr0 << 1) | (lr0 >> 3);
7353
uint32_t cg0 = (lg0 << 1) | (lg0 >> 3);
7354
uint32_t cb0 = (lb0 << 2) | (lb0 >> 1);
7355
uint32_t ca0 = (la0 << 1);
7356
7357
cr0 = (cr0 << 3) | (cr0 >> 2);
7358
cg0 = (cg0 << 3) | (cg0 >> 2);
7359
cb0 = (cb0 << 3) | (cb0 >> 2);
7360
ca0 = (ca0 << 4) | ca0;
7361
7362
uint32_t err0 = sq(cr0 - r) + sq(cg0 - g) + sq(cb0 - b) + sq(ca0 - constant_alpha_val) * 2;
7363
7364
// If the alpha is < 3 or so we're kinda screwed. It's better to have some RGB error than it is to turn a 100% transparent area slightly opaque.
7365
if ((err0 == 0) || (constant_alpha_val < 3))
7366
{
7367
pBlock->set_trans_low_color(lr0, lg0, lb0, la0);
7368
pBlock->set_trans_high_color(0, 0, 0, 0);
7369
7370
pBlock->m_modulation[0] = 0;
7371
pBlock->m_modulation[1] = 0;
7372
pBlock->m_modulation[2] = 0;
7373
pBlock->m_modulation[3] = 0;
7374
return;
7375
}
7376
7377
// Mod 3
7378
uint32_t lr3 = (r * 15 + 128) / 255, lg3 = (g * 15 + 128) / 255, lb3 = (b * 15 + 128) / 255;
7379
uint32_t la3 = g_pvrtc2_alpha_match33_3[constant_alpha_val].m_l;
7380
7381
uint32_t cr3 = (lr3 << 1) | (lr3 >> 3);
7382
uint32_t cg3 = (lg3 << 1) | (lg3 >> 3);
7383
uint32_t cb3 = (lb3 << 1) | (lb3 >> 3);
7384
uint32_t ca3 = (la3 << 1) | 1;
7385
7386
cr3 = (cr3 << 3) | (cr3 >> 2);
7387
cg3 = (cg3 << 3) | (cg3 >> 2);
7388
cb3 = (cb3 << 3) | (cb3 >> 2);
7389
ca3 = (ca3 << 4) | ca3;
7390
7391
uint32_t err3 = sq(cr3 - r) + sq(cg3 - g) + sq(cb3 - b) + sq(ca3 - constant_alpha_val) * 2;
7392
7393
// Mod 1
7394
uint32_t lr1 = g_pvrtc2_trans_match44[r].m_l, lg1 = g_pvrtc2_trans_match44[g].m_l, lb1 = g_pvrtc2_trans_match34[b].m_l;
7395
uint32_t hr1 = g_pvrtc2_trans_match44[r].m_h, hg1 = g_pvrtc2_trans_match44[g].m_h, hb1 = g_pvrtc2_trans_match34[b].m_h;
7396
uint32_t la1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_l, ha1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_h;
7397
7398
uint32_t clr1 = (lr1 << 1) | (lr1 >> 3);
7399
uint32_t clg1 = (lg1 << 1) | (lg1 >> 3);
7400
uint32_t clb1 = (lb1 << 2) | (lb1 >> 1);
7401
uint32_t cla1 = (la1 << 1);
7402
7403
clr1 = (clr1 << 3) | (clr1 >> 2);
7404
clg1 = (clg1 << 3) | (clg1 >> 2);
7405
clb1 = (clb1 << 3) | (clb1 >> 2);
7406
cla1 = (cla1 << 4) | cla1;
7407
7408
uint32_t chr1 = (hr1 << 1) | (hr1 >> 3);
7409
uint32_t chg1 = (hg1 << 1) | (hg1 >> 3);
7410
uint32_t chb1 = (hb1 << 1) | (hb1 >> 3);
7411
uint32_t cha1 = (ha1 << 1) | 1;
7412
7413
chr1 = (chr1 << 3) | (chr1 >> 2);
7414
chg1 = (chg1 << 3) | (chg1 >> 2);
7415
chb1 = (chb1 << 3) | (chb1 >> 2);
7416
cha1 = (cha1 << 4) | cha1;
7417
7418
uint32_t r1 = (clr1 * 5 + chr1 * 3) / 8;
7419
uint32_t g1 = (clg1 * 5 + chg1 * 3) / 8;
7420
uint32_t b1 = (clb1 * 5 + chb1 * 3) / 8;
7421
uint32_t a1 = (cla1 * 5 + cha1 * 3) / 8;
7422
7423
uint32_t err1 = sq(r1 - r) + sq(g1 - g) + sq(b1 - b) + sq(a1 - constant_alpha_val) * 2;
7424
7425
if ((err1 < err0) && (err1 < err3))
7426
{
7427
pBlock->set_trans_low_color(lr1, lg1, lb1, la1);
7428
pBlock->set_trans_high_color(hr1, hg1, hb1, ha1);
7429
7430
pBlock->m_modulation[0] = 0x55;
7431
pBlock->m_modulation[1] = 0x55;
7432
pBlock->m_modulation[2] = 0x55;
7433
pBlock->m_modulation[3] = 0x55;
7434
}
7435
else if (err0 < err3)
7436
{
7437
pBlock->set_trans_low_color(lr0, lg0, lb0, la0);
7438
pBlock->set_trans_high_color(0, 0, 0, 0);
7439
7440
pBlock->m_modulation[0] = 0;
7441
pBlock->m_modulation[1] = 0;
7442
pBlock->m_modulation[2] = 0;
7443
pBlock->m_modulation[3] = 0;
7444
}
7445
else
7446
{
7447
pBlock->set_trans_low_color(0, 0, 0, 0);
7448
pBlock->set_trans_high_color(lr3, lg3, lb3, la3);
7449
7450
pBlock->m_modulation[0] = 0xFF;
7451
pBlock->m_modulation[1] = 0xFF;
7452
pBlock->m_modulation[2] = 0xFF;
7453
pBlock->m_modulation[3] = 0xFF;
7454
}
7455
7456
return;
7457
}
7458
7459
// It's a complex block with non-solid color and/or alpha pixels.
7460
vec4F minColor, maxColor;
7461
7462
if (solid_color_block)
7463
{
7464
// It's a solid color block.
7465
uint32_t low_a = block_cols[alpha_selectors.m_lo_selector].a;
7466
uint32_t high_a = block_cols[alpha_selectors.m_hi_selector].a;
7467
7468
const float S = 1.0f / 255.0f;
7469
vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, low_a * S);
7470
vec4F_set(&maxColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, high_a * S);
7471
}
7472
else if (constant_alpha_val >= 0)
7473
{
7474
// It's a solid alpha block.
7475
const float S = 1.0f / 255.0f;
7476
vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, constant_alpha_val * S);
7477
vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, constant_alpha_val * S);
7478
}
7479
// See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis).
7480
// To keep quality up we need to use full 4D PCA in this case.
7481
else if ((block_cols[low_selector].c[0] == 0) || (block_cols[high_selector].c[0] == 255) ||
7482
(block_cols[low_selector].c[1] == 0) || (block_cols[high_selector].c[1] == 255) ||
7483
(block_cols[low_selector].c[2] == 0) || (block_cols[high_selector].c[2] == 255) ||
7484
(block_cols[alpha_selectors.m_lo_selector].c[3] == 0) || (block_cols[alpha_selectors.m_hi_selector].c[3] == 255))
7485
{
7486
// Find principle component of RGBA colors treated as 4D vectors.
7487
color32 pixels[16];
7488
7489
uint32_t sum_r = 0, sum_g = 0, sum_b = 0, sum_a = 0;
7490
for (uint32_t i = 0; i < 16; i++)
7491
{
7492
color32 rgb(block_cols[pSelector->get_selector(i & 3, i >> 2)]);
7493
uint32_t a = block_cols[alpha_selectors.get_selector(i & 3, i >> 2)].a;
7494
7495
pixels[i].set(rgb.r, rgb.g, rgb.b, a);
7496
7497
sum_r += rgb.r;
7498
sum_g += rgb.g;
7499
sum_b += rgb.b;
7500
sum_a += a;
7501
}
7502
7503
vec4F meanColor;
7504
vec4F_set(&meanColor, (float)sum_r, (float)sum_g, (float)sum_b, (float)sum_a);
7505
vec4F meanColorScaled = vec4F_mul(&meanColor, 1.0f / 16.0f);
7506
7507
meanColor = vec4F_mul(&meanColor, 1.0f / (float)(16.0f * 255.0f));
7508
vec4F_saturate_in_place(&meanColor);
7509
7510
vec4F axis;
7511
vec4F_set_scalar(&axis, 0.0f);
7512
// Why this incremental method? Because it's stable and predictable. Covar+power method can require a lot of iterations to converge in 4D.
7513
for (uint32_t i = 0; i < 16; i++)
7514
{
7515
vec4F color = vec4F_from_color(&pixels[i]);
7516
color = vec4F_sub(&color, &meanColorScaled);
7517
vec4F a = vec4F_mul(&color, color.c[0]);
7518
vec4F b = vec4F_mul(&color, color.c[1]);
7519
vec4F c = vec4F_mul(&color, color.c[2]);
7520
vec4F d = vec4F_mul(&color, color.c[3]);
7521
vec4F n = i ? axis : color;
7522
vec4F_normalize_in_place(&n);
7523
axis.c[0] += vec4F_dot(&a, &n);
7524
axis.c[1] += vec4F_dot(&b, &n);
7525
axis.c[2] += vec4F_dot(&c, &n);
7526
axis.c[3] += vec4F_dot(&d, &n);
7527
}
7528
7529
vec4F_normalize_in_place(&axis);
7530
7531
if (vec4F_dot(&axis, &axis) < .5f)
7532
vec4F_set_scalar(&axis, .5f);
7533
7534
float l = 1e+9f, h = -1e+9f;
7535
7536
for (uint32_t i = 0; i < 16; i++)
7537
{
7538
vec4F color = vec4F_from_color(&pixels[i]);
7539
7540
vec4F q = vec4F_sub(&color, &meanColorScaled);
7541
float d = vec4F_dot(&q, &axis);
7542
7543
l = basisu::minimum(l, d);
7544
h = basisu::maximum(h, d);
7545
}
7546
7547
l *= (1.0f / 255.0f);
7548
h *= (1.0f / 255.0f);
7549
7550
vec4F b0 = vec4F_mul(&axis, l);
7551
vec4F b1 = vec4F_mul(&axis, h);
7552
vec4F c0 = vec4F_add(&meanColor, &b0);
7553
vec4F c1 = vec4F_add(&meanColor, &b1);
7554
minColor = vec4F_saturate(&c0);
7555
maxColor = vec4F_saturate(&c1);
7556
if (minColor.c[3] > maxColor.c[3])
7557
{
7558
// VS 2019 release Code Generator issue
7559
//std::swap(minColor, maxColor);
7560
7561
float a = minColor.c[0], b = minColor.c[1], c = minColor.c[2], d = minColor.c[3];
7562
minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3];
7563
minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3];
7564
maxColor.c[0] = a; maxColor.c[1] = b; maxColor.c[2] = c; maxColor.c[3] = d;
7565
}
7566
}
7567
else
7568
{
7569
// We know the RGB axis is luma, because it's an ETC1S block and none of the block colors got clamped. So we only need to use 2D PCA.
7570
// We project each LA vector onto two 2D lines with axes (1,1) and (1,-1) and find the largest projection to determine if axis A is flipped relative to L.
7571
uint32_t block_cols_l[4], block_cols_a[4];
7572
for (uint32_t i = 0; i < 4; i++)
7573
{
7574
block_cols_l[i] = block_cols[i].r + block_cols[i].g + block_cols[i].b;
7575
block_cols_a[i] = block_cols[i].a * 3;
7576
}
7577
7578
int p0_min = INT_MAX, p0_max = INT_MIN;
7579
int p1_min = INT_MAX, p1_max = INT_MIN;
7580
for (uint32_t y = 0; y < 4; y++)
7581
{
7582
const uint32_t cs = pSelector->m_selectors[y];
7583
const uint32_t as = alpha_selectors.m_selectors[y];
7584
7585
{
7586
const int l = block_cols_l[cs & 3];
7587
const int a = block_cols_a[as & 3];
7588
const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
7589
const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
7590
}
7591
{
7592
const int l = block_cols_l[(cs >> 2) & 3];
7593
const int a = block_cols_a[(as >> 2) & 3];
7594
const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
7595
const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
7596
}
7597
{
7598
const int l = block_cols_l[(cs >> 4) & 3];
7599
const int a = block_cols_a[(as >> 4) & 3];
7600
const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
7601
const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
7602
}
7603
{
7604
const int l = block_cols_l[cs >> 6];
7605
const int a = block_cols_a[as >> 6];
7606
const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
7607
const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
7608
}
7609
}
7610
7611
int dist0 = p0_max - p0_min;
7612
int dist1 = p1_max - p1_min;
7613
7614
const float S = 1.0f / 255.0f;
7615
7616
vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, block_cols[alpha_selectors.m_lo_selector].a * S);
7617
vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, block_cols[alpha_selectors.m_hi_selector].a * S);
7618
7619
// See if the A component of the principle axis is flipped relative to L. If so, we need to flip either RGB or A bounds.
7620
if (dist1 > dist0)
7621
{
7622
std::swap(minColor.c[0], maxColor.c[0]);
7623
std::swap(minColor.c[1], maxColor.c[1]);
7624
std::swap(minColor.c[2], maxColor.c[2]);
7625
}
7626
}
7627
7628
// 4433 4443
7629
color32 trialMinColor, trialMaxColor;
7630
7631
trialMinColor.set_clamped((int)(minColor.c[0] * 15.0f + .5f), (int)(minColor.c[1] * 15.0f + .5f), (int)(minColor.c[2] * 7.0f + .5f), (int)(minColor.c[3] * 7.0f + .5f));
7632
trialMaxColor.set_clamped((int)(maxColor.c[0] * 15.0f + .5f), (int)(maxColor.c[1] * 15.0f + .5f), (int)(maxColor.c[2] * 15.0f + .5f), (int)(maxColor.c[3] * 7.0f + .5f));
7633
7634
pBlock->set_trans_low_color(trialMinColor.r, trialMinColor.g, trialMinColor.b, trialMinColor.a);
7635
pBlock->set_trans_high_color(trialMaxColor.r, trialMaxColor.g, trialMaxColor.b, trialMaxColor.a);
7636
7637
color32 color_a((trialMinColor.r << 1) | (trialMinColor.r >> 3), (trialMinColor.g << 1) | (trialMinColor.g >> 3), (trialMinColor.b << 2) | (trialMinColor.b >> 1), trialMinColor.a << 1);
7638
color32 color_b((trialMaxColor.r << 1) | (trialMaxColor.r >> 3), (trialMaxColor.g << 1) | (trialMaxColor.g >> 3), (trialMaxColor.b << 1) | (trialMaxColor.b >> 3), (trialMaxColor.a << 1) | 1);
7639
7640
color32 color0(convert_rgba_5554_to_8888(color_a));
7641
color32 color3(convert_rgba_5554_to_8888(color_b));
7642
7643
const int lr = color0.r;
7644
const int lg = color0.g;
7645
const int lb = color0.b;
7646
const int la = color0.a;
7647
7648
const int axis_r = color3.r - lr;
7649
const int axis_g = color3.g - lg;
7650
const int axis_b = color3.b - lb;
7651
const int axis_a = color3.a - la;
7652
const int len_a = (axis_r * axis_r) + (axis_g * axis_g) + (axis_b * axis_b) + (axis_a * axis_a);
7653
7654
const int thresh01 = (len_a * 3) / 16;
7655
const int thresh12 = len_a >> 1;
7656
const int thresh23 = (len_a * 13) / 16;
7657
7658
if ((axis_r | axis_g | axis_b) == 0)
7659
{
7660
int ca_sel[4];
7661
7662
for (uint32_t i = 0; i < 4; i++)
7663
{
7664
int ca = (block_cols[i].a - la) * axis_a;
7665
ca_sel[i] = (ca >= thresh23) + (ca >= thresh12) + (ca >= thresh01);
7666
}
7667
7668
for (uint32_t y = 0; y < 4; y++)
7669
{
7670
const uint32_t a_sels = alpha_selectors.m_selectors[y];
7671
7672
uint32_t sel = ca_sel[a_sels & 3] | (ca_sel[(a_sels >> 2) & 3] << 2) | (ca_sel[(a_sels >> 4) & 3] << 4) | (ca_sel[a_sels >> 6] << 6);
7673
7674
pBlock->m_modulation[y] = (uint8_t)sel;
7675
}
7676
}
7677
else
7678
{
7679
int cy[4], ca[4];
7680
7681
for (uint32_t i = 0; i < 4; i++)
7682
{
7683
cy[i] = (block_cols[i].r - lr) * axis_r + (block_cols[i].g - lg) * axis_g + (block_cols[i].b - lb) * axis_b;
7684
ca[i] = (block_cols[i].a - la) * axis_a;
7685
}
7686
7687
for (uint32_t y = 0; y < 4; y++)
7688
{
7689
const uint32_t c_sels = pSelector->m_selectors[y];
7690
const uint32_t a_sels = alpha_selectors.m_selectors[y];
7691
7692
const int d0 = cy[c_sels & 3] + ca[a_sels & 3];
7693
const int d1 = cy[(c_sels >> 2) & 3] + ca[(a_sels >> 2) & 3];
7694
const int d2 = cy[(c_sels >> 4) & 3] + ca[(a_sels >> 4) & 3];
7695
const int d3 = cy[c_sels >> 6] + ca[a_sels >> 6];
7696
7697
uint32_t sel = ((d0 >= thresh23) + (d0 >= thresh12) + (d0 >= thresh01)) |
7698
(((d1 >= thresh23) + (d1 >= thresh12) + (d1 >= thresh01)) << 2) |
7699
(((d2 >= thresh23) + (d2 >= thresh12) + (d2 >= thresh01)) << 4) |
7700
(((d3 >= thresh23) + (d3 >= thresh12) + (d3 >= thresh01)) << 6);
7701
7702
pBlock->m_modulation[y] = (uint8_t)sel;
7703
}
7704
}
7705
}
7706
7707
static void transcoder_init_pvrtc2()
7708
{
7709
for (uint32_t v = 0; v < 256; v++)
7710
{
7711
int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7712
7713
for (uint32_t l = 0; l < 8; l++)
7714
{
7715
uint32_t le = (l << 1);
7716
le = (le << 4) | le;
7717
7718
for (uint32_t h = 0; h < 8; h++)
7719
{
7720
uint32_t he = (h << 1) | 1;
7721
he = (he << 4) | he;
7722
7723
uint32_t m = (le * 5 + he * 3) / 8;
7724
7725
int err = (int)labs((int)v - (int)m);
7726
if (err < lowest_err)
7727
{
7728
lowest_err = err;
7729
best_l = l;
7730
best_h = h;
7731
}
7732
}
7733
}
7734
7735
g_pvrtc2_alpha_match33[v].m_l = (uint8_t)best_l;
7736
g_pvrtc2_alpha_match33[v].m_h = (uint8_t)best_h;
7737
}
7738
7739
for (uint32_t v = 0; v < 256; v++)
7740
{
7741
int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7742
7743
for (uint32_t l = 0; l < 8; l++)
7744
{
7745
uint32_t le = (l << 1);
7746
le = (le << 4) | le;
7747
7748
int err = (int)labs((int)v - (int)le);
7749
if (err < lowest_err)
7750
{
7751
lowest_err = err;
7752
best_l = l;
7753
best_h = l;
7754
}
7755
}
7756
7757
g_pvrtc2_alpha_match33_0[v].m_l = (uint8_t)best_l;
7758
g_pvrtc2_alpha_match33_0[v].m_h = (uint8_t)best_h;
7759
}
7760
7761
for (uint32_t v = 0; v < 256; v++)
7762
{
7763
int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7764
7765
for (uint32_t h = 0; h < 8; h++)
7766
{
7767
uint32_t he = (h << 1) | 1;
7768
he = (he << 4) | he;
7769
7770
int err = (int)labs((int)v - (int)he);
7771
if (err < lowest_err)
7772
{
7773
lowest_err = err;
7774
best_l = h;
7775
best_h = h;
7776
}
7777
}
7778
7779
g_pvrtc2_alpha_match33_3[v].m_l = (uint8_t)best_l;
7780
g_pvrtc2_alpha_match33_3[v].m_h = (uint8_t)best_h;
7781
}
7782
7783
for (uint32_t v = 0; v < 256; v++)
7784
{
7785
int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7786
7787
for (uint32_t l = 0; l < 8; l++)
7788
{
7789
uint32_t le = (l << 2) | (l >> 1);
7790
le = (le << 3) | (le >> 2);
7791
7792
for (uint32_t h = 0; h < 16; h++)
7793
{
7794
uint32_t he = (h << 1) | (h >> 3);
7795
he = (he << 3) | (he >> 2);
7796
7797
uint32_t m = (le * 5 + he * 3) / 8;
7798
7799
int err = (int)labs((int)v - (int)m);
7800
if (err < lowest_err)
7801
{
7802
lowest_err = err;
7803
best_l = l;
7804
best_h = h;
7805
}
7806
}
7807
}
7808
7809
g_pvrtc2_trans_match34[v].m_l = (uint8_t)best_l;
7810
g_pvrtc2_trans_match34[v].m_h = (uint8_t)best_h;
7811
}
7812
7813
for (uint32_t v = 0; v < 256; v++)
7814
{
7815
int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7816
7817
for (uint32_t l = 0; l < 16; l++)
7818
{
7819
uint32_t le = (l << 1) | (l >> 3);
7820
le = (le << 3) | (le >> 2);
7821
7822
for (uint32_t h = 0; h < 16; h++)
7823
{
7824
uint32_t he = (h << 1) | (h >> 3);
7825
he = (he << 3) | (he >> 2);
7826
7827
uint32_t m = (le * 5 + he * 3) / 8;
7828
7829
int err = (int)labs((int)v - (int)m);
7830
if (err < lowest_err)
7831
{
7832
lowest_err = err;
7833
best_l = l;
7834
best_h = h;
7835
}
7836
}
7837
}
7838
7839
g_pvrtc2_trans_match44[v].m_l = (uint8_t)best_l;
7840
g_pvrtc2_trans_match44[v].m_h = (uint8_t)best_h;
7841
}
7842
}
7843
#endif // BASISD_SUPPORT_PVRTC2
7844
7845
//------------------------------------------------------------------------------------------------
7846
7847
// BC7 mode 5 RGB encoder
7848
7849
#if BASISD_SUPPORT_BC7_MODE5
7850
namespace bc7_mode_5_encoder
7851
{
7852
static float g_mode5_rgba_midpoints[128];
7853
7854
void encode_bc7_mode5_init()
7855
{
7856
// Mode 5 endpoint midpoints
7857
for (uint32_t i = 0; i < 128; i++)
7858
{
7859
uint32_t vl = (i << 1);
7860
vl |= (vl >> 7);
7861
float lo = vl / 255.0f;
7862
7863
uint32_t vh = basisu::minimumi(127, i + 1) << 1;
7864
vh |= (vh >> 7);
7865
float hi = vh / 255.0f;
7866
7867
if (i == 127)
7868
g_mode5_rgba_midpoints[i] = 1e+15f;
7869
else
7870
g_mode5_rgba_midpoints[i] = (lo + hi) / 2.0f;
7871
}
7872
}
7873
7874
static inline uint32_t from_7(uint32_t v)
7875
{
7876
assert(v < 128);
7877
return (v << 1) | (v >> 6);
7878
}
7879
7880
static inline int to_7(float c)
7881
{
7882
assert((c >= 0) && (c <= 1.0f));
7883
7884
int vl = (int)(c * 127.0f);
7885
vl += (c > g_mode5_rgba_midpoints[vl]);
7886
return clampi(vl, 0, 127);
7887
}
7888
7889
static inline int to_7(int c8)
7890
{
7891
assert((c8 >= 0) && (c8 <= 255));
7892
7893
float c = (float)c8 * (1.0f / 255.0f);
7894
7895
int vl = (int)(c * 127.0f);
7896
vl += (c > g_mode5_rgba_midpoints[vl]);
7897
return clampi(vl, 0, 127);
7898
}
7899
7900
// This is usable with ASTC as well, which uses the same 2-bit interpolation weights.
7901
static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w)
7902
{
7903
assert(w < 4);
7904
return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6;
7905
}
7906
7907
static void eval_weights(
7908
const color32 *pPixels, uint8_t* pWeights,
7909
int lr, int lg, int lb,
7910
int hr, int hg, int hb)
7911
{
7912
lr = from_7(lr); lg = from_7(lg); lb = from_7(lb);
7913
hr = from_7(hr); hg = from_7(hg); hb = from_7(hb);
7914
7915
int cr[4], cg[4], cb[4];
7916
for (uint32_t i = 0; i < 4; i++)
7917
{
7918
cr[i] = (uint8_t)bc7_interp2(lr, hr, i);
7919
cg[i] = (uint8_t)bc7_interp2(lg, hg, i);
7920
cb[i] = (uint8_t)bc7_interp2(lb, hb, i);
7921
}
7922
7923
#if 0
7924
for (uint32_t i = 0; i < 16; i++)
7925
{
7926
const int pr = pPixels[i].r, pg = pPixels[i].g, pb = pPixels[i].b;
7927
7928
uint32_t best_err = UINT32_MAX;
7929
uint32_t best_idx = 0;
7930
for (uint32_t j = 0; j < 4; j++)
7931
{
7932
uint32_t e = square(pr - cr[j]) + square(pg - cg[j]) + square(pb - cb[j]);
7933
if (e < best_err)
7934
{
7935
best_err = e;
7936
best_idx = j;
7937
}
7938
7939
pWeights[i] = (uint8_t)best_idx;
7940
}
7941
} // i
7942
#else
7943
int ar = cr[3] - cr[0], ag = cg[3] - cg[0], ab = cb[3] - cb[0];
7944
7945
int dots[4];
7946
for (uint32_t i = 0; i < 4; i++)
7947
dots[i] = (int)cr[i] * ar + (int)cg[i] * ag + (int)cb[i] * ab;
7948
7949
// seems very rare in LDR, so rare that it doesn't matter
7950
//assert(dots[0] <= dots[1]);
7951
//assert(dots[1] <= dots[2]);
7952
//assert(dots[2] <= dots[3]);
7953
7954
int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
7955
7956
ar *= 2; ag *= 2; ab *= 2;
7957
7958
for (uint32_t i = 0; i < 16; i += 4)
7959
{
7960
const int d0 = pPixels[i + 0].r * ar + pPixels[i + 0].g * ag + pPixels[i + 0].b * ab;
7961
const int d1 = pPixels[i + 1].r * ar + pPixels[i + 1].g * ag + pPixels[i + 1].b * ab;
7962
const int d2 = pPixels[i + 2].r * ar + pPixels[i + 2].g * ag + pPixels[i + 2].b * ab;
7963
const int d3 = pPixels[i + 3].r * ar + pPixels[i + 3].g * ag + pPixels[i + 3].b * ab;
7964
7965
pWeights[i + 0] = (d0 > t0) + (d0 >= t1) + (d0 >= t2);
7966
pWeights[i + 1] = (d1 > t0) + (d1 >= t1) + (d1 >= t2);
7967
pWeights[i + 2] = (d2 > t0) + (d2 >= t1) + (d2 >= t2);
7968
pWeights[i + 3] = (d3 > t0) + (d3 >= t1) + (d3 >= t2);
7969
}
7970
#endif
7971
}
7972
7973
static void pack_bc7_mode5_rgb_block(
7974
bc7_mode_5* pDst_block,
7975
int lr, int lg, int lb, int hr, int hg, int hb,
7976
const uint8_t* pWeights)
7977
{
7978
assert((lr >= 0) && (lr <= 127));
7979
assert((lg >= 0) && (lg <= 127));
7980
assert((lb >= 0) && (lb <= 127));
7981
assert((hr >= 0) && (hr <= 127));
7982
assert((hg >= 0) && (hg <= 127));
7983
assert((hb >= 0) && (hb <= 127));
7984
7985
pDst_block->m_lo_bits = 0;
7986
7987
uint8_t weight_inv = 0;
7988
if (pWeights[0] & 2)
7989
{
7990
std::swap(lr, hr);
7991
std::swap(lg, hg);
7992
std::swap(lb, hb);
7993
weight_inv = 3;
7994
}
7995
assert((pWeights[0] ^ weight_inv) <= 1);
7996
7997
pDst_block->m_lo.m_mode = 32;
7998
pDst_block->m_lo.m_r0 = lr;
7999
pDst_block->m_lo.m_r1 = hr;
8000
pDst_block->m_lo.m_g0 = lg;
8001
pDst_block->m_lo.m_g1 = hg;
8002
pDst_block->m_lo.m_b0 = lb;
8003
pDst_block->m_lo.m_b1 = hb;
8004
8005
pDst_block->m_lo.m_a0 = 255;
8006
pDst_block->m_lo.m_a1_0 = 63;
8007
8008
uint64_t sel_bits = 3;
8009
uint32_t cur_ofs = 2;
8010
for (uint32_t i = 0; i < 16; i++)
8011
{
8012
assert(pWeights[i] <= 3);
8013
sel_bits |= ((uint64_t)(weight_inv ^ pWeights[i])) << cur_ofs;
8014
cur_ofs += (i ? 2 : 1);
8015
}
8016
8017
pDst_block->m_hi_bits = sel_bits;
8018
}
8019
8020
// This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w))
8021
// where w is [0,1/3,2/3,1]. 9 is the perfect multiplier.
8022
static const uint32_t g_weight_vals4[4] = { 0x000009, 0x010204, 0x040201, 0x090000 };
8023
8024
static inline bool compute_least_squares_endpoints4_rgb(
8025
const color32 *pColors, const uint8_t* pSelectors,
8026
int& lr, int& lg, int& lb, int& hr, int& hg, int& hb,
8027
int total_r, int total_g, int total_b)
8028
{
8029
uint32_t uq00_r = 0, uq00_g = 0, uq00_b = 0;
8030
uint32_t weight_accum = 0;
8031
for (uint32_t i = 0; i < 16; i++)
8032
{
8033
const uint8_t r = pColors[i].r, g = pColors[i].g, b = pColors[i].b;
8034
const uint8_t sel = pSelectors[i];
8035
8036
weight_accum += g_weight_vals4[sel];
8037
uq00_r += sel * r;
8038
uq00_g += sel * g;
8039
uq00_b += sel * b;
8040
}
8041
8042
int q10_r = total_r * 3 - uq00_r;
8043
int q10_g = total_g * 3 - uq00_g;
8044
int q10_b = total_b * 3 - uq00_b;
8045
8046
float z00 = (float)((weight_accum >> 16) & 0xFF);
8047
float z10 = (float)((weight_accum >> 8) & 0xFF);
8048
float z11 = (float)(weight_accum & 0xFF);
8049
float z01 = z10;
8050
8051
float det = z00 * z11 - z01 * z10;
8052
if (fabs(det) < 1e-8f)
8053
return false;
8054
8055
det = (3.0f / 255.0f) / det;
8056
8057
float iz00, iz01, iz10, iz11;
8058
iz00 = z11 * det;
8059
iz01 = -z01 * det;
8060
iz10 = -z10 * det;
8061
iz11 = z00 * det;
8062
8063
float fhr = basisu::clamp(iz00 * (float)uq00_r + iz01 * q10_r, 0.0f, 1.0f);
8064
float flr = basisu::clamp(iz10 * (float)uq00_r + iz11 * q10_r, 0.0f, 1.0f);
8065
8066
float fhg = basisu::clamp(iz00 * (float)uq00_g + iz01 * q10_g, 0.0f, 1.0f);
8067
float flg = basisu::clamp(iz10 * (float)uq00_g + iz11 * q10_g, 0.0f, 1.0f);
8068
8069
float fhb = basisu::clamp(iz00 * (float)uq00_b + iz01 * q10_b, 0.0f, 1.0f);
8070
float flb = basisu::clamp(iz10 * (float)uq00_b + iz11 * q10_b, 0.0f, 1.0f);
8071
8072
lr = to_7(flr); lg = to_7(flg); lb = to_7(flb);
8073
hr = to_7(fhr); hg = to_7(fhg); hb = to_7(fhb);
8074
8075
return true;
8076
}
8077
8078
void encode_bc7_mode_5_block(void* pDst_block, color32* pPixels, bool hq_mode)
8079
{
8080
assert(g_mode5_rgba_midpoints[1]);
8081
8082
int total_r = 0, total_g = 0, total_b = 0;
8083
8084
int min_r = 255, min_g = 255, min_b = 255;
8085
int max_r = 0, max_g = 0, max_b = 0;
8086
8087
for (uint32_t i = 0; i < 16; i++)
8088
{
8089
int r = pPixels[i].r, g = pPixels[i].g, b = pPixels[i].b;
8090
8091
total_r += r; total_g += g; total_b += b;
8092
8093
min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b);
8094
max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b);
8095
}
8096
8097
if ((min_r == max_r) && (min_g == max_g) && (min_b == max_b))
8098
{
8099
const int lr = g_bc7_m5_equals_1[min_r].m_lo, lg = g_bc7_m5_equals_1[min_g].m_lo, lb = g_bc7_m5_equals_1[min_b].m_lo;
8100
const int hr = g_bc7_m5_equals_1[min_r].m_hi, hg = g_bc7_m5_equals_1[min_g].m_hi, hb = g_bc7_m5_equals_1[min_b].m_hi;
8101
uint8_t solid_weights[16];
8102
memset(solid_weights, 1, 16);
8103
pack_bc7_mode5_rgb_block((bc7_mode_5*)pDst_block, lr, lg, lb, hr, hg, hb, solid_weights);
8104
return;
8105
}
8106
8107
int mean_r = (total_r + 8) >> 4, mean_g = (total_g + 8) >> 4, mean_b = (total_b + 8) >> 4;
8108
8109
// covar rows are:
8110
// 0, 1, 2
8111
// 1, 3, 4
8112
// 2, 4, 5
8113
int icov[6] = { 0, 0, 0, 0, 0, 0 };
8114
8115
for (uint32_t i = 0; i < 16; i++)
8116
{
8117
int r = (int)pPixels[i].r - mean_r;
8118
int g = (int)pPixels[i].g - mean_g;
8119
int b = (int)pPixels[i].b - mean_b;
8120
icov[0] += r * r; icov[1] += r * g; icov[2] += r * b;
8121
icov[3] += g * g; icov[4] += g * b;
8122
icov[5] += b * b;
8123
}
8124
8125
int block_max_var = basisu::maximum(icov[0], icov[3], icov[5]); // not divided by 16, i.e. scaled by 16
8126
8127
// TODO: Tune this
8128
const int32_t SIMPLE_BLOCK_THRESH = 10 * 16;
8129
8130
if ((!hq_mode) && (block_max_var < SIMPLE_BLOCK_THRESH))
8131
{
8132
const int L = 16, H = 239;
8133
8134
int lr = to_7(lerp_8bit(min_r, max_r, L));
8135
int lg = to_7(lerp_8bit(min_g, max_g, L));
8136
int lb = to_7(lerp_8bit(min_b, max_b, L));
8137
8138
int hr = to_7(lerp_8bit(min_r, max_r, H));
8139
int hg = to_7(lerp_8bit(min_g, max_g, H));
8140
int hb = to_7(lerp_8bit(min_b, max_b, H));
8141
8142
uint8_t cur_weights[16];
8143
eval_weights(pPixels, cur_weights, lr, lg, lb, hr, hg, hb);
8144
8145
pack_bc7_mode5_rgb_block((bc7_mode_5*)pDst_block, lr, lg, lb, hr, hg, hb, cur_weights);
8146
return;
8147
}
8148
8149
float cov[6];
8150
for (uint32_t i = 0; i < 6; i++)
8151
cov[i] = (float)icov[i];
8152
8153
const float sc = 1.0f / (float)block_max_var;
8154
const float wx = sc * cov[0], wy = sc * cov[3], wz = sc * cov[5];
8155
8156
const float alt_xr = cov[0] * wx + cov[1] * wy + cov[2] * wz;
8157
const float alt_xg = cov[1] * wx + cov[3] * wy + cov[4] * wz;
8158
const float alt_xb = cov[2] * wx + cov[4] * wy + cov[5] * wz;
8159
8160
int saxis_r = 306, saxis_g = 601, saxis_b = 117;
8161
8162
float k = basisu::maximum(fabsf(alt_xr), fabsf(alt_xg), fabsf(alt_xb));
8163
if (fabs(k) >= basisu::SMALL_FLOAT_VAL)
8164
{
8165
float m = 2048.0f / k;
8166
saxis_r = (int)(alt_xr * m);
8167
saxis_g = (int)(alt_xg * m);
8168
saxis_b = (int)(alt_xb * m);
8169
}
8170
8171
saxis_r = (int)((uint32_t)saxis_r << 4U);
8172
saxis_g = (int)((uint32_t)saxis_g << 4U);
8173
saxis_b = (int)((uint32_t)saxis_b << 4U);
8174
8175
int low_dot = INT_MAX, high_dot = INT_MIN;
8176
8177
for (uint32_t i = 0; i < 16; i += 4)
8178
{
8179
int dot0 = ((pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b) & ~0xF) + i;
8180
int dot1 = ((pPixels[i + 1].r * saxis_r + pPixels[i + 1].g * saxis_g + pPixels[i + 1].b * saxis_b) & ~0xF) + i + 1;
8181
int dot2 = ((pPixels[i + 2].r * saxis_r + pPixels[i + 2].g * saxis_g + pPixels[i + 2].b * saxis_b) & ~0xF) + i + 2;
8182
int dot3 = ((pPixels[i + 3].r * saxis_r + pPixels[i + 3].g * saxis_g + pPixels[i + 3].b * saxis_b) & ~0xF) + i + 3;
8183
8184
int min_d01 = basisu::minimum(dot0, dot1);
8185
int max_d01 = basisu::maximum(dot0, dot1);
8186
8187
int min_d23 = basisu::minimum(dot2, dot3);
8188
int max_d23 = basisu::maximum(dot2, dot3);
8189
8190
int min_d = basisu::minimum(min_d01, min_d23);
8191
int max_d = basisu::maximum(max_d01, max_d23);
8192
8193
low_dot = basisu::minimum(low_dot, min_d);
8194
high_dot = basisu::maximum(high_dot, max_d);
8195
}
8196
int low_c = low_dot & 15;
8197
int high_c = high_dot & 15;
8198
8199
int lr = to_7(pPixels[low_c].r), lg = to_7(pPixels[low_c].g), lb = to_7(pPixels[low_c].b);
8200
int hr = to_7(pPixels[high_c].r), hg = to_7(pPixels[high_c].g), hb = to_7(pPixels[high_c].b);
8201
8202
uint8_t cur_weights[16];
8203
eval_weights(pPixels, cur_weights, lr, lg, lb, hr, hg, hb);
8204
8205
if (compute_least_squares_endpoints4_rgb(
8206
pPixels, cur_weights,
8207
lr, lg, lb, hr, hg, hb,
8208
total_r, total_g, total_b))
8209
{
8210
eval_weights(pPixels, cur_weights, lr, lg, lb, hr, hg, hb);
8211
}
8212
8213
#if 0
8214
lr = 0; lg = 0; lb = 0;
8215
hr = 0; hg = 0; hb = 0;
8216
#endif
8217
8218
pack_bc7_mode5_rgb_block((bc7_mode_5*)pDst_block, lr, lg, lb, hr, hg, hb, cur_weights);
8219
}
8220
8221
} // namespace bc7_mode_5_encoder
8222
8223
#endif // BASISD_SUPPORT_BC7_MODE5
8224
8225
//------------------------------------------------------------------------------------------------
8226
8227
basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder() :
8228
m_pGlobal_codebook(nullptr),
8229
m_selector_history_buf_size(0)
8230
{
8231
}
8232
8233
bool basisu_lowlevel_etc1s_transcoder::decode_palettes(
8234
uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size,
8235
uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size)
8236
{
8237
if (m_pGlobal_codebook)
8238
{
8239
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 11\n");
8240
return false;
8241
}
8242
bitwise_decoder sym_codec;
8243
8244
huffman_decoding_table color5_delta_model0, color5_delta_model1, color5_delta_model2, inten_delta_model;
8245
8246
if (!sym_codec.init(pEndpoints_data, endpoints_data_size))
8247
{
8248
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 0\n");
8249
return false;
8250
}
8251
8252
if (!sym_codec.read_huffman_table(color5_delta_model0))
8253
{
8254
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1\n");
8255
return false;
8256
}
8257
8258
if (!sym_codec.read_huffman_table(color5_delta_model1))
8259
{
8260
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1a\n");
8261
return false;
8262
}
8263
8264
if (!sym_codec.read_huffman_table(color5_delta_model2))
8265
{
8266
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2a\n");
8267
return false;
8268
}
8269
8270
if (!sym_codec.read_huffman_table(inten_delta_model))
8271
{
8272
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n");
8273
return false;
8274
}
8275
8276
if (!color5_delta_model0.is_valid() || !color5_delta_model1.is_valid() || !color5_delta_model2.is_valid() || !inten_delta_model.is_valid())
8277
{
8278
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n");
8279
return false;
8280
}
8281
8282
const bool endpoints_are_grayscale = sym_codec.get_bits(1) != 0;
8283
8284
m_local_endpoints.resize(num_endpoints);
8285
8286
color32 prev_color5(16, 16, 16, 0);
8287
uint32_t prev_inten = 0;
8288
8289
for (uint32_t i = 0; i < num_endpoints; i++)
8290
{
8291
uint32_t inten_delta = sym_codec.decode_huffman(inten_delta_model);
8292
m_local_endpoints[i].m_inten5 = static_cast<uint8_t>((inten_delta + prev_inten) & 7);
8293
prev_inten = m_local_endpoints[i].m_inten5;
8294
8295
for (uint32_t c = 0; c < (endpoints_are_grayscale ? 1U : 3U); c++)
8296
{
8297
int delta;
8298
if (prev_color5[c] <= basist::COLOR5_PAL0_PREV_HI)
8299
delta = sym_codec.decode_huffman(color5_delta_model0);
8300
else if (prev_color5[c] <= basist::COLOR5_PAL1_PREV_HI)
8301
delta = sym_codec.decode_huffman(color5_delta_model1);
8302
else
8303
delta = sym_codec.decode_huffman(color5_delta_model2);
8304
8305
int v = (prev_color5[c] + delta) & 31;
8306
8307
m_local_endpoints[i].m_color5[c] = static_cast<uint8_t>(v);
8308
8309
prev_color5[c] = static_cast<uint8_t>(v);
8310
}
8311
8312
if (endpoints_are_grayscale)
8313
{
8314
m_local_endpoints[i].m_color5[1] = m_local_endpoints[i].m_color5[0];
8315
m_local_endpoints[i].m_color5[2] = m_local_endpoints[i].m_color5[0];
8316
}
8317
}
8318
8319
sym_codec.stop();
8320
8321
m_local_selectors.resize(num_selectors);
8322
8323
if (!sym_codec.init(pSelectors_data, selectors_data_size))
8324
{
8325
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 5\n");
8326
return false;
8327
}
8328
8329
basist::huffman_decoding_table delta_selector_pal_model;
8330
8331
const bool used_global_selector_cb = (sym_codec.get_bits(1) == 1);
8332
8333
if (used_global_selector_cb)
8334
{
8335
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: global selector codebooks are unsupported\n");
8336
return false;
8337
}
8338
else
8339
{
8340
const bool used_hybrid_selector_cb = (sym_codec.get_bits(1) == 1);
8341
8342
if (used_hybrid_selector_cb)
8343
{
8344
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: hybrid global selector codebooks are unsupported\n");
8345
return false;
8346
}
8347
8348
const bool used_raw_encoding = (sym_codec.get_bits(1) == 1);
8349
8350
if (used_raw_encoding)
8351
{
8352
for (uint32_t i = 0; i < num_selectors; i++)
8353
{
8354
for (uint32_t j = 0; j < 4; j++)
8355
{
8356
uint32_t cur_byte = sym_codec.get_bits(8);
8357
8358
for (uint32_t k = 0; k < 4; k++)
8359
m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
8360
}
8361
8362
m_local_selectors[i].init_flags();
8363
}
8364
}
8365
else
8366
{
8367
if (!sym_codec.read_huffman_table(delta_selector_pal_model))
8368
{
8369
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10\n");
8370
return false;
8371
}
8372
8373
if ((num_selectors > 1) && (!delta_selector_pal_model.is_valid()))
8374
{
8375
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10a\n");
8376
return false;
8377
}
8378
8379
uint8_t prev_bytes[4] = { 0, 0, 0, 0 };
8380
8381
for (uint32_t i = 0; i < num_selectors; i++)
8382
{
8383
if (!i)
8384
{
8385
for (uint32_t j = 0; j < 4; j++)
8386
{
8387
uint32_t cur_byte = sym_codec.get_bits(8);
8388
prev_bytes[j] = static_cast<uint8_t>(cur_byte);
8389
8390
for (uint32_t k = 0; k < 4; k++)
8391
m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
8392
}
8393
m_local_selectors[i].init_flags();
8394
continue;
8395
}
8396
8397
for (uint32_t j = 0; j < 4; j++)
8398
{
8399
int delta_byte = sym_codec.decode_huffman(delta_selector_pal_model);
8400
8401
uint32_t cur_byte = delta_byte ^ prev_bytes[j];
8402
prev_bytes[j] = static_cast<uint8_t>(cur_byte);
8403
8404
for (uint32_t k = 0; k < 4; k++)
8405
m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
8406
}
8407
m_local_selectors[i].init_flags();
8408
}
8409
}
8410
}
8411
8412
sym_codec.stop();
8413
8414
return true;
8415
}
8416
8417
bool basisu_lowlevel_etc1s_transcoder::decode_tables(const uint8_t* pTable_data, uint32_t table_data_size)
8418
{
8419
basist::bitwise_decoder sym_codec;
8420
if (!sym_codec.init(pTable_data, table_data_size))
8421
{
8422
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 0\n");
8423
return false;
8424
}
8425
8426
if (!sym_codec.read_huffman_table(m_endpoint_pred_model))
8427
{
8428
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1\n");
8429
return false;
8430
}
8431
8432
if (m_endpoint_pred_model.get_code_sizes().size() == 0)
8433
{
8434
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1a\n");
8435
return false;
8436
}
8437
8438
if (!sym_codec.read_huffman_table(m_delta_endpoint_model))
8439
{
8440
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2\n");
8441
return false;
8442
}
8443
8444
if (m_delta_endpoint_model.get_code_sizes().size() == 0)
8445
{
8446
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2a\n");
8447
return false;
8448
}
8449
8450
if (!sym_codec.read_huffman_table(m_selector_model))
8451
{
8452
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3\n");
8453
return false;
8454
}
8455
8456
if (m_selector_model.get_code_sizes().size() == 0)
8457
{
8458
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3a\n");
8459
return false;
8460
}
8461
8462
if (!sym_codec.read_huffman_table(m_selector_history_buf_rle_model))
8463
{
8464
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4\n");
8465
return false;
8466
}
8467
8468
if (m_selector_history_buf_rle_model.get_code_sizes().size() == 0)
8469
{
8470
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4a\n");
8471
return false;
8472
}
8473
8474
m_selector_history_buf_size = sym_codec.get_bits(13);
8475
// Check for bogus values.
8476
if (!m_selector_history_buf_size)
8477
{
8478
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 5\n");
8479
return false;
8480
}
8481
8482
sym_codec.stop();
8483
8484
return true;
8485
}
8486
8487
bool basisu_lowlevel_etc1s_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
8488
uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
8489
basisu_transcoder_state* pState, bool transcode_alpha, void *pAlpha_blocks, uint32_t output_rows_in_pixels, uint32_t decode_flags)
8490
{
8491
// 'pDst_blocks' unused when disabling *all* hardware transcode options
8492
// (and 'bc1_allow_threecolor_blocks' when disabling DXT)
8493
BASISU_NOTE_UNUSED(pDst_blocks);
8494
BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
8495
BASISU_NOTE_UNUSED(transcode_alpha);
8496
BASISU_NOTE_UNUSED(pAlpha_blocks);
8497
8498
assert(g_transcoder_initialized);
8499
if (!g_transcoder_initialized)
8500
{
8501
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: Transcoder not globally initialized.\n");
8502
return false;
8503
}
8504
8505
if (!pState)
8506
pState = &m_def_state;
8507
8508
const uint32_t total_blocks = num_blocks_x * num_blocks_y;
8509
8510
if (!output_row_pitch_in_blocks_or_pixels)
8511
{
8512
if (basis_block_format_is_uncompressed(fmt))
8513
output_row_pitch_in_blocks_or_pixels = orig_width;
8514
else
8515
{
8516
if (fmt == block_format::cFXT1_RGB)
8517
output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8;
8518
else
8519
output_row_pitch_in_blocks_or_pixels = num_blocks_x;
8520
}
8521
}
8522
8523
if (basis_block_format_is_uncompressed(fmt))
8524
{
8525
if (!output_rows_in_pixels)
8526
output_rows_in_pixels = orig_height;
8527
}
8528
8529
basisu::vector<uint32_t>* pPrev_frame_indices = nullptr;
8530
if (is_video)
8531
{
8532
// TODO: Add check to make sure the caller hasn't tried skipping past p-frames
8533
//const bool alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
8534
//const uint32_t level_index = slice_desc.m_level_index;
8535
8536
if (level_index >= basisu_transcoder_state::cMaxPrevFrameLevels)
8537
{
8538
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: unsupported level_index\n");
8539
return false;
8540
}
8541
8542
pPrev_frame_indices = &pState->m_prev_frame_indices[is_alpha_slice][level_index];
8543
if (pPrev_frame_indices->size() < total_blocks)
8544
pPrev_frame_indices->resize(total_blocks);
8545
}
8546
8547
basist::bitwise_decoder sym_codec;
8548
8549
if (!sym_codec.init(pImage_data, image_data_size))
8550
{
8551
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: sym_codec.init failed\n");
8552
return false;
8553
}
8554
8555
approx_move_to_front selector_history_buf(m_selector_history_buf_size);
8556
8557
uint32_t cur_selector_rle_count = 0;
8558
8559
decoder_etc_block block;
8560
memset(&block, 0, sizeof(block));
8561
8562
//block.set_flip_bit(true);
8563
// Setting the flip bit to false to be compatible with the Khronos KDFS.
8564
block.set_flip_bit(false);
8565
8566
block.set_diff_bit(true);
8567
8568
// Important: This MUST be freed before this function returns.
8569
void* pPVRTC_work_mem = nullptr;
8570
uint32_t* pPVRTC_endpoints = nullptr;
8571
if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
8572
{
8573
pPVRTC_work_mem = malloc(num_blocks_x * num_blocks_y * (sizeof(decoder_etc_block) + sizeof(uint32_t)));
8574
if (!pPVRTC_work_mem)
8575
{
8576
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: malloc failed\n");
8577
return false;
8578
}
8579
pPVRTC_endpoints = (uint32_t*)&((decoder_etc_block*)pPVRTC_work_mem)[num_blocks_x * num_blocks_y];
8580
}
8581
8582
if (pState->m_block_endpoint_preds[0].size() < num_blocks_x)
8583
{
8584
pState->m_block_endpoint_preds[0].resize(num_blocks_x);
8585
pState->m_block_endpoint_preds[1].resize(num_blocks_x);
8586
}
8587
8588
uint32_t cur_pred_bits = 0;
8589
int prev_endpoint_pred_sym = 0;
8590
int endpoint_pred_repeat_count = 0;
8591
uint32_t prev_endpoint_index = 0;
8592
const endpoint_vec& endpoints = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_endpoints : m_local_endpoints;
8593
const selector_vec& selectors = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_selectors : m_local_selectors;
8594
if (!endpoints.size() || !selectors.size())
8595
{
8596
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: global codebooks must be unpacked first\n");
8597
8598
if (pPVRTC_work_mem)
8599
free(pPVRTC_work_mem);
8600
8601
return false;
8602
}
8603
8604
const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = (uint32_t)selectors.size();
8605
const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = m_selector_history_buf_size + SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX;
8606
8607
#if BASISD_SUPPORT_BC7_MODE5
8608
const bool bc7_chroma_filtering = ((decode_flags & cDecodeFlagsNoETC1SChromaFiltering) == 0) &&
8609
((fmt == block_format::cBC7_M5_COLOR) || (fmt == block_format::cBC7));
8610
8611
basisu::vector2D<uint16_t> decoded_endpoints;
8612
if (bc7_chroma_filtering)
8613
{
8614
if (!decoded_endpoints.try_resize(num_blocks_x, num_blocks_y))
8615
{
8616
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: allocation failed\n");
8617
8618
if (pPVRTC_work_mem)
8619
free(pPVRTC_work_mem);
8620
8621
return false;
8622
}
8623
}
8624
#endif
8625
8626
for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
8627
{
8628
const uint32_t cur_block_endpoint_pred_array = block_y & 1;
8629
8630
for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
8631
{
8632
// Decode endpoint index predictor symbols
8633
if ((block_x & 1) == 0)
8634
{
8635
if ((block_y & 1) == 0)
8636
{
8637
if (endpoint_pred_repeat_count)
8638
{
8639
endpoint_pred_repeat_count--;
8640
cur_pred_bits = prev_endpoint_pred_sym;
8641
}
8642
else
8643
{
8644
cur_pred_bits = sym_codec.decode_huffman(m_endpoint_pred_model);
8645
if (cur_pred_bits == ENDPOINT_PRED_REPEAT_LAST_SYMBOL)
8646
{
8647
endpoint_pred_repeat_count = sym_codec.decode_vlc(ENDPOINT_PRED_COUNT_VLC_BITS) + ENDPOINT_PRED_MIN_REPEAT_COUNT - 1;
8648
8649
cur_pred_bits = prev_endpoint_pred_sym;
8650
}
8651
else
8652
{
8653
prev_endpoint_pred_sym = cur_pred_bits;
8654
}
8655
}
8656
8657
pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_pred_bits = (uint8_t)(cur_pred_bits >> 4);
8658
}
8659
else
8660
{
8661
cur_pred_bits = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_pred_bits;
8662
}
8663
}
8664
8665
// Decode endpoint index
8666
uint32_t endpoint_index, selector_index = 0;
8667
8668
const uint32_t pred = cur_pred_bits & 3;
8669
cur_pred_bits >>= 2;
8670
8671
if (pred == 0)
8672
{
8673
// Left
8674
if (!block_x)
8675
{
8676
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (0)\n");
8677
if (pPVRTC_work_mem)
8678
free(pPVRTC_work_mem);
8679
return false;
8680
}
8681
8682
endpoint_index = prev_endpoint_index;
8683
}
8684
else if (pred == 1)
8685
{
8686
// Upper
8687
if (!block_y)
8688
{
8689
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (1)\n");
8690
if (pPVRTC_work_mem)
8691
free(pPVRTC_work_mem);
8692
return false;
8693
}
8694
8695
endpoint_index = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_endpoint_index;
8696
}
8697
else if (pred == 2)
8698
{
8699
if (is_video)
8700
{
8701
assert(pred == CR_ENDPOINT_PRED_INDEX);
8702
endpoint_index = (*pPrev_frame_indices)[block_x + block_y * num_blocks_x];
8703
selector_index = endpoint_index >> 16;
8704
endpoint_index &= 0xFFFFU;
8705
}
8706
else
8707
{
8708
// Upper left
8709
if ((!block_x) || (!block_y))
8710
{
8711
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (2)\n");
8712
if (pPVRTC_work_mem)
8713
free(pPVRTC_work_mem);
8714
return false;
8715
}
8716
8717
endpoint_index = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x - 1].m_endpoint_index;
8718
}
8719
}
8720
else
8721
{
8722
// Decode and apply delta
8723
const uint32_t delta_sym = sym_codec.decode_huffman(m_delta_endpoint_model);
8724
8725
endpoint_index = delta_sym + prev_endpoint_index;
8726
if (endpoint_index >= endpoints.size())
8727
endpoint_index -= (int)endpoints.size();
8728
}
8729
8730
pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_endpoint_index = (uint16_t)endpoint_index;
8731
8732
prev_endpoint_index = endpoint_index;
8733
8734
// Decode selector index
8735
if ((!is_video) || (pred != CR_ENDPOINT_PRED_INDEX))
8736
{
8737
int selector_sym;
8738
if (cur_selector_rle_count > 0)
8739
{
8740
cur_selector_rle_count--;
8741
8742
selector_sym = (int)selectors.size();
8743
}
8744
else
8745
{
8746
selector_sym = sym_codec.decode_huffman(m_selector_model);
8747
8748
if (selector_sym == static_cast<int>(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX))
8749
{
8750
int run_sym = sym_codec.decode_huffman(m_selector_history_buf_rle_model);
8751
8752
if (run_sym == (SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
8753
cur_selector_rle_count = sym_codec.decode_vlc(7) + SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
8754
else
8755
cur_selector_rle_count = run_sym + SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
8756
8757
if (cur_selector_rle_count > total_blocks)
8758
{
8759
// The file is corrupted or we've got a bug.
8760
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (3)\n");
8761
if (pPVRTC_work_mem)
8762
free(pPVRTC_work_mem);
8763
return false;
8764
}
8765
8766
selector_sym = (int)selectors.size();
8767
8768
cur_selector_rle_count--;
8769
}
8770
}
8771
8772
if (selector_sym >= (int)selectors.size())
8773
{
8774
assert(m_selector_history_buf_size > 0);
8775
8776
int history_buf_index = selector_sym - (int)selectors.size();
8777
8778
if (history_buf_index >= (int)selector_history_buf.size())
8779
{
8780
// The file is corrupted or we've got a bug.
8781
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (4)\n");
8782
if (pPVRTC_work_mem)
8783
free(pPVRTC_work_mem);
8784
return false;
8785
}
8786
8787
selector_index = selector_history_buf[history_buf_index];
8788
8789
if (history_buf_index != 0)
8790
selector_history_buf.use(history_buf_index);
8791
}
8792
else
8793
{
8794
selector_index = selector_sym;
8795
8796
if (m_selector_history_buf_size)
8797
selector_history_buf.add(selector_index);
8798
}
8799
}
8800
8801
if ((endpoint_index >= endpoints.size()) || (selector_index >= selectors.size()))
8802
{
8803
// The file is corrupted or we've got a bug.
8804
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (5)\n");
8805
if (pPVRTC_work_mem)
8806
free(pPVRTC_work_mem);
8807
return false;
8808
}
8809
8810
if (is_video)
8811
(*pPrev_frame_indices)[block_x + block_y * num_blocks_x] = endpoint_index | (selector_index << 16);
8812
8813
#if BASISD_ENABLE_DEBUG_FLAGS
8814
if ((g_debug_flags & cDebugFlagVisCRs) && ((fmt == block_format::cETC1) || (fmt == block_format::cBC1)))
8815
{
8816
if ((is_video) && (pred == 2))
8817
{
8818
decoder_etc_block* pDst_block = reinterpret_cast<decoder_etc_block*>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
8819
memset(pDst_block, 0xFF, 8);
8820
continue;
8821
}
8822
}
8823
#endif
8824
8825
const endpoint* pEndpoints = &endpoints[endpoint_index];
8826
const selector* pSelector = &selectors[selector_index];
8827
8828
switch (fmt)
8829
{
8830
case block_format::cETC1:
8831
{
8832
decoder_etc_block* pDst_block = reinterpret_cast<decoder_etc_block*>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
8833
8834
block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
8835
block.set_inten_table(0, pEndpoints->m_inten5);
8836
block.set_inten_table(1, pEndpoints->m_inten5);
8837
8838
pDst_block->m_uint32[0] = block.m_uint32[0];
8839
pDst_block->set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
8840
8841
break;
8842
}
8843
case block_format::cBC1:
8844
{
8845
#if BASISD_SUPPORT_DXT1
8846
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8847
#if BASISD_ENABLE_DEBUG_FLAGS
8848
if (g_debug_flags & (cDebugFlagVisBC1Sels | cDebugFlagVisBC1Endpoints))
8849
convert_etc1s_to_dxt1_vis(static_cast<dxt1_block*>(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks);
8850
else
8851
#endif
8852
convert_etc1s_to_dxt1(static_cast<dxt1_block*>(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks);
8853
#else
8854
assert(0);
8855
#endif
8856
break;
8857
}
8858
case block_format::cBC4:
8859
{
8860
#if BASISD_SUPPORT_DXT5A
8861
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8862
convert_etc1s_to_dxt5a(static_cast<dxt5a_block*>(pDst_block), pEndpoints, pSelector);
8863
#else
8864
assert(0);
8865
#endif
8866
break;
8867
}
8868
case block_format::cPVRTC1_4_RGB:
8869
{
8870
#if BASISD_SUPPORT_PVRTC1
8871
block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
8872
block.set_inten_table(0, pEndpoints->m_inten5);
8873
block.set_inten_table(1, pEndpoints->m_inten5);
8874
block.set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
8875
8876
((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block;
8877
8878
const color32& base_color = pEndpoints->m_color5;
8879
const uint32_t inten_table = pEndpoints->m_inten5;
8880
8881
const uint32_t low_selector = pSelector->m_lo_selector;
8882
const uint32_t high_selector = pSelector->m_hi_selector;
8883
8884
// Get block's RGB bounding box
8885
color32 block_colors[2];
8886
decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector);
8887
8888
assert(block_colors[0][0] <= block_colors[1][0]);
8889
assert(block_colors[0][1] <= block_colors[1][1]);
8890
assert(block_colors[0][2] <= block_colors[1][2]);
8891
8892
// Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
8893
pvrtc4_block temp;
8894
temp.set_opaque_endpoint_floor(0, block_colors[0]);
8895
temp.set_opaque_endpoint_ceil(1, block_colors[1]);
8896
8897
pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints;
8898
#else
8899
assert(0);
8900
#endif
8901
8902
break;
8903
}
8904
case block_format::cPVRTC1_4_RGBA:
8905
{
8906
#if BASISD_SUPPORT_PVRTC1
8907
assert(pAlpha_blocks);
8908
8909
block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
8910
block.set_inten_table(0, pEndpoints->m_inten5);
8911
block.set_inten_table(1, pEndpoints->m_inten5);
8912
block.set_raw_selector_bits(pSelector->m_selectors[0], pSelector->m_selectors[1], pSelector->m_selectors[2], pSelector->m_selectors[3]);
8913
8914
((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block;
8915
8916
// Get block's RGBA bounding box
8917
const color32& base_color = pEndpoints->m_color5;
8918
const uint32_t inten_table = pEndpoints->m_inten5;
8919
const uint32_t low_selector = pSelector->m_lo_selector;
8920
const uint32_t high_selector = pSelector->m_hi_selector;
8921
color32 block_colors[2];
8922
decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector);
8923
8924
assert(block_colors[0][0] <= block_colors[1][0]);
8925
assert(block_colors[0][1] <= block_colors[1][1]);
8926
assert(block_colors[0][2] <= block_colors[1][2]);
8927
8928
const uint16_t* pAlpha_block = reinterpret_cast<uint16_t*>(static_cast<uint8_t*>(pAlpha_blocks) + (block_x + block_y * num_blocks_x) * sizeof(uint32_t));
8929
8930
const endpoint* pAlpha_endpoints = &endpoints[pAlpha_block[0]];
8931
const selector* pAlpha_selector = &selectors[pAlpha_block[1]];
8932
8933
const color32& alpha_base_color = pAlpha_endpoints->m_color5;
8934
const uint32_t alpha_inten_table = pAlpha_endpoints->m_inten5;
8935
const uint32_t alpha_low_selector = pAlpha_selector->m_lo_selector;
8936
const uint32_t alpha_high_selector = pAlpha_selector->m_hi_selector;
8937
uint32_t alpha_block_colors[2];
8938
decoder_etc_block::get_block_colors5_bounds_g(alpha_block_colors, alpha_base_color, alpha_inten_table, alpha_low_selector, alpha_high_selector);
8939
assert(alpha_block_colors[0] <= alpha_block_colors[1]);
8940
block_colors[0].a = (uint8_t)alpha_block_colors[0];
8941
block_colors[1].a = (uint8_t)alpha_block_colors[1];
8942
8943
// Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
8944
pvrtc4_block temp;
8945
temp.set_endpoint_floor(0, block_colors[0]);
8946
temp.set_endpoint_ceil(1, block_colors[1]);
8947
8948
pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints;
8949
#else
8950
assert(0);
8951
#endif
8952
8953
break;
8954
}
8955
case block_format::cBC7: // for more consistency with UASTC
8956
case block_format::cBC7_M5_COLOR:
8957
{
8958
#if BASISD_SUPPORT_BC7_MODE5
8959
if (bc7_chroma_filtering)
8960
{
8961
assert(endpoint_index <= UINT16_MAX);
8962
decoded_endpoints(block_x, block_y) = (uint16_t)endpoint_index;
8963
}
8964
8965
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8966
convert_etc1s_to_bc7_m5_color(pDst_block, pEndpoints, pSelector);
8967
#else
8968
assert(0);
8969
#endif
8970
break;
8971
}
8972
case block_format::cBC7_M5_ALPHA:
8973
{
8974
#if BASISD_SUPPORT_BC7_MODE5
8975
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8976
convert_etc1s_to_bc7_m5_alpha(pDst_block, pEndpoints, pSelector);
8977
#else
8978
assert(0);
8979
#endif
8980
break;
8981
}
8982
case block_format::cETC2_EAC_A8:
8983
{
8984
#if BASISD_SUPPORT_ETC2_EAC_A8
8985
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8986
convert_etc1s_to_etc2_eac_a8(static_cast<eac_block*>(pDst_block), pEndpoints, pSelector);
8987
#else
8988
assert(0);
8989
#endif
8990
break;
8991
}
8992
case block_format::cASTC_4x4:
8993
{
8994
#if BASISD_SUPPORT_ASTC
8995
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8996
convert_etc1s_to_astc_4x4(pDst_block, pEndpoints, pSelector, transcode_alpha, &endpoints[0], &selectors[0]);
8997
#else
8998
assert(0);
8999
#endif
9000
break;
9001
}
9002
case block_format::cATC_RGB:
9003
{
9004
#if BASISD_SUPPORT_ATC
9005
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
9006
convert_etc1s_to_atc(pDst_block, pEndpoints, pSelector);
9007
#else
9008
assert(0);
9009
#endif
9010
break;
9011
}
9012
case block_format::cFXT1_RGB:
9013
{
9014
#if BASISD_SUPPORT_FXT1
9015
const uint32_t fxt1_block_x = block_x >> 1;
9016
const uint32_t fxt1_block_y = block_y;
9017
const uint32_t fxt1_subblock = block_x & 1;
9018
9019
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (fxt1_block_x + fxt1_block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
9020
9021
convert_etc1s_to_fxt1(pDst_block, pEndpoints, pSelector, fxt1_subblock);
9022
#else
9023
assert(0);
9024
#endif
9025
break;
9026
}
9027
case block_format::cPVRTC2_4_RGB:
9028
{
9029
#if BASISD_SUPPORT_PVRTC2
9030
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
9031
convert_etc1s_to_pvrtc2_rgb(pDst_block, pEndpoints, pSelector);
9032
#endif
9033
break;
9034
}
9035
case block_format::cPVRTC2_4_RGBA:
9036
{
9037
#if BASISD_SUPPORT_PVRTC2
9038
assert(transcode_alpha);
9039
9040
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
9041
9042
convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &endpoints[0], &selectors[0]);
9043
#endif
9044
break;
9045
}
9046
case block_format::cIndices:
9047
{
9048
uint16_t* pDst_block = reinterpret_cast<uint16_t *>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
9049
pDst_block[0] = static_cast<uint16_t>(endpoint_index);
9050
pDst_block[1] = static_cast<uint16_t>(selector_index);
9051
break;
9052
}
9053
case block_format::cA32:
9054
{
9055
assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
9056
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
9057
9058
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9059
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9060
9061
int colors[4];
9062
decoder_etc_block::get_block_colors5_g(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9063
9064
if (max_x == 4)
9065
{
9066
for (uint32_t y = 0; y < max_y; y++)
9067
{
9068
const uint32_t s = pSelector->m_selectors[y];
9069
9070
pDst_pixels[3] = static_cast<uint8_t>(colors[s & 3]);
9071
pDst_pixels[3+4] = static_cast<uint8_t>(colors[(s >> 2) & 3]);
9072
pDst_pixels[3+8] = static_cast<uint8_t>(colors[(s >> 4) & 3]);
9073
pDst_pixels[3+12] = static_cast<uint8_t>(colors[(s >> 6) & 3]);
9074
9075
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
9076
}
9077
}
9078
else
9079
{
9080
for (uint32_t y = 0; y < max_y; y++)
9081
{
9082
const uint32_t s = pSelector->m_selectors[y];
9083
9084
for (uint32_t x = 0; x < max_x; x++)
9085
pDst_pixels[3 + 4 * x] = static_cast<uint8_t>(colors[(s >> (x * 2)) & 3]);
9086
9087
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
9088
}
9089
}
9090
9091
break;
9092
}
9093
case block_format::cRGB32:
9094
{
9095
assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
9096
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
9097
9098
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9099
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9100
9101
color32 colors[4];
9102
decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9103
9104
for (uint32_t y = 0; y < max_y; y++)
9105
{
9106
const uint32_t s = pSelector->m_selectors[y];
9107
9108
for (uint32_t x = 0; x < max_x; x++)
9109
{
9110
const color32& c = colors[(s >> (x * 2)) & 3];
9111
9112
pDst_pixels[0 + 4 * x] = c.r;
9113
pDst_pixels[1 + 4 * x] = c.g;
9114
pDst_pixels[2 + 4 * x] = c.b;
9115
}
9116
9117
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
9118
}
9119
9120
break;
9121
}
9122
case block_format::cRGBA32:
9123
{
9124
assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
9125
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
9126
9127
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9128
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9129
9130
color32 colors[4];
9131
decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9132
9133
for (uint32_t y = 0; y < max_y; y++)
9134
{
9135
const uint32_t s = pSelector->m_selectors[y];
9136
9137
for (uint32_t x = 0; x < max_x; x++)
9138
{
9139
const color32& c = colors[(s >> (x * 2)) & 3];
9140
9141
pDst_pixels[0 + 4 * x] = c.r;
9142
pDst_pixels[1 + 4 * x] = c.g;
9143
pDst_pixels[2 + 4 * x] = c.b;
9144
pDst_pixels[3 + 4 * x] = 255;
9145
}
9146
9147
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
9148
}
9149
9150
break;
9151
}
9152
case block_format::cRGB565:
9153
case block_format::cBGR565:
9154
{
9155
assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
9156
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
9157
9158
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9159
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9160
9161
color32 colors[4];
9162
decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9163
9164
uint16_t packed_colors[4];
9165
if (fmt == block_format::cRGB565)
9166
{
9167
for (uint32_t i = 0; i < 4; i++)
9168
{
9169
packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].b, 31));
9170
if (BASISD_IS_BIG_ENDIAN)
9171
packed_colors[i] = byteswap_uint16(packed_colors[i]);
9172
}
9173
}
9174
else
9175
{
9176
for (uint32_t i = 0; i < 4; i++)
9177
{
9178
packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].b, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].r, 31));
9179
if (BASISD_IS_BIG_ENDIAN)
9180
packed_colors[i] = byteswap_uint16(packed_colors[i]);
9181
}
9182
}
9183
9184
for (uint32_t y = 0; y < max_y; y++)
9185
{
9186
const uint32_t s = pSelector->m_selectors[y];
9187
9188
for (uint32_t x = 0; x < max_x; x++)
9189
reinterpret_cast<uint16_t *>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
9190
9191
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
9192
}
9193
9194
break;
9195
}
9196
case block_format::cRGBA4444_COLOR:
9197
{
9198
assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
9199
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
9200
9201
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9202
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9203
9204
color32 colors[4];
9205
decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9206
9207
uint16_t packed_colors[4];
9208
for (uint32_t i = 0; i < 4; i++)
9209
{
9210
packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4));
9211
}
9212
9213
for (uint32_t y = 0; y < max_y; y++)
9214
{
9215
const uint32_t s = pSelector->m_selectors[y];
9216
9217
for (uint32_t x = 0; x < max_x; x++)
9218
{
9219
uint16_t cur = reinterpret_cast<uint16_t*>(pDst_pixels)[x];
9220
if (BASISD_IS_BIG_ENDIAN)
9221
cur = byteswap_uint16(cur);
9222
9223
cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3];
9224
9225
if (BASISD_IS_BIG_ENDIAN)
9226
cur = byteswap_uint16(cur);
9227
9228
reinterpret_cast<uint16_t*>(pDst_pixels)[x] = cur;
9229
}
9230
9231
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
9232
}
9233
9234
break;
9235
}
9236
case block_format::cRGBA4444_COLOR_OPAQUE:
9237
{
9238
assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
9239
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
9240
9241
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9242
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9243
9244
color32 colors[4];
9245
decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9246
9247
uint16_t packed_colors[4];
9248
for (uint32_t i = 0; i < 4; i++)
9249
{
9250
packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4) | 0xF);
9251
if (BASISD_IS_BIG_ENDIAN)
9252
packed_colors[i] = byteswap_uint16(packed_colors[i]);
9253
}
9254
9255
for (uint32_t y = 0; y < max_y; y++)
9256
{
9257
const uint32_t s = pSelector->m_selectors[y];
9258
9259
for (uint32_t x = 0; x < max_x; x++)
9260
reinterpret_cast<uint16_t*>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
9261
9262
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
9263
}
9264
9265
break;
9266
}
9267
case block_format::cRGBA4444_ALPHA:
9268
{
9269
assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
9270
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
9271
9272
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9273
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9274
9275
color32 colors[4];
9276
decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9277
9278
uint16_t packed_colors[4];
9279
for (uint32_t i = 0; i < 4; i++)
9280
{
9281
packed_colors[i] = mul_8(colors[i].g, 15);
9282
if (BASISD_IS_BIG_ENDIAN)
9283
packed_colors[i] = byteswap_uint16(packed_colors[i]);
9284
}
9285
9286
for (uint32_t y = 0; y < max_y; y++)
9287
{
9288
const uint32_t s = pSelector->m_selectors[y];
9289
9290
for (uint32_t x = 0; x < max_x; x++)
9291
{
9292
reinterpret_cast<uint16_t*>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
9293
}
9294
9295
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
9296
}
9297
9298
break;
9299
}
9300
case block_format::cETC2_EAC_R11:
9301
{
9302
#if BASISD_SUPPORT_ETC2_EAC_RG11
9303
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
9304
convert_etc1s_to_etc2_eac_r11(static_cast<eac_block*>(pDst_block), pEndpoints, pSelector);
9305
#else
9306
assert(0);
9307
#endif
9308
break;
9309
}
9310
default:
9311
{
9312
assert(0);
9313
break;
9314
}
9315
}
9316
9317
} // block_x
9318
9319
} // block_y
9320
9321
if (endpoint_pred_repeat_count != 0)
9322
{
9323
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: endpoint_pred_repeat_count != 0. The file is corrupted or this is a bug\n");
9324
9325
if (pPVRTC_work_mem)
9326
free(pPVRTC_work_mem);
9327
9328
return false;
9329
}
9330
9331
//assert(endpoint_pred_repeat_count == 0);
9332
9333
#if BASISD_SUPPORT_PVRTC1
9334
// PVRTC post process - create per-pixel modulation values.
9335
if (fmt == block_format::cPVRTC1_4_RGB)
9336
fixup_pvrtc1_4_modulation_rgb((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y);
9337
else if (fmt == block_format::cPVRTC1_4_RGBA)
9338
fixup_pvrtc1_4_modulation_rgba((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y, pAlpha_blocks, &endpoints[0], &selectors[0]);
9339
#endif // BASISD_SUPPORT_PVRTC1
9340
9341
#if BASISD_SUPPORT_BC7_MODE5
9342
if (bc7_chroma_filtering)
9343
{
9344
chroma_filter_bc7_mode5(decoded_endpoints, pDst_blocks, num_blocks_x, num_blocks_y, output_row_pitch_in_blocks_or_pixels, &endpoints[0]);
9345
}
9346
#endif
9347
9348
if (pPVRTC_work_mem)
9349
free(pPVRTC_work_mem);
9350
9351
return true;
9352
}
9353
9354
bool basis_validate_output_buffer_size(
9355
basis_tex_format source_format,
9356
transcoder_texture_format target_format,
9357
uint32_t output_blocks_buf_size_in_blocks_or_pixels,
9358
uint32_t orig_width, uint32_t orig_height,
9359
uint32_t output_row_pitch_in_blocks_or_pixels,
9360
uint32_t output_rows_in_pixels)
9361
{
9362
BASISU_NOTE_UNUSED(source_format);
9363
9364
if (basis_transcoder_format_is_uncompressed(target_format))
9365
{
9366
// Assume the output buffer is orig_width by orig_height
9367
if (!output_row_pitch_in_blocks_or_pixels)
9368
output_row_pitch_in_blocks_or_pixels = orig_width;
9369
9370
if (!output_rows_in_pixels)
9371
output_rows_in_pixels = orig_height;
9372
9373
// Now make sure the output buffer is large enough, or we'll overwrite memory.
9374
if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
9375
{
9376
BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
9377
return false;
9378
}
9379
}
9380
else
9381
{
9382
const uint32_t dst_block_width = basis_get_block_width(target_format);
9383
const uint32_t dst_block_height = basis_get_block_height(target_format);
9384
//const uint32_t bytes_per_block = basis_get_bytes_per_block_or_pixel(target_format);
9385
9386
// Take into account the destination format's block width/height.
9387
const uint32_t num_dst_blocks_x = (orig_width + dst_block_width - 1) / dst_block_width;
9388
const uint32_t num_dst_blocks_y = (orig_height + dst_block_height - 1) / dst_block_height;
9389
const uint32_t total_dst_blocks = num_dst_blocks_x * num_dst_blocks_y;
9390
9391
assert(total_dst_blocks);
9392
9393
// Note this only computes the # of blocks we will write during transcoding, but for PVRTC1 OpenGL may require more for very small textures.
9394
// basis_compute_transcoded_image_size_in_bytes() may return larger buffers.
9395
if (output_blocks_buf_size_in_blocks_or_pixels < total_dst_blocks)
9396
{
9397
BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels is too small\n");
9398
return false;
9399
}
9400
}
9401
9402
return true;
9403
}
9404
9405
uint32_t basis_compute_transcoded_image_size_in_bytes(transcoder_texture_format target_format, uint32_t orig_width, uint32_t orig_height)
9406
{
9407
assert(orig_width && orig_height);
9408
9409
const uint32_t dst_block_width = basis_get_block_width(target_format);
9410
const uint32_t dst_block_height = basis_get_block_height(target_format);
9411
9412
if (basis_transcoder_format_is_uncompressed(target_format))
9413
{
9414
// Uncompressed formats are just plain raster images.
9415
const uint32_t bytes_per_pixel = basis_get_uncompressed_bytes_per_pixel(target_format);
9416
const uint32_t bytes_per_line = orig_width * bytes_per_pixel;
9417
const uint32_t bytes_per_slice = bytes_per_line * orig_height;
9418
return bytes_per_slice;
9419
}
9420
9421
// Compressed formats are 2D arrays of blocks.
9422
const uint32_t bytes_per_block = basis_get_bytes_per_block_or_pixel(target_format);
9423
9424
if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA))
9425
{
9426
// For PVRTC1, Basis only writes (or requires) total_blocks * bytes_per_block. But GL requires extra padding for very small textures:
9427
// https://www.khronos.org/registry/OpenGL/extensions/IMG/IMG_texture_compression_pvrtc.txt
9428
const uint32_t width = (orig_width + 3) & ~3;
9429
const uint32_t height = (orig_height + 3) & ~3;
9430
const uint32_t size_in_bytes = (std::max(8U, width) * std::max(8U, height) * 4 + 7) / 8;
9431
return size_in_bytes;
9432
}
9433
9434
// Take into account the destination format's block width/height.
9435
const uint32_t num_dst_blocks_x = (orig_width + dst_block_width - 1) / dst_block_width;
9436
const uint32_t num_dst_blocks_y = (orig_height + dst_block_height - 1) / dst_block_height;
9437
const uint32_t total_dst_blocks = num_dst_blocks_x * num_dst_blocks_y;
9438
9439
assert(total_dst_blocks);
9440
9441
return total_dst_blocks * bytes_per_block;
9442
}
9443
9444
bool basisu_lowlevel_etc1s_transcoder::transcode_image(
9445
transcoder_texture_format target_format,
9446
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
9447
const uint8_t* pCompressed_data, uint32_t compressed_data_length,
9448
uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
9449
uint32_t rgb_offset, uint32_t rgb_length, uint32_t alpha_offset, uint32_t alpha_length,
9450
uint32_t decode_flags,
9451
bool basis_file_has_alpha_slices,
9452
bool is_video,
9453
uint32_t output_row_pitch_in_blocks_or_pixels,
9454
basisu_transcoder_state* pState,
9455
uint32_t output_rows_in_pixels)
9456
{
9457
if (((uint64_t)rgb_offset + rgb_length) > (uint64_t)compressed_data_length)
9458
{
9459
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (color)\n");
9460
return false;
9461
}
9462
9463
if (alpha_length)
9464
{
9465
if (((uint64_t)alpha_offset + alpha_length) > (uint64_t)compressed_data_length)
9466
{
9467
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (alpha)\n");
9468
return false;
9469
}
9470
}
9471
else
9472
{
9473
assert(!basis_file_has_alpha_slices);
9474
}
9475
9476
if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA))
9477
{
9478
if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4)))
9479
{
9480
// PVRTC1 only supports power of 2 dimensions
9481
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n");
9482
return false;
9483
}
9484
}
9485
9486
if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices))
9487
{
9488
// Switch to PVRTC1 RGB if the input doesn't have alpha.
9489
target_format = transcoder_texture_format::cTFPVRTC1_4_RGB;
9490
}
9491
9492
const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
9493
const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
9494
const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
9495
9496
if (!basis_validate_output_buffer_size(basis_tex_format::cETC1S, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels))
9497
{
9498
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output buffer size too small\n");
9499
return false;
9500
}
9501
9502
bool status = false;
9503
9504
const uint8_t* pData = pCompressed_data + rgb_offset;
9505
uint32_t data_len = rgb_length;
9506
bool is_alpha_slice = false;
9507
9508
// If the caller wants us to transcode the mip level's alpha data, then use the next slice.
9509
if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats))
9510
{
9511
pData = pCompressed_data + alpha_offset;
9512
data_len = alpha_length;
9513
is_alpha_slice = true;
9514
}
9515
9516
switch (target_format)
9517
{
9518
case transcoder_texture_format::cTFETC1_RGB:
9519
{
9520
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9521
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9522
9523
if (!status)
9524
{
9525
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC1 failed\n");
9526
}
9527
break;
9528
}
9529
case transcoder_texture_format::cTFBC1_RGB:
9530
{
9531
#if !BASISD_SUPPORT_DXT1
9532
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC1/DXT1 unsupported\n");
9533
return false;
9534
#else
9535
// status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9536
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC1, bytes_per_block_or_pixel, true, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9537
if (!status)
9538
{
9539
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC1 failed\n");
9540
}
9541
break;
9542
#endif
9543
}
9544
case transcoder_texture_format::cTFBC4_R:
9545
{
9546
#if !BASISD_SUPPORT_DXT5A
9547
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC4/DXT5A unsupported\n");
9548
return false;
9549
#else
9550
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9551
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9552
if (!status)
9553
{
9554
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC4 failed\n");
9555
}
9556
break;
9557
#endif
9558
}
9559
case transcoder_texture_format::cTFPVRTC1_4_RGB:
9560
{
9561
#if !BASISD_SUPPORT_PVRTC1
9562
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n");
9563
return false;
9564
#else
9565
// output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?)
9566
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9567
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9568
if (!status)
9569
{
9570
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGB failed\n");
9571
}
9572
break;
9573
#endif
9574
}
9575
case transcoder_texture_format::cTFPVRTC1_4_RGBA:
9576
{
9577
#if !BASISD_SUPPORT_PVRTC1
9578
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n");
9579
return false;
9580
#else
9581
assert(basis_file_has_alpha_slices);
9582
assert(alpha_length);
9583
9584
// Temp buffer to hold alpha block endpoint/selector indices
9585
basisu::vector<uint32_t> temp_block_indices(total_slice_blocks);
9586
9587
// First transcode alpha data to temp buffer
9588
//status = transcode_slice(pData, data_size, slice_index + 1, &temp_block_indices[0], total_slice_blocks, block_format::cIndices, sizeof(uint32_t), decode_flags, pSlice_descs[slice_index].m_num_blocks_x, pState);
9589
status = transcode_slice(&temp_block_indices[0], num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, num_blocks_x, pState, false, nullptr, 0, decode_flags);
9590
if (!status)
9591
{
9592
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (0)\n");
9593
}
9594
else
9595
{
9596
// output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?)
9597
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, &temp_block_indices[0]);
9598
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, &temp_block_indices[0], 0, decode_flags);
9599
if (!status)
9600
{
9601
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (1)\n");
9602
}
9603
}
9604
9605
break;
9606
#endif
9607
}
9608
case transcoder_texture_format::cTFBC7_RGBA:
9609
case transcoder_texture_format::cTFBC7_ALT:
9610
{
9611
#if !BASISD_SUPPORT_BC7_MODE5
9612
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC7 unsupported\n");
9613
return false;
9614
#else
9615
assert(bytes_per_block_or_pixel == 16);
9616
// We used to support transcoding just alpha to BC7 - but is that useful at all?
9617
9618
// First transcode the color slice. The cBC7_M5_COLOR transcoder will output opaque mode 5 blocks.
9619
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_COLOR, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9620
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC7_M5_COLOR, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9621
9622
if ((status) && (basis_file_has_alpha_slices))
9623
{
9624
// Now transcode the alpha slice. The cBC7_M5_ALPHA transcoder will now change the opaque mode 5 blocks to blocks with alpha.
9625
//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_ALPHA, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9626
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC7_M5_ALPHA, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9627
}
9628
9629
if (!status)
9630
{
9631
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC7 failed (0)\n");
9632
}
9633
9634
break;
9635
#endif
9636
}
9637
case transcoder_texture_format::cTFETC2_RGBA:
9638
{
9639
#if !BASISD_SUPPORT_ETC2_EAC_A8
9640
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ETC2 EAC A8 unsupported\n");
9641
return false;
9642
#else
9643
assert(bytes_per_block_or_pixel == 16);
9644
9645
if (basis_file_has_alpha_slices)
9646
{
9647
// First decode the alpha data
9648
//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9649
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_A8, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9650
}
9651
else
9652
{
9653
//write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels);
9654
basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels);
9655
status = true;
9656
}
9657
9658
if (status)
9659
{
9660
// Now decode the color data
9661
//status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9662
status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9663
if (!status)
9664
{
9665
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 RGB failed\n");
9666
}
9667
}
9668
else
9669
{
9670
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 A failed\n");
9671
}
9672
break;
9673
#endif
9674
}
9675
case transcoder_texture_format::cTFBC3_RGBA:
9676
{
9677
#if !BASISD_SUPPORT_DXT1
9678
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT1 unsupported\n");
9679
return false;
9680
#elif !BASISD_SUPPORT_DXT5A
9681
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
9682
return false;
9683
#else
9684
assert(bytes_per_block_or_pixel == 16);
9685
9686
// First decode the alpha data
9687
if (basis_file_has_alpha_slices)
9688
{
9689
//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9690
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9691
}
9692
else
9693
{
9694
basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
9695
status = true;
9696
}
9697
9698
if (status)
9699
{
9700
// Now decode the color data. Forbid 3 color blocks, which aren't allowed in BC3.
9701
//status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, 16, decode_flags | cDecodeFlagsBC1ForbidThreeColorBlocks, output_row_pitch_in_blocks_or_pixels, pState);
9702
status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9703
if (!status)
9704
{
9705
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 RGB failed\n");
9706
}
9707
}
9708
else
9709
{
9710
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 A failed\n");
9711
}
9712
9713
break;
9714
#endif
9715
}
9716
case transcoder_texture_format::cTFBC5_RG:
9717
{
9718
#if !BASISD_SUPPORT_DXT5A
9719
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
9720
return false;
9721
#else
9722
assert(bytes_per_block_or_pixel == 16);
9723
9724
//bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
9725
// uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
9726
// basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0);
9727
9728
// Decode the R data (actually the green channel of the color data slice in the basis file)
9729
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9730
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9731
if (status)
9732
{
9733
if (basis_file_has_alpha_slices)
9734
{
9735
// Decode the G data (actually the green channel of the alpha data slice in the basis file)
9736
//status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9737
status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9738
if (!status)
9739
{
9740
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 1 failed\n");
9741
}
9742
}
9743
else
9744
{
9745
basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
9746
status = true;
9747
}
9748
}
9749
else
9750
{
9751
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 channel 0 failed\n");
9752
}
9753
break;
9754
#endif
9755
}
9756
case transcoder_texture_format::cTFASTC_4x4_RGBA:
9757
{
9758
#if !BASISD_SUPPORT_ASTC
9759
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ASTC unsupported\n");
9760
return false;
9761
#else
9762
assert(bytes_per_block_or_pixel == 16);
9763
9764
if (basis_file_has_alpha_slices)
9765
{
9766
// First decode the alpha data to the output (we're using the output texture as a temp buffer here).
9767
//status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9768
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9769
if (status)
9770
{
9771
// Now decode the color data and transcode to ASTC. The transcoder function will read the alpha selector data from the output texture as it converts and
9772
// transcode both the alpha and color data at the same time to ASTC.
9773
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState);
9774
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels, decode_flags);
9775
}
9776
}
9777
else
9778
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9779
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9780
9781
if (!status)
9782
{
9783
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ASTC failed (0)\n");
9784
}
9785
9786
break;
9787
#endif
9788
}
9789
case transcoder_texture_format::cTFATC_RGB:
9790
{
9791
#if !BASISD_SUPPORT_ATC
9792
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n");
9793
return false;
9794
#else
9795
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9796
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9797
if (!status)
9798
{
9799
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC_RGB failed\n");
9800
}
9801
break;
9802
#endif
9803
}
9804
case transcoder_texture_format::cTFATC_RGBA:
9805
{
9806
#if !BASISD_SUPPORT_ATC
9807
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n");
9808
return false;
9809
#elif !BASISD_SUPPORT_DXT5A
9810
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
9811
return false;
9812
#else
9813
assert(bytes_per_block_or_pixel == 16);
9814
9815
// First decode the alpha data
9816
if (basis_file_has_alpha_slices)
9817
{
9818
//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9819
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9820
}
9821
else
9822
{
9823
basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
9824
status = true;
9825
}
9826
9827
if (status)
9828
{
9829
//status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9830
status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9831
if (!status)
9832
{
9833
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC RGB failed\n");
9834
}
9835
}
9836
else
9837
{
9838
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC A failed\n");
9839
}
9840
break;
9841
#endif
9842
}
9843
case transcoder_texture_format::cTFPVRTC2_4_RGB:
9844
{
9845
#if !BASISD_SUPPORT_PVRTC2
9846
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n");
9847
return false;
9848
#else
9849
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9850
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9851
if (!status)
9852
{
9853
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGB failed\n");
9854
}
9855
break;
9856
#endif
9857
}
9858
case transcoder_texture_format::cTFPVRTC2_4_RGBA:
9859
{
9860
#if !BASISD_SUPPORT_PVRTC2
9861
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n");
9862
return false;
9863
#else
9864
if (basis_file_has_alpha_slices)
9865
{
9866
// First decode the alpha data to the output (we're using the output texture as a temp buffer here).
9867
//status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9868
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9869
if (!status)
9870
{
9871
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to failed\n");
9872
}
9873
else
9874
{
9875
// Now decode the color data and transcode to PVRTC2 RGBA.
9876
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState);
9877
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels, decode_flags);
9878
}
9879
}
9880
else
9881
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9882
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9883
9884
if (!status)
9885
{
9886
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGBA failed\n");
9887
}
9888
9889
break;
9890
#endif
9891
}
9892
case transcoder_texture_format::cTFRGBA32:
9893
{
9894
// Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
9895
9896
// First decode the alpha data
9897
if (basis_file_has_alpha_slices)
9898
//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9899
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cA32, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9900
else
9901
status = true;
9902
9903
if (status)
9904
{
9905
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9906
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9907
if (!status)
9908
{
9909
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 RGB failed\n");
9910
}
9911
}
9912
else
9913
{
9914
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 A failed\n");
9915
}
9916
9917
break;
9918
}
9919
case transcoder_texture_format::cTFRGB565:
9920
case transcoder_texture_format::cTFBGR565:
9921
{
9922
// Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
9923
9924
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (fmt == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9925
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, (target_format == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9926
if (!status)
9927
{
9928
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGB565 RGB failed\n");
9929
}
9930
9931
break;
9932
}
9933
case transcoder_texture_format::cTFRGBA4444:
9934
{
9935
// Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
9936
9937
// First decode the alpha data
9938
if (basis_file_has_alpha_slices)
9939
//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9940
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9941
else
9942
status = true;
9943
9944
if (status)
9945
{
9946
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9947
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9948
if (!status)
9949
{
9950
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 RGB failed\n");
9951
}
9952
}
9953
else
9954
{
9955
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 A failed\n");
9956
}
9957
9958
break;
9959
}
9960
case transcoder_texture_format::cTFFXT1_RGB:
9961
{
9962
#if !BASISD_SUPPORT_FXT1
9963
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: FXT1 unsupported\n");
9964
return false;
9965
#else
9966
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cFXT1_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9967
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cFXT1_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9968
if (!status)
9969
{
9970
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to FXT1_RGB failed\n");
9971
}
9972
break;
9973
#endif
9974
}
9975
case transcoder_texture_format::cTFETC2_EAC_R11:
9976
{
9977
#if !BASISD_SUPPORT_ETC2_EAC_RG11
9978
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n");
9979
return false;
9980
#else
9981
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9982
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9983
if (!status)
9984
{
9985
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 failed\n");
9986
}
9987
9988
break;
9989
#endif
9990
}
9991
case transcoder_texture_format::cTFETC2_EAC_RG11:
9992
{
9993
#if !BASISD_SUPPORT_ETC2_EAC_RG11
9994
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n");
9995
return false;
9996
#else
9997
assert(bytes_per_block_or_pixel == 16);
9998
9999
if (basis_file_has_alpha_slices)
10000
{
10001
// First decode the alpha data to G
10002
//status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10003
status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
10004
}
10005
else
10006
{
10007
basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cETC2_EAC_R11, 16, output_row_pitch_in_blocks_or_pixels);
10008
status = true;
10009
}
10010
10011
if (status)
10012
{
10013
// Now decode the color data to R
10014
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10015
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
10016
if (!status)
10017
{
10018
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 R failed\n");
10019
}
10020
}
10021
else
10022
{
10023
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 G failed\n");
10024
}
10025
10026
break;
10027
#endif
10028
}
10029
default:
10030
{
10031
assert(0);
10032
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: Invalid fmt\n");
10033
break;
10034
}
10035
}
10036
10037
return status;
10038
}
10039
10040
//------------------------------------------------------------------------------------------------
10041
10042
basisu_lowlevel_uastc_ldr_4x4_transcoder::basisu_lowlevel_uastc_ldr_4x4_transcoder()
10043
{
10044
}
10045
10046
bool basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice(
10047
void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
10048
uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
10049
const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
10050
basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
10051
{
10052
BASISU_NOTE_UNUSED(pState);
10053
BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
10054
10055
assert(g_transcoder_initialized);
10056
if (!g_transcoder_initialized)
10057
{
10058
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice: Transcoder not globally initialized.\n");
10059
return false;
10060
}
10061
10062
#if BASISD_SUPPORT_UASTC
10063
const uint32_t total_blocks = num_blocks_x * num_blocks_y;
10064
10065
if (!output_row_pitch_in_blocks_or_pixels)
10066
{
10067
if (basis_block_format_is_uncompressed(fmt))
10068
output_row_pitch_in_blocks_or_pixels = orig_width;
10069
else
10070
{
10071
if (fmt == block_format::cFXT1_RGB)
10072
output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8;
10073
else
10074
output_row_pitch_in_blocks_or_pixels = num_blocks_x;
10075
}
10076
}
10077
10078
if (basis_block_format_is_uncompressed(fmt))
10079
{
10080
if (!output_rows_in_pixels)
10081
output_rows_in_pixels = orig_height;
10082
}
10083
10084
uint32_t total_expected_block_bytes = sizeof(uastc_block) * total_blocks;
10085
if (image_data_size < total_expected_block_bytes)
10086
{
10087
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
10088
return false;
10089
}
10090
10091
const uastc_block* pSource_block = reinterpret_cast<const uastc_block *>(pImage_data);
10092
10093
const bool high_quality = (decode_flags & cDecodeFlagsHighQuality) != 0;
10094
const bool from_alpha = has_alpha && (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
10095
10096
bool status = false;
10097
if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
10098
{
10099
if (fmt == block_format::cPVRTC1_4_RGBA)
10100
transcode_uastc_to_pvrtc1_4_rgba((const uastc_block*)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality);
10101
else
10102
transcode_uastc_to_pvrtc1_4_rgb((const uastc_block *)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality, from_alpha);
10103
}
10104
else
10105
{
10106
for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
10107
{
10108
void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
10109
10110
for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t *)pDst_block + output_block_or_pixel_stride_in_bytes)
10111
{
10112
switch (fmt)
10113
{
10114
case block_format::cUASTC_4x4:
10115
{
10116
memcpy(pDst_block, pSource_block, sizeof(uastc_block));
10117
status = true;
10118
break;
10119
}
10120
case block_format::cETC1:
10121
{
10122
if (from_alpha)
10123
status = transcode_uastc_to_etc1(*pSource_block, pDst_block, 3);
10124
else
10125
status = transcode_uastc_to_etc1(*pSource_block, pDst_block);
10126
break;
10127
}
10128
case block_format::cETC2_RGBA:
10129
{
10130
status = transcode_uastc_to_etc2_rgba(*pSource_block, pDst_block);
10131
break;
10132
}
10133
case block_format::cBC1:
10134
{
10135
status = transcode_uastc_to_bc1(*pSource_block, pDst_block, high_quality);
10136
break;
10137
}
10138
case block_format::cBC3:
10139
{
10140
status = transcode_uastc_to_bc3(*pSource_block, pDst_block, high_quality);
10141
break;
10142
}
10143
case block_format::cBC4:
10144
{
10145
if (channel0 < 0)
10146
channel0 = 0;
10147
status = transcode_uastc_to_bc4(*pSource_block, pDst_block, high_quality, channel0);
10148
break;
10149
}
10150
case block_format::cBC5:
10151
{
10152
if (channel0 < 0)
10153
channel0 = 0;
10154
if (channel1 < 0)
10155
channel1 = 3;
10156
status = transcode_uastc_to_bc5(*pSource_block, pDst_block, high_quality, channel0, channel1);
10157
break;
10158
}
10159
case block_format::cBC7:
10160
case block_format::cBC7_M5_COLOR: // for consistently with ETC1S
10161
{
10162
status = transcode_uastc_to_bc7(*pSource_block, pDst_block);
10163
break;
10164
}
10165
case block_format::cASTC_4x4:
10166
{
10167
status = transcode_uastc_to_astc(*pSource_block, pDst_block);
10168
break;
10169
}
10170
case block_format::cETC2_EAC_R11:
10171
{
10172
if (channel0 < 0)
10173
channel0 = 0;
10174
status = transcode_uastc_to_etc2_eac_r11(*pSource_block, pDst_block, high_quality, channel0);
10175
break;
10176
}
10177
case block_format::cETC2_EAC_RG11:
10178
{
10179
if (channel0 < 0)
10180
channel0 = 0;
10181
if (channel1 < 0)
10182
channel1 = 3;
10183
status = transcode_uastc_to_etc2_eac_rg11(*pSource_block, pDst_block, high_quality, channel0, channel1);
10184
break;
10185
}
10186
case block_format::cRGBA32:
10187
{
10188
color32 block_pixels[4][4];
10189
status = unpack_uastc(*pSource_block, (color32 *)block_pixels, false);
10190
10191
assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
10192
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
10193
10194
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
10195
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
10196
10197
for (uint32_t y = 0; y < max_y; y++)
10198
{
10199
for (uint32_t x = 0; x < max_x; x++)
10200
{
10201
const color32& c = block_pixels[y][x];
10202
10203
pDst_pixels[0 + 4 * x] = c.r;
10204
pDst_pixels[1 + 4 * x] = c.g;
10205
pDst_pixels[2 + 4 * x] = c.b;
10206
pDst_pixels[3 + 4 * x] = c.a;
10207
}
10208
10209
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
10210
}
10211
10212
break;
10213
}
10214
case block_format::cRGB565:
10215
case block_format::cBGR565:
10216
{
10217
color32 block_pixels[4][4];
10218
status = unpack_uastc(*pSource_block, (color32*)block_pixels, false);
10219
10220
assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
10221
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
10222
10223
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
10224
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
10225
10226
for (uint32_t y = 0; y < max_y; y++)
10227
{
10228
for (uint32_t x = 0; x < max_x; x++)
10229
{
10230
const color32& c = block_pixels[y][x];
10231
10232
const uint16_t packed = (fmt == block_format::cRGB565) ? static_cast<uint16_t>((mul_8(c.r, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.b, 31)) :
10233
static_cast<uint16_t>((mul_8(c.b, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.r, 31));
10234
10235
pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF);
10236
pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF);
10237
}
10238
10239
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
10240
}
10241
10242
break;
10243
}
10244
case block_format::cRGBA4444:
10245
{
10246
color32 block_pixels[4][4];
10247
status = unpack_uastc(*pSource_block, (color32*)block_pixels, false);
10248
10249
assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
10250
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
10251
10252
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
10253
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
10254
10255
for (uint32_t y = 0; y < max_y; y++)
10256
{
10257
for (uint32_t x = 0; x < max_x; x++)
10258
{
10259
const color32& c = block_pixels[y][x];
10260
10261
const uint16_t packed = static_cast<uint16_t>((mul_8(c.r, 15) << 12) | (mul_8(c.g, 15) << 8) | (mul_8(c.b, 15) << 4) | mul_8(c.a, 15));
10262
10263
pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF);
10264
pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF);
10265
}
10266
10267
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
10268
}
10269
break;
10270
}
10271
default:
10272
assert(0);
10273
break;
10274
10275
}
10276
10277
if (!status)
10278
{
10279
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice: Transcoder failed to unpack a UASTC block - this is a bug, or the data was corrupted\n");
10280
return false;
10281
}
10282
10283
} // block_x
10284
10285
} // block_y
10286
}
10287
10288
return true;
10289
#else
10290
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice: UASTC is unsupported\n");
10291
10292
BASISU_NOTE_UNUSED(decode_flags);
10293
BASISU_NOTE_UNUSED(channel0);
10294
BASISU_NOTE_UNUSED(channel1);
10295
BASISU_NOTE_UNUSED(output_rows_in_pixels);
10296
BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
10297
BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
10298
BASISU_NOTE_UNUSED(fmt);
10299
BASISU_NOTE_UNUSED(image_data_size);
10300
BASISU_NOTE_UNUSED(pImage_data);
10301
BASISU_NOTE_UNUSED(num_blocks_x);
10302
BASISU_NOTE_UNUSED(num_blocks_y);
10303
BASISU_NOTE_UNUSED(pDst_blocks);
10304
10305
return false;
10306
#endif
10307
}
10308
10309
bool basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image(
10310
transcoder_texture_format target_format,
10311
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
10312
const uint8_t* pCompressed_data, uint32_t compressed_data_length,
10313
uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
10314
uint32_t slice_offset, uint32_t slice_length,
10315
uint32_t decode_flags,
10316
bool has_alpha,
10317
bool is_video,
10318
uint32_t output_row_pitch_in_blocks_or_pixels,
10319
basisu_transcoder_state* pState,
10320
uint32_t output_rows_in_pixels,
10321
int channel0, int channel1)
10322
{
10323
BASISU_NOTE_UNUSED(is_video);
10324
BASISU_NOTE_UNUSED(level_index);
10325
10326
if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
10327
{
10328
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: source data buffer too small\n");
10329
return false;
10330
}
10331
10332
if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA))
10333
{
10334
if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4)))
10335
{
10336
// PVRTC1 only supports power of 2 dimensions
10337
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n");
10338
return false;
10339
}
10340
}
10341
10342
if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!has_alpha))
10343
{
10344
// Switch to PVRTC1 RGB if the input doesn't have alpha.
10345
target_format = transcoder_texture_format::cTFPVRTC1_4_RGB;
10346
}
10347
10348
const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
10349
const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
10350
//const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
10351
10352
if (!basis_validate_output_buffer_size(basis_tex_format::cUASTC4x4, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels))
10353
{
10354
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: output buffer size too small\n");
10355
return false;
10356
}
10357
10358
bool status = false;
10359
10360
// UASTC4x4
10361
switch (target_format)
10362
{
10363
case transcoder_texture_format::cTFETC1_RGB:
10364
{
10365
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10366
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC1,
10367
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10368
10369
if (!status)
10370
{
10371
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to ETC1 failed\n");
10372
}
10373
break;
10374
}
10375
case transcoder_texture_format::cTFETC2_RGBA:
10376
{
10377
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10378
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_RGBA,
10379
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10380
if (!status)
10381
{
10382
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to ETC2 failed\n");
10383
}
10384
break;
10385
}
10386
case transcoder_texture_format::cTFBC1_RGB:
10387
{
10388
// TODO: ETC1S allows BC1 from alpha channel. That doesn't seem actually useful, though.
10389
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10390
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC1,
10391
bytes_per_block_or_pixel, true, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10392
if (!status)
10393
{
10394
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC1 failed\n");
10395
}
10396
break;
10397
}
10398
case transcoder_texture_format::cTFBC3_RGBA:
10399
{
10400
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC3, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10401
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC3,
10402
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10403
if (!status)
10404
{
10405
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC3 failed\n");
10406
}
10407
break;
10408
}
10409
case transcoder_texture_format::cTFBC4_R:
10410
{
10411
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
10412
// nullptr, 0,
10413
// ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
10414
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC4,
10415
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
10416
((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0, -1, decode_flags);
10417
if (!status)
10418
{
10419
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC4 failed\n");
10420
}
10421
break;
10422
}
10423
case transcoder_texture_format::cTFBC5_RG:
10424
{
10425
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC5, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
10426
// nullptr, 0,
10427
// 0, 3);
10428
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC5,
10429
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
10430
0, 3, decode_flags);
10431
if (!status)
10432
{
10433
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC5 failed\n");
10434
}
10435
break;
10436
}
10437
case transcoder_texture_format::cTFBC7_RGBA:
10438
case transcoder_texture_format::cTFBC7_ALT:
10439
{
10440
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10441
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC7,
10442
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10443
if (!status)
10444
{
10445
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC7 failed\n");
10446
}
10447
break;
10448
}
10449
case transcoder_texture_format::cTFPVRTC1_4_RGB:
10450
{
10451
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10452
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGB,
10453
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10454
if (!status)
10455
{
10456
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to PVRTC1 RGB 4bpp failed\n");
10457
}
10458
break;
10459
}
10460
case transcoder_texture_format::cTFPVRTC1_4_RGBA:
10461
{
10462
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10463
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGBA,
10464
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10465
if (!status)
10466
{
10467
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to PVRTC1 RGBA 4bpp failed\n");
10468
}
10469
break;
10470
}
10471
case transcoder_texture_format::cTFASTC_4x4_RGBA:
10472
{
10473
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10474
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_4x4,
10475
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10476
if (!status)
10477
{
10478
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to ASTC 4x4 failed\n");
10479
}
10480
break;
10481
}
10482
case transcoder_texture_format::cTFATC_RGB:
10483
case transcoder_texture_format::cTFATC_RGBA:
10484
{
10485
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->ATC currently unsupported\n");
10486
return false;
10487
}
10488
case transcoder_texture_format::cTFFXT1_RGB:
10489
{
10490
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->FXT1 currently unsupported\n");
10491
return false;
10492
}
10493
case transcoder_texture_format::cTFPVRTC2_4_RGB:
10494
{
10495
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n");
10496
return false;
10497
}
10498
case transcoder_texture_format::cTFPVRTC2_4_RGBA:
10499
{
10500
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n");
10501
return false;
10502
}
10503
case transcoder_texture_format::cTFETC2_EAC_R11:
10504
{
10505
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
10506
// nullptr, 0,
10507
// ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
10508
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_R11,
10509
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
10510
((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0, -1, decode_flags);
10511
if (!status)
10512
{
10513
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to EAC R11 failed\n");
10514
}
10515
break;
10516
}
10517
case transcoder_texture_format::cTFETC2_EAC_RG11:
10518
{
10519
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_RG11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
10520
// nullptr, 0,
10521
// 0, 3);
10522
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_RG11,
10523
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
10524
0, 3, decode_flags);
10525
if (!status)
10526
{
10527
BASISU_DEVEL_ERROR("basisu_basisu_lowlevel_uastc_ldr_4x4_transcodertranscoder::transcode_image: transcode_slice() to EAC RG11 failed\n");
10528
}
10529
break;
10530
}
10531
case transcoder_texture_format::cTFRGBA32:
10532
{
10533
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA32, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10534
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA32,
10535
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10536
if (!status)
10537
{
10538
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to RGBA32 failed\n");
10539
}
10540
break;
10541
}
10542
case transcoder_texture_format::cTFRGB565:
10543
{
10544
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGB565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10545
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB565,
10546
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10547
if (!status)
10548
{
10549
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to RGB565 failed\n");
10550
}
10551
break;
10552
}
10553
case transcoder_texture_format::cTFBGR565:
10554
{
10555
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBGR565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10556
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBGR565,
10557
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10558
if (!status)
10559
{
10560
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to RGB565 failed\n");
10561
}
10562
break;
10563
}
10564
case transcoder_texture_format::cTFRGBA4444:
10565
{
10566
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10567
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA4444,
10568
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10569
if (!status)
10570
{
10571
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to RGBA4444 failed\n");
10572
}
10573
break;
10574
}
10575
default:
10576
{
10577
assert(0);
10578
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: Invalid format\n");
10579
break;
10580
}
10581
}
10582
10583
return status;
10584
}
10585
10586
//------------------------------------------------------------------------------------------------
10587
// UASTC HDR 4x4
10588
10589
basisu_lowlevel_uastc_hdr_4x4_transcoder::basisu_lowlevel_uastc_hdr_4x4_transcoder()
10590
{
10591
}
10592
10593
bool basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice(
10594
void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
10595
uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
10596
const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
10597
basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
10598
{
10599
BASISU_NOTE_UNUSED(pState);
10600
BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
10601
BASISU_NOTE_UNUSED(has_alpha);
10602
BASISU_NOTE_UNUSED(channel0);
10603
BASISU_NOTE_UNUSED(channel1);
10604
BASISU_NOTE_UNUSED(decode_flags);
10605
BASISU_NOTE_UNUSED(orig_width);
10606
BASISU_NOTE_UNUSED(orig_height);
10607
10608
assert(g_transcoder_initialized);
10609
if (!g_transcoder_initialized)
10610
{
10611
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: Transcoder not globally initialized.\n");
10612
return false;
10613
}
10614
10615
#if BASISD_SUPPORT_UASTC_HDR
10616
const uint32_t total_blocks = num_blocks_x * num_blocks_y;
10617
10618
if (!output_row_pitch_in_blocks_or_pixels)
10619
{
10620
if (basis_block_format_is_uncompressed(fmt))
10621
output_row_pitch_in_blocks_or_pixels = orig_width;
10622
else
10623
output_row_pitch_in_blocks_or_pixels = num_blocks_x;
10624
}
10625
10626
if (basis_block_format_is_uncompressed(fmt))
10627
{
10628
if (!output_rows_in_pixels)
10629
output_rows_in_pixels = orig_height;
10630
}
10631
10632
uint32_t total_expected_block_bytes = sizeof(astc_blk) * total_blocks;
10633
if (image_data_size < total_expected_block_bytes)
10634
{
10635
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
10636
return false;
10637
}
10638
10639
const astc_blk* pSource_block = reinterpret_cast<const astc_blk*>(pImage_data);
10640
10641
bool status = false;
10642
10643
// TODO: Optimize pure memcpy() case.
10644
10645
for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
10646
{
10647
void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
10648
10649
for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes)
10650
{
10651
switch (fmt)
10652
{
10653
case block_format::cUASTC_HDR_4x4:
10654
case block_format::cASTC_HDR_4x4:
10655
{
10656
// Nothing to do, UASTC HDR 4x4 is just ASTC.
10657
memcpy(pDst_block, pSource_block, sizeof(uastc_block));
10658
status = true;
10659
break;
10660
}
10661
case block_format::cBC6H:
10662
{
10663
status = astc_hdr_transcode_to_bc6h(*pSource_block, *(bc6h_block *)pDst_block);
10664
break;
10665
}
10666
case block_format::cRGB_9E5:
10667
{
10668
astc_helpers::log_astc_block log_blk;
10669
status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
10670
if (status)
10671
{
10672
uint32_t* pDst_pixels = reinterpret_cast<uint32_t*>(
10673
static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t)
10674
);
10675
10676
uint32_t blk_texels[4][4];
10677
10678
status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeRGB9E5);
10679
10680
if (status)
10681
{
10682
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
10683
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
10684
10685
for (uint32_t y = 0; y < max_y; y++)
10686
{
10687
memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x);
10688
10689
pDst_pixels += output_row_pitch_in_blocks_or_pixels;
10690
} // y
10691
}
10692
}
10693
10694
break;
10695
}
10696
case block_format::cRGBA_HALF:
10697
{
10698
astc_helpers::log_astc_block log_blk;
10699
status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
10700
if (status)
10701
{
10702
half_float* pDst_pixels = reinterpret_cast<half_float*>(
10703
static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4
10704
);
10705
10706
half_float blk_texels[4][4][4];
10707
status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
10708
10709
if (status)
10710
{
10711
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
10712
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
10713
10714
for (uint32_t y = 0; y < max_y; y++)
10715
{
10716
for (uint32_t x = 0; x < max_x; x++)
10717
{
10718
pDst_pixels[0 + 4 * x] = blk_texels[y][x][0];
10719
pDst_pixels[1 + 4 * x] = blk_texels[y][x][1];
10720
pDst_pixels[2 + 4 * x] = blk_texels[y][x][2];
10721
pDst_pixels[3 + 4 * x] = blk_texels[y][x][3];
10722
} // x
10723
10724
pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4;
10725
} // y
10726
}
10727
}
10728
10729
break;
10730
}
10731
case block_format::cRGB_HALF:
10732
{
10733
astc_helpers:: log_astc_block log_blk;
10734
status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
10735
if (status)
10736
{
10737
half_float* pDst_pixels =
10738
reinterpret_cast<half_float*>(static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3);
10739
10740
half_float blk_texels[4][4][4];
10741
status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
10742
if (status)
10743
{
10744
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
10745
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
10746
10747
for (uint32_t y = 0; y < max_y; y++)
10748
{
10749
for (uint32_t x = 0; x < max_x; x++)
10750
{
10751
pDst_pixels[0 + 3 * x] = blk_texels[y][x][0];
10752
pDst_pixels[1 + 3 * x] = blk_texels[y][x][1];
10753
pDst_pixels[2 + 3 * x] = blk_texels[y][x][2];
10754
} // x
10755
10756
pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3;
10757
} // y
10758
}
10759
}
10760
10761
break;
10762
}
10763
default:
10764
assert(0);
10765
break;
10766
10767
}
10768
10769
if (!status)
10770
{
10771
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: Transcoder failed to unpack a UASTC HDR block - this is a bug, or the data was corrupted\n");
10772
return false;
10773
}
10774
10775
} // block_x
10776
10777
} // block_y
10778
10779
return true;
10780
#else
10781
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: UASTC_HDR is unsupported\n");
10782
10783
BASISU_NOTE_UNUSED(decode_flags);
10784
BASISU_NOTE_UNUSED(channel0);
10785
BASISU_NOTE_UNUSED(channel1);
10786
BASISU_NOTE_UNUSED(output_rows_in_pixels);
10787
BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
10788
BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
10789
BASISU_NOTE_UNUSED(fmt);
10790
BASISU_NOTE_UNUSED(image_data_size);
10791
BASISU_NOTE_UNUSED(pImage_data);
10792
BASISU_NOTE_UNUSED(num_blocks_x);
10793
BASISU_NOTE_UNUSED(num_blocks_y);
10794
BASISU_NOTE_UNUSED(pDst_blocks);
10795
10796
return false;
10797
#endif
10798
}
10799
10800
bool basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image(
10801
transcoder_texture_format target_format,
10802
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
10803
const uint8_t* pCompressed_data, uint32_t compressed_data_length,
10804
uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
10805
uint32_t slice_offset, uint32_t slice_length,
10806
uint32_t decode_flags,
10807
bool has_alpha,
10808
bool is_video,
10809
uint32_t output_row_pitch_in_blocks_or_pixels,
10810
basisu_transcoder_state* pState,
10811
uint32_t output_rows_in_pixels,
10812
int channel0, int channel1)
10813
{
10814
BASISU_NOTE_UNUSED(is_video);
10815
BASISU_NOTE_UNUSED(level_index);
10816
BASISU_NOTE_UNUSED(decode_flags);
10817
10818
if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
10819
{
10820
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: source data buffer too small\n");
10821
return false;
10822
}
10823
10824
const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
10825
//const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
10826
10827
if (!basis_validate_output_buffer_size(basis_tex_format::cUASTC_HDR_4x4, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels))
10828
{
10829
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: output buffer size too small\n");
10830
return false;
10831
}
10832
10833
bool status = false;
10834
10835
switch (target_format)
10836
{
10837
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
10838
{
10839
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_4x4,
10840
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10841
10842
if (!status)
10843
{
10844
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n");
10845
}
10846
break;
10847
}
10848
case transcoder_texture_format::cTFBC6H:
10849
{
10850
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H,
10851
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10852
if (!status)
10853
{
10854
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to BC6H failed\n");
10855
}
10856
break;
10857
}
10858
case transcoder_texture_format::cTFRGB_HALF:
10859
{
10860
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF,
10861
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10862
if (!status)
10863
{
10864
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n");
10865
}
10866
break;
10867
}
10868
case transcoder_texture_format::cTFRGBA_HALF:
10869
{
10870
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF,
10871
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10872
if (!status)
10873
{
10874
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
10875
}
10876
break;
10877
}
10878
case transcoder_texture_format::cTFRGB_9E5:
10879
{
10880
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5,
10881
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10882
if (!status)
10883
{
10884
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
10885
}
10886
break;
10887
}
10888
default:
10889
{
10890
assert(0);
10891
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: Invalid format\n");
10892
break;
10893
}
10894
}
10895
10896
return status;
10897
}
10898
10899
//------------------------------------------------------------------------------------------------
10900
// ASTC 6x6 HDR
10901
10902
basisu_lowlevel_astc_hdr_6x6_transcoder::basisu_lowlevel_astc_hdr_6x6_transcoder()
10903
{
10904
}
10905
10906
// num_blocks_x/num_blocks_y are source 6x6 blocks
10907
bool basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice(
10908
void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
10909
uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
10910
const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
10911
basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
10912
{
10913
BASISU_NOTE_UNUSED(pState);
10914
BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
10915
BASISU_NOTE_UNUSED(has_alpha);
10916
BASISU_NOTE_UNUSED(channel0);
10917
BASISU_NOTE_UNUSED(channel1);
10918
BASISU_NOTE_UNUSED(decode_flags);
10919
BASISU_NOTE_UNUSED(orig_width);
10920
BASISU_NOTE_UNUSED(orig_height);
10921
10922
assert(g_transcoder_initialized);
10923
if (!g_transcoder_initialized)
10924
{
10925
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder not globally initialized.\n");
10926
return false;
10927
}
10928
10929
#if BASISD_SUPPORT_UASTC_HDR
10930
const uint32_t total_src_blocks = num_blocks_x * num_blocks_y;
10931
10932
const uint32_t output_block_width = get_block_width(fmt);
10933
//const uint32_t output_block_height = get_block_height(fmt);
10934
10935
if (!output_row_pitch_in_blocks_or_pixels)
10936
{
10937
if (basis_block_format_is_uncompressed(fmt))
10938
output_row_pitch_in_blocks_or_pixels = orig_width;
10939
else
10940
output_row_pitch_in_blocks_or_pixels = (orig_width + output_block_width - 1) / output_block_width;
10941
}
10942
10943
if (basis_block_format_is_uncompressed(fmt))
10944
{
10945
if (!output_rows_in_pixels)
10946
output_rows_in_pixels = orig_height;
10947
}
10948
10949
uint32_t total_expected_block_bytes = sizeof(astc_blk) * total_src_blocks;
10950
if (image_data_size < total_expected_block_bytes)
10951
{
10952
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
10953
return false;
10954
}
10955
10956
const astc_blk* pSource_block = reinterpret_cast<const astc_blk*>(pImage_data);
10957
10958
bool status = false;
10959
10960
half_float unpacked_blocks[12][12][3]; // [y][x][c]
10961
10962
assert(((orig_width + 5) / 6) == num_blocks_x);
10963
assert(((orig_height + 5) / 6) == num_blocks_y);
10964
10965
if (fmt == block_format::cBC6H)
10966
{
10967
const uint32_t num_dst_blocks_x = (orig_width + 3) / 4;
10968
const uint32_t num_dst_blocks_y = (orig_height + 3) / 4;
10969
10970
if (!output_row_pitch_in_blocks_or_pixels)
10971
{
10972
output_row_pitch_in_blocks_or_pixels = num_dst_blocks_x;
10973
}
10974
else if (output_row_pitch_in_blocks_or_pixels < num_dst_blocks_x)
10975
{
10976
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: output_row_pitch_in_blocks_or_pixels is too low\n");
10977
return false;
10978
}
10979
10980
if (output_block_or_pixel_stride_in_bytes != sizeof(bc6h_block))
10981
{
10982
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: invalid output_block_or_pixel_stride_in_bytes\n");
10983
return false;
10984
}
10985
10986
fast_bc6h_params bc6h_enc_params;
10987
const bool hq_flag = (decode_flags & cDecodeFlagsHighQuality) != 0;
10988
bc6h_enc_params.m_max_2subset_pats_to_try = hq_flag ? 1 : 0;
10989
10990
for (uint32_t src_block_y = 0; src_block_y < num_blocks_y; src_block_y += 2)
10991
{
10992
const uint32_t num_inner_blocks_y = basisu::minimum<uint32_t>(2, num_blocks_y - src_block_y);
10993
10994
for (uint32_t src_block_x = 0; src_block_x < num_blocks_x; src_block_x += 2)
10995
{
10996
const uint32_t num_inner_blocks_x = basisu::minimum<uint32_t>(2, num_blocks_x - src_block_x);
10997
10998
for (uint32_t iy = 0; iy < num_inner_blocks_y; iy++)
10999
{
11000
for (uint32_t ix = 0; ix < num_inner_blocks_x; ix++)
11001
{
11002
const astc_blk* pS = pSource_block + (src_block_y + iy) * num_blocks_x + (src_block_x + ix);
11003
11004
half_float blk_texels[6][6][4];
11005
11006
astc_helpers::log_astc_block log_blk;
11007
status = astc_helpers::unpack_block(pS, log_blk, 6, 6);
11008
if (!status)
11009
{
11010
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n");
11011
return false;
11012
}
11013
11014
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16);
11015
if (!status)
11016
{
11017
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n");
11018
return false;
11019
}
11020
11021
for (uint32_t y = 0; y < 6; y++)
11022
{
11023
for (uint32_t x = 0; x < 6; x++)
11024
{
11025
unpacked_blocks[iy * 6 + y][ix * 6 + x][0] = blk_texels[y][x][0];
11026
unpacked_blocks[iy * 6 + y][ix * 6 + x][1] = blk_texels[y][x][1];
11027
unpacked_blocks[iy * 6 + y][ix * 6 + x][2] = blk_texels[y][x][2];
11028
11029
} // x
11030
} // y
11031
11032
} // ix
11033
11034
} // iy
11035
11036
const uint32_t dst_x = src_block_x * 6;
11037
assert((dst_x & 3) == 0);
11038
const uint32_t dst_block_x = dst_x >> 2;
11039
11040
const uint32_t dst_y = src_block_y * 6;
11041
assert((dst_y & 3) == 0);
11042
const uint32_t dst_block_y = dst_y >> 2;
11043
11044
const uint32_t num_inner_dst_blocks_x = basisu::minimum<uint32_t>(3, num_dst_blocks_x - dst_block_x);
11045
const uint32_t num_inner_dst_blocks_y = basisu::minimum<uint32_t>(3, num_dst_blocks_y - dst_block_y);
11046
11047
for (uint32_t dy = 0; dy < num_inner_dst_blocks_y; dy++)
11048
{
11049
for (uint32_t dx = 0; dx < num_inner_dst_blocks_x; dx++)
11050
{
11051
bc6h_block* pDst_block = (bc6h_block*)pDst_blocks + (dst_block_x + dx) + (dst_block_y + dy) * output_row_pitch_in_blocks_or_pixels;
11052
11053
half_float src_pixels[4][4][3]; // [y][x][c]
11054
11055
for (uint32_t y = 0; y < 4; y++)
11056
{
11057
const uint32_t src_pixel_y = basisu::minimum<uint32_t>(dy * 4 + y, num_inner_blocks_y * 6 - 1);
11058
11059
for (uint32_t x = 0; x < 4; x++)
11060
{
11061
const uint32_t src_pixel_x = basisu::minimum<uint32_t>(dx * 4 + x, num_inner_blocks_x * 6 - 1);
11062
11063
assert((src_pixel_y < 12) && (src_pixel_x < 12));
11064
11065
src_pixels[y][x][0] = unpacked_blocks[src_pixel_y][src_pixel_x][0];
11066
src_pixels[y][x][1] = unpacked_blocks[src_pixel_y][src_pixel_x][1];
11067
src_pixels[y][x][2] = unpacked_blocks[src_pixel_y][src_pixel_x][2];
11068
11069
} // x
11070
} // y
11071
11072
astc_6x6_hdr::fast_encode_bc6h(&src_pixels[0][0][0], pDst_block, bc6h_enc_params);
11073
11074
} // dx
11075
} // dy
11076
11077
} // block_x
11078
11079
} // block_y
11080
11081
status = true;
11082
}
11083
else
11084
{
11085
for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
11086
{
11087
void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
11088
11089
for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes)
11090
{
11091
switch (fmt)
11092
{
11093
case block_format::cASTC_HDR_6x6:
11094
{
11095
// Nothing to do, ASTC HDR 6x6 is just ASTC.
11096
// TODO: Optimize this copy
11097
memcpy(pDst_block, pSource_block, sizeof(astc_helpers::astc_block));
11098
status = true;
11099
break;
11100
}
11101
case block_format::cRGB_9E5:
11102
{
11103
astc_helpers::log_astc_block log_blk;
11104
status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6);
11105
if (status)
11106
{
11107
uint32_t* pDst_pixels = reinterpret_cast<uint32_t*>(
11108
static_cast<uint8_t*>(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t)
11109
);
11110
11111
uint32_t blk_texels[6][6];
11112
11113
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeRGB9E5);
11114
11115
if (status)
11116
{
11117
const uint32_t max_x = basisu::minimum<int>(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6);
11118
const uint32_t max_y = basisu::minimum<int>(6, (int)output_rows_in_pixels - (int)block_y * 6);
11119
11120
for (uint32_t y = 0; y < max_y; y++)
11121
{
11122
memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x);
11123
11124
pDst_pixels += output_row_pitch_in_blocks_or_pixels;
11125
} // y
11126
}
11127
}
11128
11129
break;
11130
}
11131
case block_format::cRGBA_HALF:
11132
{
11133
astc_helpers::log_astc_block log_blk;
11134
status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6);
11135
if (status)
11136
{
11137
half_float* pDst_pixels = reinterpret_cast<half_float*>(
11138
static_cast<uint8_t*>(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4
11139
);
11140
11141
half_float blk_texels[6][6][4];
11142
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16);
11143
11144
if (status)
11145
{
11146
const uint32_t max_x = basisu::minimum<int>(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6);
11147
const uint32_t max_y = basisu::minimum<int>(6, (int)output_rows_in_pixels - (int)block_y * 6);
11148
11149
for (uint32_t y = 0; y < max_y; y++)
11150
{
11151
for (uint32_t x = 0; x < max_x; x++)
11152
{
11153
pDst_pixels[0 + 4 * x] = blk_texels[y][x][0];
11154
pDst_pixels[1 + 4 * x] = blk_texels[y][x][1];
11155
pDst_pixels[2 + 4 * x] = blk_texels[y][x][2];
11156
pDst_pixels[3 + 4 * x] = blk_texels[y][x][3];
11157
} // x
11158
11159
pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4;
11160
} // y
11161
}
11162
}
11163
11164
break;
11165
}
11166
case block_format::cRGB_HALF:
11167
{
11168
astc_helpers::log_astc_block log_blk;
11169
status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6);
11170
if (status)
11171
{
11172
half_float* pDst_pixels =
11173
reinterpret_cast<half_float*>(static_cast<uint8_t*>(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3);
11174
11175
half_float blk_texels[6][6][4];
11176
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16);
11177
if (status)
11178
{
11179
const uint32_t max_x = basisu::minimum<int>(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6);
11180
const uint32_t max_y = basisu::minimum<int>(6, (int)output_rows_in_pixels - (int)block_y * 6);
11181
11182
for (uint32_t y = 0; y < max_y; y++)
11183
{
11184
for (uint32_t x = 0; x < max_x; x++)
11185
{
11186
pDst_pixels[0 + 3 * x] = blk_texels[y][x][0];
11187
pDst_pixels[1 + 3 * x] = blk_texels[y][x][1];
11188
pDst_pixels[2 + 3 * x] = blk_texels[y][x][2];
11189
} // x
11190
11191
pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3;
11192
} // y
11193
}
11194
}
11195
11196
break;
11197
}
11198
default:
11199
assert(0);
11200
break;
11201
11202
}
11203
11204
if (!status)
11205
{
11206
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n");
11207
return false;
11208
}
11209
11210
} // block_x
11211
11212
} // block_y
11213
}
11214
11215
return true;
11216
#else
11217
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: ASTC HDR is unsupported\n");
11218
11219
BASISU_NOTE_UNUSED(decode_flags);
11220
BASISU_NOTE_UNUSED(channel0);
11221
BASISU_NOTE_UNUSED(channel1);
11222
BASISU_NOTE_UNUSED(output_rows_in_pixels);
11223
BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
11224
BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
11225
BASISU_NOTE_UNUSED(fmt);
11226
BASISU_NOTE_UNUSED(image_data_size);
11227
BASISU_NOTE_UNUSED(pImage_data);
11228
BASISU_NOTE_UNUSED(num_blocks_x);
11229
BASISU_NOTE_UNUSED(num_blocks_y);
11230
BASISU_NOTE_UNUSED(pDst_blocks);
11231
11232
return false;
11233
#endif
11234
}
11235
11236
bool basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image(
11237
transcoder_texture_format target_format,
11238
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
11239
const uint8_t* pCompressed_data, uint32_t compressed_data_length,
11240
uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
11241
uint32_t slice_offset, uint32_t slice_length,
11242
uint32_t decode_flags,
11243
bool has_alpha,
11244
bool is_video,
11245
uint32_t output_row_pitch_in_blocks_or_pixels,
11246
basisu_transcoder_state* pState,
11247
uint32_t output_rows_in_pixels,
11248
int channel0, int channel1)
11249
{
11250
BASISU_NOTE_UNUSED(is_video);
11251
BASISU_NOTE_UNUSED(level_index);
11252
BASISU_NOTE_UNUSED(decode_flags);
11253
11254
if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
11255
{
11256
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: source data buffer too small\n");
11257
return false;
11258
}
11259
11260
const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
11261
//const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
11262
11263
if (!basis_validate_output_buffer_size(basis_tex_format::cASTC_HDR_6x6, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels))
11264
{
11265
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: output buffer size too small\n");
11266
return false;
11267
}
11268
11269
bool status = false;
11270
11271
switch (target_format)
11272
{
11273
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
11274
{
11275
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_6x6,
11276
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11277
11278
if (!status)
11279
{
11280
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n");
11281
}
11282
break;
11283
}
11284
case transcoder_texture_format::cTFBC6H:
11285
{
11286
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H,
11287
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11288
if (!status)
11289
{
11290
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to BC6H failed\n");
11291
}
11292
break;
11293
}
11294
case transcoder_texture_format::cTFRGB_HALF:
11295
{
11296
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF,
11297
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11298
if (!status)
11299
{
11300
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n");
11301
}
11302
break;
11303
}
11304
case transcoder_texture_format::cTFRGBA_HALF:
11305
{
11306
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF,
11307
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11308
if (!status)
11309
{
11310
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
11311
}
11312
break;
11313
}
11314
case transcoder_texture_format::cTFRGB_9E5:
11315
{
11316
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5,
11317
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11318
if (!status)
11319
{
11320
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
11321
}
11322
break;
11323
}
11324
default:
11325
{
11326
assert(0);
11327
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: Invalid format\n");
11328
break;
11329
}
11330
}
11331
11332
return status;
11333
}
11334
11335
//------------------------------------------------------------------------------------------------
11336
// ASTC 6x6 HDR intermediate
11337
11338
basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder()
11339
{
11340
}
11341
11342
// num_blocks_x/num_blocks_y are source 6x6 blocks
11343
bool basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice(
11344
void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
11345
uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
11346
const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
11347
basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
11348
{
11349
BASISU_NOTE_UNUSED(pState);
11350
BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
11351
BASISU_NOTE_UNUSED(has_alpha);
11352
BASISU_NOTE_UNUSED(channel0);
11353
BASISU_NOTE_UNUSED(channel1);
11354
BASISU_NOTE_UNUSED(decode_flags);
11355
BASISU_NOTE_UNUSED(orig_width);
11356
BASISU_NOTE_UNUSED(orig_height);
11357
11358
assert(g_transcoder_initialized);
11359
if (!g_transcoder_initialized)
11360
{
11361
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder not globally initialized.\n");
11362
return false;
11363
}
11364
11365
#if BASISD_SUPPORT_UASTC_HDR
11366
11367
// TODO: Optimize this
11368
11369
basisu::vector2D<astc_helpers::astc_block> decoded_blocks;
11370
uint32_t dec_width = 0, dec_height = 0;
11371
bool dec_status = astc_6x6_hdr::decode_6x6_hdr(pImage_data, image_data_size, decoded_blocks, dec_width, dec_height);
11372
if (!dec_status)
11373
{
11374
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: decode_6x6_hdr() failed.\n");
11375
return false;
11376
}
11377
11378
if ((dec_width != orig_width) || (dec_height != orig_height) ||
11379
(decoded_blocks.get_width() != num_blocks_x) || (decoded_blocks.get_height() != num_blocks_y))
11380
{
11381
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: unexpected decoded width/height\n");
11382
return false;
11383
}
11384
11385
//const uint32_t total_src_blocks = num_blocks_x * num_blocks_y;
11386
11387
const uint32_t output_block_width = get_block_width(fmt);
11388
//const uint32_t output_block_height = get_block_height(fmt);
11389
11390
if (!output_row_pitch_in_blocks_or_pixels)
11391
{
11392
if (basis_block_format_is_uncompressed(fmt))
11393
output_row_pitch_in_blocks_or_pixels = orig_width;
11394
else
11395
output_row_pitch_in_blocks_or_pixels = (orig_width + output_block_width - 1) / output_block_width;
11396
}
11397
11398
if (basis_block_format_is_uncompressed(fmt))
11399
{
11400
if (!output_rows_in_pixels)
11401
output_rows_in_pixels = orig_height;
11402
}
11403
11404
const astc_blk* pSource_block = (const astc_blk *)decoded_blocks.get_ptr();
11405
11406
bool status = false;
11407
11408
half_float unpacked_blocks[12][12][3]; // [y][x][c]
11409
11410
assert(((orig_width + 5) / 6) == num_blocks_x);
11411
assert(((orig_height + 5) / 6) == num_blocks_y);
11412
11413
if (fmt == block_format::cBC6H)
11414
{
11415
const uint32_t num_dst_blocks_x = (orig_width + 3) / 4;
11416
const uint32_t num_dst_blocks_y = (orig_height + 3) / 4;
11417
11418
if (!output_row_pitch_in_blocks_or_pixels)
11419
{
11420
output_row_pitch_in_blocks_or_pixels = num_dst_blocks_x;
11421
}
11422
else if (output_row_pitch_in_blocks_or_pixels < num_dst_blocks_x)
11423
{
11424
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: output_row_pitch_in_blocks_or_pixels is too low\n");
11425
return false;
11426
}
11427
11428
if (output_block_or_pixel_stride_in_bytes != sizeof(bc6h_block))
11429
{
11430
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: invalid output_block_or_pixel_stride_in_bytes\n");
11431
return false;
11432
}
11433
11434
fast_bc6h_params bc6h_enc_params;
11435
const bool hq_flag = (decode_flags & cDecodeFlagsHighQuality) != 0;
11436
bc6h_enc_params.m_max_2subset_pats_to_try = hq_flag ? 1 : 0;
11437
11438
for (uint32_t src_block_y = 0; src_block_y < num_blocks_y; src_block_y += 2)
11439
{
11440
const uint32_t num_inner_blocks_y = basisu::minimum<uint32_t>(2, num_blocks_y - src_block_y);
11441
11442
for (uint32_t src_block_x = 0; src_block_x < num_blocks_x; src_block_x += 2)
11443
{
11444
const uint32_t num_inner_blocks_x = basisu::minimum<uint32_t>(2, num_blocks_x - src_block_x);
11445
11446
for (uint32_t iy = 0; iy < num_inner_blocks_y; iy++)
11447
{
11448
for (uint32_t ix = 0; ix < num_inner_blocks_x; ix++)
11449
{
11450
const astc_blk* pS = pSource_block + (src_block_y + iy) * num_blocks_x + (src_block_x + ix);
11451
11452
half_float blk_texels[6][6][4];
11453
11454
astc_helpers::log_astc_block log_blk;
11455
status = astc_helpers::unpack_block(pS, log_blk, 6, 6);
11456
if (!status)
11457
{
11458
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n");
11459
return false;
11460
}
11461
11462
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16);
11463
if (!status)
11464
{
11465
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n");
11466
return false;
11467
}
11468
11469
for (uint32_t y = 0; y < 6; y++)
11470
{
11471
for (uint32_t x = 0; x < 6; x++)
11472
{
11473
unpacked_blocks[iy * 6 + y][ix * 6 + x][0] = blk_texels[y][x][0];
11474
unpacked_blocks[iy * 6 + y][ix * 6 + x][1] = blk_texels[y][x][1];
11475
unpacked_blocks[iy * 6 + y][ix * 6 + x][2] = blk_texels[y][x][2];
11476
} // x
11477
} // y
11478
11479
} // ix
11480
11481
} // iy
11482
11483
const uint32_t dst_x = src_block_x * 6;
11484
assert((dst_x & 3) == 0);
11485
const uint32_t dst_block_x = dst_x >> 2;
11486
11487
const uint32_t dst_y = src_block_y * 6;
11488
assert((dst_y & 3) == 0);
11489
const uint32_t dst_block_y = dst_y >> 2;
11490
11491
const uint32_t num_inner_dst_blocks_x = basisu::minimum<uint32_t>(3, num_dst_blocks_x - dst_block_x);
11492
const uint32_t num_inner_dst_blocks_y = basisu::minimum<uint32_t>(3, num_dst_blocks_y - dst_block_y);
11493
11494
for (uint32_t dy = 0; dy < num_inner_dst_blocks_y; dy++)
11495
{
11496
for (uint32_t dx = 0; dx < num_inner_dst_blocks_x; dx++)
11497
{
11498
bc6h_block* pDst_block = (bc6h_block*)pDst_blocks + (dst_block_x + dx) + (dst_block_y + dy) * output_row_pitch_in_blocks_or_pixels;
11499
11500
half_float src_pixels[4][4][3]; // [y][x][c]
11501
11502
for (uint32_t y = 0; y < 4; y++)
11503
{
11504
const uint32_t src_pixel_y = basisu::minimum<uint32_t>(dy * 4 + y, num_inner_blocks_y * 6 - 1);
11505
11506
for (uint32_t x = 0; x < 4; x++)
11507
{
11508
const uint32_t src_pixel_x = basisu::minimum<uint32_t>(dx * 4 + x, num_inner_blocks_x * 6 - 1);
11509
11510
assert((src_pixel_y < 12) && (src_pixel_x < 12));
11511
11512
src_pixels[y][x][0] = unpacked_blocks[src_pixel_y][src_pixel_x][0];
11513
src_pixels[y][x][1] = unpacked_blocks[src_pixel_y][src_pixel_x][1];
11514
src_pixels[y][x][2] = unpacked_blocks[src_pixel_y][src_pixel_x][2];
11515
11516
} // x
11517
} // y
11518
11519
astc_6x6_hdr::fast_encode_bc6h(&src_pixels[0][0][0], pDst_block, bc6h_enc_params);
11520
11521
} // dx
11522
} // dy
11523
11524
} // block_x
11525
11526
} // block_y
11527
11528
status = true;
11529
}
11530
else
11531
{
11532
for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
11533
{
11534
void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
11535
11536
for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes)
11537
{
11538
switch (fmt)
11539
{
11540
case block_format::cASTC_HDR_6x6:
11541
{
11542
// Nothing to do, ASTC HDR 6x6 is just ASTC.
11543
// TODO: Optimize this copy
11544
memcpy(pDst_block, pSource_block, sizeof(astc_helpers::astc_block));
11545
status = true;
11546
break;
11547
}
11548
case block_format::cRGB_9E5:
11549
{
11550
astc_helpers::log_astc_block log_blk;
11551
status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6);
11552
if (status)
11553
{
11554
uint32_t* pDst_pixels = reinterpret_cast<uint32_t*>(
11555
static_cast<uint8_t*>(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t)
11556
);
11557
11558
uint32_t blk_texels[6][6];
11559
11560
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeRGB9E5);
11561
11562
if (status)
11563
{
11564
const uint32_t max_x = basisu::minimum<int>(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6);
11565
const uint32_t max_y = basisu::minimum<int>(6, (int)output_rows_in_pixels - (int)block_y * 6);
11566
11567
for (uint32_t y = 0; y < max_y; y++)
11568
{
11569
memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x);
11570
11571
pDst_pixels += output_row_pitch_in_blocks_or_pixels;
11572
} // y
11573
}
11574
}
11575
11576
break;
11577
}
11578
case block_format::cRGBA_HALF:
11579
{
11580
astc_helpers::log_astc_block log_blk;
11581
status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6);
11582
if (status)
11583
{
11584
half_float* pDst_pixels = reinterpret_cast<half_float*>(
11585
static_cast<uint8_t*>(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4
11586
);
11587
11588
half_float blk_texels[6][6][4];
11589
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16);
11590
11591
if (status)
11592
{
11593
const uint32_t max_x = basisu::minimum<int>(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6);
11594
const uint32_t max_y = basisu::minimum<int>(6, (int)output_rows_in_pixels - (int)block_y * 6);
11595
11596
for (uint32_t y = 0; y < max_y; y++)
11597
{
11598
for (uint32_t x = 0; x < max_x; x++)
11599
{
11600
pDst_pixels[0 + 4 * x] = blk_texels[y][x][0];
11601
pDst_pixels[1 + 4 * x] = blk_texels[y][x][1];
11602
pDst_pixels[2 + 4 * x] = blk_texels[y][x][2];
11603
pDst_pixels[3 + 4 * x] = blk_texels[y][x][3];
11604
} // x
11605
11606
pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4;
11607
} // y
11608
}
11609
}
11610
11611
break;
11612
}
11613
case block_format::cRGB_HALF:
11614
{
11615
astc_helpers::log_astc_block log_blk;
11616
status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6);
11617
if (status)
11618
{
11619
half_float* pDst_pixels =
11620
reinterpret_cast<half_float*>(static_cast<uint8_t*>(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3);
11621
11622
half_float blk_texels[6][6][4];
11623
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16);
11624
if (status)
11625
{
11626
const uint32_t max_x = basisu::minimum<int>(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6);
11627
const uint32_t max_y = basisu::minimum<int>(6, (int)output_rows_in_pixels - (int)block_y * 6);
11628
11629
for (uint32_t y = 0; y < max_y; y++)
11630
{
11631
for (uint32_t x = 0; x < max_x; x++)
11632
{
11633
pDst_pixels[0 + 3 * x] = blk_texels[y][x][0];
11634
pDst_pixels[1 + 3 * x] = blk_texels[y][x][1];
11635
pDst_pixels[2 + 3 * x] = blk_texels[y][x][2];
11636
} // x
11637
11638
pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3;
11639
} // y
11640
}
11641
}
11642
11643
break;
11644
}
11645
default:
11646
assert(0);
11647
break;
11648
11649
}
11650
11651
if (!status)
11652
{
11653
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n");
11654
return false;
11655
}
11656
11657
} // block_x
11658
11659
} // block_y
11660
}
11661
11662
return true;
11663
#else
11664
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: ASTC HDR is unsupported\n");
11665
11666
BASISU_NOTE_UNUSED(decode_flags);
11667
BASISU_NOTE_UNUSED(channel0);
11668
BASISU_NOTE_UNUSED(channel1);
11669
BASISU_NOTE_UNUSED(output_rows_in_pixels);
11670
BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
11671
BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
11672
BASISU_NOTE_UNUSED(fmt);
11673
BASISU_NOTE_UNUSED(image_data_size);
11674
BASISU_NOTE_UNUSED(pImage_data);
11675
BASISU_NOTE_UNUSED(num_blocks_x);
11676
BASISU_NOTE_UNUSED(num_blocks_y);
11677
BASISU_NOTE_UNUSED(pDst_blocks);
11678
11679
return false;
11680
#endif
11681
}
11682
11683
bool basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image(
11684
transcoder_texture_format target_format,
11685
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
11686
const uint8_t* pCompressed_data, uint32_t compressed_data_length,
11687
uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
11688
uint32_t slice_offset, uint32_t slice_length,
11689
uint32_t decode_flags,
11690
bool has_alpha,
11691
bool is_video,
11692
uint32_t output_row_pitch_in_blocks_or_pixels,
11693
basisu_transcoder_state* pState,
11694
uint32_t output_rows_in_pixels,
11695
int channel0, int channel1)
11696
{
11697
BASISU_NOTE_UNUSED(is_video);
11698
BASISU_NOTE_UNUSED(level_index);
11699
BASISU_NOTE_UNUSED(decode_flags);
11700
11701
if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
11702
{
11703
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: source data buffer too small\n");
11704
return false;
11705
}
11706
11707
const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
11708
//const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
11709
11710
if (!basis_validate_output_buffer_size(basis_tex_format::cASTC_HDR_6x6, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels))
11711
{
11712
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: output buffer size too small\n");
11713
return false;
11714
}
11715
11716
bool status = false;
11717
11718
switch (target_format)
11719
{
11720
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
11721
{
11722
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_6x6,
11723
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11724
11725
if (!status)
11726
{
11727
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n");
11728
}
11729
break;
11730
}
11731
case transcoder_texture_format::cTFBC6H:
11732
{
11733
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H,
11734
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11735
if (!status)
11736
{
11737
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to BC6H failed\n");
11738
}
11739
break;
11740
}
11741
case transcoder_texture_format::cTFRGB_HALF:
11742
{
11743
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF,
11744
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11745
if (!status)
11746
{
11747
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n");
11748
}
11749
break;
11750
}
11751
case transcoder_texture_format::cTFRGBA_HALF:
11752
{
11753
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF,
11754
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11755
if (!status)
11756
{
11757
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
11758
}
11759
break;
11760
}
11761
case transcoder_texture_format::cTFRGB_9E5:
11762
{
11763
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5,
11764
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1 , decode_flags);
11765
if (!status)
11766
{
11767
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
11768
}
11769
break;
11770
}
11771
default:
11772
{
11773
assert(0);
11774
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: Invalid format\n");
11775
break;
11776
}
11777
}
11778
11779
return status;
11780
}
11781
11782
//------------------------------------------------------------------------------------------------
11783
11784
basisu_transcoder::basisu_transcoder() :
11785
m_ready_to_transcode(false)
11786
{
11787
}
11788
11789
bool basisu_transcoder::validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const
11790
{
11791
if (!validate_header(pData, data_size))
11792
return false;
11793
11794
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
11795
11796
#if !BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS
11797
if (crc16(&pHeader->m_data_size, sizeof(basis_file_header) - BASISU_OFFSETOF(basis_file_header, m_data_size), 0) != pHeader->m_header_crc16)
11798
{
11799
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header CRC check failed\n");
11800
return false;
11801
}
11802
11803
if (full_validation)
11804
{
11805
if (crc16(reinterpret_cast<const uint8_t*>(pData) + sizeof(basis_file_header), pHeader->m_data_size, 0) != pHeader->m_data_crc16)
11806
{
11807
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: data CRC check failed\n");
11808
return false;
11809
}
11810
}
11811
#endif
11812
11813
return true;
11814
}
11815
11816
bool basisu_transcoder::validate_header_quick(const void* pData, uint32_t data_size) const
11817
{
11818
if (data_size <= sizeof(basis_file_header))
11819
return false;
11820
11821
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
11822
11823
if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header)))
11824
{
11825
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n");
11826
return false;
11827
}
11828
11829
uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size;
11830
if (data_size < expected_file_size)
11831
{
11832
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: source buffer is too small\n");
11833
return false;
11834
}
11835
11836
if ((!pHeader->m_total_slices) || (!pHeader->m_total_images))
11837
{
11838
BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header is invalid\n");
11839
return false;
11840
}
11841
11842
if ((pHeader->m_slice_desc_file_ofs >= data_size) ||
11843
((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices))
11844
)
11845
{
11846
BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n");
11847
return false;
11848
}
11849
11850
return true;
11851
}
11852
11853
bool basisu_transcoder::validate_header(const void* pData, uint32_t data_size) const
11854
{
11855
if (data_size <= sizeof(basis_file_header))
11856
{
11857
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small\n");
11858
return false;
11859
}
11860
11861
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
11862
11863
if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header)))
11864
{
11865
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n");
11866
return false;
11867
}
11868
11869
uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size;
11870
if (data_size < expected_file_size)
11871
{
11872
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small, or header is corrupted\n");
11873
return false;
11874
}
11875
11876
if ((!pHeader->m_total_images) || (!pHeader->m_total_slices))
11877
{
11878
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (total images or slices are 0)\n");
11879
return false;
11880
}
11881
11882
if (pHeader->m_total_images > pHeader->m_total_slices)
11883
{
11884
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (too many images)\n");
11885
return false;
11886
}
11887
11888
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
11889
{
11890
if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices)
11891
{
11892
if (pHeader->m_total_slices & 1)
11893
{
11894
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid alpha .basis file\n");
11895
return false;
11896
}
11897
}
11898
11899
// This flag dates back to pre-Basis Universal, when .basis supported full ETC1 too.
11900
if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0)
11901
{
11902
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n");
11903
return false;
11904
}
11905
}
11906
else
11907
{
11908
if ((pHeader->m_flags & cBASISHeaderFlagETC1S) != 0)
11909
{
11910
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n");
11911
return false;
11912
}
11913
}
11914
11915
if ((pHeader->m_slice_desc_file_ofs >= data_size) ||
11916
((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices))
11917
)
11918
{
11919
BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n");
11920
return false;
11921
}
11922
11923
return true;
11924
}
11925
11926
basis_texture_type basisu_transcoder::get_texture_type(const void* pData, uint32_t data_size) const
11927
{
11928
if (!validate_header_quick(pData, data_size))
11929
{
11930
BASISU_DEVEL_ERROR("basisu_transcoder::get_texture_type: header validation failed\n");
11931
return cBASISTexType2DArray;
11932
}
11933
11934
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
11935
11936
basis_texture_type btt = static_cast<basis_texture_type>(static_cast<uint8_t>(pHeader->m_tex_type));
11937
11938
if (btt >= cBASISTexTypeTotal)
11939
{
11940
BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header's texture type field is invalid\n");
11941
return cBASISTexType2DArray;
11942
}
11943
11944
return btt;
11945
}
11946
11947
bool basisu_transcoder::get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const
11948
{
11949
if (!validate_header_quick(pData, data_size))
11950
{
11951
BASISU_DEVEL_ERROR("basisu_transcoder::get_userdata: header validation failed\n");
11952
return false;
11953
}
11954
11955
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
11956
11957
userdata0 = pHeader->m_userdata0;
11958
userdata1 = pHeader->m_userdata1;
11959
return true;
11960
}
11961
11962
uint32_t basisu_transcoder::get_total_images(const void* pData, uint32_t data_size) const
11963
{
11964
if (!validate_header_quick(pData, data_size))
11965
{
11966
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header validation failed\n");
11967
return 0;
11968
}
11969
11970
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
11971
11972
return pHeader->m_total_images;
11973
}
11974
11975
basis_tex_format basisu_transcoder::get_basis_tex_format(const void* pData, uint32_t data_size) const
11976
{
11977
if (!validate_header_quick(pData, data_size))
11978
{
11979
BASISU_DEVEL_ERROR("basisu_transcoder::get_basis_tex_format: header validation failed\n");
11980
return basis_tex_format::cETC1S;
11981
}
11982
11983
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
11984
11985
return (basis_tex_format)(uint32_t)pHeader->m_tex_format;
11986
}
11987
11988
bool basisu_transcoder::get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const
11989
{
11990
if (!validate_header_quick(pData, data_size))
11991
{
11992
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: header validation failed\n");
11993
return false;
11994
}
11995
11996
int slice_index = find_first_slice_index(pData, data_size, image_index, 0);
11997
if (slice_index < 0)
11998
{
11999
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid slice index\n");
12000
return false;
12001
}
12002
12003
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
12004
12005
if (image_index >= pHeader->m_total_images)
12006
{
12007
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n");
12008
return false;
12009
}
12010
12011
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
12012
12013
uint32_t total_levels = 1;
12014
for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++)
12015
if (pSlice_descs[i].m_image_index == image_index)
12016
total_levels = basisu::maximum<uint32_t>(total_levels, pSlice_descs[i].m_level_index + 1);
12017
else
12018
break;
12019
12020
if (total_levels > 16)
12021
{
12022
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n");
12023
return false;
12024
}
12025
12026
const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
12027
12028
image_info.m_image_index = image_index;
12029
image_info.m_total_levels = total_levels;
12030
12031
image_info.m_alpha_flag = false;
12032
12033
// For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha.
12034
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
12035
image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
12036
else
12037
image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
12038
12039
image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
12040
12041
const uint32_t block_width = basis_tex_format_get_block_width((basis_tex_format)((uint32_t)pHeader->m_tex_format));
12042
const uint32_t block_height = basis_tex_format_get_block_height((basis_tex_format)((uint32_t)pHeader->m_tex_format));
12043
12044
image_info.m_width = slice_desc.m_num_blocks_x * block_width;
12045
image_info.m_height = slice_desc.m_num_blocks_y * block_height;
12046
image_info.m_orig_width = slice_desc.m_orig_width;
12047
image_info.m_orig_height = slice_desc.m_orig_height;
12048
image_info.m_num_blocks_x = slice_desc.m_num_blocks_x;
12049
image_info.m_num_blocks_y = slice_desc.m_num_blocks_y;
12050
image_info.m_block_width = block_width;
12051
image_info.m_block_height = block_height;
12052
image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y;
12053
image_info.m_first_slice_index = slice_index;
12054
12055
return true;
12056
}
12057
12058
uint32_t basisu_transcoder::get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const
12059
{
12060
if (!validate_header_quick(pData, data_size))
12061
{
12062
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: header validation failed\n");
12063
return false;
12064
}
12065
12066
int slice_index = find_first_slice_index(pData, data_size, image_index, 0);
12067
if (slice_index < 0)
12068
{
12069
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: failed finding slice\n");
12070
return false;
12071
}
12072
12073
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
12074
12075
if (image_index >= pHeader->m_total_images)
12076
{
12077
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image_index\n");
12078
return false;
12079
}
12080
12081
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
12082
12083
uint32_t total_levels = 1;
12084
for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++)
12085
if (pSlice_descs[i].m_image_index == image_index)
12086
total_levels = basisu::maximum<uint32_t>(total_levels, pSlice_descs[i].m_level_index + 1);
12087
else
12088
break;
12089
12090
const uint32_t cMaxSupportedLevels = 16;
12091
if (total_levels > cMaxSupportedLevels)
12092
{
12093
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image levels!\n");
12094
return false;
12095
}
12096
12097
return total_levels;
12098
}
12099
12100
bool basisu_transcoder::get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const
12101
{
12102
if (!validate_header_quick(pData, data_size))
12103
{
12104
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: header validation failed\n");
12105
return false;
12106
}
12107
12108
int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
12109
if (slice_index < 0)
12110
{
12111
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: failed finding slice\n");
12112
return false;
12113
}
12114
12115
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
12116
12117
if (image_index >= pHeader->m_total_images)
12118
{
12119
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: invalid image_index\n");
12120
return false;
12121
}
12122
12123
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
12124
12125
const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
12126
12127
orig_width = slice_desc.m_orig_width;
12128
orig_height = slice_desc.m_orig_height;
12129
total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y;
12130
12131
return true;
12132
}
12133
12134
bool basisu_transcoder::get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& image_info, uint32_t image_index, uint32_t level_index) const
12135
{
12136
if (!validate_header_quick(pData, data_size))
12137
{
12138
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: validate_file_checksums failed\n");
12139
return false;
12140
}
12141
12142
int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
12143
if (slice_index < 0)
12144
{
12145
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: failed finding slice\n");
12146
return false;
12147
}
12148
12149
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
12150
12151
if (image_index >= pHeader->m_total_images)
12152
{
12153
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: invalid image_index\n");
12154
return false;
12155
}
12156
12157
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
12158
12159
const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
12160
12161
image_info.m_image_index = image_index;
12162
image_info.m_level_index = level_index;
12163
12164
// For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha.
12165
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
12166
image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
12167
else
12168
image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
12169
12170
const uint32_t block_width = basis_tex_format_get_block_width((basis_tex_format)((uint32_t)pHeader->m_tex_format));
12171
const uint32_t block_height = basis_tex_format_get_block_height((basis_tex_format)((uint32_t)pHeader->m_tex_format));
12172
12173
image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
12174
image_info.m_width = slice_desc.m_num_blocks_x * block_width;
12175
image_info.m_height = slice_desc.m_num_blocks_y * block_height;
12176
image_info.m_orig_width = slice_desc.m_orig_width;
12177
image_info.m_orig_height = slice_desc.m_orig_height;
12178
image_info.m_block_width = block_width;
12179
image_info.m_block_height = block_height;
12180
image_info.m_num_blocks_x = slice_desc.m_num_blocks_x;
12181
image_info.m_num_blocks_y = slice_desc.m_num_blocks_y;
12182
image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y;
12183
image_info.m_first_slice_index = slice_index;
12184
12185
image_info.m_rgb_file_ofs = slice_desc.m_file_ofs;
12186
image_info.m_rgb_file_len = slice_desc.m_file_size;
12187
image_info.m_alpha_file_ofs = 0;
12188
image_info.m_alpha_file_len = 0;
12189
12190
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
12191
{
12192
if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices)
12193
{
12194
assert((slice_index + 1) < (int)pHeader->m_total_slices);
12195
image_info.m_alpha_file_ofs = pSlice_descs[slice_index + 1].m_file_ofs;
12196
image_info.m_alpha_file_len = pSlice_descs[slice_index + 1].m_file_size;
12197
}
12198
}
12199
12200
return true;
12201
}
12202
12203
bool basisu_transcoder::get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const
12204
{
12205
if (!validate_file_checksums(pData, data_size, false))
12206
{
12207
BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: validate_file_checksums failed\n");
12208
return false;
12209
}
12210
12211
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
12212
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
12213
12214
file_info.m_version = pHeader->m_ver;
12215
12216
file_info.m_total_header_size = sizeof(basis_file_header) + pHeader->m_total_slices * sizeof(basis_slice_desc);
12217
12218
file_info.m_total_selectors = pHeader->m_total_selectors;
12219
file_info.m_selector_codebook_ofs = pHeader->m_selector_cb_file_ofs;
12220
file_info.m_selector_codebook_size = pHeader->m_selector_cb_file_size;
12221
12222
file_info.m_total_endpoints = pHeader->m_total_endpoints;
12223
file_info.m_endpoint_codebook_ofs = pHeader->m_endpoint_cb_file_ofs;
12224
file_info.m_endpoint_codebook_size = pHeader->m_endpoint_cb_file_size;
12225
12226
file_info.m_tables_ofs = pHeader->m_tables_file_ofs;
12227
file_info.m_tables_size = pHeader->m_tables_file_size;
12228
12229
file_info.m_tex_format = static_cast<basis_tex_format>(static_cast<int>(pHeader->m_tex_format));
12230
12231
file_info.m_etc1s = (pHeader->m_tex_format == (int)basis_tex_format::cETC1S);
12232
12233
file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0;
12234
file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
12235
12236
const uint32_t total_slices = pHeader->m_total_slices;
12237
12238
file_info.m_slice_info.resize(total_slices);
12239
12240
file_info.m_slices_size = 0;
12241
12242
file_info.m_tex_type = static_cast<basis_texture_type>(static_cast<uint8_t>(pHeader->m_tex_type));
12243
12244
if (file_info.m_tex_type > cBASISTexTypeTotal)
12245
{
12246
BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: invalid texture type, file is corrupted\n");
12247
return false;
12248
}
12249
12250
file_info.m_us_per_frame = pHeader->m_us_per_frame;
12251
file_info.m_userdata0 = pHeader->m_userdata0;
12252
file_info.m_userdata1 = pHeader->m_userdata1;
12253
12254
file_info.m_image_mipmap_levels.resize(0);
12255
file_info.m_image_mipmap_levels.resize(pHeader->m_total_images);
12256
12257
file_info.m_total_images = pHeader->m_total_images;
12258
12259
const uint32_t block_width = basis_tex_format_get_block_width((basis_tex_format)((uint32_t)pHeader->m_tex_format));
12260
const uint32_t block_height = basis_tex_format_get_block_height((basis_tex_format)((uint32_t)pHeader->m_tex_format));
12261
file_info.m_block_width = block_width;
12262
file_info.m_block_height = block_height;
12263
12264
for (uint32_t i = 0; i < total_slices; i++)
12265
{
12266
file_info.m_slices_size += pSlice_descs[i].m_file_size;
12267
12268
basisu_slice_info& slice_info = file_info.m_slice_info[i];
12269
12270
slice_info.m_orig_width = pSlice_descs[i].m_orig_width;
12271
slice_info.m_orig_height = pSlice_descs[i].m_orig_height;
12272
slice_info.m_width = pSlice_descs[i].m_num_blocks_x * block_width;
12273
slice_info.m_height = pSlice_descs[i].m_num_blocks_y * block_height;
12274
slice_info.m_num_blocks_x = pSlice_descs[i].m_num_blocks_x;
12275
slice_info.m_num_blocks_y = pSlice_descs[i].m_num_blocks_y;
12276
slice_info.m_block_width = block_width;
12277
slice_info.m_block_height = block_height;
12278
slice_info.m_total_blocks = slice_info.m_num_blocks_x * slice_info.m_num_blocks_y;
12279
slice_info.m_compressed_size = pSlice_descs[i].m_file_size;
12280
slice_info.m_slice_index = i;
12281
slice_info.m_image_index = pSlice_descs[i].m_image_index;
12282
slice_info.m_level_index = pSlice_descs[i].m_level_index;
12283
slice_info.m_unpacked_slice_crc16 = pSlice_descs[i].m_slice_data_crc16;
12284
slice_info.m_alpha_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsHasAlpha) != 0;
12285
slice_info.m_iframe_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
12286
12287
if (pSlice_descs[i].m_image_index >= pHeader->m_total_images)
12288
{
12289
BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice desc's image index is invalid\n");
12290
return false;
12291
}
12292
12293
file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] = basisu::maximum<uint32_t>(file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index], pSlice_descs[i].m_level_index + 1);
12294
12295
if (file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] > 16)
12296
{
12297
BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice mipmap level is invalid\n");
12298
return false;
12299
}
12300
}
12301
12302
return true;
12303
}
12304
12305
bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size)
12306
{
12307
if (!validate_header_quick(pData, data_size))
12308
{
12309
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: header validation failed\n");
12310
return false;
12311
}
12312
12313
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
12314
const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
12315
12316
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
12317
{
12318
if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
12319
{
12320
m_lowlevel_etc1s_decoder.clear();
12321
}
12322
12323
if (pHeader->m_flags & cBASISHeaderFlagUsesGlobalCodebook)
12324
{
12325
if (!m_lowlevel_etc1s_decoder.get_global_codebooks())
12326
{
12327
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: File uses global codebooks, but set_global_codebooks() has not been called\n");
12328
return false;
12329
}
12330
if (!m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size())
12331
{
12332
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebooks must be unpacked first by calling start_transcoding()\n");
12333
return false;
12334
}
12335
if ((m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size() != pHeader->m_total_endpoints) ||
12336
(m_lowlevel_etc1s_decoder.get_global_codebooks()->get_selectors().size() != pHeader->m_total_selectors))
12337
{
12338
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebook size mismatch (wrong codebooks for file).\n");
12339
return false;
12340
}
12341
if (!pHeader->m_tables_file_size)
12342
{
12343
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (2)\n");
12344
return false;
12345
}
12346
if (pHeader->m_tables_file_ofs > data_size)
12347
{
12348
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (4)\n");
12349
return false;
12350
}
12351
if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs))
12352
{
12353
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (5)\n");
12354
return false;
12355
}
12356
}
12357
else
12358
{
12359
if (!pHeader->m_endpoint_cb_file_size || !pHeader->m_selector_cb_file_size || !pHeader->m_tables_file_size)
12360
{
12361
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (0)\n");
12362
return false;
12363
}
12364
12365
if ((pHeader->m_endpoint_cb_file_ofs > data_size) || (pHeader->m_selector_cb_file_ofs > data_size) || (pHeader->m_tables_file_ofs > data_size))
12366
{
12367
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (1)\n");
12368
return false;
12369
}
12370
12371
if (pHeader->m_endpoint_cb_file_size > (data_size - pHeader->m_endpoint_cb_file_ofs))
12372
{
12373
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (2)\n");
12374
return false;
12375
}
12376
12377
if (pHeader->m_selector_cb_file_size > (data_size - pHeader->m_selector_cb_file_ofs))
12378
{
12379
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n");
12380
return false;
12381
}
12382
12383
if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs))
12384
{
12385
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n");
12386
return false;
12387
}
12388
12389
if (!m_lowlevel_etc1s_decoder.decode_palettes(
12390
pHeader->m_total_endpoints, pDataU8 + pHeader->m_endpoint_cb_file_ofs, pHeader->m_endpoint_cb_file_size,
12391
pHeader->m_total_selectors, pDataU8 + pHeader->m_selector_cb_file_ofs, pHeader->m_selector_cb_file_size))
12392
{
12393
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_palettes failed\n");
12394
return false;
12395
}
12396
}
12397
12398
if (!m_lowlevel_etc1s_decoder.decode_tables(pDataU8 + pHeader->m_tables_file_ofs, pHeader->m_tables_file_size))
12399
{
12400
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_tables failed\n");
12401
return false;
12402
}
12403
}
12404
else
12405
{
12406
// Nothing special to do for UASTC/UASTC HDR.
12407
if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
12408
{
12409
m_lowlevel_etc1s_decoder.clear();
12410
}
12411
}
12412
12413
m_ready_to_transcode = true;
12414
12415
return true;
12416
}
12417
12418
bool basisu_transcoder::stop_transcoding()
12419
{
12420
m_lowlevel_etc1s_decoder.clear();
12421
12422
m_ready_to_transcode = false;
12423
12424
return true;
12425
}
12426
12427
bool basisu_transcoder::transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, block_format fmt,
12428
uint32_t output_block_or_pixel_stride_in_bytes, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, void *pAlpha_blocks, uint32_t output_rows_in_pixels, int channel0, int channel1) const
12429
{
12430
if (!m_ready_to_transcode)
12431
{
12432
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: must call start_transcoding first\n");
12433
return false;
12434
}
12435
12436
if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
12437
{
12438
// TODO: Not yet supported
12439
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n");
12440
return false;
12441
}
12442
12443
if (!validate_header_quick(pData, data_size))
12444
{
12445
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: header validation failed\n");
12446
return false;
12447
}
12448
12449
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
12450
12451
const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
12452
12453
if (slice_index >= pHeader->m_total_slices)
12454
{
12455
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: slice_index >= pHeader->m_total_slices\n");
12456
return false;
12457
}
12458
12459
const basis_slice_desc& slice_desc = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_index];
12460
12461
if (basis_block_format_is_uncompressed(fmt))
12462
{
12463
// Assume the output buffer is orig_width by orig_height
12464
if (!output_row_pitch_in_blocks_or_pixels)
12465
output_row_pitch_in_blocks_or_pixels = slice_desc.m_orig_width;
12466
12467
if (!output_rows_in_pixels)
12468
output_rows_in_pixels = slice_desc.m_orig_height;
12469
12470
// Now make sure the output buffer is large enough, or we'll overwrite memory.
12471
if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
12472
{
12473
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
12474
return false;
12475
}
12476
}
12477
else if (fmt == block_format::cFXT1_RGB)
12478
{
12479
const uint32_t num_blocks_fxt1_x = (slice_desc.m_orig_width + 7) / 8;
12480
const uint32_t num_blocks_fxt1_y = (slice_desc.m_orig_height + 3) / 4;
12481
const uint32_t total_blocks_fxt1 = num_blocks_fxt1_x * num_blocks_fxt1_y;
12482
12483
if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1)
12484
{
12485
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
12486
return false;
12487
}
12488
}
12489
else if (fmt == block_format::cASTC_HDR_6x6)
12490
{
12491
const uint32_t num_blocks_6x6_x = (slice_desc.m_orig_width + 5) / 6;
12492
const uint32_t num_blocks_6x6_y = (slice_desc.m_orig_height + 5) / 6;
12493
const uint32_t total_blocks_6x6 = num_blocks_6x6_x * num_blocks_6x6_y;
12494
12495
if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_6x6)
12496
{
12497
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_6x6\n");
12498
return false;
12499
}
12500
}
12501
else
12502
{
12503
// must be a 4x4 pixel block format
12504
const uint32_t num_blocks_4x4_x = (slice_desc.m_orig_width + 3) / 4;
12505
const uint32_t num_blocks_4x4_y = (slice_desc.m_orig_height + 3) / 4;
12506
const uint32_t total_4x4_blocks = num_blocks_4x4_x * num_blocks_4x4_y;
12507
12508
if (output_blocks_buf_size_in_blocks_or_pixels < total_4x4_blocks)
12509
{
12510
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks\n");
12511
return false;
12512
}
12513
}
12514
12515
if ((pHeader->m_tex_format == (uint32_t)basis_tex_format::cETC1S) || (pHeader->m_tex_format == (uint32_t)basis_tex_format::cUASTC4x4))
12516
{
12517
if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
12518
{
12519
if ((!basisu::is_pow2(slice_desc.m_num_blocks_x * 4)) || (!basisu::is_pow2(slice_desc.m_num_blocks_y * 4)))
12520
{
12521
// PVRTC1 only supports power of 2 dimensions
12522
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: PVRTC1 only supports power of 2 dimensions\n");
12523
return false;
12524
}
12525
}
12526
}
12527
12528
if (slice_desc.m_file_ofs > data_size)
12529
{
12530
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_ofs, or passed in buffer too small\n");
12531
return false;
12532
}
12533
12534
const uint32_t data_size_left = data_size - slice_desc.m_file_ofs;
12535
if (data_size_left < slice_desc.m_file_size)
12536
{
12537
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n");
12538
return false;
12539
}
12540
12541
if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6)
12542
{
12543
return m_lowlevel_astc_6x6_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
12544
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
12545
fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
12546
output_rows_in_pixels, channel0, channel1, decode_flags);
12547
}
12548
else if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)
12549
{
12550
return m_lowlevel_astc_6x6_hdr_intermediate_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
12551
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
12552
fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
12553
output_rows_in_pixels, channel0, channel1, decode_flags);
12554
}
12555
else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
12556
{
12557
return m_lowlevel_uastc_4x4_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
12558
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
12559
fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
12560
output_rows_in_pixels, channel0, channel1, decode_flags);
12561
}
12562
else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
12563
{
12564
return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
12565
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
12566
fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
12567
output_rows_in_pixels, channel0, channel1, decode_flags);
12568
}
12569
else
12570
{
12571
return m_lowlevel_etc1s_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
12572
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
12573
fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
12574
(decode_flags & cDecodeFlagsOutputHasAlphaIndices) != 0, pAlpha_blocks, output_rows_in_pixels);
12575
}
12576
}
12577
12578
int basisu_transcoder::find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const
12579
{
12580
BASISU_NOTE_UNUSED(data_size);
12581
12582
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
12583
const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
12584
12585
// For very large basis files this search could be painful
12586
// TODO: Binary search this
12587
for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++)
12588
{
12589
const basis_slice_desc& slice_desc = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_iter];
12590
if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index))
12591
return slice_iter;
12592
}
12593
12594
BASISU_DEVEL_ERROR("basisu_transcoder::find_first_slice_index: didn't find slice\n");
12595
12596
return -1;
12597
}
12598
12599
int basisu_transcoder::find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const
12600
{
12601
if (!validate_header_quick(pData, data_size))
12602
{
12603
BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: header validation failed\n");
12604
return false;
12605
}
12606
12607
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
12608
const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
12609
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs);
12610
12611
// For very large basis files this search could be painful
12612
// TODO: Binary search this
12613
for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++)
12614
{
12615
const basis_slice_desc& slice_desc = pSlice_descs[slice_iter];
12616
if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index))
12617
{
12618
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
12619
{
12620
const bool slice_alpha = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
12621
if (slice_alpha == alpha_data)
12622
return slice_iter;
12623
}
12624
else
12625
{
12626
return slice_iter;
12627
}
12628
}
12629
}
12630
12631
BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: didn't find slice\n");
12632
12633
return -1;
12634
}
12635
12636
void basisu_transcoder::write_opaque_alpha_blocks(
12637
uint32_t num_blocks_x, uint32_t num_blocks_y,
12638
void* pOutput_blocks, block_format fmt,
12639
uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels)
12640
{
12641
// 'num_blocks_y', 'pOutput_blocks' & 'block_stride_in_bytes' unused
12642
// when disabling BASISD_SUPPORT_ETC2_EAC_A8 *and* BASISD_SUPPORT_DXT5A
12643
BASISU_NOTE_UNUSED(num_blocks_y);
12644
BASISU_NOTE_UNUSED(pOutput_blocks);
12645
BASISU_NOTE_UNUSED(block_stride_in_bytes);
12646
12647
if (!output_row_pitch_in_blocks_or_pixels)
12648
output_row_pitch_in_blocks_or_pixels = num_blocks_x;
12649
12650
if ((fmt == block_format::cETC2_EAC_A8) || (fmt == block_format::cETC2_EAC_R11))
12651
{
12652
#if BASISD_SUPPORT_ETC2_EAC_A8
12653
eac_block blk;
12654
blk.m_base = 255;
12655
blk.m_multiplier = 1;
12656
blk.m_table = 13;
12657
12658
// Selectors are all 4's
12659
memcpy(&blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
12660
12661
for (uint32_t y = 0; y < num_blocks_y; y++)
12662
{
12663
uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes;
12664
for (uint32_t x = 0; x < num_blocks_x; x++)
12665
{
12666
memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk));
12667
dst_ofs += block_stride_in_bytes;
12668
}
12669
}
12670
#endif
12671
}
12672
else if (fmt == block_format::cBC4)
12673
{
12674
#if BASISD_SUPPORT_DXT5A
12675
dxt5a_block blk;
12676
blk.m_endpoints[0] = 255;
12677
blk.m_endpoints[1] = 255;
12678
memset(blk.m_selectors, 0, sizeof(blk.m_selectors));
12679
12680
for (uint32_t y = 0; y < num_blocks_y; y++)
12681
{
12682
uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes;
12683
for (uint32_t x = 0; x < num_blocks_x; x++)
12684
{
12685
memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk));
12686
dst_ofs += block_stride_in_bytes;
12687
}
12688
}
12689
#endif
12690
}
12691
}
12692
12693
bool basisu_transcoder::transcode_image_level(
12694
const void* pData, uint32_t data_size,
12695
uint32_t image_index, uint32_t level_index,
12696
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
12697
transcoder_texture_format fmt,
12698
uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state *pState, uint32_t output_rows_in_pixels) const
12699
{
12700
const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(fmt);
12701
12702
if (!m_ready_to_transcode)
12703
{
12704
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: must call start_transcoding() first\n");
12705
return false;
12706
}
12707
12708
//const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
12709
12710
if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
12711
{
12712
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n");
12713
// TODO: Not yet supported
12714
return false;
12715
}
12716
12717
if (!validate_header_quick(pData, data_size))
12718
{
12719
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: header validation failed\n");
12720
return false;
12721
}
12722
12723
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
12724
12725
const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
12726
12727
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs);
12728
12729
const bool basis_file_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
12730
12731
int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
12732
if (slice_index < 0)
12733
{
12734
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: failed finding slice index\n");
12735
// Unable to find the requested image/level
12736
return false;
12737
}
12738
12739
if ((fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices))
12740
{
12741
// Switch to PVRTC1 RGB if the input doesn't have alpha.
12742
fmt = transcoder_texture_format::cTFPVRTC1_4_RGB;
12743
}
12744
12745
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
12746
{
12747
if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsHasAlpha)
12748
{
12749
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has out of order alpha slice\n");
12750
12751
// The first slice shouldn't have alpha data in a properly formed basis file
12752
return false;
12753
}
12754
12755
if (basis_file_has_alpha_slices)
12756
{
12757
// The alpha data should immediately follow the color data, and have the same resolution.
12758
if ((slice_index + 1U) >= pHeader->m_total_slices)
12759
{
12760
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice\n");
12761
// basis file is missing the alpha slice
12762
return false;
12763
}
12764
12765
// Basic sanity checks
12766
if ((pSlice_descs[slice_index + 1].m_flags & cSliceDescFlagsHasAlpha) == 0)
12767
{
12768
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice (flag check)\n");
12769
// This slice should have alpha data
12770
return false;
12771
}
12772
12773
if ((pSlice_descs[slice_index].m_num_blocks_x != pSlice_descs[slice_index + 1].m_num_blocks_x) || (pSlice_descs[slice_index].m_num_blocks_y != pSlice_descs[slice_index + 1].m_num_blocks_y))
12774
{
12775
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file slice dimensions bad\n");
12776
// Alpha slice should have been the same res as the color slice
12777
return false;
12778
}
12779
}
12780
}
12781
12782
bool status = false;
12783
12784
if ((pHeader->m_tex_format == (int)basis_tex_format::cETC1S) || (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4))
12785
{
12786
// Only do this on 4x4 LDR formats that supports transcoding to PVRTC1.
12787
const uint32_t total_slice_blocks = pSlice_descs[slice_index].m_num_blocks_x * pSlice_descs[slice_index].m_num_blocks_y;
12788
12789
if (((fmt == transcoder_texture_format::cTFPVRTC1_4_RGB) || (fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA)) && (output_blocks_buf_size_in_blocks_or_pixels > total_slice_blocks))
12790
{
12791
// The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves.
12792
// For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8.
12793
// However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block_or_pixel. This is all the transcoder actually writes to memory.
12794
memset(static_cast<uint8_t*>(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel);
12795
}
12796
}
12797
12798
if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6)
12799
{
12800
const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
12801
12802
// Use the container independent image transcode method.
12803
status = m_lowlevel_astc_6x6_hdr_decoder.transcode_image(fmt,
12804
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
12805
(const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
12806
pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
12807
decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
12808
}
12809
else if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)
12810
{
12811
const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
12812
12813
// Use the container independent image transcode method.
12814
status = m_lowlevel_astc_6x6_hdr_intermediate_decoder.transcode_image(fmt,
12815
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
12816
(const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
12817
pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
12818
decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
12819
}
12820
else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
12821
{
12822
const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
12823
12824
// Use the container independent image transcode method.
12825
status = m_lowlevel_uastc_4x4_hdr_decoder.transcode_image(fmt,
12826
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
12827
(const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
12828
pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
12829
decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
12830
}
12831
else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
12832
{
12833
const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
12834
12835
// Use the container independent image transcode method.
12836
status = m_lowlevel_uastc_decoder.transcode_image(fmt,
12837
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
12838
(const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
12839
pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
12840
decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
12841
}
12842
else
12843
{
12844
// ETC1S
12845
const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
12846
const basis_slice_desc* pAlpha_slice_desc = basis_file_has_alpha_slices ? &pSlice_descs[slice_index + 1] : nullptr;
12847
12848
assert((pSlice_desc->m_flags & cSliceDescFlagsHasAlpha) == 0);
12849
12850
if (pAlpha_slice_desc)
12851
{
12852
// Basic sanity checks
12853
assert((pAlpha_slice_desc->m_flags & cSliceDescFlagsHasAlpha) != 0);
12854
assert(pSlice_desc->m_num_blocks_x == pAlpha_slice_desc->m_num_blocks_x);
12855
assert(pSlice_desc->m_num_blocks_y == pAlpha_slice_desc->m_num_blocks_y);
12856
assert(pSlice_desc->m_level_index == pAlpha_slice_desc->m_level_index);
12857
}
12858
12859
// Use the container independent image transcode method.
12860
status = m_lowlevel_etc1s_decoder.transcode_image(fmt,
12861
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
12862
(const uint8_t *)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
12863
pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
12864
(pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_ofs : 0U, (pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_size : 0U,
12865
decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
12866
12867
} // if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
12868
12869
if (!status)
12870
{
12871
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning false\n");
12872
}
12873
else
12874
{
12875
//BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n");
12876
}
12877
12878
return status;
12879
}
12880
12881
uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt)
12882
{
12883
switch (fmt)
12884
{
12885
case transcoder_texture_format::cTFETC1_RGB:
12886
case transcoder_texture_format::cTFBC1_RGB:
12887
case transcoder_texture_format::cTFBC4_R:
12888
case transcoder_texture_format::cTFPVRTC1_4_RGB:
12889
case transcoder_texture_format::cTFPVRTC1_4_RGBA:
12890
case transcoder_texture_format::cTFATC_RGB:
12891
case transcoder_texture_format::cTFPVRTC2_4_RGB:
12892
case transcoder_texture_format::cTFPVRTC2_4_RGBA:
12893
case transcoder_texture_format::cTFETC2_EAC_R11:
12894
return 8;
12895
case transcoder_texture_format::cTFBC7_RGBA:
12896
case transcoder_texture_format::cTFBC7_ALT:
12897
case transcoder_texture_format::cTFBC6H:
12898
case transcoder_texture_format::cTFETC2_RGBA:
12899
case transcoder_texture_format::cTFBC3_RGBA:
12900
case transcoder_texture_format::cTFBC5_RG:
12901
case transcoder_texture_format::cTFASTC_4x4_RGBA:
12902
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
12903
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
12904
case transcoder_texture_format::cTFATC_RGBA:
12905
case transcoder_texture_format::cTFFXT1_RGB:
12906
case transcoder_texture_format::cTFETC2_EAC_RG11:
12907
return 16;
12908
case transcoder_texture_format::cTFRGBA32:
12909
case transcoder_texture_format::cTFRGB_9E5:
12910
return sizeof(uint32_t);
12911
case transcoder_texture_format::cTFRGB565:
12912
case transcoder_texture_format::cTFBGR565:
12913
case transcoder_texture_format::cTFRGBA4444:
12914
return sizeof(uint16_t);
12915
case transcoder_texture_format::cTFRGB_HALF:
12916
return sizeof(half_float) * 3;
12917
case transcoder_texture_format::cTFRGBA_HALF:
12918
return sizeof(half_float) * 4;
12919
default:
12920
assert(0);
12921
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
12922
break;
12923
}
12924
return 0;
12925
}
12926
12927
const char* basis_get_format_name(transcoder_texture_format fmt)
12928
{
12929
switch (fmt)
12930
{
12931
case transcoder_texture_format::cTFETC1_RGB: return "ETC1_RGB";
12932
case transcoder_texture_format::cTFBC1_RGB: return "BC1_RGB";
12933
case transcoder_texture_format::cTFBC4_R: return "BC4_R";
12934
case transcoder_texture_format::cTFPVRTC1_4_RGB: return "PVRTC1_4_RGB";
12935
case transcoder_texture_format::cTFPVRTC1_4_RGBA: return "PVRTC1_4_RGBA";
12936
case transcoder_texture_format::cTFBC7_RGBA: return "BC7_RGBA";
12937
case transcoder_texture_format::cTFBC7_ALT: return "BC7_RGBA";
12938
case transcoder_texture_format::cTFETC2_RGBA: return "ETC2_RGBA";
12939
case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA";
12940
case transcoder_texture_format::cTFBC5_RG: return "BC5_RG";
12941
case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA";
12942
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return "ASTC_HDR_4X4_RGBA";
12943
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: return "ASTC_HDR_6X6_RGBA";
12944
case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB";
12945
case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA";
12946
case transcoder_texture_format::cTFRGBA32: return "RGBA32";
12947
case transcoder_texture_format::cTFRGB565: return "RGB565";
12948
case transcoder_texture_format::cTFBGR565: return "BGR565";
12949
case transcoder_texture_format::cTFRGBA4444: return "RGBA4444";
12950
case transcoder_texture_format::cTFRGBA_HALF: return "RGBA_HALF";
12951
case transcoder_texture_format::cTFRGB_9E5: return "RGB_9E5";
12952
case transcoder_texture_format::cTFRGB_HALF: return "RGB_HALF";
12953
case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB";
12954
case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB";
12955
case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
12956
case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11";
12957
case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11";
12958
case transcoder_texture_format::cTFBC6H: return "BC6H";
12959
default:
12960
assert(0);
12961
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
12962
break;
12963
}
12964
return "";
12965
}
12966
12967
const char* basis_get_block_format_name(block_format fmt)
12968
{
12969
switch (fmt)
12970
{
12971
case block_format::cETC1: return "ETC1";
12972
case block_format::cBC1: return "BC1";
12973
case block_format::cPVRTC1_4_RGB: return "PVRTC1_4_RGB";
12974
case block_format::cPVRTC1_4_RGBA: return "PVRTC1_4_RGBA";
12975
case block_format::cBC7: return "BC7";
12976
case block_format::cETC2_RGBA: return "ETC2_RGBA";
12977
case block_format::cBC3: return "BC3";
12978
case block_format::cASTC_4x4: return "ASTC_4x4";
12979
case block_format::cATC_RGB: return "ATC_RGB";
12980
case block_format::cRGBA32: return "RGBA32";
12981
case block_format::cRGB565: return "RGB565";
12982
case block_format::cBGR565: return "BGR565";
12983
case block_format::cRGBA4444: return "RGBA4444";
12984
case block_format::cRGBA_HALF: return "RGBA_HALF";
12985
case block_format::cRGB_HALF: return "RGB_HALF";
12986
case block_format::cRGB_9E5: return "RGB_9E5";
12987
case block_format::cUASTC_4x4: return "UASTC_4x4";
12988
case block_format::cUASTC_HDR_4x4: return "UASTC_HDR_4x4";
12989
case block_format::cBC6H: return "BC6H";
12990
case block_format::cASTC_HDR_4x4: return "ASTC_HDR_4x4";
12991
case block_format::cASTC_HDR_6x6: return "ASTC_HDR_6x6";
12992
case block_format::cFXT1_RGB: return "FXT1_RGB";
12993
case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB";
12994
case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
12995
case block_format::cETC2_EAC_R11: return "ETC2_EAC_R11";
12996
case block_format::cETC2_EAC_RG11: return "ETC2_EAC_RG11";
12997
default:
12998
assert(0);
12999
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
13000
break;
13001
}
13002
return "";
13003
}
13004
13005
const char* basis_get_texture_type_name(basis_texture_type tex_type)
13006
{
13007
switch (tex_type)
13008
{
13009
case cBASISTexType2D: return "2D";
13010
case cBASISTexType2DArray: return "2D array";
13011
case cBASISTexTypeCubemapArray: return "cubemap array";
13012
case cBASISTexTypeVideoFrames: return "video";
13013
case cBASISTexTypeVolume: return "3D";
13014
default:
13015
assert(0);
13016
BASISU_DEVEL_ERROR("basis_get_texture_type_name: Invalid tex_type\n");
13017
break;
13018
}
13019
return "";
13020
}
13021
13022
bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt)
13023
{
13024
// TODO: Technically ASTC HDR does support alpha, but our ASTC HDR encoders don't yet support it. Unsure what to do here.
13025
switch (fmt)
13026
{
13027
case transcoder_texture_format::cTFETC2_RGBA:
13028
case transcoder_texture_format::cTFBC3_RGBA:
13029
case transcoder_texture_format::cTFASTC_4x4_RGBA:
13030
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: // technically this ASTC HDR format supports alpha, but we currently don't exploit that in our encoders
13031
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: // technically this ASTC HDR format supports alpha, but we currently don't exploit that in our encoders
13032
case transcoder_texture_format::cTFBC7_RGBA:
13033
case transcoder_texture_format::cTFBC7_ALT:
13034
case transcoder_texture_format::cTFPVRTC1_4_RGBA:
13035
case transcoder_texture_format::cTFPVRTC2_4_RGBA:
13036
case transcoder_texture_format::cTFATC_RGBA:
13037
case transcoder_texture_format::cTFRGBA32:
13038
case transcoder_texture_format::cTFRGBA4444:
13039
case transcoder_texture_format::cTFRGBA_HALF:
13040
return true;
13041
default:
13042
break;
13043
}
13044
return false;
13045
}
13046
13047
bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt)
13048
{
13049
switch (fmt)
13050
{
13051
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
13052
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
13053
case transcoder_texture_format::cTFBC6H:
13054
case transcoder_texture_format::cTFRGBA_HALF:
13055
case transcoder_texture_format::cTFRGB_HALF:
13056
case transcoder_texture_format::cTFRGB_9E5:
13057
return true;
13058
default:
13059
break;
13060
}
13061
return false;
13062
}
13063
13064
basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt)
13065
{
13066
switch (fmt)
13067
{
13068
case transcoder_texture_format::cTFETC1_RGB: return basisu::texture_format::cETC1;
13069
case transcoder_texture_format::cTFBC1_RGB: return basisu::texture_format::cBC1;
13070
case transcoder_texture_format::cTFBC4_R: return basisu::texture_format::cBC4;
13071
case transcoder_texture_format::cTFPVRTC1_4_RGB: return basisu::texture_format::cPVRTC1_4_RGB;
13072
case transcoder_texture_format::cTFPVRTC1_4_RGBA: return basisu::texture_format::cPVRTC1_4_RGBA;
13073
case transcoder_texture_format::cTFBC7_RGBA: return basisu::texture_format::cBC7;
13074
case transcoder_texture_format::cTFBC7_ALT: return basisu::texture_format::cBC7;
13075
case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA;
13076
case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3;
13077
case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5;
13078
case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC_LDR_4x4;
13079
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return basisu::texture_format::cASTC_HDR_4x4;
13080
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: return basisu::texture_format::cASTC_HDR_6x6;
13081
case transcoder_texture_format::cTFBC6H: return basisu::texture_format::cBC6HUnsigned;
13082
case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB;
13083
case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA;
13084
case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32;
13085
case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565;
13086
case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565;
13087
case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444;
13088
case transcoder_texture_format::cTFRGBA_HALF: return basisu::texture_format::cRGBA_HALF;
13089
case transcoder_texture_format::cTFRGB_9E5: return basisu::texture_format::cRGB_9E5;
13090
case transcoder_texture_format::cTFRGB_HALF: return basisu::texture_format::cRGB_HALF;
13091
case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB;
13092
case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA;
13093
case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA;
13094
case transcoder_texture_format::cTFETC2_EAC_R11: return basisu::texture_format::cETC2_R11_EAC;
13095
case transcoder_texture_format::cTFETC2_EAC_RG11: return basisu::texture_format::cETC2_RG11_EAC;
13096
default:
13097
assert(0);
13098
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
13099
break;
13100
}
13101
return basisu::texture_format::cInvalidTextureFormat;
13102
}
13103
13104
bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type)
13105
{
13106
switch (tex_type)
13107
{
13108
case transcoder_texture_format::cTFRGBA32:
13109
case transcoder_texture_format::cTFRGB565:
13110
case transcoder_texture_format::cTFBGR565:
13111
case transcoder_texture_format::cTFRGBA4444:
13112
case transcoder_texture_format::cTFRGB_HALF:
13113
case transcoder_texture_format::cTFRGBA_HALF:
13114
case transcoder_texture_format::cTFRGB_9E5:
13115
return true;
13116
default:
13117
break;
13118
}
13119
return false;
13120
}
13121
13122
bool basis_block_format_is_uncompressed(block_format blk_fmt)
13123
{
13124
switch (blk_fmt)
13125
{
13126
case block_format::cRGB32:
13127
case block_format::cRGBA32:
13128
case block_format::cA32:
13129
case block_format::cRGB565:
13130
case block_format::cBGR565:
13131
case block_format::cRGBA4444:
13132
case block_format::cRGBA4444_COLOR:
13133
case block_format::cRGBA4444_ALPHA:
13134
case block_format::cRGBA4444_COLOR_OPAQUE:
13135
case block_format::cRGBA_HALF:
13136
case block_format::cRGB_HALF:
13137
case block_format::cRGB_9E5:
13138
return true;
13139
default:
13140
break;
13141
}
13142
return false;
13143
}
13144
13145
uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt)
13146
{
13147
switch (fmt)
13148
{
13149
case transcoder_texture_format::cTFRGBA32:
13150
case transcoder_texture_format::cTFRGB_9E5:
13151
return sizeof(uint32_t);
13152
case transcoder_texture_format::cTFRGB565:
13153
case transcoder_texture_format::cTFBGR565:
13154
case transcoder_texture_format::cTFRGBA4444:
13155
return sizeof(uint16_t);
13156
case transcoder_texture_format::cTFRGB_HALF:
13157
return sizeof(half_float) * 3;
13158
case transcoder_texture_format::cTFRGBA_HALF:
13159
return sizeof(half_float) * 4;
13160
default:
13161
break;
13162
}
13163
return 0;
13164
}
13165
13166
uint32_t basis_get_block_width(transcoder_texture_format tex_type)
13167
{
13168
switch (tex_type)
13169
{
13170
case transcoder_texture_format::cTFFXT1_RGB:
13171
return 8;
13172
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
13173
return 6;
13174
default:
13175
break;
13176
}
13177
return 4;
13178
}
13179
13180
uint32_t basis_get_block_height(transcoder_texture_format tex_type)
13181
{
13182
switch (tex_type)
13183
{
13184
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
13185
return 6;
13186
default:
13187
break;
13188
}
13189
return 4;
13190
}
13191
13192
uint32_t basis_tex_format_get_block_width(basis_tex_format fmt)
13193
{
13194
switch (fmt)
13195
{
13196
case basis_tex_format::cASTC_HDR_6x6:
13197
case basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE:
13198
return 6;
13199
default:
13200
break;
13201
}
13202
return 4;
13203
}
13204
13205
uint32_t basis_tex_format_get_block_height(basis_tex_format fmt)
13206
{
13207
switch (fmt)
13208
{
13209
case basis_tex_format::cASTC_HDR_6x6:
13210
case basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE:
13211
return 6;
13212
default:
13213
break;
13214
}
13215
return 4;
13216
}
13217
13218
bool basis_tex_format_is_hdr(basis_tex_format fmt)
13219
{
13220
switch (fmt)
13221
{
13222
case basis_tex_format::cUASTC_HDR_4x4:
13223
case basis_tex_format::cASTC_HDR_6x6:
13224
case basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE:
13225
return true;
13226
default:
13227
break;
13228
}
13229
return false;
13230
}
13231
13232
bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt)
13233
{
13234
if ((fmt == basis_tex_format::cASTC_HDR_6x6) || (fmt == basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE))
13235
{
13236
// RDO UASTC HDR 6x6, or our custom intermediate format
13237
#if BASISD_SUPPORT_UASTC_HDR
13238
switch (tex_type)
13239
{
13240
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
13241
case transcoder_texture_format::cTFBC6H:
13242
case transcoder_texture_format::cTFRGBA_HALF:
13243
case transcoder_texture_format::cTFRGB_HALF:
13244
case transcoder_texture_format::cTFRGB_9E5:
13245
return true;
13246
default:
13247
break;
13248
}
13249
#endif
13250
}
13251
else if (fmt == basis_tex_format::cUASTC_HDR_4x4)
13252
{
13253
// UASTC HDR 4x4
13254
#if BASISD_SUPPORT_UASTC_HDR
13255
switch (tex_type)
13256
{
13257
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
13258
case transcoder_texture_format::cTFBC6H:
13259
case transcoder_texture_format::cTFRGBA_HALF:
13260
case transcoder_texture_format::cTFRGB_HALF:
13261
case transcoder_texture_format::cTFRGB_9E5:
13262
return true;
13263
default:
13264
break;
13265
}
13266
#endif
13267
}
13268
else if (fmt == basis_tex_format::cUASTC4x4)
13269
{
13270
// UASTC LDR 4x4
13271
#if BASISD_SUPPORT_UASTC
13272
switch (tex_type)
13273
{
13274
// These niche formats aren't currently supported for UASTC - everything else is.
13275
case transcoder_texture_format::cTFPVRTC2_4_RGB:
13276
case transcoder_texture_format::cTFPVRTC2_4_RGBA:
13277
case transcoder_texture_format::cTFATC_RGB:
13278
case transcoder_texture_format::cTFATC_RGBA:
13279
case transcoder_texture_format::cTFFXT1_RGB:
13280
// UASTC LDR doesn't support transcoding to HDR formats
13281
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
13282
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
13283
case transcoder_texture_format::cTFBC6H:
13284
case transcoder_texture_format::cTFRGBA_HALF:
13285
case transcoder_texture_format::cTFRGB_HALF:
13286
case transcoder_texture_format::cTFRGB_9E5:
13287
return false;
13288
default:
13289
return true;
13290
}
13291
#endif
13292
}
13293
else
13294
{
13295
// ETC1S
13296
switch (tex_type)
13297
{
13298
// ETC1 and uncompressed are always supported.
13299
case transcoder_texture_format::cTFETC1_RGB:
13300
case transcoder_texture_format::cTFRGBA32:
13301
case transcoder_texture_format::cTFRGB565:
13302
case transcoder_texture_format::cTFBGR565:
13303
case transcoder_texture_format::cTFRGBA4444:
13304
return true;
13305
#if BASISD_SUPPORT_DXT1
13306
case transcoder_texture_format::cTFBC1_RGB:
13307
return true;
13308
#endif
13309
#if BASISD_SUPPORT_DXT5A
13310
case transcoder_texture_format::cTFBC4_R:
13311
case transcoder_texture_format::cTFBC5_RG:
13312
return true;
13313
#endif
13314
#if BASISD_SUPPORT_DXT1 && BASISD_SUPPORT_DXT5A
13315
case transcoder_texture_format::cTFBC3_RGBA:
13316
return true;
13317
#endif
13318
#if BASISD_SUPPORT_PVRTC1
13319
case transcoder_texture_format::cTFPVRTC1_4_RGB:
13320
case transcoder_texture_format::cTFPVRTC1_4_RGBA:
13321
return true;
13322
#endif
13323
#if BASISD_SUPPORT_BC7_MODE5
13324
case transcoder_texture_format::cTFBC7_RGBA:
13325
case transcoder_texture_format::cTFBC7_ALT:
13326
return true;
13327
#endif
13328
#if BASISD_SUPPORT_ETC2_EAC_A8
13329
case transcoder_texture_format::cTFETC2_RGBA:
13330
return true;
13331
#endif
13332
#if BASISD_SUPPORT_ASTC
13333
case transcoder_texture_format::cTFASTC_4x4_RGBA:
13334
return true;
13335
#endif
13336
#if BASISD_SUPPORT_ATC
13337
case transcoder_texture_format::cTFATC_RGB:
13338
case transcoder_texture_format::cTFATC_RGBA:
13339
return true;
13340
#endif
13341
#if BASISD_SUPPORT_FXT1
13342
case transcoder_texture_format::cTFFXT1_RGB:
13343
return true;
13344
#endif
13345
#if BASISD_SUPPORT_PVRTC2
13346
case transcoder_texture_format::cTFPVRTC2_4_RGB:
13347
case transcoder_texture_format::cTFPVRTC2_4_RGBA:
13348
return true;
13349
#endif
13350
#if BASISD_SUPPORT_ETC2_EAC_RG11
13351
case transcoder_texture_format::cTFETC2_EAC_R11:
13352
case transcoder_texture_format::cTFETC2_EAC_RG11:
13353
return true;
13354
#endif
13355
default:
13356
break;
13357
}
13358
}
13359
13360
return false;
13361
}
13362
13363
// ------------------------------------------------------------------------------------------------------
13364
// UASTC LDR 4x4
13365
// ------------------------------------------------------------------------------------------------------
13366
13367
#if BASISD_SUPPORT_UASTC
13368
const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] =
13369
{
13370
{ 0, 28, false }, { 1, 20, false }, { 2, 16, true }, { 3, 29, false },
13371
{ 4, 91, true }, { 5, 9, false }, { 6, 107, true }, { 7, 72, true },
13372
{ 8, 149, false }, { 9, 204, true }, { 10, 50, false }, { 11, 114, true },
13373
{ 12, 496, true }, { 13, 17, true }, { 14, 78, false }, { 15, 39, true },
13374
{ 17, 252, true }, { 18, 828, true }, { 19, 43, false }, { 20, 156, false },
13375
{ 21, 116, false }, { 22, 210, true }, { 23, 476, true }, { 24, 273, false },
13376
{ 25, 684, true }, { 26, 359, false }, { 29, 246, true }, { 32, 195, true },
13377
{ 33, 694, true }, { 52, 524, true }
13378
};
13379
13380
const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS] =
13381
{
13382
{ 10, 36, 4 }, { 11, 48, 4 }, { 0, 61, 3 }, { 2, 137, 4 },
13383
{ 8, 161, 5 }, { 13, 183, 4 }, { 1, 226, 2 }, { 33, 281, 2 },
13384
{ 40, 302, 3 }, { 20, 307, 4 }, { 21, 479, 0 }, { 58, 495, 3 },
13385
{ 3, 593, 0 }, { 32, 594, 2 }, { 59, 605, 1 }, { 34, 799, 3 },
13386
{ 20, 812, 1 }, { 14, 988, 4 }, { 31, 993, 3 }
13387
};
13388
13389
const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3] =
13390
{
13391
{ 4, 260, 0 }, { 8, 74, 5 }, { 9, 32, 5 }, { 10, 156, 2 },
13392
{ 11, 183, 2 }, { 12, 15, 0 }, { 13, 745, 4 }, { 20, 0, 1 },
13393
{ 35, 335, 1 }, { 36, 902, 5 }, { 57, 254, 0 }
13394
};
13395
13396
const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 1, 2, 0 }, { 2, 0, 1 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } };
13397
13398
const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 2, 0, 1 }, { 1, 2, 0 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } };
13399
13400
uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k)
13401
{
13402
assert(k < 6);
13403
switch (k >> 1)
13404
{
13405
case 0:
13406
if (p <= 1)
13407
p = 0;
13408
else
13409
p = 1;
13410
break;
13411
case 1:
13412
if (p == 0)
13413
p = 0;
13414
else
13415
p = 1;
13416
break;
13417
case 2:
13418
if ((p == 0) || (p == 2))
13419
p = 0;
13420
else
13421
p = 1;
13422
break;
13423
}
13424
if (k & 1)
13425
p = 1 - p;
13426
return p;
13427
}
13428
13429
static const uint8_t g_zero_pattern[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
13430
13431
const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16] =
13432
{
13433
{ 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1 }, { 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1 }, { 1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0 }, { 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1 },
13434
{ 1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0 }, { 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1 }, { 1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0 },
13435
{ 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1 }, { 1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0 },
13436
{ 1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0 },
13437
{ 1,0,0,0,1,1,1,0,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,1 }, { 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0 }, { 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0 },
13438
{ 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1 }, { 1,0,0,0,1,1,0,0,1,1,0,0,1,1,1,0 }, { 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0 },
13439
{ 1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1 }, { 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0 }, { 1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1 }, { 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0 },
13440
{ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0 }, { 1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0 }
13441
};
13442
13443
const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16] =
13444
{
13445
{ 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2 }, { 1,1,1,1,1,1,1,1,0,0,0,0,2,2,2,2 }, { 1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2 }, { 1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0 },
13446
{ 1,1,2,0,1,1,2,0,1,1,2,0,1,1,2,0 }, { 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2 }, { 0,2,1,1,0,2,1,1,0,2,1,1,0,2,1,1 }, { 2,0,0,0,2,0,0,0,2,1,1,1,2,1,1,1 },
13447
{ 2,0,1,2,2,0,1,2,2,0,1,2,2,0,1,2 }, { 1,1,1,1,0,0,0,0,2,2,2,2,1,1,1,1 }, { 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2 }
13448
};
13449
13450
const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16] =
13451
{
13452
{ 0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0 }, { 1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1 },
13453
{ 1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1 }, { 0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0 }, { 0,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1 }, { 0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1 },
13454
{ 1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0 }, { 0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0 }, { 1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0 },
13455
{ 0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0 }, { 1,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0 },
13456
{ 1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0 }, { 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0 }, { 1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 }
13457
};
13458
13459
const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3] =
13460
{
13461
{ 0, 2 }, { 0, 3 }, { 1, 0 }, { 0, 3 }, { 7, 0 }, { 0, 2 }, { 3, 0 }, { 7, 0 },
13462
{ 0, 11 }, { 2, 0 }, { 0, 7 }, { 11, 0 }, { 3, 0 }, { 8, 0 }, { 0, 4 }, { 12, 0 },
13463
{ 1, 0 }, { 8, 0 }, { 0, 1 }, { 0, 2 }, { 0, 4 }, { 8, 0 }, { 1, 0 }, { 0, 2 },
13464
{ 4, 0 }, { 0, 1 }, { 4, 0 }, { 1, 0 }, { 4, 0 }, { 1, 0 }
13465
};
13466
13467
const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3] =
13468
{
13469
{ 0, 8, 10 }, { 8, 0, 12 }, { 4, 0, 12 }, { 8, 0, 4 }, { 3, 0, 2 }, { 0, 1, 3 }, { 0, 2, 1 }, { 1, 9, 0 }, { 1, 2, 0 }, { 4, 0, 8 }, { 0, 6, 2 }
13470
};
13471
13472
const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3] =
13473
{
13474
{ 0, 4 }, { 0, 2 }, { 2, 0 }, { 0, 7 }, { 8, 0 }, { 0, 1 }, { 0, 3 }, { 0, 1 }, { 2, 0 }, { 0, 1 }, { 0, 8 }, { 2, 0 }, { 0, 1 }, { 0, 7 }, { 12, 0 }, { 2, 0 }, { 9, 0 }, { 0, 2 }, { 4, 0 }
13475
};
13476
13477
const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2] =
13478
{
13479
{ 0x1, 4 },
13480
{ 0x35, 6 },
13481
{ 0x1D, 5 },
13482
{ 0x3, 5 },
13483
13484
{ 0x13, 5 },
13485
{ 0xB, 5 },
13486
{ 0x1B, 5 },
13487
{ 0x7, 5 },
13488
13489
{ 0x17, 5 },
13490
{ 0xF, 5 },
13491
{ 0x2, 3 },
13492
{ 0x0, 2 },
13493
13494
{ 0x6, 3 },
13495
{ 0x1F, 5 },
13496
{ 0xD, 5 },
13497
{ 0x5, 7 },
13498
13499
{ 0x15, 6 },
13500
{ 0x25, 6 },
13501
{ 0x9, 4 },
13502
{ 0x45, 7 } // future expansion
13503
};
13504
13505
// If g_uastc_mode_huff_codes[] changes this table must be updated!
13506
static const uint8_t g_uastc_huff_modes[128] =
13507
{
13508
11,0,10,3,11,15,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,
13509
19,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13
13510
};
13511
13512
const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES] = { 4, 2, 3, 2, 2, 3, 2, 2, 0, 2, 4, 2, 3, 1, 2, 4, 2, 2, 5 };
13513
const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES] = { 8, 2, 5, 2, 2, 5, 2, 2, 0, 2, 8, 2, 5, 0, 2, 8, 2, 2, 11 };
13514
const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES] = { 19, 20, 8, 7, 12, 20, 18, 12, 0, 8, 13, 13, 19, 20, 20, 20, 20, 20, 11 };
13515
const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES] = { 1, 1, 2, 3, 2, 1, 1, 2, 0, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1 };
13516
const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 2, 1, 0, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1 };
13517
const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES] = { 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 3 };
13518
const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
13519
const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
13520
const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
13521
const uint8_t g_uastc_mode_cem[TOTAL_UASTC_MODES] = { 8, 8, 8, 8, 8, 8, 8, 8, 0, 12, 12, 12, 12, 12, 12, 4, 4, 4, 8 };
13522
const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 };
13523
const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0 };
13524
const uint8_t g_uastc_mode_total_hint_bits[TOTAL_UASTC_MODES] = { 15, 15, 15, 15, 15, 15, 15, 15, 0, 23, 17, 17, 17, 23, 23, 23, 23, 23, 15 };
13525
13526
// bits, trits, quints
13527
const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3] =
13528
{
13529
{ 1, 0, 0 }, // 0-1 0
13530
{ 0, 1, 0 }, // 0-2 1
13531
{ 2, 0, 0 }, // 0-3 2
13532
{ 0, 0, 1 }, // 0-4 3
13533
13534
{ 1, 1, 0 }, // 0-5 4
13535
{ 3, 0, 0 }, // 0-7 5
13536
{ 1, 0, 1 }, // 0-9 6
13537
{ 2, 1, 0 }, // 0-11 7
13538
13539
{ 4, 0, 0 }, // 0-15 8
13540
{ 2, 0, 1 }, // 0-19 9
13541
{ 3, 1, 0 }, // 0-23 10
13542
{ 5, 0, 0 }, // 0-31 11
13543
13544
{ 3, 0, 1 }, // 0-39 12
13545
{ 4, 1, 0 }, // 0-47 13
13546
{ 6, 0, 0 }, // 0-63 14
13547
{ 4, 0, 1 }, // 0-79 15
13548
13549
{ 5, 1, 0 }, // 0-95 16
13550
{ 7, 0, 0 }, // 0-127 17
13551
{ 5, 0, 1 }, // 0-159 18
13552
{ 6, 1, 0 }, // 0-191 19
13553
13554
{ 8, 0, 0 }, // 0-255 20
13555
};
13556
13557
int astc_get_levels(int range)
13558
{
13559
assert(range < (int)BC7ENC_TOTAL_ASTC_RANGES);
13560
return (1 + 2 * g_astc_bise_range_table[range][1] + 4 * g_astc_bise_range_table[range][2]) << g_astc_bise_range_table[range][0];
13561
}
13562
13563
// g_astc_unquant[] is the inverse of g_astc_sorted_order_unquant[]
13564
astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [ASTC encoded endpoint index]
13565
13566
// Taken right from the ASTC spec.
13567
static struct
13568
{
13569
const char* m_pB_str;
13570
uint32_t m_c;
13571
} g_astc_endpoint_unquant_params[BC7ENC_TOTAL_ASTC_RANGES] =
13572
{
13573
{ "", 0 },
13574
{ "", 0 },
13575
{ "", 0 },
13576
{ "", 0 },
13577
{ "000000000", 204, }, // 0-5
13578
{ "", 0 },
13579
{ "000000000", 113, }, // 0-9
13580
{ "b000b0bb0", 93 }, // 0-11
13581
{ "", 0 },
13582
{ "b0000bb00", 54 }, // 0-19
13583
{ "cb000cbcb", 44 }, // 0-23
13584
{ "", 0 },
13585
{ "cb0000cbc", 26 }, // 0-39
13586
{ "dcb000dcb", 22 }, // 0-47
13587
{ "", 0 },
13588
{ "dcb0000dc", 13 }, // 0-79
13589
{ "edcb000ed", 11 }, // 0-95
13590
{ "", 0 },
13591
{ "edcb0000e", 6 }, // 0-159
13592
{ "fedcb000f", 5 }, // 0-191
13593
{ "", 0 },
13594
};
13595
13596
bool astc_is_valid_endpoint_range(uint32_t range)
13597
{
13598
if ((g_astc_bise_range_table[range][1] == 0) && (g_astc_bise_range_table[range][2] == 0))
13599
return true;
13600
13601
return g_astc_endpoint_unquant_params[range].m_c != 0;
13602
}
13603
13604
uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range)
13605
{
13606
assert(range < BC7ENC_TOTAL_ASTC_RANGES);
13607
13608
const uint32_t bits = g_astc_bise_range_table[range][0];
13609
const uint32_t trits = g_astc_bise_range_table[range][1];
13610
const uint32_t quints = g_astc_bise_range_table[range][2];
13611
13612
uint32_t val = 0;
13613
if ((!trits) && (!quints))
13614
{
13615
assert(!packed_trits && !packed_quints);
13616
13617
int bits_left = 8;
13618
while (bits_left > 0)
13619
{
13620
uint32_t v = packed_bits;
13621
13622
int n = basisu::minimumi(bits_left, bits);
13623
if (n < (int)bits)
13624
v >>= (bits - n);
13625
13626
assert(v < (1U << n));
13627
13628
val |= (v << (bits_left - n));
13629
bits_left -= n;
13630
}
13631
}
13632
else
13633
{
13634
const uint32_t A = (packed_bits & 1) ? 511 : 0;
13635
const uint32_t C = g_astc_endpoint_unquant_params[range].m_c;
13636
const uint32_t D = trits ? packed_trits : packed_quints;
13637
13638
assert(C);
13639
13640
uint32_t B = 0;
13641
for (uint32_t i = 0; i < 9; i++)
13642
{
13643
B <<= 1;
13644
13645
char c = g_astc_endpoint_unquant_params[range].m_pB_str[i];
13646
if (c != '0')
13647
{
13648
c -= 'a';
13649
B |= ((packed_bits >> c) & 1);
13650
}
13651
}
13652
13653
val = D * C + B;
13654
val = val ^ A;
13655
val = (A & 0x80) | (val >> 2);
13656
}
13657
13658
return val;
13659
}
13660
13661
uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range)
13662
{
13663
assert(range < BC7ENC_TOTAL_ASTC_RANGES);
13664
assert(packed_val < (uint32_t)astc_get_levels(range));
13665
13666
const uint32_t bits = g_astc_bise_range_table[range][0];
13667
const uint32_t trits = g_astc_bise_range_table[range][1];
13668
const uint32_t quints = g_astc_bise_range_table[range][2];
13669
13670
if ((!trits) && (!quints))
13671
return unquant_astc_endpoint(packed_val, 0, 0, range);
13672
else if (trits)
13673
return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), packed_val >> bits, 0, range);
13674
else
13675
return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), 0, packed_val >> bits, range);
13676
}
13677
13678
// BC7 - Various BC7 tables/helpers
13679
const uint32_t g_bc7_weights1[2] = { 0, 64 };
13680
const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 };
13681
const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
13682
const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
13683
const uint32_t g_astc_weights4[16] = { 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 };
13684
const uint32_t g_astc_weights5[32] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64 };
13685
const uint32_t g_astc_weights_3levels[3] = { 0, 32, 64 };
13686
13687
const uint8_t g_bc7_partition1[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
13688
13689
const uint8_t g_bc7_partition2[64 * 16] =
13690
{
13691
0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1, 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1, 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1, 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,
13692
0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1, 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
13693
0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1, 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0, 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0, 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,
13694
0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0, 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0, 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0, 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0, 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,
13695
0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1, 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0, 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0, 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1, 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,
13696
0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0, 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0, 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0, 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1, 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1, 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,
13697
0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0, 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0, 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0, 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,
13698
0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1, 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1, 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1, 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1, 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0, 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0, 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1
13699
};
13700
13701
const uint8_t g_bc7_partition3[64 * 16] =
13702
{
13703
0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2, 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1, 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1, 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2, 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2, 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1, 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1,
13704
0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2, 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2, 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2, 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2, 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2, 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0,
13705
0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2, 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0, 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2, 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1, 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2, 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1, 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2, 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0,
13706
0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0, 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2, 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0, 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1, 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2, 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2, 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1, 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1,
13707
0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2, 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1, 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2, 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0, 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0, 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0, 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0, 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1,
13708
0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1, 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1, 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2, 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1, 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1, 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1, 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1,
13709
0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2, 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1, 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2, 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2, 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2, 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2, 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2,
13710
0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2, 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2, 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2, 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2, 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1, 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2, 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2, 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0,
13711
};
13712
13713
const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15, 15,15,15,15,15,15,15,15, 15, 2, 8, 2, 2, 8, 8,15, 2, 8, 2, 2, 8, 8, 2, 2, 15,15, 6, 8, 2, 8,15,15, 2, 8, 2, 2, 2,15,15, 6, 6, 2, 6, 8,15,15, 2, 2, 15,15,15,15,15, 2, 2,15 };
13714
13715
const uint8_t g_bc7_table_anchor_index_third_subset_1[64] =
13716
{
13717
3, 3,15,15, 8, 3,15,15, 8, 8, 6, 6, 6, 5, 3, 3, 3, 3, 8,15, 3, 3, 6,10, 5, 8, 8, 6, 8, 5,15,15, 8,15, 3, 5, 6,10, 8,15, 15, 3,15, 5,15,15,15,15, 3,15, 5, 5, 5, 8, 5,10, 5,10, 8,13,15,12, 3, 3
13718
};
13719
13720
const uint8_t g_bc7_table_anchor_index_third_subset_2[64] =
13721
{
13722
15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8
13723
};
13724
13725
const uint8_t g_bc7_num_subsets[8] = { 3, 2, 3, 2, 1, 1, 1, 2 };
13726
const uint8_t g_bc7_partition_bits[8] = { 4, 6, 6, 6, 0, 0, 0, 6 };
13727
const uint8_t g_bc7_color_index_bitcount[8] = { 3, 3, 2, 2, 2, 2, 4, 2 };
13728
13729
const uint8_t g_bc7_mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 };
13730
const uint8_t g_bc7_mode_has_shared_p_bits[8] = { 0, 1, 0, 0, 0, 0, 0, 0 };
13731
const uint8_t g_bc7_color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 };
13732
const int8_t g_bc7_alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 };
13733
13734
const uint8_t g_bc7_alpha_index_bitcount[8] = { 0, 0, 0, 0, 3, 2, 4, 2 };
13735
13736
endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]; // [c][pbit]
13737
endpoint_err g_bc7_mode_5_optimal_endpoints[256]; // [c]
13738
13739
static inline void bc7_set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t* pCur_ofs)
13740
{
13741
assert((num_bits <= 32) && (val < (1ULL << num_bits)));
13742
while (num_bits)
13743
{
13744
const uint32_t n = basisu::minimumu(8 - (*pCur_ofs & 7), num_bits);
13745
pBytes[*pCur_ofs >> 3] |= (uint8_t)(val << (*pCur_ofs & 7));
13746
val >>= n;
13747
num_bits -= n;
13748
*pCur_ofs += n;
13749
}
13750
assert(*pCur_ofs <= 128);
13751
}
13752
13753
// TODO: Optimize this.
13754
void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults)
13755
{
13756
const uint32_t best_mode = pResults->m_mode;
13757
13758
const uint32_t total_subsets = g_bc7_num_subsets[best_mode];
13759
const uint32_t total_partitions = 1 << g_bc7_partition_bits[best_mode];
13760
//const uint32_t num_rotations = 1 << g_bc7_rotation_bits[best_mode];
13761
//const uint32_t num_index_selectors = (best_mode == 4) ? 2 : 1;
13762
13763
const uint8_t* pPartition;
13764
if (total_subsets == 1)
13765
pPartition = &g_bc7_partition1[0];
13766
else if (total_subsets == 2)
13767
pPartition = &g_bc7_partition2[pResults->m_partition * 16];
13768
else
13769
pPartition = &g_bc7_partition3[pResults->m_partition * 16];
13770
13771
uint8_t color_selectors[16];
13772
memcpy(color_selectors, pResults->m_selectors, 16);
13773
13774
uint8_t alpha_selectors[16];
13775
memcpy(alpha_selectors, pResults->m_alpha_selectors, 16);
13776
13777
color_quad_u8 low[3], high[3];
13778
memcpy(low, pResults->m_low, sizeof(low));
13779
memcpy(high, pResults->m_high, sizeof(high));
13780
13781
uint32_t pbits[3][2];
13782
memcpy(pbits, pResults->m_pbits, sizeof(pbits));
13783
13784
int anchor[3] = { -1, -1, -1 };
13785
13786
for (uint32_t k = 0; k < total_subsets; k++)
13787
{
13788
uint32_t anchor_index = 0;
13789
if (k)
13790
{
13791
if ((total_subsets == 3) && (k == 1))
13792
anchor_index = g_bc7_table_anchor_index_third_subset_1[pResults->m_partition];
13793
else if ((total_subsets == 3) && (k == 2))
13794
anchor_index = g_bc7_table_anchor_index_third_subset_2[pResults->m_partition];
13795
else
13796
anchor_index = g_bc7_table_anchor_index_second_subset[pResults->m_partition];
13797
}
13798
13799
anchor[k] = anchor_index;
13800
13801
const uint32_t color_index_bits = get_bc7_color_index_size(best_mode, pResults->m_index_selector);
13802
const uint32_t num_color_indices = 1 << color_index_bits;
13803
13804
if (color_selectors[anchor_index] & (num_color_indices >> 1))
13805
{
13806
for (uint32_t i = 0; i < 16; i++)
13807
if (pPartition[i] == k)
13808
color_selectors[i] = (uint8_t)((num_color_indices - 1) - color_selectors[i]);
13809
13810
if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
13811
{
13812
for (uint32_t q = 0; q < 3; q++)
13813
{
13814
uint8_t t = low[k].m_c[q];
13815
low[k].m_c[q] = high[k].m_c[q];
13816
high[k].m_c[q] = t;
13817
}
13818
}
13819
else
13820
{
13821
color_quad_u8 tmp = low[k];
13822
low[k] = high[k];
13823
high[k] = tmp;
13824
}
13825
13826
if (!g_bc7_mode_has_shared_p_bits[best_mode])
13827
{
13828
uint32_t t = pbits[k][0];
13829
pbits[k][0] = pbits[k][1];
13830
pbits[k][1] = t;
13831
}
13832
}
13833
13834
if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
13835
{
13836
const uint32_t alpha_index_bits = get_bc7_alpha_index_size(best_mode, pResults->m_index_selector);
13837
const uint32_t num_alpha_indices = 1 << alpha_index_bits;
13838
13839
if (alpha_selectors[anchor_index] & (num_alpha_indices >> 1))
13840
{
13841
for (uint32_t i = 0; i < 16; i++)
13842
if (pPartition[i] == k)
13843
alpha_selectors[i] = (uint8_t)((num_alpha_indices - 1) - alpha_selectors[i]);
13844
13845
uint8_t t = low[k].m_c[3];
13846
low[k].m_c[3] = high[k].m_c[3];
13847
high[k].m_c[3] = t;
13848
}
13849
}
13850
}
13851
13852
uint8_t* pBlock_bytes = (uint8_t*)(pBlock);
13853
memset(pBlock_bytes, 0, BC7ENC_BLOCK_SIZE);
13854
13855
uint32_t cur_bit_ofs = 0;
13856
bc7_set_block_bits(pBlock_bytes, 1 << best_mode, best_mode + 1, &cur_bit_ofs);
13857
13858
if ((best_mode == 4) || (best_mode == 5))
13859
bc7_set_block_bits(pBlock_bytes, pResults->m_rotation, 2, &cur_bit_ofs);
13860
13861
if (best_mode == 4)
13862
bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector, 1, &cur_bit_ofs);
13863
13864
if (total_partitions > 1)
13865
bc7_set_block_bits(pBlock_bytes, pResults->m_partition, (total_partitions == 64) ? 6 : 4, &cur_bit_ofs);
13866
13867
const uint32_t total_comps = (best_mode >= 4) ? 4 : 3;
13868
for (uint32_t comp = 0; comp < total_comps; comp++)
13869
{
13870
for (uint32_t subset = 0; subset < total_subsets; subset++)
13871
{
13872
bc7_set_block_bits(pBlock_bytes, low[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
13873
bc7_set_block_bits(pBlock_bytes, high[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
13874
}
13875
}
13876
13877
if (g_bc7_mode_has_p_bits[best_mode])
13878
{
13879
for (uint32_t subset = 0; subset < total_subsets; subset++)
13880
{
13881
bc7_set_block_bits(pBlock_bytes, pbits[subset][0], 1, &cur_bit_ofs);
13882
if (!g_bc7_mode_has_shared_p_bits[best_mode])
13883
bc7_set_block_bits(pBlock_bytes, pbits[subset][1], 1, &cur_bit_ofs);
13884
}
13885
}
13886
13887
for (uint32_t y = 0; y < 4; y++)
13888
{
13889
for (uint32_t x = 0; x < 4; x++)
13890
{
13891
int idx = x + y * 4;
13892
13893
uint32_t n = pResults->m_index_selector ? get_bc7_alpha_index_size(best_mode, pResults->m_index_selector) : get_bc7_color_index_size(best_mode, pResults->m_index_selector);
13894
13895
if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2]))
13896
n--;
13897
13898
bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? alpha_selectors[idx] : color_selectors[idx], n, &cur_bit_ofs);
13899
}
13900
}
13901
13902
if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
13903
{
13904
for (uint32_t y = 0; y < 4; y++)
13905
{
13906
for (uint32_t x = 0; x < 4; x++)
13907
{
13908
int idx = x + y * 4;
13909
13910
uint32_t n = pResults->m_index_selector ? get_bc7_color_index_size(best_mode, pResults->m_index_selector) : get_bc7_alpha_index_size(best_mode, pResults->m_index_selector);
13911
13912
if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2]))
13913
n--;
13914
13915
bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? color_selectors[idx] : alpha_selectors[idx], n, &cur_bit_ofs);
13916
}
13917
}
13918
}
13919
13920
assert(cur_bit_ofs == 128);
13921
}
13922
13923
// ASTC
13924
static inline void astc_set_bits_1_to_9(uint32_t* pDst, int& bit_offset, uint32_t code, uint32_t codesize)
13925
{
13926
uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst);
13927
13928
assert(codesize <= 9);
13929
if (codesize)
13930
{
13931
uint32_t byte_bit_offset = bit_offset & 7;
13932
uint32_t val = code << byte_bit_offset;
13933
13934
uint32_t index = bit_offset >> 3;
13935
pBuf[index] |= (uint8_t)val;
13936
13937
if (codesize > (8 - byte_bit_offset))
13938
pBuf[index + 1] |= (uint8_t)(val >> 8);
13939
13940
bit_offset += codesize;
13941
}
13942
}
13943
13944
void pack_astc_solid_block(void* pDst_block, const color32& color)
13945
{
13946
uint32_t r = color[0], g = color[1], b = color[2];
13947
uint32_t a = color[3];
13948
13949
uint32_t* pOutput = static_cast<uint32_t*>(pDst_block);
13950
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pDst_block);
13951
13952
pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff;
13953
13954
pOutput[1] = 0xffffffff;
13955
pOutput[2] = 0;
13956
pOutput[3] = 0;
13957
13958
int bit_pos = 64;
13959
astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, r | (r << 8), 16);
13960
astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, g | (g << 8), 16);
13961
astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, b | (b << 8), 16);
13962
astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, a | (a << 8), 16);
13963
}
13964
13965
// See 23.21 https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_partition_pattern_generation
13966
#ifdef _DEBUG
13967
static inline uint32_t astc_hash52(uint32_t v)
13968
{
13969
uint32_t p = v;
13970
p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
13971
p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
13972
p ^= p << 6; p ^= p >> 17;
13973
return p;
13974
}
13975
13976
int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block)
13977
{
13978
if (small_block)
13979
{
13980
x <<= 1; y <<= 1; z <<= 1;
13981
}
13982
seed += (partitioncount - 1) * 1024;
13983
uint32_t rnum = astc_hash52(seed);
13984
uint8_t seed1 = rnum & 0xF;
13985
uint8_t seed2 = (rnum >> 4) & 0xF;
13986
uint8_t seed3 = (rnum >> 8) & 0xF;
13987
uint8_t seed4 = (rnum >> 12) & 0xF;
13988
uint8_t seed5 = (rnum >> 16) & 0xF;
13989
uint8_t seed6 = (rnum >> 20) & 0xF;
13990
uint8_t seed7 = (rnum >> 24) & 0xF;
13991
uint8_t seed8 = (rnum >> 28) & 0xF;
13992
uint8_t seed9 = (rnum >> 18) & 0xF;
13993
uint8_t seed10 = (rnum >> 22) & 0xF;
13994
uint8_t seed11 = (rnum >> 26) & 0xF;
13995
uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
13996
13997
seed1 *= seed1; seed2 *= seed2;
13998
seed3 *= seed3; seed4 *= seed4;
13999
seed5 *= seed5; seed6 *= seed6;
14000
seed7 *= seed7; seed8 *= seed8;
14001
seed9 *= seed9; seed10 *= seed10;
14002
seed11 *= seed11; seed12 *= seed12;
14003
14004
int sh1, sh2, sh3;
14005
if (seed & 1)
14006
{
14007
sh1 = (seed & 2 ? 4 : 5); sh2 = (partitioncount == 3 ? 6 : 5);
14008
}
14009
else
14010
{
14011
sh1 = (partitioncount == 3 ? 6 : 5); sh2 = (seed & 2 ? 4 : 5);
14012
}
14013
sh3 = (seed & 0x10) ? sh1 : sh2;
14014
14015
seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
14016
seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
14017
seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
14018
14019
int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
14020
int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
14021
int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
14022
int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
14023
14024
a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F;
14025
14026
if (partitioncount < 4) d = 0;
14027
if (partitioncount < 3) c = 0;
14028
14029
if (a >= b && a >= c && a >= d)
14030
return 0;
14031
else if (b >= c && b >= d)
14032
return 1;
14033
else if (c >= d)
14034
return 2;
14035
else
14036
return 3;
14037
}
14038
#endif
14039
14040
static const uint8_t g_astc_quint_encode[125] =
14041
{
14042
0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57,
14043
58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104,
14044
105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54,
14045
126, 127, 94, 95, 62, 39, 47, 55, 63, 31
14046
};
14047
14048
// Encodes 3 values to output, usable for any range that uses quints and bits
14049
static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
14050
{
14051
// First extract the quints and the bits from the 3 input values
14052
int quints = 0, bits[3];
14053
const uint32_t bit_mask = (1 << n) - 1;
14054
for (int i = 0; i < 3; i++)
14055
{
14056
static const int s_muls[3] = { 1, 5, 25 };
14057
14058
const int t = pValues[i] >> n;
14059
14060
quints += t * s_muls[i];
14061
bits[i] = pValues[i] & bit_mask;
14062
}
14063
14064
// Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits.
14065
// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
14066
14067
assert(quints < 125);
14068
const int T = g_astc_quint_encode[quints];
14069
14070
// Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.
14071
astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |
14072
(bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);
14073
}
14074
14075
// Packs values using ASTC's BISE to output buffer.
14076
static void astc_pack_bise(uint32_t* pDst, const uint8_t* pSrc_vals, int bit_pos, int num_vals, int range)
14077
{
14078
uint32_t temp[5] = { 0, 0, 0, 0, 0 };
14079
14080
const int num_bits = g_astc_bise_range_table[range][0];
14081
14082
int group_size = 0;
14083
if (g_astc_bise_range_table[range][1])
14084
group_size = 5;
14085
else if (g_astc_bise_range_table[range][2])
14086
group_size = 3;
14087
14088
if (group_size)
14089
{
14090
// Range has trits or quints - pack each group of 5 or 3 values
14091
const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3);
14092
14093
for (int group_index = 0; group_index < total_groups; group_index++)
14094
{
14095
uint8_t vals[5] = { 0, 0, 0, 0, 0 };
14096
14097
const int limit = basisu::minimum(group_size, num_vals - group_index * group_size);
14098
for (int i = 0; i < limit; i++)
14099
vals[i] = pSrc_vals[group_index * group_size + i];
14100
14101
if (group_size == 5)
14102
astc_encode_trits(temp, vals, bit_pos, num_bits);
14103
else
14104
astc_encode_quints(temp, vals, bit_pos, num_bits);
14105
}
14106
}
14107
else
14108
{
14109
for (int i = 0; i < num_vals; i++)
14110
astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);
14111
}
14112
14113
pDst[0] |= temp[0]; pDst[1] |= temp[1];
14114
pDst[2] |= temp[2]; pDst[3] |= temp[3];
14115
}
14116
14117
const uint32_t ASTC_BLOCK_MODE_BITS = 11;
14118
const uint32_t ASTC_PART_BITS = 2;
14119
const uint32_t ASTC_CEM_BITS = 4;
14120
const uint32_t ASTC_PARTITION_INDEX_BITS = 10;
14121
const uint32_t ASTC_CCS_BITS = 2;
14122
14123
const uint32_t g_uastc_mode_astc_block_mode[TOTAL_UASTC_MODES] = { 0x242, 0x42, 0x53, 0x42, 0x42, 0x53, 0x442, 0x42, 0, 0x42, 0x242, 0x442, 0x53, 0x441, 0x42, 0x242, 0x42, 0x442, 0x253 };
14124
14125
bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t uastc_mode)
14126
{
14127
assert(uastc_mode < TOTAL_UASTC_MODES);
14128
uint8_t* pDst_bytes = reinterpret_cast<uint8_t*>(pDst);
14129
14130
const int total_weights = pBlock->m_dual_plane ? 32 : 16;
14131
14132
// Set mode bits - see Table 146-147
14133
uint32_t mode = g_uastc_mode_astc_block_mode[uastc_mode];
14134
pDst_bytes[0] = (uint8_t)mode;
14135
pDst_bytes[1] = (uint8_t)(mode >> 8);
14136
14137
memset(pDst_bytes + 2, 0, 16 - 2);
14138
14139
int bit_pos = ASTC_BLOCK_MODE_BITS;
14140
14141
// We only support 1-5 bit weight indices
14142
assert(!g_astc_bise_range_table[pBlock->m_weight_range][1] && !g_astc_bise_range_table[pBlock->m_weight_range][2]);
14143
const int bits_per_weight = g_astc_bise_range_table[pBlock->m_weight_range][0];
14144
14145
// See table 143 - PART
14146
astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_subsets - 1, ASTC_PART_BITS);
14147
14148
if (pBlock->m_subsets == 1)
14149
astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_cem, ASTC_CEM_BITS);
14150
else
14151
{
14152
// See table 145
14153
astc_set_bits(pDst, bit_pos, pBlock->m_partition_seed, ASTC_PARTITION_INDEX_BITS);
14154
14155
// Table 150 - we assume all CEM's are equal, so write 2 0's along with the CEM
14156
astc_set_bits_1_to_9(pDst, bit_pos, (pBlock->m_cem << 2) & 63, ASTC_CEM_BITS + 2);
14157
}
14158
14159
if (pBlock->m_dual_plane)
14160
{
14161
const int total_weight_bits = total_weights * bits_per_weight;
14162
14163
// See Illegal Encodings 23.24
14164
// https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_illegal_encodings
14165
assert((total_weight_bits >= 24) && (total_weight_bits <= 96));
14166
14167
int ccs_bit_pos = 128 - total_weight_bits - ASTC_CCS_BITS;
14168
astc_set_bits_1_to_9(pDst, ccs_bit_pos, pBlock->m_ccs, ASTC_CCS_BITS);
14169
}
14170
14171
const int num_cem_pairs = (1 + (pBlock->m_cem >> 2)) * pBlock->m_subsets;
14172
assert(num_cem_pairs <= 9);
14173
14174
astc_pack_bise(pDst, pBlock->m_endpoints, bit_pos, num_cem_pairs * 2, g_uastc_mode_endpoint_ranges[uastc_mode]);
14175
14176
// Write the weight bits in reverse bit order.
14177
switch (bits_per_weight)
14178
{
14179
case 1:
14180
{
14181
const uint32_t N = 1;
14182
for (int i = 0; i < total_weights; i++)
14183
{
14184
const uint32_t ofs = 128 - N - i;
14185
assert((ofs >> 3) < 16);
14186
pDst_bytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7));
14187
}
14188
break;
14189
}
14190
case 2:
14191
{
14192
const uint32_t N = 2;
14193
for (int i = 0; i < total_weights; i++)
14194
{
14195
static const uint8_t s_reverse_bits2[4] = { 0, 2, 1, 3 };
14196
const uint32_t ofs = 128 - N - (i * N);
14197
assert((ofs >> 3) < 16);
14198
pDst_bytes[ofs >> 3] |= (s_reverse_bits2[pBlock->m_weights[i]] << (ofs & 7));
14199
}
14200
break;
14201
}
14202
case 3:
14203
{
14204
const uint32_t N = 3;
14205
for (int i = 0; i < total_weights; i++)
14206
{
14207
static const uint8_t s_reverse_bits3[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
14208
14209
const uint32_t ofs = 128 - N - (i * N);
14210
const uint32_t rev = s_reverse_bits3[pBlock->m_weights[i]] << (ofs & 7);
14211
14212
uint32_t index = ofs >> 3;
14213
assert(index < 16);
14214
pDst_bytes[index++] |= rev & 0xFF;
14215
if (index < 16)
14216
pDst_bytes[index++] |= (rev >> 8);
14217
}
14218
break;
14219
}
14220
case 4:
14221
{
14222
const uint32_t N = 4;
14223
for (int i = 0; i < total_weights; i++)
14224
{
14225
static const uint8_t s_reverse_bits4[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
14226
const int ofs = 128 - N - (i * N);
14227
assert(ofs >= 0 && (ofs >> 3) < 16);
14228
pDst_bytes[ofs >> 3] |= (s_reverse_bits4[pBlock->m_weights[i]] << (ofs & 7));
14229
}
14230
break;
14231
}
14232
case 5:
14233
{
14234
const uint32_t N = 5;
14235
for (int i = 0; i < total_weights; i++)
14236
{
14237
static const uint8_t s_reverse_bits5[32] = { 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30, 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31 };
14238
14239
const uint32_t ofs = 128 - N - (i * N);
14240
const uint32_t rev = s_reverse_bits5[pBlock->m_weights[i]] << (ofs & 7);
14241
14242
uint32_t index = ofs >> 3;
14243
assert(index < 16);
14244
pDst_bytes[index++] |= rev & 0xFF;
14245
if (index < 16)
14246
pDst_bytes[index++] |= (rev >> 8);
14247
}
14248
14249
break;
14250
}
14251
default:
14252
assert(0);
14253
break;
14254
}
14255
14256
return true;
14257
}
14258
14259
const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern)
14260
{
14261
const uint8_t* pSubset_anchor_indices = g_zero_pattern;
14262
pPartition_pattern = g_zero_pattern;
14263
14264
if (subsets >= 2)
14265
{
14266
if (subsets == 3)
14267
{
14268
pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0];
14269
pSubset_anchor_indices = &g_astc_bc7_pattern3_anchors[common_pattern][0];
14270
}
14271
else if (mode == 7)
14272
{
14273
pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0];
14274
pSubset_anchor_indices = &g_bc7_3_astc2_patterns2_anchors[common_pattern][0];
14275
}
14276
else
14277
{
14278
pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0];
14279
pSubset_anchor_indices = &g_astc_bc7_pattern2_anchors[common_pattern][0];
14280
}
14281
}
14282
14283
return pSubset_anchor_indices;
14284
}
14285
14286
static inline uint32_t read_bit(const uint8_t* pBuf, uint32_t& bit_offset)
14287
{
14288
uint32_t byte_bits = pBuf[bit_offset >> 3] >> (bit_offset & 7);
14289
bit_offset += 1;
14290
return byte_bits & 1;
14291
}
14292
14293
static inline uint32_t read_bits1_to_9(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
14294
{
14295
assert(codesize <= 9);
14296
if (!codesize)
14297
return 0;
14298
14299
if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS) || (bit_offset >= 112))
14300
{
14301
const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
14302
14303
uint32_t byte_bit_offset = bit_offset & 7U;
14304
14305
uint32_t bits = pBytes[0] >> byte_bit_offset;
14306
uint32_t bits_read = basisu::minimum<int>(codesize, 8 - byte_bit_offset);
14307
14308
uint32_t bits_remaining = codesize - bits_read;
14309
if (bits_remaining)
14310
bits |= ((uint32_t)pBytes[1]) << bits_read;
14311
14312
bit_offset += codesize;
14313
14314
return bits & ((1U << codesize) - 1U);
14315
}
14316
14317
uint32_t byte_bit_offset = bit_offset & 7U;
14318
const uint16_t w = *(const uint16_t *)(&pBuf[bit_offset >> 3U]);
14319
bit_offset += codesize;
14320
return (w >> byte_bit_offset) & ((1U << codesize) - 1U);
14321
}
14322
14323
inline uint64_t read_bits64(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
14324
{
14325
assert(codesize <= 64U);
14326
uint64_t bits = 0;
14327
uint32_t total_bits = 0;
14328
14329
while (total_bits < codesize)
14330
{
14331
uint32_t byte_bit_offset = bit_offset & 7U;
14332
uint32_t bits_to_read = basisu::minimum<int>(codesize - total_bits, 8U - byte_bit_offset);
14333
14334
uint32_t byte_bits = pBuf[bit_offset >> 3U] >> byte_bit_offset;
14335
byte_bits &= ((1U << bits_to_read) - 1U);
14336
14337
bits |= ((uint64_t)(byte_bits) << total_bits);
14338
14339
total_bits += bits_to_read;
14340
bit_offset += bits_to_read;
14341
}
14342
14343
return bits;
14344
}
14345
14346
static inline uint32_t read_bits1_to_9_fst(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
14347
{
14348
assert(codesize <= 9);
14349
if (!codesize)
14350
return 0;
14351
assert(bit_offset < 112);
14352
14353
if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
14354
{
14355
const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
14356
14357
uint32_t byte_bit_offset = bit_offset & 7U;
14358
14359
uint32_t bits = pBytes[0] >> byte_bit_offset;
14360
uint32_t bits_read = basisu::minimum<int>(codesize, 8 - byte_bit_offset);
14361
14362
uint32_t bits_remaining = codesize - bits_read;
14363
if (bits_remaining)
14364
bits |= ((uint32_t)pBytes[1]) << bits_read;
14365
14366
bit_offset += codesize;
14367
14368
return bits & ((1U << codesize) - 1U);
14369
}
14370
else
14371
{
14372
uint32_t byte_bit_offset = bit_offset & 7U;
14373
const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
14374
bit_offset += codesize;
14375
return (w >> byte_bit_offset) & ((1U << codesize) - 1U);
14376
}
14377
}
14378
14379
bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints)
14380
{
14381
//memset(&unpacked, 0, sizeof(unpacked));
14382
14383
#if 0
14384
uint8_t table[128];
14385
memset(table, 0xFF, sizeof(table));
14386
14387
{
14388
for (uint32_t mode = 0; mode <= TOTAL_UASTC_MODES; mode++)
14389
{
14390
const uint32_t code = g_uastc_mode_huff_codes[mode][0];
14391
const uint32_t codesize = g_uastc_mode_huff_codes[mode][1];
14392
14393
table[code] = mode;
14394
14395
uint32_t bits_left = 7 - codesize;
14396
for (uint32_t i = 0; i < (1 << bits_left); i++)
14397
table[code | (i << codesize)] = mode;
14398
}
14399
14400
for (uint32_t i = 0; i < 128; i++)
14401
printf("%u,", table[i]);
14402
exit(0);
14403
}
14404
#endif
14405
14406
const int mode = g_uastc_huff_modes[blk.m_bytes[0] & 127];
14407
if (mode >= (int)TOTAL_UASTC_MODES)
14408
return false;
14409
14410
unpacked.m_mode = mode;
14411
unpacked.m_common_pattern = 0;
14412
14413
uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1];
14414
14415
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
14416
{
14417
unpacked.m_solid_color.r = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
14418
unpacked.m_solid_color.g = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
14419
unpacked.m_solid_color.b = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
14420
unpacked.m_solid_color.a = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
14421
14422
if (read_hints)
14423
{
14424
unpacked.m_etc1_flip = false;
14425
unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0;
14426
unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
14427
unpacked.m_etc1_inten1 = 0;
14428
unpacked.m_etc1_selector = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2);
14429
unpacked.m_etc1_r = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
14430
unpacked.m_etc1_g = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
14431
unpacked.m_etc1_b = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
14432
unpacked.m_etc1_bias = 0;
14433
unpacked.m_etc2_hints = 0;
14434
}
14435
14436
return true;
14437
}
14438
14439
if (read_hints)
14440
{
14441
if (g_uastc_mode_has_bc1_hint0[mode])
14442
unpacked.m_bc1_hint0 = read_bit(blk.m_bytes, bit_ofs) != 0;
14443
else
14444
unpacked.m_bc1_hint0 = false;
14445
14446
if (g_uastc_mode_has_bc1_hint1[mode])
14447
unpacked.m_bc1_hint1 = read_bit(blk.m_bytes, bit_ofs) != 0;
14448
else
14449
unpacked.m_bc1_hint1 = false;
14450
14451
unpacked.m_etc1_flip = read_bit(blk.m_bytes, bit_ofs) != 0;
14452
unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0;
14453
unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
14454
unpacked.m_etc1_inten1 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
14455
14456
if (g_uastc_mode_has_etc1_bias[mode])
14457
unpacked.m_etc1_bias = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
14458
else
14459
unpacked.m_etc1_bias = 0;
14460
14461
if (g_uastc_mode_has_alpha[mode])
14462
{
14463
unpacked.m_etc2_hints = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
14464
//assert(unpacked.m_etc2_hints > 0);
14465
}
14466
else
14467
unpacked.m_etc2_hints = 0;
14468
}
14469
else
14470
bit_ofs += g_uastc_mode_total_hint_bits[mode];
14471
14472
uint32_t subsets = 1;
14473
switch (mode)
14474
{
14475
case 2:
14476
case 4:
14477
case 7:
14478
case 9:
14479
case 16:
14480
unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
14481
subsets = 2;
14482
break;
14483
case 3:
14484
unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 4);
14485
subsets = 3;
14486
break;
14487
default:
14488
break;
14489
}
14490
14491
uint32_t part_seed = 0;
14492
switch (mode)
14493
{
14494
case 2:
14495
case 4:
14496
case 9:
14497
case 16:
14498
if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS2)
14499
return false;
14500
14501
part_seed = g_astc_bc7_common_partitions2[unpacked.m_common_pattern].m_astc;
14502
break;
14503
case 3:
14504
if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS3)
14505
return false;
14506
14507
part_seed = g_astc_bc7_common_partitions3[unpacked.m_common_pattern].m_astc;
14508
break;
14509
case 7:
14510
if (unpacked.m_common_pattern >= TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS)
14511
return false;
14512
14513
part_seed = g_bc7_3_astc2_common_partitions[unpacked.m_common_pattern].m_astc2;
14514
break;
14515
default:
14516
break;
14517
}
14518
14519
uint32_t total_planes = 1;
14520
switch (mode)
14521
{
14522
case 6:
14523
case 11:
14524
case 13:
14525
unpacked.m_astc.m_ccs = (int)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2);
14526
total_planes = 2;
14527
break;
14528
case 17:
14529
unpacked.m_astc.m_ccs = 3;
14530
total_planes = 2;
14531
break;
14532
default:
14533
break;
14534
}
14535
14536
unpacked.m_astc.m_dual_plane = (total_planes == 2);
14537
14538
unpacked.m_astc.m_subsets = subsets;
14539
unpacked.m_astc.m_partition_seed = part_seed;
14540
14541
const uint32_t total_comps = g_uastc_mode_comps[mode];
14542
14543
const uint32_t weight_bits = g_uastc_mode_weight_bits[mode];
14544
14545
unpacked.m_astc.m_weight_range = g_uastc_mode_weight_ranges[mode];
14546
14547
const uint32_t total_values = total_comps * 2 * subsets;
14548
const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
14549
14550
const uint32_t cem = g_uastc_mode_cem[mode];
14551
unpacked.m_astc.m_cem = cem;
14552
14553
const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0];
14554
const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1];
14555
const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2];
14556
14557
uint32_t total_tqs = 0;
14558
uint32_t bundle_size = 0, mul = 0;
14559
if (ep_trits)
14560
{
14561
total_tqs = (total_values + 4) / 5;
14562
bundle_size = 5;
14563
mul = 3;
14564
}
14565
else if (ep_quints)
14566
{
14567
total_tqs = (total_values + 2) / 3;
14568
bundle_size = 3;
14569
mul = 5;
14570
}
14571
14572
uint32_t tq_values[8];
14573
for (uint32_t i = 0; i < total_tqs; i++)
14574
{
14575
uint32_t num_bits = ep_trits ? 8 : 7;
14576
if (i == (total_tqs - 1))
14577
{
14578
uint32_t num_remaining = total_values - (total_tqs - 1) * bundle_size;
14579
if (ep_trits)
14580
{
14581
switch (num_remaining)
14582
{
14583
case 1: num_bits = 2; break;
14584
case 2: num_bits = 4; break;
14585
case 3: num_bits = 5; break;
14586
case 4: num_bits = 7; break;
14587
default: break;
14588
}
14589
}
14590
else if (ep_quints)
14591
{
14592
switch (num_remaining)
14593
{
14594
case 1: num_bits = 3; break;
14595
case 2: num_bits = 5; break;
14596
default: break;
14597
}
14598
}
14599
}
14600
14601
tq_values[i] = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, num_bits);
14602
} // i
14603
14604
uint32_t accum = 0;
14605
uint32_t accum_remaining = 0;
14606
uint32_t next_tq_index = 0;
14607
14608
for (uint32_t i = 0; i < total_values; i++)
14609
{
14610
uint32_t value = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, ep_bits);
14611
14612
if (total_tqs)
14613
{
14614
if (!accum_remaining)
14615
{
14616
assert(next_tq_index < total_tqs);
14617
accum = tq_values[next_tq_index++];
14618
accum_remaining = bundle_size;
14619
}
14620
14621
// TODO: Optimize with tables
14622
uint32_t v = accum % mul;
14623
accum /= mul;
14624
accum_remaining--;
14625
14626
value |= (v << ep_bits);
14627
}
14628
14629
unpacked.m_astc.m_endpoints[i] = (uint8_t)value;
14630
}
14631
14632
const uint8_t* pPartition_pattern;
14633
const uint8_t* pSubset_anchor_indices = get_anchor_indices(subsets, mode, unpacked.m_common_pattern, pPartition_pattern);
14634
14635
#ifdef _DEBUG
14636
for (uint32_t i = 0; i < 16; i++)
14637
assert(pPartition_pattern[i] == astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true));
14638
14639
for (uint32_t subset_index = 0; subset_index < subsets; subset_index++)
14640
{
14641
uint32_t anchor_index = 0;
14642
14643
for (uint32_t i = 0; i < 16; i++)
14644
{
14645
if (pPartition_pattern[i] == subset_index)
14646
{
14647
anchor_index = i;
14648
break;
14649
}
14650
}
14651
14652
assert(pSubset_anchor_indices[subset_index] == anchor_index);
14653
}
14654
#endif
14655
14656
#if 0
14657
const uint32_t total_planes_shift = total_planes - 1;
14658
for (uint32_t i = 0; i < 16 * total_planes; i++)
14659
{
14660
uint32_t num_bits = weight_bits;
14661
for (uint32_t s = 0; s < subsets; s++)
14662
{
14663
if (pSubset_anchor_indices[s] == (i >> total_planes_shift))
14664
{
14665
num_bits--;
14666
break;
14667
}
14668
}
14669
14670
unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, num_bits);
14671
}
14672
#endif
14673
14674
if (mode == 18)
14675
{
14676
// Mode 18 is the only mode with more than 64 weight bits.
14677
for (uint32_t i = 0; i < 16; i++)
14678
unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, i ? weight_bits : (weight_bits - 1));
14679
}
14680
else
14681
{
14682
// All other modes have <= 64 weight bits.
14683
uint64_t bits;
14684
14685
// Read the weight bits
14686
if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
14687
bits = read_bits64(blk.m_bytes, bit_ofs, basisu::minimum<int>(64, 128 - (int)bit_ofs));
14688
else
14689
{
14690
bits = blk.m_dwords[2];
14691
bits |= (((uint64_t)blk.m_dwords[3]) << 32U);
14692
14693
if (bit_ofs >= 64U)
14694
bits >>= (bit_ofs - 64U);
14695
else
14696
{
14697
assert(bit_ofs >= 56U);
14698
14699
uint32_t bits_needed = 64U - bit_ofs;
14700
bits <<= bits_needed;
14701
bits |= (blk.m_bytes[7] >> (8U - bits_needed));
14702
}
14703
}
14704
14705
bit_ofs = 0;
14706
14707
const uint32_t mask = (1U << weight_bits) - 1U;
14708
const uint32_t anchor_mask = (1U << (weight_bits - 1U)) - 1U;
14709
14710
if (total_planes == 2)
14711
{
14712
// Dual plane modes always have a single subset, and the first 2 weights are anchors.
14713
14714
unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
14715
bit_ofs += (weight_bits - 1);
14716
14717
unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
14718
bit_ofs += (weight_bits - 1);
14719
14720
for (uint32_t i = 2; i < 32; i++)
14721
{
14722
unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
14723
bit_ofs += weight_bits;
14724
}
14725
}
14726
else
14727
{
14728
if (subsets == 1)
14729
{
14730
// Specialize the single subset case.
14731
if (weight_bits == 4)
14732
{
14733
assert(bit_ofs == 0);
14734
14735
// Specialize the most common case: 4-bit weights.
14736
unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits) & 7);
14737
unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> 3) & 15);
14738
unpacked.m_astc.m_weights[2] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 1)) & 15);
14739
unpacked.m_astc.m_weights[3] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 2)) & 15);
14740
14741
unpacked.m_astc.m_weights[4] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 3)) & 15);
14742
unpacked.m_astc.m_weights[5] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 4)) & 15);
14743
unpacked.m_astc.m_weights[6] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 5)) & 15);
14744
unpacked.m_astc.m_weights[7] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 6)) & 15);
14745
14746
unpacked.m_astc.m_weights[8] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 7)) & 15);
14747
unpacked.m_astc.m_weights[9] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 8)) & 15);
14748
unpacked.m_astc.m_weights[10] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 9)) & 15);
14749
unpacked.m_astc.m_weights[11] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 10)) & 15);
14750
14751
unpacked.m_astc.m_weights[12] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 11)) & 15);
14752
unpacked.m_astc.m_weights[13] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 12)) & 15);
14753
unpacked.m_astc.m_weights[14] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 13)) & 15);
14754
unpacked.m_astc.m_weights[15] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 14)) & 15);
14755
}
14756
else
14757
{
14758
// First weight is always an anchor.
14759
unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
14760
bit_ofs += (weight_bits - 1);
14761
14762
for (uint32_t i = 1; i < 16; i++)
14763
{
14764
unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
14765
bit_ofs += weight_bits;
14766
}
14767
}
14768
}
14769
else
14770
{
14771
const uint32_t a0 = pSubset_anchor_indices[0], a1 = pSubset_anchor_indices[1], a2 = pSubset_anchor_indices[2];
14772
14773
for (uint32_t i = 0; i < 16; i++)
14774
{
14775
if ((i == a0) || (i == a1) || (i == a2))
14776
{
14777
unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
14778
bit_ofs += (weight_bits - 1);
14779
}
14780
else
14781
{
14782
unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
14783
bit_ofs += weight_bits;
14784
}
14785
}
14786
}
14787
}
14788
}
14789
14790
if ((blue_contract_check) && (total_comps >= 3))
14791
{
14792
// We only need to disable ASTC Blue Contraction when we'll be packing to ASTC. The other transcoders don't care.
14793
bool invert_subset[3] = { false, false, false };
14794
bool any_flag = false;
14795
14796
for (uint32_t subset_index = 0; subset_index < subsets; subset_index++)
14797
{
14798
const int s0 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 0]].m_unquant +
14799
g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 2]].m_unquant +
14800
g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 4]].m_unquant;
14801
14802
const int s1 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 1]].m_unquant +
14803
g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 3]].m_unquant +
14804
g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 5]].m_unquant;
14805
14806
if (s1 < s0)
14807
{
14808
for (uint32_t c = 0; c < total_comps; c++)
14809
std::swap(unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 0], unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 1]);
14810
14811
invert_subset[subset_index] = true;
14812
any_flag = true;
14813
}
14814
}
14815
14816
if (any_flag)
14817
{
14818
const uint32_t weight_mask = (1 << weight_bits) - 1;
14819
14820
for (uint32_t i = 0; i < 16; i++)
14821
{
14822
uint32_t subset = pPartition_pattern[i];
14823
14824
if (invert_subset[subset])
14825
{
14826
unpacked.m_astc.m_weights[i * total_planes] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes]);
14827
14828
if (total_planes == 2)
14829
unpacked.m_astc.m_weights[i * total_planes + 1] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes + 1]);
14830
}
14831
}
14832
}
14833
}
14834
14835
return true;
14836
}
14837
14838
static const uint32_t* g_astc_weight_tables[6] = { nullptr, g_bc7_weights1, g_bc7_weights2, g_bc7_weights3, g_astc_weights4, g_astc_weights5 };
14839
14840
bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb)
14841
{
14842
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
14843
{
14844
for (uint32_t i = 0; i < 16; i++)
14845
pPixels[i] = solid_color;
14846
return true;
14847
}
14848
14849
color32 endpoints[3][2];
14850
14851
const uint32_t total_subsets = g_uastc_mode_subsets[mode];
14852
const uint32_t total_comps = basisu::minimum<uint32_t>(4U, g_uastc_mode_comps[mode]);
14853
const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
14854
const uint32_t total_planes = g_uastc_mode_planes[mode];
14855
const uint32_t weight_bits = g_uastc_mode_weight_bits[mode];
14856
const uint32_t weight_levels = 1 << weight_bits;
14857
14858
for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++)
14859
{
14860
if (total_comps == 2)
14861
{
14862
const uint32_t ll = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 0]].m_unquant;
14863
const uint32_t lh = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 1]].m_unquant;
14864
14865
const uint32_t al = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 0]].m_unquant;
14866
const uint32_t ah = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 1]].m_unquant;
14867
14868
endpoints[subset_index][0].set_noclamp_rgba(ll, ll, ll, al);
14869
endpoints[subset_index][1].set_noclamp_rgba(lh, lh, lh, ah);
14870
}
14871
else
14872
{
14873
for (uint32_t comp_index = 0; comp_index < total_comps; comp_index++)
14874
{
14875
endpoints[subset_index][0][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 0]].m_unquant;
14876
endpoints[subset_index][1][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 1]].m_unquant;
14877
}
14878
for (uint32_t comp_index = total_comps; comp_index < 4; comp_index++)
14879
{
14880
endpoints[subset_index][0][comp_index] = 255;
14881
endpoints[subset_index][1][comp_index] = 255;
14882
}
14883
}
14884
}
14885
14886
color32 block_colors[3][32];
14887
14888
const uint32_t* pWeights = g_astc_weight_tables[weight_bits];
14889
14890
for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++)
14891
{
14892
for (uint32_t l = 0; l < weight_levels; l++)
14893
{
14894
if (total_comps == 2)
14895
{
14896
const uint8_t lc = (uint8_t)astc_interpolate(endpoints[subset_index][0][0], endpoints[subset_index][1][0], pWeights[l], srgb);
14897
const uint8_t ac = (uint8_t)astc_interpolate(endpoints[subset_index][0][3], endpoints[subset_index][1][3], pWeights[l], srgb);
14898
14899
block_colors[subset_index][l].set(lc, lc, lc, ac);
14900
}
14901
else
14902
{
14903
uint32_t comp_index;
14904
for (comp_index = 0; comp_index < total_comps; comp_index++)
14905
block_colors[subset_index][l][comp_index] = (uint8_t)astc_interpolate(endpoints[subset_index][0][comp_index], endpoints[subset_index][1][comp_index], pWeights[l], srgb);
14906
14907
for (; comp_index < 4; comp_index++)
14908
block_colors[subset_index][l][comp_index] = 255;
14909
}
14910
}
14911
}
14912
14913
const uint8_t* pPartition_pattern = g_zero_pattern;
14914
14915
if (total_subsets >= 2)
14916
{
14917
if (total_subsets == 3)
14918
pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0];
14919
else if (mode == 7)
14920
pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0];
14921
else
14922
pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0];
14923
14924
#ifdef _DEBUG
14925
for (uint32_t i = 0; i < 16; i++)
14926
{
14927
assert(pPartition_pattern[i] == (uint8_t)astc_compute_texel_partition(astc.m_partition_seed, i & 3, i >> 2, 0, total_subsets, true));
14928
}
14929
#endif
14930
}
14931
14932
if (total_planes == 1)
14933
{
14934
if (total_subsets == 1)
14935
{
14936
for (uint32_t i = 0; i < 16; i++)
14937
{
14938
assert(astc.m_weights[i] < weight_levels);
14939
pPixels[i] = block_colors[0][astc.m_weights[i]];
14940
}
14941
}
14942
else
14943
{
14944
for (uint32_t i = 0; i < 16; i++)
14945
{
14946
assert(astc.m_weights[i] < weight_levels);
14947
pPixels[i] = block_colors[pPartition_pattern[i]][astc.m_weights[i]];
14948
}
14949
}
14950
}
14951
else
14952
{
14953
assert(total_subsets == 1);
14954
14955
for (uint32_t i = 0; i < 16; i++)
14956
{
14957
const uint32_t subset_index = 0; // pPartition_pattern[i];
14958
14959
const uint32_t weight_index0 = astc.m_weights[i * 2];
14960
const uint32_t weight_index1 = astc.m_weights[i * 2 + 1];
14961
14962
assert(weight_index0 < weight_levels && weight_index1 < weight_levels);
14963
14964
color32& c = pPixels[i];
14965
for (uint32_t comp = 0; comp < 4; comp++)
14966
{
14967
if ((int)comp == astc.m_ccs)
14968
c[comp] = block_colors[subset_index][weight_index1][comp];
14969
else
14970
c[comp] = block_colors[subset_index][weight_index0][comp];
14971
}
14972
}
14973
}
14974
14975
return true;
14976
}
14977
14978
bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb)
14979
{
14980
return unpack_uastc(unpacked_blk.m_mode, unpacked_blk.m_common_pattern, unpacked_blk.m_solid_color, unpacked_blk.m_astc, pPixels, srgb);
14981
}
14982
14983
bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb)
14984
{
14985
unpacked_uastc_block unpacked_blk;
14986
14987
if (!unpack_uastc(blk, unpacked_blk, false, false))
14988
return false;
14989
14990
return unpack_uastc(unpacked_blk, pPixels, srgb);
14991
}
14992
14993
// Determines the best shared pbits to use to encode xl/xh
14994
static void determine_shared_pbits(
14995
uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4],
14996
color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2])
14997
{
14998
const uint32_t total_bits = comp_bits + 1;
14999
assert(total_bits >= 4 && total_bits <= 8);
15000
15001
const int iscalep = (1 << total_bits) - 1;
15002
const float scalep = (float)iscalep;
15003
15004
float best_err = 1e+9f;
15005
15006
for (int p = 0; p < 2; p++)
15007
{
15008
color_quad_u8 xMinColor, xMaxColor;
15009
for (uint32_t c = 0; c < 4; c++)
15010
{
15011
xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
15012
xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
15013
}
15014
15015
color_quad_u8 scaledLow, scaledHigh;
15016
15017
for (uint32_t i = 0; i < 4; i++)
15018
{
15019
scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits));
15020
scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits);
15021
assert(scaledLow.m_c[i] <= 255);
15022
15023
scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits));
15024
scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits);
15025
assert(scaledHigh.m_c[i] <= 255);
15026
}
15027
15028
float err = 0;
15029
for (uint32_t i = 0; i < total_comps; i++)
15030
err += basisu::squaref((scaledLow.m_c[i] / 255.0f) - xl[i]) + basisu::squaref((scaledHigh.m_c[i] / 255.0f) - xh[i]);
15031
15032
if (err < best_err)
15033
{
15034
best_err = err;
15035
best_pbits[0] = p;
15036
best_pbits[1] = p;
15037
for (uint32_t j = 0; j < 4; j++)
15038
{
15039
bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1;
15040
bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1;
15041
}
15042
}
15043
}
15044
}
15045
15046
// Determines the best unique pbits to use to encode xl/xh
15047
static void determine_unique_pbits(
15048
uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4],
15049
color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2])
15050
{
15051
const uint32_t total_bits = comp_bits + 1;
15052
const int iscalep = (1 << total_bits) - 1;
15053
const float scalep = (float)iscalep;
15054
15055
float best_err0 = 1e+9f;
15056
float best_err1 = 1e+9f;
15057
15058
for (int p = 0; p < 2; p++)
15059
{
15060
color_quad_u8 xMinColor, xMaxColor;
15061
15062
for (uint32_t c = 0; c < 4; c++)
15063
{
15064
xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
15065
xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
15066
}
15067
15068
color_quad_u8 scaledLow, scaledHigh;
15069
for (uint32_t i = 0; i < 4; i++)
15070
{
15071
scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits));
15072
scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits);
15073
assert(scaledLow.m_c[i] <= 255);
15074
15075
scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits));
15076
scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits);
15077
assert(scaledHigh.m_c[i] <= 255);
15078
}
15079
15080
float err0 = 0, err1 = 0;
15081
for (uint32_t i = 0; i < total_comps; i++)
15082
{
15083
err0 += basisu::squaref(scaledLow.m_c[i] - xl[i] * 255.0f);
15084
err1 += basisu::squaref(scaledHigh.m_c[i] - xh[i] * 255.0f);
15085
}
15086
15087
if (err0 < best_err0)
15088
{
15089
best_err0 = err0;
15090
best_pbits[0] = p;
15091
15092
bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1;
15093
bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1;
15094
bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1;
15095
bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1;
15096
}
15097
15098
if (err1 < best_err1)
15099
{
15100
best_err1 = err1;
15101
best_pbits[1] = p;
15102
15103
bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1;
15104
bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1;
15105
bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1;
15106
bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1;
15107
}
15108
}
15109
}
15110
15111
bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst)
15112
{
15113
unpacked_uastc_block unpacked_src_blk;
15114
if (!unpack_uastc(src_blk, unpacked_src_blk, true, false))
15115
return false;
15116
15117
bool success = false;
15118
if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
15119
{
15120
pack_astc_solid_block(pDst, unpacked_src_blk.m_solid_color);
15121
success = true;
15122
}
15123
else
15124
{
15125
success = pack_astc_block(static_cast<uint32_t*>(pDst), &unpacked_src_blk.m_astc, unpacked_src_blk.m_mode);
15126
}
15127
15128
return success;
15129
}
15130
15131
bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk)
15132
{
15133
memset(&dst_blk, 0, sizeof(dst_blk));
15134
15135
const uint32_t mode = unpacked_src_blk.m_mode;
15136
15137
const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
15138
const uint32_t total_comps = g_uastc_mode_comps[mode];
15139
15140
switch (mode)
15141
{
15142
case 0:
15143
case 5:
15144
case 10:
15145
case 12:
15146
case 14:
15147
case 15:
15148
case 18:
15149
{
15150
// MODE 0: DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 19 (192) - BC7 MODE6 RGB
15151
// MODE 5: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6 RGB
15152
// MODE 10 DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE6
15153
// MODE 12: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 19 (192) - BC7 MODE6
15154
// MODE 14: DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6
15155
// MODE 18: DualPlane: 0, WeightRange : 11 (32), Subsets : 1, CEM : 8, EndpointRange : 11 (32) - BC7 MODE6
15156
// MODE 15: DualPlane: 0, WeightRange : 8 (16), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE6
15157
dst_blk.m_mode = 6;
15158
15159
float xl[4], xh[4];
15160
if (total_comps == 2)
15161
{
15162
xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f;
15163
xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f;
15164
15165
xl[1] = xl[0];
15166
xh[1] = xh[0];
15167
15168
xl[2] = xl[0];
15169
xh[2] = xh[0];
15170
15171
xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f;
15172
xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f;
15173
}
15174
else
15175
{
15176
xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f;
15177
xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f;
15178
xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4]].m_unquant / 255.0f;
15179
15180
xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f;
15181
xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f;
15182
xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5]].m_unquant / 255.0f;
15183
15184
if (total_comps == 4)
15185
{
15186
xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6]].m_unquant / 255.0f;
15187
xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7]].m_unquant / 255.0f;
15188
}
15189
else
15190
{
15191
xl[3] = 1.0f;
15192
xh[3] = 1.0f;
15193
}
15194
}
15195
15196
uint32_t best_pbits[2];
15197
color_quad_u8 bestMinColor, bestMaxColor;
15198
determine_unique_pbits((total_comps == 2) ? 4 : total_comps, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
15199
15200
dst_blk.m_low[0] = bestMinColor;
15201
dst_blk.m_high[0] = bestMaxColor;
15202
15203
if (total_comps == 3)
15204
{
15205
dst_blk.m_low[0].m_c[3] = 127;
15206
dst_blk.m_high[0].m_c[3] = 127;
15207
}
15208
15209
dst_blk.m_pbits[0][0] = best_pbits[0];
15210
dst_blk.m_pbits[0][1] = best_pbits[1];
15211
15212
if (mode == 18)
15213
{
15214
const uint8_t s_bc7_5_to_4[32] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 };
15215
for (uint32_t i = 0; i < 16; i++)
15216
dst_blk.m_selectors[i] = s_bc7_5_to_4[unpacked_src_blk.m_astc.m_weights[i]];
15217
}
15218
else if (mode == 14)
15219
{
15220
const uint8_t s_bc7_2_to_4[4] = { 0, 5, 10, 15 };
15221
for (uint32_t i = 0; i < 16; i++)
15222
dst_blk.m_selectors[i] = s_bc7_2_to_4[unpacked_src_blk.m_astc.m_weights[i]];
15223
}
15224
else if ((mode == 5) || (mode == 12))
15225
{
15226
const uint8_t s_bc7_3_to_4[8] = { 0, 2, 4, 6, 9, 11, 13, 15 };
15227
for (uint32_t i = 0; i < 16; i++)
15228
dst_blk.m_selectors[i] = s_bc7_3_to_4[unpacked_src_blk.m_astc.m_weights[i]];
15229
}
15230
else
15231
{
15232
for (uint32_t i = 0; i < 16; i++)
15233
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15234
}
15235
15236
break;
15237
}
15238
case 1:
15239
{
15240
// DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE3
15241
// Mode 1 uses endpoint range 20 - no need to use ASTC dequant tables.
15242
dst_blk.m_mode = 3;
15243
15244
float xl[4], xh[4];
15245
xl[0] = unpacked_src_blk.m_astc.m_endpoints[0] / 255.0f;
15246
xl[1] = unpacked_src_blk.m_astc.m_endpoints[2] / 255.0f;
15247
xl[2] = unpacked_src_blk.m_astc.m_endpoints[4] / 255.0f;
15248
xl[3] = 1.0f;
15249
15250
xh[0] = unpacked_src_blk.m_astc.m_endpoints[1] / 255.0f;
15251
xh[1] = unpacked_src_blk.m_astc.m_endpoints[3] / 255.0f;
15252
xh[2] = unpacked_src_blk.m_astc.m_endpoints[5] / 255.0f;
15253
xh[3] = 1.0f;
15254
15255
uint32_t best_pbits[2];
15256
color_quad_u8 bestMinColor, bestMaxColor;
15257
memset(&bestMinColor, 0, sizeof(bestMinColor));
15258
memset(&bestMaxColor, 0, sizeof(bestMaxColor));
15259
determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
15260
15261
for (uint32_t i = 0; i < 3; i++)
15262
{
15263
dst_blk.m_low[0].m_c[i] = bestMinColor.m_c[i];
15264
dst_blk.m_high[0].m_c[i] = bestMaxColor.m_c[i];
15265
dst_blk.m_low[1].m_c[i] = bestMinColor.m_c[i];
15266
dst_blk.m_high[1].m_c[i] = bestMaxColor.m_c[i];
15267
}
15268
dst_blk.m_pbits[0][0] = best_pbits[0];
15269
dst_blk.m_pbits[0][1] = best_pbits[1];
15270
dst_blk.m_pbits[1][0] = best_pbits[0];
15271
dst_blk.m_pbits[1][1] = best_pbits[1];
15272
15273
for (uint32_t i = 0; i < 16; i++)
15274
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15275
15276
break;
15277
}
15278
case 2:
15279
{
15280
// 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1
15281
dst_blk.m_mode = 1;
15282
dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
15283
15284
const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
15285
15286
float xl[4], xh[4];
15287
xl[3] = 1.0f;
15288
xh[3] = 1.0f;
15289
15290
for (uint32_t subset = 0; subset < 2; subset++)
15291
{
15292
for (uint32_t i = 0; i < 3; i++)
15293
{
15294
uint32_t v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6];
15295
v = (v << 4) | v;
15296
xl[i] = v / 255.0f;
15297
15298
v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1];
15299
v = (v << 4) | v;
15300
xh[i] = v / 255.0f;
15301
}
15302
15303
uint32_t best_pbits[2] = { 0, 0 };
15304
color_quad_u8 bestMinColor, bestMaxColor;
15305
memset(&bestMinColor, 0, sizeof(bestMinColor));
15306
memset(&bestMaxColor, 0, sizeof(bestMaxColor));
15307
determine_shared_pbits(3, 6, xl, xh, bestMinColor, bestMaxColor, best_pbits);
15308
15309
const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset;
15310
15311
for (uint32_t i = 0; i < 3; i++)
15312
{
15313
dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i];
15314
dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i];
15315
}
15316
15317
dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
15318
} // subset
15319
15320
for (uint32_t i = 0; i < 16; i++)
15321
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15322
15323
break;
15324
}
15325
case 3:
15326
{
15327
// DualPlane: 0, WeightRange : 2 (4), Subsets : 3, EndpointRange : 7 (12) - BC7 MODE2
15328
dst_blk.m_mode = 2;
15329
dst_blk.m_partition = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_bc7;
15330
15331
const uint32_t perm = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_astc_to_bc7_perm;
15332
15333
for (uint32_t subset = 0; subset < 3; subset++)
15334
{
15335
for (uint32_t comp = 0; comp < 3; comp++)
15336
{
15337
uint32_t lo = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 0 + subset * 6]].m_unquant;
15338
uint32_t hi = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 1 + subset * 6]].m_unquant;
15339
15340
// TODO: I think this can be improved by using tables like Basis Universal does with ETC1S conversion.
15341
lo = (lo * 31 + 127) / 255;
15342
hi = (hi * 31 + 127) / 255;
15343
15344
const uint32_t bc7_subset_index = g_astc_to_bc7_partition_index_perm_tables[perm][subset];
15345
15346
dst_blk.m_low[bc7_subset_index].m_c[comp] = (uint8_t)lo;
15347
dst_blk.m_high[bc7_subset_index].m_c[comp] = (uint8_t)hi;
15348
}
15349
}
15350
15351
for (uint32_t i = 0; i < 16; i++)
15352
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15353
15354
break;
15355
}
15356
case 4:
15357
{
15358
// 4. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, EndpointRange: 12 (40) - BC7 MODE3
15359
dst_blk.m_mode = 3;
15360
dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
15361
15362
const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
15363
15364
float xl[4], xh[4];
15365
xl[3] = 1.0f;
15366
xh[3] = 1.0f;
15367
15368
for (uint32_t subset = 0; subset < 2; subset++)
15369
{
15370
for (uint32_t i = 0; i < 3; i++)
15371
{
15372
xl[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6]].m_unquant / 255.0f;
15373
xh[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1]].m_unquant / 255.0f;
15374
}
15375
15376
uint32_t best_pbits[2] = { 0, 0 };
15377
color_quad_u8 bestMinColor, bestMaxColor;
15378
memset(&bestMinColor, 0, sizeof(bestMinColor));
15379
memset(&bestMaxColor, 0, sizeof(bestMaxColor));
15380
determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
15381
15382
const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset;
15383
15384
for (uint32_t i = 0; i < 3; i++)
15385
{
15386
dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i];
15387
dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i];
15388
}
15389
dst_blk.m_low[bc7_subset_index].m_c[3] = 127;
15390
dst_blk.m_high[bc7_subset_index].m_c[3] = 127;
15391
15392
dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
15393
dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1];
15394
15395
} // subset
15396
15397
for (uint32_t i = 0; i < 16; i++)
15398
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15399
15400
break;
15401
}
15402
case 6:
15403
case 11:
15404
case 13:
15405
case 17:
15406
{
15407
// MODE 6: DualPlane: 1, WeightRange : 2 (4), Subsets : 1, EndpointRange : 18 (160) - BC7 MODE5 RGB
15408
// MODE 11: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE5
15409
// MODE 13: DualPlane: 1, WeightRange: 0 (2), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE5
15410
// MODE 17: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE5
15411
dst_blk.m_mode = 5;
15412
dst_blk.m_rotation = (unpacked_src_blk.m_astc.m_ccs + 1) & 3;
15413
15414
if (total_comps == 2)
15415
{
15416
assert(unpacked_src_blk.m_astc.m_ccs == 3);
15417
15418
dst_blk.m_low->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant * 127 + 127) / 255);
15419
dst_blk.m_high->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant * 127 + 127) / 255);
15420
15421
dst_blk.m_low->m_c[1] = dst_blk.m_low->m_c[0];
15422
dst_blk.m_high->m_c[1] = dst_blk.m_high->m_c[0];
15423
15424
dst_blk.m_low->m_c[2] = dst_blk.m_low->m_c[0];
15425
dst_blk.m_high->m_c[2] = dst_blk.m_high->m_c[0];
15426
15427
dst_blk.m_low->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant);
15428
dst_blk.m_high->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant);
15429
}
15430
else
15431
{
15432
for (uint32_t astc_comp = 0; astc_comp < 4; astc_comp++)
15433
{
15434
uint32_t bc7_comp = astc_comp;
15435
// ASTC and BC7 handle dual plane component rotations differently:
15436
// ASTC: 2nd plane separately interpolates the CCS channel.
15437
// BC7: 2nd plane channel is swapped with alpha, 2nd plane controls alpha interpolation, then we swap alpha with the desired channel.
15438
if (astc_comp == (uint32_t)unpacked_src_blk.m_astc.m_ccs)
15439
bc7_comp = 3;
15440
else if (astc_comp == 3)
15441
bc7_comp = unpacked_src_blk.m_astc.m_ccs;
15442
15443
uint32_t l = 255, h = 255;
15444
if (astc_comp < total_comps)
15445
{
15446
l = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 0]].m_unquant;
15447
h = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 1]].m_unquant;
15448
}
15449
15450
if (bc7_comp < 3)
15451
{
15452
l = (l * 127 + 127) / 255;
15453
h = (h * 127 + 127) / 255;
15454
}
15455
15456
dst_blk.m_low->m_c[bc7_comp] = (uint8_t)l;
15457
dst_blk.m_high->m_c[bc7_comp] = (uint8_t)h;
15458
}
15459
}
15460
15461
if (mode == 13)
15462
{
15463
for (uint32_t i = 0; i < 16; i++)
15464
{
15465
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2] ? 3 : 0;
15466
dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1] ? 3 : 0;
15467
}
15468
}
15469
else
15470
{
15471
for (uint32_t i = 0; i < 16; i++)
15472
{
15473
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2];
15474
dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1];
15475
}
15476
}
15477
15478
break;
15479
}
15480
case 7:
15481
{
15482
// DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 12 (40) - BC7 MODE2
15483
dst_blk.m_mode = 2;
15484
dst_blk.m_partition = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].m_bc73;
15485
15486
const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].k;
15487
15488
for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++)
15489
{
15490
const uint32_t astc_part = bc7_convert_partition_index_3_to_2(bc7_part, common_pattern_k);
15491
15492
for (uint32_t c = 0; c < 3; c++)
15493
{
15494
dst_blk.m_low[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 0 + astc_part * 6]].m_unquant * 31 + 127) / 255;
15495
dst_blk.m_high[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 1 + astc_part * 6]].m_unquant * 31 + 127) / 255;
15496
}
15497
}
15498
15499
for (uint32_t i = 0; i < 16; i++)
15500
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15501
15502
break;
15503
}
15504
case UASTC_MODE_INDEX_SOLID_COLOR:
15505
{
15506
// Void-Extent: Solid Color RGBA (BC7 MODE5 or MODE6)
15507
const color32& solid_color = unpacked_src_blk.m_solid_color;
15508
15509
uint32_t best_err0 = g_bc7_mode_6_optimal_endpoints[solid_color.r][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][0].m_error +
15510
g_bc7_mode_6_optimal_endpoints[solid_color.b][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][0].m_error;
15511
15512
uint32_t best_err1 = g_bc7_mode_6_optimal_endpoints[solid_color.r][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][1].m_error +
15513
g_bc7_mode_6_optimal_endpoints[solid_color.b][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][1].m_error;
15514
15515
if (best_err0 > 0 && best_err1 > 0)
15516
{
15517
dst_blk.m_mode = 5;
15518
15519
for (uint32_t c = 0; c < 3; c++)
15520
{
15521
dst_blk.m_low[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_lo;
15522
dst_blk.m_high[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_hi;
15523
}
15524
15525
memset(dst_blk.m_selectors, BC7ENC_MODE_5_OPTIMAL_INDEX, 16);
15526
15527
dst_blk.m_low[0].m_c[3] = solid_color.c[3];
15528
dst_blk.m_high[0].m_c[3] = solid_color.c[3];
15529
15530
//memset(dst_blk.m_alpha_selectors, 0, 16);
15531
}
15532
else
15533
{
15534
dst_blk.m_mode = 6;
15535
15536
uint32_t best_p = 0;
15537
if (best_err1 < best_err0)
15538
best_p = 1;
15539
15540
for (uint32_t c = 0; c < 4; c++)
15541
{
15542
dst_blk.m_low[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_lo;
15543
dst_blk.m_high[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_hi;
15544
}
15545
15546
dst_blk.m_pbits[0][0] = best_p;
15547
dst_blk.m_pbits[0][1] = best_p;
15548
memset(dst_blk.m_selectors, BC7ENC_MODE_6_OPTIMAL_INDEX, 16);
15549
}
15550
15551
break;
15552
}
15553
case 9:
15554
case 16:
15555
{
15556
// 9. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE7
15557
// 16. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE7
15558
15559
dst_blk.m_mode = 7;
15560
dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
15561
15562
const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
15563
15564
for (uint32_t astc_subset = 0; astc_subset < 2; astc_subset++)
15565
{
15566
float xl[4], xh[4];
15567
15568
if (total_comps == 2)
15569
{
15570
xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 4]].m_unquant / 255.0f;
15571
xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 4]].m_unquant / 255.0f;
15572
15573
xl[1] = xl[0];
15574
xh[1] = xh[0];
15575
15576
xl[2] = xl[0];
15577
xh[2] = xh[0];
15578
15579
xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 4]].m_unquant / 255.0f;
15580
xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 4]].m_unquant / 255.0f;
15581
}
15582
else
15583
{
15584
xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 8]].m_unquant / 255.0f;
15585
xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 8]].m_unquant / 255.0f;
15586
xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4 + astc_subset * 8]].m_unquant / 255.0f;
15587
xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6 + astc_subset * 8]].m_unquant / 255.0f;
15588
15589
xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 8]].m_unquant / 255.0f;
15590
xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 8]].m_unquant / 255.0f;
15591
xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5 + astc_subset * 8]].m_unquant / 255.0f;
15592
xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7 + astc_subset * 8]].m_unquant / 255.0f;
15593
}
15594
15595
uint32_t best_pbits[2] = { 0, 0 };
15596
color_quad_u8 bestMinColor, bestMaxColor;
15597
memset(&bestMinColor, 0, sizeof(bestMinColor));
15598
memset(&bestMaxColor, 0, sizeof(bestMaxColor));
15599
determine_unique_pbits(4, 5, xl, xh, bestMinColor, bestMaxColor, best_pbits);
15600
15601
const uint32_t bc7_subset_index = invert_partition ? (1 - astc_subset) : astc_subset;
15602
15603
dst_blk.m_low[bc7_subset_index] = bestMinColor;
15604
dst_blk.m_high[bc7_subset_index] = bestMaxColor;
15605
15606
dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
15607
dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1];
15608
} // astc_subset
15609
15610
for (uint32_t i = 0; i < 16; i++)
15611
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15612
15613
break;
15614
}
15615
default:
15616
return false;
15617
}
15618
15619
return true;
15620
}
15621
15622
bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk)
15623
{
15624
unpacked_uastc_block unpacked_src_blk;
15625
if (!unpack_uastc(src_blk, unpacked_src_blk, false, false))
15626
return false;
15627
15628
return transcode_uastc_to_bc7(unpacked_src_blk, dst_blk);
15629
}
15630
15631
bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst)
15632
{
15633
bc7_optimization_results temp;
15634
if (!transcode_uastc_to_bc7(src_blk, temp))
15635
return false;
15636
15637
encode_bc7_block(pDst, &temp);
15638
return true;
15639
}
15640
15641
color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock)
15642
{
15643
color32 result;
15644
15645
for (uint32_t c = 0; c < 3; c++)
15646
{
15647
static const int s_divs[3] = { 1, 3, 9 };
15648
15649
int delta = 0;
15650
15651
switch (bias)
15652
{
15653
case 2: delta = subblock ? 0 : ((c == 0) ? -1 : 0); break;
15654
case 5: delta = subblock ? 0 : ((c == 1) ? -1 : 0); break;
15655
case 6: delta = subblock ? 0 : ((c == 2) ? -1 : 0); break;
15656
15657
case 7: delta = subblock ? 0 : ((c == 0) ? 1 : 0); break;
15658
case 11: delta = subblock ? 0 : ((c == 1) ? 1 : 0); break;
15659
case 15: delta = subblock ? 0 : ((c == 2) ? 1 : 0); break;
15660
15661
case 18: delta = subblock ? ((c == 0) ? -1 : 0) : 0; break;
15662
case 19: delta = subblock ? ((c == 1) ? -1 : 0) : 0; break;
15663
case 20: delta = subblock ? ((c == 2) ? -1 : 0) : 0; break;
15664
15665
case 21: delta = subblock ? ((c == 0) ? 1 : 0) : 0; break;
15666
case 24: delta = subblock ? ((c == 1) ? 1 : 0) : 0; break;
15667
case 8: delta = subblock ? ((c == 2) ? 1 : 0) : 0; break;
15668
15669
case 10: delta = -2; break;
15670
15671
case 27: delta = subblock ? 0 : -1; break;
15672
case 28: delta = subblock ? -1 : 1; break;
15673
case 29: delta = subblock ? 1 : 0; break;
15674
case 30: delta = subblock ? -1 : 0; break;
15675
case 31: delta = subblock ? 0 : 1; break;
15676
15677
default:
15678
delta = ((bias / s_divs[c]) % 3) - 1;
15679
break;
15680
}
15681
15682
int v = block_color[c];
15683
if (v == 0)
15684
{
15685
if (delta == -2)
15686
v += 3;
15687
else
15688
v += delta + 1;
15689
}
15690
else if (v == (int)limit)
15691
{
15692
v += (delta - 1);
15693
}
15694
else
15695
{
15696
v += delta;
15697
if ((v < 0) || (v > (int)limit))
15698
v = (v - delta) - delta;
15699
}
15700
15701
assert(v >= 0);
15702
assert(v <= (int)limit);
15703
15704
result[c] = (uint8_t)v;
15705
}
15706
15707
return result;
15708
}
15709
15710
static void etc1_determine_selectors(decoder_etc_block& dst_blk, const color32* pSource_pixels, uint32_t first_subblock, uint32_t last_subblock)
15711
{
15712
static const uint8_t s_tran[4] = { 1, 0, 2, 3 };
15713
15714
uint16_t l_bitmask = 0;
15715
uint16_t h_bitmask = 0;
15716
15717
for (uint32_t subblock = first_subblock; subblock < last_subblock; subblock++)
15718
{
15719
color32 block_colors[4];
15720
dst_blk.get_block_colors(block_colors, subblock);
15721
15722
uint32_t block_y[4];
15723
for (uint32_t i = 0; i < 4; i++)
15724
block_y[i] = block_colors[i][0] * 54 + block_colors[i][1] * 183 + block_colors[i][2] * 19;
15725
15726
const uint32_t block_y01 = block_y[0] + block_y[1];
15727
const uint32_t block_y12 = block_y[1] + block_y[2];
15728
const uint32_t block_y23 = block_y[2] + block_y[3];
15729
15730
// X0 X0 X0 X0 X1 X1 X1 X1 X2 X2 X2 X2 X3 X3 X3 X3
15731
// Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3
15732
15733
if (dst_blk.get_flip_bit())
15734
{
15735
uint32_t ofs = subblock * 2;
15736
15737
for (uint32_t y = 0; y < 2; y++)
15738
{
15739
for (uint32_t x = 0; x < 4; x++)
15740
{
15741
const color32& c = pSource_pixels[x + (subblock * 2 + y) * 4];
15742
const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38;
15743
15744
uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)];
15745
15746
assert(ofs < 16);
15747
l_bitmask |= ((t & 1) << ofs);
15748
h_bitmask |= ((t >> 1) << ofs);
15749
ofs += 4;
15750
}
15751
15752
ofs = (int)ofs + 1 - 4 * 4;
15753
}
15754
}
15755
else
15756
{
15757
uint32_t ofs = (subblock * 2) * 4;
15758
for (uint32_t x = 0; x < 2; x++)
15759
{
15760
for (uint32_t y = 0; y < 4; y++)
15761
{
15762
const color32& c = pSource_pixels[subblock * 2 + x + y * 4];
15763
const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38;
15764
15765
uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)];
15766
15767
assert(ofs < 16);
15768
l_bitmask |= ((t & 1) << ofs);
15769
h_bitmask |= ((t >> 1) << ofs);
15770
++ofs;
15771
}
15772
}
15773
}
15774
}
15775
15776
dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
15777
dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
15778
dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
15779
dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
15780
}
15781
15782
static const uint8_t s_etc1_solid_selectors[4][4] = { { 255, 255, 255, 255 }, { 255, 255, 0, 0 }, { 0, 0, 0, 0 }, {0, 0, 255, 255 } };
15783
15784
struct etc_coord2
15785
{
15786
uint8_t m_x, m_y;
15787
};
15788
15789
// [flip][subblock][pixel_index]
15790
const etc_coord2 g_etc1_pixel_coords[2][2][8] =
15791
{
15792
{
15793
{
15794
{ 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 },
15795
{ 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 }
15796
},
15797
{
15798
{ 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
15799
{ 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }
15800
}
15801
},
15802
{
15803
{
15804
{ 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },
15805
{ 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }
15806
},
15807
{
15808
{ 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 },
15809
{ 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }
15810
},
15811
}
15812
};
15813
15814
void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst)
15815
{
15816
decoder_etc_block& dst_blk = *static_cast<decoder_etc_block*>(pDst);
15817
15818
if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
15819
{
15820
dst_blk.m_bytes[3] = (uint8_t)((unpacked_src_blk.m_etc1_diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten0 << 2));
15821
15822
if (unpacked_src_blk.m_etc1_diff)
15823
{
15824
dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r << 3);
15825
dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g << 3);
15826
dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b << 3);
15827
}
15828
else
15829
{
15830
dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r | (unpacked_src_blk.m_etc1_r << 4));
15831
dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g | (unpacked_src_blk.m_etc1_g << 4));
15832
dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b | (unpacked_src_blk.m_etc1_b << 4));
15833
}
15834
15835
memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[unpacked_src_blk.m_etc1_selector][0], 4);
15836
15837
return;
15838
}
15839
15840
const bool flip = unpacked_src_blk.m_etc1_flip != 0;
15841
const bool diff = unpacked_src_blk.m_etc1_diff != 0;
15842
15843
dst_blk.m_bytes[3] = (uint8_t)((int)flip | (diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten1 << 2));
15844
15845
const uint32_t limit = diff ? 31 : 15;
15846
15847
color32 block_colors[2];
15848
15849
for (uint32_t subset = 0; subset < 2; subset++)
15850
{
15851
uint32_t avg_color[3];
15852
memset(avg_color, 0, sizeof(avg_color));
15853
15854
for (uint32_t j = 0; j < 8; j++)
15855
{
15856
const etc_coord2& c = g_etc1_pixel_coords[flip][subset][j];
15857
15858
avg_color[0] += block_pixels[c.m_y][c.m_x].r;
15859
avg_color[1] += block_pixels[c.m_y][c.m_x].g;
15860
avg_color[2] += block_pixels[c.m_y][c.m_x].b;
15861
} // j
15862
15863
block_colors[subset][0] = (uint8_t)((avg_color[0] * limit + 1020) / (8 * 255));
15864
block_colors[subset][1] = (uint8_t)((avg_color[1] * limit + 1020) / (8 * 255));
15865
block_colors[subset][2] = (uint8_t)((avg_color[2] * limit + 1020) / (8 * 255));
15866
block_colors[subset][3] = 0;
15867
15868
if (g_uastc_mode_has_etc1_bias[unpacked_src_blk.m_mode])
15869
{
15870
block_colors[subset] = apply_etc1_bias(block_colors[subset], unpacked_src_blk.m_etc1_bias, limit, subset);
15871
}
15872
15873
} // subset
15874
15875
if (diff)
15876
{
15877
int dr = block_colors[1].r - block_colors[0].r;
15878
int dg = block_colors[1].g - block_colors[0].g;
15879
int db = block_colors[1].b - block_colors[0].b;
15880
15881
dr = basisu::clamp<int>(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
15882
dg = basisu::clamp<int>(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
15883
db = basisu::clamp<int>(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
15884
15885
if (dr < 0) dr += 8;
15886
if (dg < 0) dg += 8;
15887
if (db < 0) db += 8;
15888
15889
dst_blk.m_bytes[0] = (uint8_t)((block_colors[0].r << 3) | dr);
15890
dst_blk.m_bytes[1] = (uint8_t)((block_colors[0].g << 3) | dg);
15891
dst_blk.m_bytes[2] = (uint8_t)((block_colors[0].b << 3) | db);
15892
}
15893
else
15894
{
15895
dst_blk.m_bytes[0] = (uint8_t)(block_colors[1].r | (block_colors[0].r << 4));
15896
dst_blk.m_bytes[1] = (uint8_t)(block_colors[1].g | (block_colors[0].g << 4));
15897
dst_blk.m_bytes[2] = (uint8_t)(block_colors[1].b | (block_colors[0].b << 4));
15898
}
15899
15900
etc1_determine_selectors(dst_blk, &block_pixels[0][0], 0, 2);
15901
}
15902
15903
bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst)
15904
{
15905
unpacked_uastc_block unpacked_src_blk;
15906
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
15907
return false;
15908
15909
color32 block_pixels[4][4];
15910
if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR)
15911
{
15912
const bool unpack_srgb = false;
15913
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
15914
return false;
15915
}
15916
15917
transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, pDst);
15918
15919
return true;
15920
}
15921
15922
static inline int gray_distance2(const uint8_t c, int y)
15923
{
15924
int gray_dist = (int)c - y;
15925
return gray_dist * gray_dist;
15926
}
15927
15928
static bool pack_etc1_y_estimate_flipped(const uint8_t* pSrc_pixels,
15929
int& upper_avg, int& lower_avg, int& left_avg, int& right_avg)
15930
{
15931
int sums[2][2];
15932
15933
#define GET_XY(x, y) pSrc_pixels[(x) + ((y) * 4)]
15934
15935
sums[0][0] = GET_XY(0, 0) + GET_XY(0, 1) + GET_XY(1, 0) + GET_XY(1, 1);
15936
sums[1][0] = GET_XY(2, 0) + GET_XY(2, 1) + GET_XY(3, 0) + GET_XY(3, 1);
15937
sums[0][1] = GET_XY(0, 2) + GET_XY(0, 3) + GET_XY(1, 2) + GET_XY(1, 3);
15938
sums[1][1] = GET_XY(2, 2) + GET_XY(2, 3) + GET_XY(3, 2) + GET_XY(3, 3);
15939
15940
upper_avg = (sums[0][0] + sums[1][0] + 4) / 8;
15941
lower_avg = (sums[0][1] + sums[1][1] + 4) / 8;
15942
left_avg = (sums[0][0] + sums[0][1] + 4) / 8;
15943
right_avg = (sums[1][0] + sums[1][1] + 4) / 8;
15944
15945
#undef GET_XY
15946
#define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a)
15947
15948
int upper_gray_dist = 0, lower_gray_dist = 0, left_gray_dist = 0, right_gray_dist = 0;
15949
for (uint32_t i = 0; i < 4; i++)
15950
{
15951
for (uint32_t j = 0; j < 2; j++)
15952
{
15953
upper_gray_dist += GET_XY(i, j, upper_avg);
15954
lower_gray_dist += GET_XY(i, 2 + j, lower_avg);
15955
left_gray_dist += GET_XY(j, i, left_avg);
15956
right_gray_dist += GET_XY(2 + j, i, right_avg);
15957
}
15958
}
15959
15960
#undef GET_XY
15961
15962
int upper_lower_sum = upper_gray_dist + lower_gray_dist;
15963
int left_right_sum = left_gray_dist + right_gray_dist;
15964
15965
return upper_lower_sum < left_right_sum;
15966
}
15967
15968
// Base Sel Table
15969
// XXXXX XX XXX
15970
static const uint16_t g_etc1_y_solid_block_configs[256] =
15971
{
15972
0,781,64,161,260,192,33,131,96,320,65,162,261,193,34,291,97,224,66,163,262,194,35,549,98,4,67,653,164,195,523,36,99,5,578,68,165,353,196,37,135,100,324,69,166,354,197,38,295,101,228,70,167,
15973
355,198,39,553,102,8,71,608,168,199,527,40,103,9,582,72,169,357,200,41,139,104,328,73,170,358,201,42,299,105,232,74,171,359,202,43,557,106,12,75,612,172,203,531,44,107,13,586,76,173,361,
15974
204,45,143,108,332,77,174,362,205,46,303,109,236,78,175,363,206,47,561,110,16,79,616,176,207,535,48,111,17,590,80,177,365,208,49,147,112,336,81,178,366,209,50,307,113,240,82,179,367,210,
15975
51,565,114,20,83,620,180,211,539,52,115,21,594,84,181,369,212,53,151,116,340,85,182,370,213,54,311,117,244,86,183,371,214,55,569,118,24,87,624,184,215,543,56,119,25,598,88,185,373,216,57,
15976
155,120,344,89,186,374,217,58,315,121,248,90,187,375,218,59,573,122,28,91,628,188,219,754,60,123,29,602,92,189,377,220,61,159,124,348,93,190,378,221,62,319,125,252,94,191,379,222,63,882,126
15977
};
15978
15979
// individual
15980
// table base sel0 sel1 sel2 sel3
15981
static const uint16_t g_etc1_y_solid_block_4i_configs[256] =
15982
{
15983
0xA000,0xA800,0x540B,0xAA01,0xAA01,0xFE00,0xFF00,0xFF00,0x8,0x5515,0x5509,0x5509,0xAA03,0x5508,0x5508,0x9508,0xA508,0xA908,0xAA08,0x5513,0xAA09,0xAA09,0xAA05,0xFF08,0xFF08,0x10,0x551D,0x5511,0x5511,
15984
0xAA0B,0x5510,0x5510,0x9510,0xA510,0xA910,0xAA10,0x551B,0xAA11,0xAA11,0xAA0D,0xFF10,0xFF10,0x18,0x5525,0x5519,0x5519,0xAA13,0x5518,0x5518,0x9518,0xA518,0xA918,0xAA18,0x5523,0xAA19,0xAA19,0xAA15,
15985
0xFF18,0xFF18,0x20,0x552D,0x5521,0x5521,0xAA1B,0x5520,0x5520,0x9520,0xA520,0xA920,0xAA20,0x552B,0xAA21,0xAA21,0xAA1D,0xFF20,0xFF20,0x28,0x5535,0x5529,0x5529,0xAA23,0x5528,0x5528,0x9528,0xA528,0xA928,
15986
0xAA28,0x5533,0xAA29,0xAA29,0xAA25,0xFF28,0xFF28,0x30,0x553D,0x5531,0x5531,0xAA2B,0x5530,0x5530,0x9530,0xA530,0xA930,0xAA30,0x553B,0xAA31,0xAA31,0xAA2D,0xFF30,0xFF30,0x38,0x5545,0x5539,0x5539,0xAA33,
15987
0x5538,0x5538,0x9538,0xA538,0xA938,0xAA38,0x5543,0xAA39,0xAA39,0xAA35,0xFF38,0xFF38,0x40,0x554D,0x5541,0x5541,0xAA3B,0x5540,0x5540,0x9540,0xA540,0xA940,0xAA40,0x554B,0xAA41,0xAA41,0xAA3D,0xFF40,0xFF40,
15988
0x48,0x5555,0x5549,0x5549,0xAA43,0x5548,0x5548,0x9548,0xA548,0xA948,0xAA48,0x5553,0xAA49,0xAA49,0xAA45,0xFF48,0xFF48,0x50,0x555D,0x5551,0x5551,0xAA4B,0x5550,0x5550,0x9550,0xA550,0xA950,0xAA50,0x555B,
15989
0xAA51,0xAA51,0xAA4D,0xFF50,0xFF50,0x58,0x5565,0x5559,0x5559,0xAA53,0x5558,0x5558,0x9558,0xA558,0xA958,0xAA58,0x5563,0xAA59,0xAA59,0xAA55,0xFF58,0xFF58,0x60,0x556D,0x5561,0x5561,0xAA5B,0x5560,0x5560,
15990
0x9560,0xA560,0xA960,0xAA60,0x556B,0xAA61,0xAA61,0xAA5D,0xFF60,0xFF60,0x68,0x5575,0x5569,0x5569,0xAA63,0x5568,0x5568,0x9568,0xA568,0xA968,0xAA68,0x5573,0xAA69,0xAA69,0xAA65,0xFF68,0xFF68,0x70,0x557D,
15991
0x5571,0x5571,0xAA6B,0x5570,0x5570,0x9570,0xA570,0xA970,0xAA70,0x557B,0xAA71,0xAA71,0xAA6D,0xFF70,0xFF70,0x78,0x78,0x5579,0x5579,0xAA73,0x5578,0x9578,0x2578,0xE6E,0x278
15992
};
15993
15994
static const uint16_t g_etc1_y_solid_block_2i_configs[256] =
15995
{
15996
0x416,0x800,0xA00,0x50B,0xA01,0xA01,0xF00,0xF00,0xF00,0x8,0x515,0x509,0x509,0xA03,0x508,0x508,0xF01,0xF01,0xA08,0xA08,0x513,0xA09,0xA09,0xA05,0xF08,0xF08,0x10,0x51D,0x511,0x511,0xA0B,0x510,0x510,0xF09,
15997
0xF09,0xA10,0xA10,0x51B,0xA11,0xA11,0xA0D,0xF10,0xF10,0x18,0x525,0x519,0x519,0xA13,0x518,0x518,0xF11,0xF11,0xA18,0xA18,0x523,0xA19,0xA19,0xA15,0xF18,0xF18,0x20,0x52D,0x521,0x521,0xA1B,0x520,0x520,0xF19,
15998
0xF19,0xA20,0xA20,0x52B,0xA21,0xA21,0xA1D,0xF20,0xF20,0x28,0x535,0x529,0x529,0xA23,0x528,0x528,0xF21,0xF21,0xA28,0xA28,0x533,0xA29,0xA29,0xA25,0xF28,0xF28,0x30,0x53D,0x531,0x531,0xA2B,0x530,0x530,0xF29,
15999
0xF29,0xA30,0xA30,0x53B,0xA31,0xA31,0xA2D,0xF30,0xF30,0x38,0x545,0x539,0x539,0xA33,0x538,0x538,0xF31,0xF31,0xA38,0xA38,0x543,0xA39,0xA39,0xA35,0xF38,0xF38,0x40,0x54D,0x541,0x541,0xA3B,0x540,0x540,0xF39,
16000
0xF39,0xA40,0xA40,0x54B,0xA41,0xA41,0xA3D,0xF40,0xF40,0x48,0x555,0x549,0x549,0xA43,0x548,0x548,0xF41,0xF41,0xA48,0xA48,0x553,0xA49,0xA49,0xA45,0xF48,0xF48,0x50,0x55D,0x551,0x551,0xA4B,0x550,0x550,0xF49,
16001
0xF49,0xA50,0xA50,0x55B,0xA51,0xA51,0xA4D,0xF50,0xF50,0x58,0x565,0x559,0x559,0xA53,0x558,0x558,0xF51,0xF51,0xA58,0xA58,0x563,0xA59,0xA59,0xA55,0xF58,0xF58,0x60,0x56D,0x561,0x561,0xA5B,0x560,0x560,0xF59,
16002
0xF59,0xA60,0xA60,0x56B,0xA61,0xA61,0xA5D,0xF60,0xF60,0x68,0x575,0x569,0x569,0xA63,0x568,0x568,0xF61,0xF61,0xA68,0xA68,0x573,0xA69,0xA69,0xA65,0xF68,0xF68,0x70,0x57D,0x571,0x571,0xA6B,0x570,0x570,0xF69,
16003
0xF69,0xA70,0xA70,0x57B,0xA71,0xA71,0xA6D,0xF70,0xF70,0x78,0x78,0x579,0x579,0xA73,0x578,0x578,0xE6E,0x278
16004
};
16005
16006
static const uint16_t g_etc1_y_solid_block_1i_configs[256] =
16007
{
16008
0x0,0x116,0x200,0x200,0x10B,0x201,0x201,0x300,0x300,0x8,0x115,0x109,0x109,0x203,0x108,0x108,0x114,0x301,0x204,0x208,0x208,0x113,0x209,0x209,0x205,0x308,0x10,0x11D,0x111,0x111,0x20B,0x110,0x110,0x11C,0x309,
16009
0x20C,0x210,0x210,0x11B,0x211,0x211,0x20D,0x310,0x18,0x125,0x119,0x119,0x213,0x118,0x118,0x124,0x311,0x214,0x218,0x218,0x123,0x219,0x219,0x215,0x318,0x20,0x12D,0x121,0x121,0x21B,0x120,0x120,0x12C,0x319,0x21C,
16010
0x220,0x220,0x12B,0x221,0x221,0x21D,0x320,0x28,0x135,0x129,0x129,0x223,0x128,0x128,0x134,0x321,0x224,0x228,0x228,0x133,0x229,0x229,0x225,0x328,0x30,0x13D,0x131,0x131,0x22B,0x130,0x130,0x13C,0x329,0x22C,0x230,
16011
0x230,0x13B,0x231,0x231,0x22D,0x330,0x38,0x145,0x139,0x139,0x233,0x138,0x138,0x144,0x331,0x234,0x238,0x238,0x143,0x239,0x239,0x235,0x338,0x40,0x14D,0x141,0x141,0x23B,0x140,0x140,0x14C,0x339,0x23C,0x240,0x240,
16012
0x14B,0x241,0x241,0x23D,0x340,0x48,0x155,0x149,0x149,0x243,0x148,0x148,0x154,0x341,0x244,0x248,0x248,0x153,0x249,0x249,0x245,0x348,0x50,0x15D,0x151,0x151,0x24B,0x150,0x150,0x15C,0x349,0x24C,0x250,0x250,0x15B,
16013
0x251,0x251,0x24D,0x350,0x58,0x165,0x159,0x159,0x253,0x158,0x158,0x164,0x351,0x254,0x258,0x258,0x163,0x259,0x259,0x255,0x358,0x60,0x16D,0x161,0x161,0x25B,0x160,0x160,0x16C,0x359,0x25C,0x260,0x260,0x16B,0x261,
16014
0x261,0x25D,0x360,0x68,0x175,0x169,0x169,0x263,0x168,0x168,0x174,0x361,0x264,0x268,0x268,0x173,0x269,0x269,0x265,0x368,0x70,0x17D,0x171,0x171,0x26B,0x170,0x170,0x17C,0x369,0x26C,0x270,0x270,0x17B,0x271,0x271,
16015
0x26D,0x370,0x78,0x78,0x179,0x179,0x273,0x178,0x178,0x26E,0x278
16016
};
16017
16018
// We don't have any useful hints to accelerate single channel ETC1, so we need to real-time encode from scratch.
16019
bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel)
16020
{
16021
unpacked_uastc_block unpacked_src_blk;
16022
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
16023
return false;
16024
16025
#if 0
16026
for (uint32_t individ = 0; individ < 2; individ++)
16027
{
16028
uint32_t overall_error = 0;
16029
16030
for (uint32_t c = 0; c < 256; c++)
16031
{
16032
uint32_t best_err = UINT32_MAX;
16033
uint32_t best_individ = 0;
16034
uint32_t best_base = 0;
16035
uint32_t best_sels[4] = { 0,0,0,0 };
16036
uint32_t best_table = 0;
16037
16038
const uint32_t limit = individ ? 16 : 32;
16039
16040
for (uint32_t table = 0; table < 8; table++)
16041
{
16042
for (uint32_t base = 0; base < limit; base++)
16043
{
16044
uint32_t total_e = 0;
16045
uint32_t sels[4] = { 0,0,0,0 };
16046
16047
const uint32_t N = 4;
16048
for (uint32_t i = 0; i < basisu::minimum<uint32_t>(N, (256 - c)); i++)
16049
{
16050
uint32_t best_sel_e = UINT32_MAX;
16051
uint32_t best_sel = 0;
16052
16053
for (uint32_t sel = 0; sel < 4; sel++)
16054
{
16055
int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2));
16056
val = clamp255(val + g_etc1_inten_tables[table][sel]);
16057
16058
int e = iabs(val - clamp255(c + i));
16059
if (e < best_sel_e)
16060
{
16061
best_sel_e = e;
16062
best_sel = sel;
16063
}
16064
16065
} // sel
16066
16067
sels[i] = best_sel;
16068
total_e += best_sel_e * best_sel_e;
16069
16070
} // i
16071
16072
if (total_e < best_err)
16073
{
16074
best_err = total_e;
16075
best_individ = individ;
16076
best_base = base;
16077
memcpy(best_sels, sels, sizeof(best_sels));
16078
best_table = table;
16079
}
16080
16081
} // base
16082
} // table
16083
16084
//printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]);
16085
16086
uint32_t encoded = best_table | (best_base << 3) |
16087
(best_sels[0] << 8) |
16088
(best_sels[1] << 10) |
16089
(best_sels[2] << 12) |
16090
(best_sels[3] << 14);
16091
16092
printf("0x%X,", encoded);
16093
16094
overall_error += best_err;
16095
} // c
16096
16097
printf("\n");
16098
printf("Overall error: %u\n", overall_error);
16099
16100
} // individ
16101
16102
exit(0);
16103
#endif
16104
16105
#if 0
16106
for (uint32_t individ = 0; individ < 2; individ++)
16107
{
16108
uint32_t overall_error = 0;
16109
16110
for (uint32_t c = 0; c < 256; c++)
16111
{
16112
uint32_t best_err = UINT32_MAX;
16113
uint32_t best_individ = 0;
16114
uint32_t best_base = 0;
16115
uint32_t best_sels[4] = { 0,0,0,0 };
16116
uint32_t best_table = 0;
16117
16118
const uint32_t limit = individ ? 16 : 32;
16119
16120
for (uint32_t table = 0; table < 8; table++)
16121
{
16122
for (uint32_t base = 0; base < limit; base++)
16123
{
16124
uint32_t total_e = 0;
16125
uint32_t sels[4] = { 0,0,0,0 };
16126
16127
const uint32_t N = 1;
16128
for (uint32_t i = 0; i < basisu::minimum<uint32_t>(N, (256 - c)); i++)
16129
{
16130
uint32_t best_sel_e = UINT32_MAX;
16131
uint32_t best_sel = 0;
16132
16133
for (uint32_t sel = 0; sel < 4; sel++)
16134
{
16135
int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2));
16136
val = clamp255(val + g_etc1_inten_tables[table][sel]);
16137
16138
int e = iabs(val - clamp255(c + i));
16139
if (e < best_sel_e)
16140
{
16141
best_sel_e = e;
16142
best_sel = sel;
16143
}
16144
16145
} // sel
16146
16147
sels[i] = best_sel;
16148
total_e += best_sel_e * best_sel_e;
16149
16150
} // i
16151
16152
if (total_e < best_err)
16153
{
16154
best_err = total_e;
16155
best_individ = individ;
16156
best_base = base;
16157
memcpy(best_sels, sels, sizeof(best_sels));
16158
best_table = table;
16159
}
16160
16161
} // base
16162
} // table
16163
16164
//printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]);
16165
16166
uint32_t encoded = best_table | (best_base << 3) |
16167
(best_sels[0] << 8) |
16168
(best_sels[1] << 10) |
16169
(best_sels[2] << 12) |
16170
(best_sels[3] << 14);
16171
16172
printf("0x%X,", encoded);
16173
16174
overall_error += best_err;
16175
} // c
16176
16177
printf("\n");
16178
printf("Overall error: %u\n", overall_error);
16179
16180
} // individ
16181
16182
exit(0);
16183
#endif
16184
16185
decoder_etc_block& dst_blk = *static_cast<decoder_etc_block*>(pDst);
16186
16187
if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
16188
{
16189
const uint32_t y = unpacked_src_blk.m_solid_color[channel];
16190
const uint32_t encoded_config = g_etc1_y_solid_block_configs[y];
16191
16192
const uint32_t base = encoded_config & 31;
16193
const uint32_t sel = (encoded_config >> 5) & 3;
16194
const uint32_t table = encoded_config >> 7;
16195
16196
dst_blk.m_bytes[3] = (uint8_t)(2 | (table << 5) | (table << 2));
16197
16198
dst_blk.m_bytes[0] = (uint8_t)(base << 3);
16199
dst_blk.m_bytes[1] = (uint8_t)(base << 3);
16200
dst_blk.m_bytes[2] = (uint8_t)(base << 3);
16201
16202
memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[sel][0], 4);
16203
return true;
16204
}
16205
16206
color32 block_pixels[4][4];
16207
const bool unpack_srgb = false;
16208
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
16209
return false;
16210
16211
uint8_t block_y[4][4];
16212
for (uint32_t i = 0; i < 16; i++)
16213
((uint8_t*)block_y)[i] = ((color32*)block_pixels)[i][channel];
16214
16215
int upper_avg, lower_avg, left_avg, right_avg;
16216
bool flip = pack_etc1_y_estimate_flipped(&block_y[0][0], upper_avg, lower_avg, left_avg, right_avg);
16217
16218
// non-flipped: | |
16219
// vs.
16220
// flipped: --
16221
// --
16222
16223
uint32_t low[2] = { 255, 255 }, high[2] = { 0, 0 };
16224
16225
if (flip)
16226
{
16227
for (uint32_t y = 0; y < 2; y++)
16228
{
16229
for (uint32_t x = 0; x < 4; x++)
16230
{
16231
const uint32_t v = block_y[y][x];
16232
low[0] = basisu::minimum(low[0], v);
16233
high[0] = basisu::maximum(high[0], v);
16234
}
16235
}
16236
for (uint32_t y = 2; y < 4; y++)
16237
{
16238
for (uint32_t x = 0; x < 4; x++)
16239
{
16240
const uint32_t v = block_y[y][x];
16241
low[1] = basisu::minimum(low[1], v);
16242
high[1] = basisu::maximum(high[1], v);
16243
}
16244
}
16245
}
16246
else
16247
{
16248
for (uint32_t y = 0; y < 4; y++)
16249
{
16250
for (uint32_t x = 0; x < 2; x++)
16251
{
16252
const uint32_t v = block_y[y][x];
16253
low[0] = basisu::minimum(low[0], v);
16254
high[0] = basisu::maximum(high[0], v);
16255
}
16256
}
16257
for (uint32_t y = 0; y < 4; y++)
16258
{
16259
for (uint32_t x = 2; x < 4; x++)
16260
{
16261
const uint32_t v = block_y[y][x];
16262
low[1] = basisu::minimum(low[1], v);
16263
high[1] = basisu::maximum(high[1], v);
16264
}
16265
}
16266
}
16267
16268
const uint32_t range[2] = { high[0] - low[0], high[1] - low[1] };
16269
16270
dst_blk.m_bytes[3] = (uint8_t)((int)flip);
16271
16272
if ((range[0] <= 3) && (range[1] <= 3))
16273
{
16274
// This is primarily for better gradients.
16275
dst_blk.m_bytes[0] = 0;
16276
dst_blk.m_bytes[1] = 0;
16277
dst_blk.m_bytes[2] = 0;
16278
16279
uint16_t l_bitmask = 0, h_bitmask = 0;
16280
16281
for (uint32_t subblock = 0; subblock < 2; subblock++)
16282
{
16283
const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]);
16284
16285
const uint32_t table = encoded & 7;
16286
const uint32_t base = (encoded >> 3) & 31;
16287
assert(base <= 15);
16288
const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 };
16289
16290
dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5));
16291
16292
const uint32_t sv = base << (subblock ? 0 : 4);
16293
dst_blk.m_bytes[0] |= (uint8_t)(sv);
16294
dst_blk.m_bytes[1] |= (uint8_t)(sv);
16295
dst_blk.m_bytes[2] |= (uint8_t)(sv);
16296
16297
if (flip)
16298
{
16299
uint32_t ofs = subblock * 2;
16300
for (uint32_t y = 0; y < 2; y++)
16301
{
16302
for (uint32_t x = 0; x < 4; x++)
16303
{
16304
uint32_t t = block_y[y + subblock * 2][x];
16305
assert(t >= low[subblock] && t <= high[subblock]);
16306
t -= low[subblock];
16307
assert(t <= 3);
16308
16309
t = g_selector_index_to_etc1[sels[t]];
16310
16311
assert(ofs < 16);
16312
l_bitmask |= ((t & 1) << ofs);
16313
h_bitmask |= ((t >> 1) << ofs);
16314
ofs += 4;
16315
}
16316
16317
ofs = (int)ofs + 1 - 4 * 4;
16318
}
16319
}
16320
else
16321
{
16322
uint32_t ofs = (subblock * 2) * 4;
16323
for (uint32_t x = 0; x < 2; x++)
16324
{
16325
for (uint32_t y = 0; y < 4; y++)
16326
{
16327
uint32_t t = block_y[y][x + subblock * 2];
16328
assert(t >= low[subblock] && t <= high[subblock]);
16329
t -= low[subblock];
16330
assert(t <= 3);
16331
16332
t = g_selector_index_to_etc1[sels[t]];
16333
16334
assert(ofs < 16);
16335
l_bitmask |= ((t & 1) << ofs);
16336
h_bitmask |= ((t >> 1) << ofs);
16337
++ofs;
16338
}
16339
}
16340
}
16341
} // subblock
16342
16343
dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
16344
dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
16345
dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
16346
dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
16347
16348
return true;
16349
}
16350
16351
uint32_t y0 = ((flip ? upper_avg : left_avg) * 31 + 127) / 255;
16352
uint32_t y1 = ((flip ? lower_avg : right_avg) * 31 + 127) / 255;
16353
16354
bool diff = true;
16355
16356
int dy = y1 - y0;
16357
16358
if ((dy < cETC1ColorDeltaMin) || (dy > cETC1ColorDeltaMax))
16359
{
16360
diff = false;
16361
16362
y0 = ((flip ? upper_avg : left_avg) * 15 + 127) / 255;
16363
y1 = ((flip ? lower_avg : right_avg) * 15 + 127) / 255;
16364
16365
dst_blk.m_bytes[0] = (uint8_t)(y1 | (y0 << 4));
16366
dst_blk.m_bytes[1] = (uint8_t)(y1 | (y0 << 4));
16367
dst_blk.m_bytes[2] = (uint8_t)(y1 | (y0 << 4));
16368
}
16369
else
16370
{
16371
dy = basisu::clamp<int>(dy, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
16372
16373
y1 = y0 + dy;
16374
16375
if (dy < 0) dy += 8;
16376
16377
dst_blk.m_bytes[0] = (uint8_t)((y0 << 3) | dy);
16378
dst_blk.m_bytes[1] = (uint8_t)((y0 << 3) | dy);
16379
dst_blk.m_bytes[2] = (uint8_t)((y0 << 3) | dy);
16380
16381
dst_blk.m_bytes[3] |= 2;
16382
}
16383
16384
const uint32_t base_y[2] = { diff ? ((y0 << 3) | (y0 >> 2)) : ((y0 << 4) | y0), diff ? ((y1 << 3) | (y1 >> 2)) : ((y1 << 4) | y1) };
16385
16386
uint32_t enc_range[2];
16387
for (uint32_t subset = 0; subset < 2; subset++)
16388
{
16389
const int pos = basisu::iabs((int)high[subset] - (int)base_y[subset]);
16390
const int neg = basisu::iabs((int)base_y[subset] - (int)low[subset]);
16391
16392
enc_range[subset] = basisu::maximum(pos, neg);
16393
}
16394
16395
uint16_t l_bitmask = 0, h_bitmask = 0;
16396
for (uint32_t subblock = 0; subblock < 2; subblock++)
16397
{
16398
if ((!diff) && (range[subblock] <= 3))
16399
{
16400
const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]);
16401
16402
const uint32_t table = encoded & 7;
16403
const uint32_t base = (encoded >> 3) & 31;
16404
assert(base <= 15);
16405
const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 };
16406
16407
dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5));
16408
16409
const uint32_t mask = ~(0xF << (subblock ? 0 : 4));
16410
16411
dst_blk.m_bytes[0] &= mask;
16412
dst_blk.m_bytes[1] &= mask;
16413
dst_blk.m_bytes[2] &= mask;
16414
16415
const uint32_t sv = base << (subblock ? 0 : 4);
16416
dst_blk.m_bytes[0] |= (uint8_t)(sv);
16417
dst_blk.m_bytes[1] |= (uint8_t)(sv);
16418
dst_blk.m_bytes[2] |= (uint8_t)(sv);
16419
16420
if (flip)
16421
{
16422
uint32_t ofs = subblock * 2;
16423
for (uint32_t y = 0; y < 2; y++)
16424
{
16425
for (uint32_t x = 0; x < 4; x++)
16426
{
16427
uint32_t t = block_y[y + subblock * 2][x];
16428
assert(t >= low[subblock] && t <= high[subblock]);
16429
t -= low[subblock];
16430
assert(t <= 3);
16431
16432
t = g_selector_index_to_etc1[sels[t]];
16433
16434
assert(ofs < 16);
16435
l_bitmask |= ((t & 1) << ofs);
16436
h_bitmask |= ((t >> 1) << ofs);
16437
ofs += 4;
16438
}
16439
16440
ofs = (int)ofs + 1 - 4 * 4;
16441
}
16442
}
16443
else
16444
{
16445
uint32_t ofs = (subblock * 2) * 4;
16446
for (uint32_t x = 0; x < 2; x++)
16447
{
16448
for (uint32_t y = 0; y < 4; y++)
16449
{
16450
uint32_t t = block_y[y][x + subblock * 2];
16451
assert(t >= low[subblock] && t <= high[subblock]);
16452
t -= low[subblock];
16453
assert(t <= 3);
16454
16455
t = g_selector_index_to_etc1[sels[t]];
16456
16457
assert(ofs < 16);
16458
l_bitmask |= ((t & 1) << ofs);
16459
h_bitmask |= ((t >> 1) << ofs);
16460
++ofs;
16461
}
16462
}
16463
}
16464
16465
continue;
16466
} // if
16467
16468
uint32_t best_err = UINT32_MAX;
16469
uint8_t best_sels[8];
16470
uint32_t best_inten = 0;
16471
16472
const int base = base_y[subblock];
16473
16474
const int low_limit = -base;
16475
const int high_limit = 255 - base;
16476
16477
assert(low_limit <= 0 && high_limit >= 0);
16478
16479
uint32_t inten_table_mask = 0xFF;
16480
const uint32_t er = enc_range[subblock];
16481
// Each one of these tables is expensive to evaluate, so let's only examine the ones we know may be useful.
16482
if (er <= 51)
16483
{
16484
inten_table_mask = 0xF;
16485
16486
if (er > 22)
16487
inten_table_mask &= ~(1 << 0);
16488
16489
if ((er < 4) || (er > 39))
16490
inten_table_mask &= ~(1 << 1);
16491
16492
if (er < 9)
16493
inten_table_mask &= ~(1 << 2);
16494
16495
if (er < 12)
16496
inten_table_mask &= ~(1 << 3);
16497
}
16498
else
16499
{
16500
inten_table_mask &= ~((1 << 0) | (1 << 1));
16501
16502
if (er > 60)
16503
inten_table_mask &= ~(1 << 2);
16504
16505
if (er > 89)
16506
inten_table_mask &= ~(1 << 3);
16507
16508
if (er > 120)
16509
inten_table_mask &= ~(1 << 4);
16510
16511
if (er > 136)
16512
inten_table_mask &= ~(1 << 5);
16513
16514
if (er > 174)
16515
inten_table_mask &= ~(1 << 6);
16516
}
16517
16518
for (uint32_t inten = 0; inten < 8; inten++)
16519
{
16520
if ((inten_table_mask & (1 << inten)) == 0)
16521
continue;
16522
16523
const int t0 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][0]);
16524
const int t1 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][1]);
16525
const int t2 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][2]);
16526
const int t3 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][3]);
16527
assert((t0 <= t1) && (t1 <= t2) && (t2 <= t3));
16528
16529
const int tv[4] = { t2, t3, t1, t0 };
16530
16531
const int thresh01 = t0 + t1;
16532
const int thresh12 = t1 + t2;
16533
const int thresh23 = t2 + t3;
16534
16535
assert(thresh01 <= thresh12 && thresh12 <= thresh23);
16536
16537
static const uint8_t s_table[4] = { 1, 0, 2, 3 };
16538
16539
uint32_t total_err = 0;
16540
uint8_t sels[8];
16541
16542
if (flip)
16543
{
16544
if (((int)high[subblock] - base) * 2 < thresh01)
16545
{
16546
memset(sels, 3, 8);
16547
16548
for (uint32_t y = 0; y < 2; y++)
16549
{
16550
for (uint32_t x = 0; x < 4; x++)
16551
{
16552
const int delta = (int)block_y[y + subblock * 2][x] - base;
16553
16554
const uint32_t c = 3;
16555
16556
uint32_t e = basisu::iabs(tv[c] - delta);
16557
total_err += e * e;
16558
}
16559
if (total_err >= best_err)
16560
break;
16561
}
16562
}
16563
else if (((int)low[subblock] - base) * 2 >= thresh23)
16564
{
16565
memset(sels, 1, 8);
16566
16567
for (uint32_t y = 0; y < 2; y++)
16568
{
16569
for (uint32_t x = 0; x < 4; x++)
16570
{
16571
const int delta = (int)block_y[y + subblock * 2][x] - base;
16572
16573
const uint32_t c = 1;
16574
16575
uint32_t e = basisu::iabs(tv[c] - delta);
16576
total_err += e * e;
16577
}
16578
if (total_err >= best_err)
16579
break;
16580
}
16581
}
16582
else
16583
{
16584
for (uint32_t y = 0; y < 2; y++)
16585
{
16586
for (uint32_t x = 0; x < 4; x++)
16587
{
16588
const int delta = (int)block_y[y + subblock * 2][x] - base;
16589
const int delta2 = delta * 2;
16590
16591
uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)];
16592
sels[y * 4 + x] = (uint8_t)c;
16593
16594
uint32_t e = basisu::iabs(tv[c] - delta);
16595
total_err += e * e;
16596
}
16597
if (total_err >= best_err)
16598
break;
16599
}
16600
}
16601
}
16602
else
16603
{
16604
if (((int)high[subblock] - base) * 2 < thresh01)
16605
{
16606
memset(sels, 3, 8);
16607
16608
for (uint32_t y = 0; y < 4; y++)
16609
{
16610
for (uint32_t x = 0; x < 2; x++)
16611
{
16612
const int delta = (int)block_y[y][x + subblock * 2] - base;
16613
16614
const uint32_t c = 3;
16615
16616
uint32_t e = basisu::iabs(tv[c] - delta);
16617
total_err += e * e;
16618
}
16619
if (total_err >= best_err)
16620
break;
16621
}
16622
}
16623
else if (((int)low[subblock] - base) * 2 >= thresh23)
16624
{
16625
memset(sels, 1, 8);
16626
16627
for (uint32_t y = 0; y < 4; y++)
16628
{
16629
for (uint32_t x = 0; x < 2; x++)
16630
{
16631
const int delta = (int)block_y[y][x + subblock * 2] - base;
16632
16633
const uint32_t c = 1;
16634
16635
uint32_t e = basisu::iabs(tv[c] - delta);
16636
total_err += e * e;
16637
}
16638
if (total_err >= best_err)
16639
break;
16640
}
16641
}
16642
else
16643
{
16644
for (uint32_t y = 0; y < 4; y++)
16645
{
16646
for (uint32_t x = 0; x < 2; x++)
16647
{
16648
const int delta = (int)block_y[y][x + subblock * 2] - base;
16649
const int delta2 = delta * 2;
16650
16651
uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)];
16652
sels[y * 2 + x] = (uint8_t)c;
16653
16654
uint32_t e = basisu::iabs(tv[c] - delta);
16655
total_err += e * e;
16656
}
16657
if (total_err >= best_err)
16658
break;
16659
}
16660
}
16661
}
16662
16663
if (total_err < best_err)
16664
{
16665
best_err = total_err;
16666
best_inten = inten;
16667
memcpy(best_sels, sels, 8);
16668
}
16669
16670
} // inten
16671
16672
//g_inten_hist[best_inten][enc_range[subblock]]++;
16673
16674
dst_blk.m_bytes[3] |= (uint8_t)(best_inten << (subblock ? 2 : 5));
16675
16676
if (flip)
16677
{
16678
uint32_t ofs = subblock * 2;
16679
for (uint32_t y = 0; y < 2; y++)
16680
{
16681
for (uint32_t x = 0; x < 4; x++)
16682
{
16683
uint32_t t = best_sels[y * 4 + x];
16684
16685
assert(ofs < 16);
16686
l_bitmask |= ((t & 1) << ofs);
16687
h_bitmask |= ((t >> 1) << ofs);
16688
ofs += 4;
16689
}
16690
16691
ofs = (int)ofs + 1 - 4 * 4;
16692
}
16693
}
16694
else
16695
{
16696
uint32_t ofs = (subblock * 2) * 4;
16697
for (uint32_t x = 0; x < 2; x++)
16698
{
16699
for (uint32_t y = 0; y < 4; y++)
16700
{
16701
uint32_t t = best_sels[y * 2 + x];
16702
16703
assert(ofs < 16);
16704
l_bitmask |= ((t & 1) << ofs);
16705
h_bitmask |= ((t >> 1) << ofs);
16706
++ofs;
16707
}
16708
}
16709
}
16710
16711
} // subblock
16712
16713
dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
16714
dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
16715
dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
16716
dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
16717
16718
return true;
16719
}
16720
16721
const uint32_t ETC2_EAC_MIN_VALUE_SELECTOR = 3, ETC2_EAC_MAX_VALUE_SELECTOR = 7;
16722
16723
void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst)
16724
{
16725
eac_block& dst = *static_cast<eac_block*>(pDst);
16726
const color32* pSrc_pixels = &block_pixels[0][0];
16727
16728
if ((!g_uastc_mode_has_alpha[unpacked_src_blk.m_mode]) || (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR))
16729
{
16730
const uint32_t a = (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) ? unpacked_src_blk.m_solid_color[3] : 255;
16731
16732
dst.m_base = a;
16733
dst.m_table = 13;
16734
dst.m_multiplier = 1;
16735
16736
memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
16737
16738
return;
16739
}
16740
16741
uint32_t min_a = 255, max_a = 0;
16742
for (uint32_t i = 0; i < 16; i++)
16743
{
16744
min_a = basisu::minimum<uint32_t>(min_a, pSrc_pixels[i].a);
16745
max_a = basisu::maximum<uint32_t>(max_a, pSrc_pixels[i].a);
16746
}
16747
16748
if (min_a == max_a)
16749
{
16750
dst.m_base = min_a;
16751
dst.m_table = 13;
16752
dst.m_multiplier = 1;
16753
16754
memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
16755
return;
16756
}
16757
16758
const uint32_t table = unpacked_src_blk.m_etc2_hints & 0xF;
16759
const int multiplier = unpacked_src_blk.m_etc2_hints >> 4;
16760
16761
assert(multiplier >= 1);
16762
16763
dst.m_multiplier = multiplier;
16764
dst.m_table = table;
16765
16766
const float range = (float)(g_eac_modifier_table[dst.m_table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]);
16767
const int center = (int)roundf(basisu::lerp((float)min_a, (float)max_a, (float)(0 - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range));
16768
16769
dst.m_base = center;
16770
16771
const int8_t* pTable = &g_eac_modifier_table[dst.m_table][0];
16772
16773
uint32_t vals[8];
16774
for (uint32_t j = 0; j < 8; j++)
16775
vals[j] = clamp255(center + (pTable[j] * multiplier));
16776
16777
uint64_t sels = 0;
16778
for (uint32_t i = 0; i < 16; i++)
16779
{
16780
const uint32_t a = block_pixels[i & 3][i >> 2].a;
16781
16782
const uint32_t err0 = (basisu::iabs(vals[0] - a) << 3) | 0;
16783
const uint32_t err1 = (basisu::iabs(vals[1] - a) << 3) | 1;
16784
const uint32_t err2 = (basisu::iabs(vals[2] - a) << 3) | 2;
16785
const uint32_t err3 = (basisu::iabs(vals[3] - a) << 3) | 3;
16786
const uint32_t err4 = (basisu::iabs(vals[4] - a) << 3) | 4;
16787
const uint32_t err5 = (basisu::iabs(vals[5] - a) << 3) | 5;
16788
const uint32_t err6 = (basisu::iabs(vals[6] - a) << 3) | 6;
16789
const uint32_t err7 = (basisu::iabs(vals[7] - a) << 3) | 7;
16790
16791
const uint32_t min_err = basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(err0, err1, err2), err3), err4), err5), err6), err7);
16792
16793
const uint64_t best_index = min_err & 7;
16794
sels |= (best_index << (45 - i * 3));
16795
}
16796
16797
dst.set_selector_bits(sels);
16798
}
16799
16800
bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst)
16801
{
16802
eac_block& dst_etc2_eac_a8_blk = *static_cast<eac_block*>(pDst);
16803
decoder_etc_block& dst_etc1_blk = static_cast<decoder_etc_block*>(pDst)[1];
16804
16805
unpacked_uastc_block unpacked_src_blk;
16806
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
16807
return false;
16808
16809
color32 block_pixels[4][4];
16810
if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR)
16811
{
16812
const bool unpack_srgb = false;
16813
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
16814
return false;
16815
}
16816
16817
transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &dst_etc2_eac_a8_blk);
16818
16819
transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, &dst_etc1_blk);
16820
16821
return true;
16822
}
16823
16824
static const uint8_t s_uastc5_to_bc1[32] = { 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1 };
16825
static const uint8_t s_uastc4_to_bc1[16] = { 0, 0, 0, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 1, 1, 1 };
16826
static const uint8_t s_uastc3_to_bc1[8] = { 0, 0, 2, 2, 3, 3, 1, 1 };
16827
static const uint8_t s_uastc2_to_bc1[4] = { 0, 2, 3, 1 };
16828
static const uint8_t s_uastc1_to_bc1[2] = { 0, 1 };
16829
const uint8_t* s_uastc_to_bc1_weights[6] = { nullptr, s_uastc1_to_bc1, s_uastc2_to_bc1, s_uastc3_to_bc1, s_uastc4_to_bc1, s_uastc5_to_bc1 };
16830
16831
void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride)
16832
{
16833
uint32_t min0_v, max0_v, min1_v, max1_v,min2_v, max2_v, min3_v, max3_v;
16834
16835
{
16836
min0_v = max0_v = pPixels[0 * stride];
16837
min1_v = max1_v = pPixels[1 * stride];
16838
min2_v = max2_v = pPixels[2 * stride];
16839
min3_v = max3_v = pPixels[3 * stride];
16840
}
16841
16842
{
16843
uint32_t v0 = pPixels[4 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
16844
uint32_t v1 = pPixels[5 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
16845
uint32_t v2 = pPixels[6 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
16846
uint32_t v3 = pPixels[7 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
16847
}
16848
16849
{
16850
uint32_t v0 = pPixels[8 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
16851
uint32_t v1 = pPixels[9 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
16852
uint32_t v2 = pPixels[10 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
16853
uint32_t v3 = pPixels[11 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
16854
}
16855
16856
{
16857
uint32_t v0 = pPixels[12 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
16858
uint32_t v1 = pPixels[13 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
16859
uint32_t v2 = pPixels[14 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
16860
uint32_t v3 = pPixels[15 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
16861
}
16862
16863
const uint32_t min_v = basisu::minimum(min0_v, min1_v, min2_v, min3_v);
16864
const uint32_t max_v = basisu::maximum(max0_v, max1_v, max2_v, max3_v);
16865
16866
uint8_t* pDst_bytes = static_cast<uint8_t*>(pDst);
16867
pDst_bytes[0] = (uint8_t)max_v;
16868
pDst_bytes[1] = (uint8_t)min_v;
16869
16870
if (max_v == min_v)
16871
{
16872
memset(pDst_bytes + 2, 0, 6);
16873
return;
16874
}
16875
16876
const uint32_t delta = max_v - min_v;
16877
16878
// min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors.
16879
const int t0 = delta * 13;
16880
const int t1 = delta * 11;
16881
const int t2 = delta * 9;
16882
const int t3 = delta * 7;
16883
const int t4 = delta * 5;
16884
const int t5 = delta * 3;
16885
const int t6 = delta * 1;
16886
16887
// BC4 floors in its divisions, which we compensate for with the 4 bias.
16888
// This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one).
16889
const int bias = 4 - min_v * 14;
16890
16891
static const uint32_t s_tran0[8] = { 1U , 7U , 6U , 5U , 4U , 3U , 2U , 0U };
16892
static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U };
16893
static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U };
16894
static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U };
16895
16896
uint64_t a0, a1, a2, a3;
16897
{
16898
const int v0 = pPixels[0 * stride] * 14 + bias;
16899
const int v1 = pPixels[1 * stride] * 14 + bias;
16900
const int v2 = pPixels[2 * stride] * 14 + bias;
16901
const int v3 = pPixels[3 * stride] * 14 + bias;
16902
a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)];
16903
a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)];
16904
a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)];
16905
a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)];
16906
}
16907
16908
{
16909
const int v0 = pPixels[4 * stride] * 14 + bias;
16910
const int v1 = pPixels[5 * stride] * 14 + bias;
16911
const int v2 = pPixels[6 * stride] * 14 + bias;
16912
const int v3 = pPixels[7 * stride] * 14 + bias;
16913
a0 |= (s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U);
16914
a1 |= (s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U);
16915
a2 |= (s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U);
16916
a3 |= (s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U);
16917
}
16918
16919
{
16920
const int v0 = pPixels[8 * stride] * 14 + bias;
16921
const int v1 = pPixels[9 * stride] * 14 + bias;
16922
const int v2 = pPixels[10 * stride] * 14 + bias;
16923
const int v3 = pPixels[11 * stride] * 14 + bias;
16924
a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U);
16925
a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U);
16926
a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U);
16927
a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U);
16928
}
16929
16930
{
16931
const int v0 = pPixels[12 * stride] * 14 + bias;
16932
const int v1 = pPixels[13 * stride] * 14 + bias;
16933
const int v2 = pPixels[14 * stride] * 14 + bias;
16934
const int v3 = pPixels[15 * stride] * 14 + bias;
16935
a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U);
16936
a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U);
16937
a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U);
16938
a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U);
16939
}
16940
16941
const uint64_t f = a0 | a1 | a2 | a3;
16942
16943
pDst_bytes[2] = (uint8_t)f;
16944
pDst_bytes[3] = (uint8_t)(f >> 8U);
16945
pDst_bytes[4] = (uint8_t)(f >> 16U);
16946
pDst_bytes[5] = (uint8_t)(f >> 24U);
16947
pDst_bytes[6] = (uint8_t)(f >> 32U);
16948
pDst_bytes[7] = (uint8_t)(f >> 40U);
16949
}
16950
16951
static void bc1_find_sels(const color32 *pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16])
16952
{
16953
uint32_t block_r[4], block_g[4], block_b[4];
16954
16955
block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2);
16956
block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2);
16957
block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3;
16958
block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3;
16959
16960
int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
16961
16962
int dots[4];
16963
for (uint32_t i = 0; i < 4; i++)
16964
dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
16965
16966
int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
16967
16968
ar *= 2; ag *= 2; ab *= 2;
16969
16970
for (uint32_t i = 0; i < 16; i++)
16971
{
16972
const int d = pSrc_pixels[i].r * ar + pSrc_pixels[i].g * ag + pSrc_pixels[i].b * ab;
16973
static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
16974
16975
// Rounding matters here!
16976
// d <= t0: <=, not <, to the later LS step "sees" a wider range of selectors. It matters for quality.
16977
sels[i] = s_sels[(d <= t0) + (d < t1) + (d < t2)];
16978
}
16979
}
16980
16981
static inline void bc1_find_sels_2(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16])
16982
{
16983
uint32_t block_r[4], block_g[4], block_b[4];
16984
16985
block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2);
16986
block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2);
16987
block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3;
16988
block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3;
16989
16990
int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
16991
16992
int dots[4];
16993
for (uint32_t i = 0; i < 4; i++)
16994
dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
16995
16996
int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
16997
16998
ar *= 2; ag *= 2; ab *= 2;
16999
17000
static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
17001
17002
for (uint32_t i = 0; i < 16; i += 4)
17003
{
17004
const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab;
17005
const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab;
17006
const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab;
17007
const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab;
17008
17009
sels[i+0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)];
17010
sels[i+1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)];
17011
sels[i+2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)];
17012
sels[i+3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)];
17013
}
17014
}
17015
17016
static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh)
17017
{
17018
// Derived from bc7enc16's LS function.
17019
// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
17020
// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
17021
uint32_t uq00_r = 0, uq10_r = 0, ut_r = 0, uq00_g = 0, uq10_g = 0, ut_g = 0, uq00_b = 0, uq10_b = 0, ut_b = 0;
17022
17023
// This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w))
17024
// where w is [0,1/3,2/3,1]. 9 is the perfect multiplier.
17025
static const uint32_t s_weight_vals[4] = { 0x000009, 0x010204, 0x040201, 0x090000 };
17026
17027
uint32_t weight_accum = 0;
17028
for (uint32_t i = 0; i < 16; i++)
17029
{
17030
const uint32_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2];
17031
const uint32_t sel = pSelectors[i];
17032
ut_r += r;
17033
ut_g += g;
17034
ut_b += b;
17035
weight_accum += s_weight_vals[sel];
17036
uq00_r += sel * r;
17037
uq00_g += sel * g;
17038
uq00_b += sel * b;
17039
}
17040
17041
float q00_r = (float)uq00_r, q10_r = (float)uq10_r, t_r = (float)ut_r;
17042
float q00_g = (float)uq00_g, q10_g = (float)uq10_g, t_g = (float)ut_g;
17043
float q00_b = (float)uq00_b, q10_b = (float)uq10_b, t_b = (float)ut_b;
17044
17045
q10_r = t_r * 3.0f - q00_r;
17046
q10_g = t_g * 3.0f - q00_g;
17047
q10_b = t_b * 3.0f - q00_b;
17048
17049
float z00 = (float)((weight_accum >> 16) & 0xFF);
17050
float z10 = (float)((weight_accum >> 8) & 0xFF);
17051
float z11 = (float)(weight_accum & 0xFF);
17052
float z01 = z10;
17053
17054
float det = z00 * z11 - z01 * z10;
17055
if (fabs(det) < 1e-8f)
17056
return false;
17057
17058
det = 3.0f / det;
17059
17060
float iz00, iz01, iz10, iz11;
17061
iz00 = z11 * det;
17062
iz01 = -z01 * det;
17063
iz10 = -z10 * det;
17064
iz11 = z00 * det;
17065
17066
pXl->c[0] = iz00 * q00_r + iz01 * q10_r; pXh->c[0] = iz10 * q00_r + iz11 * q10_r;
17067
pXl->c[1] = iz00 * q00_g + iz01 * q10_g; pXh->c[1] = iz10 * q00_g + iz11 * q10_g;
17068
pXl->c[2] = iz00 * q00_b + iz01 * q10_b; pXh->c[2] = iz10 * q00_b + iz11 * q10_b;
17069
17070
// Check and fix channel singularities - might not be needed, but is in UASTC's encoder.
17071
for (uint32_t c = 0; c < 3; c++)
17072
{
17073
if ((pXl->c[c] < 0.0f) || (pXh->c[c] > 255.0f))
17074
{
17075
uint32_t lo_v = UINT32_MAX, hi_v = 0;
17076
for (uint32_t i = 0; i < 16; i++)
17077
{
17078
lo_v = basisu::minimumu(lo_v, pColors[i].c[c]);
17079
hi_v = basisu::maximumu(hi_v, pColors[i].c[c]);
17080
}
17081
17082
if (lo_v == hi_v)
17083
{
17084
pXl->c[c] = (float)lo_v;
17085
pXh->c[c] = (float)hi_v;
17086
}
17087
}
17088
}
17089
17090
return true;
17091
}
17092
17093
void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb)
17094
{
17095
dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
17096
17097
uint32_t mask = 0xAA;
17098
uint32_t max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi;
17099
uint32_t min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo;
17100
17101
if (min16 == max16)
17102
{
17103
// Always forbid 3 color blocks
17104
// This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's.
17105
mask = 0;
17106
17107
// Make l > h
17108
if (min16 > 0)
17109
min16--;
17110
else
17111
{
17112
// l = h = 0
17113
assert(min16 == max16 && max16 == 0);
17114
17115
max16 = 1;
17116
min16 = 0;
17117
mask = 0x55;
17118
}
17119
17120
assert(max16 > min16);
17121
}
17122
17123
if (max16 < min16)
17124
{
17125
std::swap(max16, min16);
17126
mask ^= 0x55;
17127
}
17128
17129
pDst_block->set_low_color(static_cast<uint16_t>(max16));
17130
pDst_block->set_high_color(static_cast<uint16_t>(min16));
17131
pDst_block->m_selectors[0] = static_cast<uint8_t>(mask);
17132
pDst_block->m_selectors[1] = static_cast<uint8_t>(mask);
17133
pDst_block->m_selectors[2] = static_cast<uint8_t>(mask);
17134
pDst_block->m_selectors[3] = static_cast<uint8_t>(mask);
17135
}
17136
17137
static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
17138
static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
17139
17140
// Good references: squish library, stb_dxt.
17141
void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags)
17142
{
17143
const color32* pSrc_pixels = (const color32*)pPixels;
17144
dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
17145
17146
int avg_r = -1, avg_g = 0, avg_b = 0;
17147
int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0;
17148
uint8_t sels[16];
17149
17150
const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0;
17151
if (use_sels)
17152
{
17153
// Caller is jamming in their own selectors for us to try.
17154
const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24);
17155
17156
static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 };
17157
17158
for (uint32_t i = 0; i < 16; i++)
17159
sels[i] = s_sel_tran[(s >> (i * 2)) & 3];
17160
}
17161
else
17162
{
17163
const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;
17164
17165
uint32_t j;
17166
for (j = 1; j < 16; j++)
17167
if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb))
17168
break;
17169
17170
if (j == 16)
17171
{
17172
encode_bc1_solid_block(pDst, fr, fg, fb);
17173
return;
17174
}
17175
17176
// Select 2 colors along the principle axis. (There must be a faster/simpler way.)
17177
int total_r = fr, total_g = fg, total_b = fb;
17178
int max_r = fr, max_g = fg, max_b = fb;
17179
int min_r = fr, min_g = fg, min_b = fb;
17180
for (uint32_t i = 1; i < 16; i++)
17181
{
17182
const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
17183
max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b);
17184
min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b);
17185
total_r += r; total_g += g; total_b += b;
17186
}
17187
17188
avg_r = (total_r + 8) >> 4;
17189
avg_g = (total_g + 8) >> 4;
17190
avg_b = (total_b + 8) >> 4;
17191
17192
int icov[6] = { 0, 0, 0, 0, 0, 0 };
17193
for (uint32_t i = 0; i < 16; i++)
17194
{
17195
int r = (int)pSrc_pixels[i].r - avg_r;
17196
int g = (int)pSrc_pixels[i].g - avg_g;
17197
int b = (int)pSrc_pixels[i].b - avg_b;
17198
icov[0] += r * r;
17199
icov[1] += r * g;
17200
icov[2] += r * b;
17201
icov[3] += g * g;
17202
icov[4] += g * b;
17203
icov[5] += b * b;
17204
}
17205
17206
float cov[6];
17207
for (uint32_t i = 0; i < 6; i++)
17208
cov[i] = static_cast<float>(icov[i])* (1.0f / 255.0f);
17209
17210
#if 0
17211
// Seems silly to use full PCA to choose 2 colors. The diff in avg. PSNR between using PCA vs. not is small (~.025 difference).
17212
// TODO: Try 2 or 3 different normalized diagonal vectors, choose the one that results in the largest dot delta
17213
int saxis_r = max_r - min_r;
17214
int saxis_g = max_g - min_g;
17215
int saxis_b = max_b - min_b;
17216
#else
17217
float xr = (float)(max_r - min_r);
17218
float xg = (float)(max_g - min_g);
17219
float xb = (float)(max_b - min_b);
17220
//float xr = (float)(max_r - avg_r); // max-avg is nearly the same, and doesn't require computing min's
17221
//float xg = (float)(max_g - avg_g);
17222
//float xb = (float)(max_b - avg_b);
17223
for (uint32_t power_iter = 0; power_iter < 4; power_iter++)
17224
{
17225
float r = xr * cov[0] + xg * cov[1] + xb * cov[2];
17226
float g = xr * cov[1] + xg * cov[3] + xb * cov[4];
17227
float b = xr * cov[2] + xg * cov[4] + xb * cov[5];
17228
xr = r; xg = g; xb = b;
17229
}
17230
17231
float k = basisu::maximum(fabsf(xr), fabsf(xg), fabsf(xb));
17232
int saxis_r = 306, saxis_g = 601, saxis_b = 117;
17233
if (k >= 2)
17234
{
17235
float m = 1024.0f / k;
17236
saxis_r = (int)(xr * m);
17237
saxis_g = (int)(xg * m);
17238
saxis_b = (int)(xb * m);
17239
}
17240
#endif
17241
17242
int low_dot = INT_MAX, high_dot = INT_MIN, low_c = 0, high_c = 0;
17243
for (uint32_t i = 0; i < 16; i++)
17244
{
17245
int dot = pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b;
17246
if (dot < low_dot)
17247
{
17248
low_dot = dot;
17249
low_c = i;
17250
}
17251
if (dot > high_dot)
17252
{
17253
high_dot = dot;
17254
high_c = i;
17255
}
17256
}
17257
17258
lr = to_5(pSrc_pixels[low_c].r);
17259
lg = to_6(pSrc_pixels[low_c].g);
17260
lb = to_5(pSrc_pixels[low_c].b);
17261
17262
hr = to_5(pSrc_pixels[high_c].r);
17263
hg = to_6(pSrc_pixels[high_c].g);
17264
hb = to_5(pSrc_pixels[high_c].b);
17265
17266
bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
17267
} // if (use_sels)
17268
17269
const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1);
17270
for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
17271
{
17272
// This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors.
17273
vec3F xl, xh;
17274
if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh))
17275
{
17276
if (avg_r < 0)
17277
{
17278
int total_r = 0, total_g = 0, total_b = 0;
17279
for (uint32_t i = 0; i < 16; i++)
17280
{
17281
total_r += pSrc_pixels[i].r;
17282
total_g += pSrc_pixels[i].g;
17283
total_b += pSrc_pixels[i].b;
17284
}
17285
17286
avg_r = (total_r + 8) >> 4;
17287
avg_g = (total_g + 8) >> 4;
17288
avg_b = (total_b + 8) >> 4;
17289
}
17290
17291
// All selectors equal - treat it as a solid block which should always be equal or better.
17292
lr = g_bc1_match5_equals_1[avg_r].m_hi;
17293
lg = g_bc1_match6_equals_1[avg_g].m_hi;
17294
lb = g_bc1_match5_equals_1[avg_b].m_hi;
17295
17296
hr = g_bc1_match5_equals_1[avg_r].m_lo;
17297
hg = g_bc1_match6_equals_1[avg_g].m_lo;
17298
hb = g_bc1_match5_equals_1[avg_b].m_lo;
17299
17300
// In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
17301
}
17302
else
17303
{
17304
lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
17305
lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
17306
lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
17307
17308
hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
17309
hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
17310
hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
17311
}
17312
17313
bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
17314
}
17315
17316
uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb);
17317
uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb);
17318
17319
// Always forbid 3 color blocks
17320
if (lc16 == hc16)
17321
{
17322
uint8_t mask = 0;
17323
17324
// Make l > h
17325
if (hc16 > 0)
17326
hc16--;
17327
else
17328
{
17329
// lc16 = hc16 = 0
17330
assert(lc16 == hc16 && hc16 == 0);
17331
17332
hc16 = 0;
17333
lc16 = 1;
17334
mask = 0x55; // select hc16
17335
}
17336
17337
assert(lc16 > hc16);
17338
pDst_block->set_low_color(static_cast<uint16_t>(lc16));
17339
pDst_block->set_high_color(static_cast<uint16_t>(hc16));
17340
17341
pDst_block->m_selectors[0] = mask;
17342
pDst_block->m_selectors[1] = mask;
17343
pDst_block->m_selectors[2] = mask;
17344
pDst_block->m_selectors[3] = mask;
17345
}
17346
else
17347
{
17348
uint8_t invert_mask = 0;
17349
if (lc16 < hc16)
17350
{
17351
std::swap(lc16, hc16);
17352
invert_mask = 0x55;
17353
}
17354
17355
assert(lc16 > hc16);
17356
pDst_block->set_low_color((uint16_t)lc16);
17357
pDst_block->set_high_color((uint16_t)hc16);
17358
17359
uint32_t packed_sels = 0;
17360
static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 };
17361
for (uint32_t i = 0; i < 16; i++)
17362
packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2));
17363
17364
pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask;
17365
pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
17366
pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
17367
pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
17368
}
17369
}
17370
17371
void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags)
17372
{
17373
const color32* pSrc_pixels = (const color32*)pPixels;
17374
dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
17375
17376
int avg_r = -1, avg_g = 0, avg_b = 0;
17377
int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0;
17378
uint8_t sels[16];
17379
17380
const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0;
17381
if (use_sels)
17382
{
17383
// Caller is jamming in their own selectors for us to try.
17384
const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24);
17385
17386
static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 };
17387
17388
for (uint32_t i = 0; i < 16; i++)
17389
sels[i] = s_sel_tran[(s >> (i * 2)) & 3];
17390
}
17391
else
17392
{
17393
const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;
17394
17395
uint32_t j;
17396
for (j = 1; j < 16; j++)
17397
if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb))
17398
break;
17399
17400
if (j == 16)
17401
{
17402
encode_bc1_solid_block(pDst, fr, fg, fb);
17403
return;
17404
}
17405
17406
// Select 2 colors along the principle axis. (There must be a faster/simpler way.)
17407
int total_r = fr, total_g = fg, total_b = fb;
17408
int max_r = fr, max_g = fg, max_b = fb;
17409
int min_r = fr, min_g = fg, min_b = fb;
17410
uint32_t grayscale_flag = (fr == fg) && (fr == fb);
17411
for (uint32_t i = 1; i < 16; i++)
17412
{
17413
const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
17414
grayscale_flag &= ((r == g) && (r == b));
17415
max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b);
17416
min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b);
17417
total_r += r; total_g += g; total_b += b;
17418
}
17419
17420
if (grayscale_flag)
17421
{
17422
// Grayscale blocks are a common enough case to specialize.
17423
if ((max_r - min_r) < 2)
17424
{
17425
lr = lb = hr = hb = to_5(fr);
17426
lg = hg = to_6(fr);
17427
}
17428
else
17429
{
17430
lr = lb = to_5(min_r);
17431
lg = to_6(min_r);
17432
17433
hr = hb = to_5(max_r);
17434
hg = to_6(max_r);
17435
}
17436
}
17437
else
17438
{
17439
avg_r = (total_r + 8) >> 4;
17440
avg_g = (total_g + 8) >> 4;
17441
avg_b = (total_b + 8) >> 4;
17442
17443
// Find the shortest vector from a AABB corner to the block's average color.
17444
// This is to help avoid outliers.
17445
17446
uint32_t dist[3][2];
17447
dist[0][0] = basisu::square(min_r - avg_r) << 3; dist[0][1] = basisu::square(max_r - avg_r) << 3;
17448
dist[1][0] = basisu::square(min_g - avg_g) << 3; dist[1][1] = basisu::square(max_g - avg_g) << 3;
17449
dist[2][0] = basisu::square(min_b - avg_b) << 3; dist[2][1] = basisu::square(max_b - avg_b) << 3;
17450
17451
uint32_t min_d0 = (dist[0][0] + dist[1][0] + dist[2][0]);
17452
uint32_t d4 = (dist[0][0] + dist[1][0] + dist[2][1]) | 4;
17453
min_d0 = basisu::minimum(min_d0, d4);
17454
17455
uint32_t min_d1 = (dist[0][1] + dist[1][0] + dist[2][0]) | 1;
17456
uint32_t d5 = (dist[0][1] + dist[1][0] + dist[2][1]) | 5;
17457
min_d1 = basisu::minimum(min_d1, d5);
17458
17459
uint32_t d2 = (dist[0][0] + dist[1][1] + dist[2][0]) | 2;
17460
min_d0 = basisu::minimum(min_d0, d2);
17461
17462
uint32_t d3 = (dist[0][1] + dist[1][1] + dist[2][0]) | 3;
17463
min_d1 = basisu::minimum(min_d1, d3);
17464
17465
uint32_t d6 = (dist[0][0] + dist[1][1] + dist[2][1]) | 6;
17466
min_d0 = basisu::minimum(min_d0, d6);
17467
17468
uint32_t d7 = (dist[0][1] + dist[1][1] + dist[2][1]) | 7;
17469
min_d1 = basisu::minimum(min_d1, d7);
17470
17471
uint32_t min_d = basisu::minimum(min_d0, min_d1);
17472
uint32_t best_i = min_d & 7;
17473
17474
int delta_r = (best_i & 1) ? (max_r - avg_r) : (avg_r - min_r);
17475
int delta_g = (best_i & 2) ? (max_g - avg_g) : (avg_g - min_g);
17476
int delta_b = (best_i & 4) ? (max_b - avg_b) : (avg_b - min_b);
17477
17478
// Note: if delta_r/g/b==0, we actually want to choose a single color, so the block average color optimization kicks in.
17479
uint32_t low_c = 0, high_c = 0;
17480
if ((delta_r | delta_g | delta_b) != 0)
17481
{
17482
// Now we have a smaller AABB going from the block's average color to a cornerpoint of the larger AABB.
17483
// Project all pixels colors along the 4 vectors going from a smaller AABB cornerpoint to the opposite cornerpoint, find largest projection.
17484
// One of these vectors will be a decent approximation of the block's PCA.
17485
const int saxis0_r = delta_r, saxis0_g = delta_g, saxis0_b = delta_b;
17486
17487
int low_dot0 = INT_MAX, high_dot0 = INT_MIN;
17488
int low_dot1 = INT_MAX, high_dot1 = INT_MIN;
17489
int low_dot2 = INT_MAX, high_dot2 = INT_MIN;
17490
int low_dot3 = INT_MAX, high_dot3 = INT_MIN;
17491
17492
//int low_c0, low_c1, low_c2, low_c3;
17493
//int high_c0, high_c1, high_c2, high_c3;
17494
17495
for (uint32_t i = 0; i < 16; i++)
17496
{
17497
const int dotx = pSrc_pixels[i].r * saxis0_r;
17498
const int doty = pSrc_pixels[i].g * saxis0_g;
17499
const int dotz = pSrc_pixels[i].b * saxis0_b;
17500
17501
const int dot0 = ((dotz + dotx + doty) << 4) + i;
17502
const int dot1 = ((dotz - dotx - doty) << 4) + i;
17503
const int dot2 = ((dotz - dotx + doty) << 4) + i;
17504
const int dot3 = ((dotz + dotx - doty) << 4) + i;
17505
17506
if (dot0 < low_dot0)
17507
{
17508
low_dot0 = dot0;
17509
//low_c0 = i;
17510
}
17511
if ((dot0 ^ 15) > high_dot0)
17512
{
17513
high_dot0 = dot0 ^ 15;
17514
//high_c0 = i;
17515
}
17516
17517
if (dot1 < low_dot1)
17518
{
17519
low_dot1 = dot1;
17520
//low_c1 = i;
17521
}
17522
if ((dot1 ^ 15) > high_dot1)
17523
{
17524
high_dot1 = dot1 ^ 15;
17525
//high_c1 = i;
17526
}
17527
17528
if (dot2 < low_dot2)
17529
{
17530
low_dot2 = dot2;
17531
//low_c2 = i;
17532
}
17533
if ((dot2 ^ 15) > high_dot2)
17534
{
17535
high_dot2 = dot2 ^ 15;
17536
//high_c2 = i;
17537
}
17538
17539
if (dot3 < low_dot3)
17540
{
17541
low_dot3 = dot3;
17542
//low_c3 = i;
17543
}
17544
if ((dot3 ^ 15) > high_dot3)
17545
{
17546
high_dot3 = dot3 ^ 15;
17547
//high_c3 = i;
17548
}
17549
}
17550
17551
low_c = low_dot0 & 15;
17552
high_c = ~high_dot0 & 15;
17553
uint32_t r = (high_dot0 & ~15) - (low_dot0 & ~15);
17554
17555
uint32_t tr = (high_dot1 & ~15) - (low_dot1 & ~15);
17556
if (tr > r) {
17557
low_c = low_dot1 & 15;
17558
high_c = ~high_dot1 & 15;
17559
r = tr;
17560
}
17561
17562
tr = (high_dot2 & ~15) - (low_dot2 & ~15);
17563
if (tr > r) {
17564
low_c = low_dot2 & 15;
17565
high_c = ~high_dot2 & 15;
17566
r = tr;
17567
}
17568
17569
tr = (high_dot3 & ~15) - (low_dot3 & ~15);
17570
if (tr > r) {
17571
low_c = low_dot3 & 15;
17572
high_c = ~high_dot3 & 15;
17573
}
17574
}
17575
17576
lr = to_5(pSrc_pixels[low_c].r);
17577
lg = to_6(pSrc_pixels[low_c].g);
17578
lb = to_5(pSrc_pixels[low_c].b);
17579
17580
hr = to_5(pSrc_pixels[high_c].r);
17581
hg = to_6(pSrc_pixels[high_c].g);
17582
hb = to_5(pSrc_pixels[high_c].b);
17583
}
17584
17585
bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
17586
} // if (use_sels)
17587
17588
const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1);
17589
for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
17590
{
17591
int prev_lr = lr, prev_lg = lg, prev_lb = lb, prev_hr = hr, prev_hg = hg, prev_hb = hb;
17592
17593
// This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors.
17594
vec3F xl, xh;
17595
if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh))
17596
{
17597
if (avg_r < 0)
17598
{
17599
int total_r = 0, total_g = 0, total_b = 0;
17600
for (uint32_t i = 0; i < 16; i++)
17601
{
17602
total_r += pSrc_pixels[i].r;
17603
total_g += pSrc_pixels[i].g;
17604
total_b += pSrc_pixels[i].b;
17605
}
17606
17607
avg_r = (total_r + 8) >> 4;
17608
avg_g = (total_g + 8) >> 4;
17609
avg_b = (total_b + 8) >> 4;
17610
}
17611
17612
// All selectors equal - treat it as a solid block which should always be equal or better.
17613
lr = g_bc1_match5_equals_1[avg_r].m_hi;
17614
lg = g_bc1_match6_equals_1[avg_g].m_hi;
17615
lb = g_bc1_match5_equals_1[avg_b].m_hi;
17616
17617
hr = g_bc1_match5_equals_1[avg_r].m_lo;
17618
hg = g_bc1_match6_equals_1[avg_g].m_lo;
17619
hb = g_bc1_match5_equals_1[avg_b].m_lo;
17620
17621
// In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
17622
}
17623
else
17624
{
17625
lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
17626
lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
17627
lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
17628
17629
hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
17630
hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
17631
hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
17632
}
17633
17634
if ((prev_lr == lr) && (prev_lg == lg) && (prev_lb == lb) && (prev_hr == hr) && (prev_hg == hg) && (prev_hb == hb))
17635
break;
17636
17637
bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
17638
}
17639
17640
uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb);
17641
uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb);
17642
17643
// Always forbid 3 color blocks
17644
if (lc16 == hc16)
17645
{
17646
uint8_t mask = 0;
17647
17648
// Make l > h
17649
if (hc16 > 0)
17650
hc16--;
17651
else
17652
{
17653
// lc16 = hc16 = 0
17654
assert(lc16 == hc16 && hc16 == 0);
17655
17656
hc16 = 0;
17657
lc16 = 1;
17658
mask = 0x55; // select hc16
17659
}
17660
17661
assert(lc16 > hc16);
17662
pDst_block->set_low_color(static_cast<uint16_t>(lc16));
17663
pDst_block->set_high_color(static_cast<uint16_t>(hc16));
17664
17665
pDst_block->m_selectors[0] = mask;
17666
pDst_block->m_selectors[1] = mask;
17667
pDst_block->m_selectors[2] = mask;
17668
pDst_block->m_selectors[3] = mask;
17669
}
17670
else
17671
{
17672
uint8_t invert_mask = 0;
17673
if (lc16 < hc16)
17674
{
17675
std::swap(lc16, hc16);
17676
invert_mask = 0x55;
17677
}
17678
17679
assert(lc16 > hc16);
17680
pDst_block->set_low_color((uint16_t)lc16);
17681
pDst_block->set_high_color((uint16_t)hc16);
17682
17683
uint32_t packed_sels = 0;
17684
static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 };
17685
for (uint32_t i = 0; i < 16; i++)
17686
packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2));
17687
17688
pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask;
17689
pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
17690
pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
17691
pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
17692
}
17693
}
17694
17695
// Scale the UASTC first subset endpoints and first plane's weight indices directly to BC1's - fastest.
17696
void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst)
17697
{
17698
const uint32_t mode = unpacked_src_blk.m_mode;
17699
const astc_block_desc& astc_blk = unpacked_src_blk.m_astc;
17700
17701
dxt1_block& b = *static_cast<dxt1_block*>(pDst);
17702
17703
const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
17704
17705
const uint32_t total_comps = g_uastc_mode_comps[mode];
17706
17707
if (total_comps == 2)
17708
{
17709
const uint32_t l = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant;
17710
const uint32_t h = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant;
17711
17712
b.set_low_color(dxt1_block::pack_color(color32(l, l, l, 255), true, 127));
17713
b.set_high_color(dxt1_block::pack_color(color32(h, h, h, 255), true, 127));
17714
}
17715
else
17716
{
17717
b.set_low_color(dxt1_block::pack_color(
17718
color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant,
17719
g_astc_unquant[endpoint_range][astc_blk.m_endpoints[2]].m_unquant,
17720
g_astc_unquant[endpoint_range][astc_blk.m_endpoints[4]].m_unquant,
17721
255), true, 127)
17722
);
17723
17724
b.set_high_color(dxt1_block::pack_color(
17725
color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant,
17726
g_astc_unquant[endpoint_range][astc_blk.m_endpoints[3]].m_unquant,
17727
g_astc_unquant[endpoint_range][astc_blk.m_endpoints[5]].m_unquant,
17728
255), true, 127)
17729
);
17730
}
17731
17732
if (b.get_low_color() == b.get_high_color())
17733
{
17734
// Always forbid 3 color blocks
17735
uint16_t lc16 = (uint16_t)b.get_low_color();
17736
uint16_t hc16 = (uint16_t)b.get_high_color();
17737
17738
uint8_t mask = 0;
17739
17740
// Make l > h
17741
if (hc16 > 0)
17742
hc16--;
17743
else
17744
{
17745
// lc16 = hc16 = 0
17746
assert(lc16 == hc16 && hc16 == 0);
17747
17748
hc16 = 0;
17749
lc16 = 1;
17750
mask = 0x55; // select hc16
17751
}
17752
17753
assert(lc16 > hc16);
17754
b.set_low_color(static_cast<uint16_t>(lc16));
17755
b.set_high_color(static_cast<uint16_t>(hc16));
17756
17757
b.m_selectors[0] = mask;
17758
b.m_selectors[1] = mask;
17759
b.m_selectors[2] = mask;
17760
b.m_selectors[3] = mask;
17761
}
17762
else
17763
{
17764
bool invert = false;
17765
if (b.get_low_color() < b.get_high_color())
17766
{
17767
std::swap(b.m_low_color[0], b.m_high_color[0]);
17768
std::swap(b.m_low_color[1], b.m_high_color[1]);
17769
invert = true;
17770
}
17771
17772
const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]];
17773
17774
const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1;
17775
17776
uint32_t sels = 0;
17777
for (int i = 15; i >= 0; --i)
17778
{
17779
uint32_t s = pTran[astc_blk.m_weights[i << plane_shift]];
17780
17781
if (invert)
17782
s ^= 1;
17783
17784
sels = (sels << 2) | s;
17785
}
17786
b.m_selectors[0] = sels & 0xFF;
17787
b.m_selectors[1] = (sels >> 8) & 0xFF;
17788
b.m_selectors[2] = (sels >> 16) & 0xFF;
17789
b.m_selectors[3] = (sels >> 24) & 0xFF;
17790
}
17791
}
17792
17793
// Scale the UASTC first plane's weight indices to BC1, use 1 or 2 least squares passes to compute endpoints - no PCA needed.
17794
void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality)
17795
{
17796
const uint32_t mode = unpacked_src_blk.m_mode;
17797
17798
const astc_block_desc& astc_blk = unpacked_src_blk.m_astc;
17799
17800
dxt1_block& b = *static_cast<dxt1_block*>(pDst);
17801
17802
b.set_low_color(1);
17803
b.set_high_color(0);
17804
17805
const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]];
17806
17807
const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1;
17808
17809
uint32_t sels = 0;
17810
for (int i = 15; i >= 0; --i)
17811
{
17812
sels <<= 2;
17813
sels |= pTran[astc_blk.m_weights[i << plane_shift]];
17814
}
17815
17816
b.m_selectors[0] = sels & 0xFF;
17817
b.m_selectors[1] = (sels >> 8) & 0xFF;
17818
b.m_selectors[2] = (sels >> 16) & 0xFF;
17819
b.m_selectors[3] = (sels >> 24) & 0xFF;
17820
17821
encode_bc1(&b, (const uint8_t*)&block_pixels[0][0].c[0], (high_quality ? cEncodeBC1HighQuality : 0) | cEncodeBC1UseSelectors);
17822
}
17823
17824
bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality)
17825
{
17826
unpacked_uastc_block unpacked_src_blk;
17827
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
17828
return false;
17829
17830
const uint32_t mode = unpacked_src_blk.m_mode;
17831
17832
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
17833
{
17834
encode_bc1_solid_block(pDst, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b);
17835
return true;
17836
}
17837
17838
if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0))
17839
transcode_uastc_to_bc1_hint0(unpacked_src_blk, pDst);
17840
else
17841
{
17842
color32 block_pixels[4][4];
17843
const bool unpack_srgb = false;
17844
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
17845
return false;
17846
17847
if (unpacked_src_blk.m_bc1_hint1)
17848
transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pDst, high_quality);
17849
else
17850
encode_bc1(pDst, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0);
17851
}
17852
17853
return true;
17854
}
17855
17856
static void write_bc4_solid_block(uint8_t* pDst, uint32_t a)
17857
{
17858
pDst[0] = (uint8_t)a;
17859
pDst[1] = (uint8_t)a;
17860
memset(pDst + 2, 0, 6);
17861
}
17862
17863
bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality)
17864
{
17865
unpacked_uastc_block unpacked_src_blk;
17866
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
17867
return false;
17868
17869
const uint32_t mode = unpacked_src_blk.m_mode;
17870
17871
void* pBC4_block = pDst;
17872
dxt1_block* pBC1_block = &static_cast<dxt1_block*>(pDst)[1];
17873
17874
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
17875
{
17876
write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block), unpacked_src_blk.m_solid_color.a);
17877
encode_bc1_solid_block(pBC1_block, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b);
17878
return true;
17879
}
17880
17881
color32 block_pixels[4][4];
17882
const bool unpack_srgb = false;
17883
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
17884
return false;
17885
17886
basist::encode_bc4(pBC4_block, &block_pixels[0][0].a, sizeof(color32));
17887
17888
if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0))
17889
transcode_uastc_to_bc1_hint0(unpacked_src_blk, pBC1_block);
17890
else
17891
{
17892
if (unpacked_src_blk.m_bc1_hint1)
17893
transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pBC1_block, high_quality);
17894
else
17895
encode_bc1(pBC1_block, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0);
17896
}
17897
17898
return true;
17899
}
17900
17901
bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0)
17902
{
17903
BASISU_NOTE_UNUSED(high_quality);
17904
17905
unpacked_uastc_block unpacked_src_blk;
17906
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
17907
return false;
17908
17909
const uint32_t mode = unpacked_src_blk.m_mode;
17910
17911
void* pBC4_block = pDst;
17912
17913
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
17914
{
17915
write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block), unpacked_src_blk.m_solid_color.c[chan0]);
17916
return true;
17917
}
17918
17919
color32 block_pixels[4][4];
17920
const bool unpack_srgb = false;
17921
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
17922
return false;
17923
17924
basist::encode_bc4(pBC4_block, &block_pixels[0][0].c[chan0], sizeof(color32));
17925
17926
return true;
17927
}
17928
17929
bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1)
17930
{
17931
BASISU_NOTE_UNUSED(high_quality);
17932
17933
unpacked_uastc_block unpacked_src_blk;
17934
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
17935
return false;
17936
17937
const uint32_t mode = unpacked_src_blk.m_mode;
17938
17939
void* pBC4_block0 = pDst;
17940
void* pBC4_block1 = (uint8_t*)pDst + 8;
17941
17942
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
17943
{
17944
write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block0), unpacked_src_blk.m_solid_color.c[chan0]);
17945
write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block1), unpacked_src_blk.m_solid_color.c[chan1]);
17946
return true;
17947
}
17948
17949
color32 block_pixels[4][4];
17950
const bool unpack_srgb = false;
17951
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
17952
return false;
17953
17954
basist::encode_bc4(pBC4_block0, &block_pixels[0][0].c[chan0], sizeof(color32));
17955
basist::encode_bc4(pBC4_block1, &block_pixels[0][0].c[chan1], sizeof(color32));
17956
17957
return true;
17958
}
17959
17960
static const uint8_t s_etc2_eac_bit_ofs[16] = { 45, 33, 21, 9, 42, 30, 18, 6, 39, 27, 15, 3, 36, 24, 12, 0 };
17961
17962
static void pack_eac_solid_block(eac_block& blk, uint32_t a)
17963
{
17964
blk.m_base = static_cast<uint8_t>(a);
17965
blk.m_table = 13;
17966
blk.m_multiplier = 0;
17967
17968
memcpy(blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
17969
17970
return;
17971
}
17972
17973
// Only checks 4 tables.
17974
static void pack_eac(eac_block& blk, const uint8_t* pPixels, uint32_t stride)
17975
{
17976
uint32_t min_alpha = 255, max_alpha = 0;
17977
for (uint32_t i = 0; i < 16; i++)
17978
{
17979
const uint32_t a = pPixels[i * stride];
17980
if (a < min_alpha) min_alpha = a;
17981
if (a > max_alpha) max_alpha = a;
17982
}
17983
17984
if (min_alpha == max_alpha)
17985
{
17986
pack_eac_solid_block(blk, min_alpha);
17987
return;
17988
}
17989
17990
const uint32_t alpha_range = max_alpha - min_alpha;
17991
17992
const uint32_t SINGLE_TABLE_THRESH = 5;
17993
if (alpha_range <= SINGLE_TABLE_THRESH)
17994
{
17995
// If alpha_range <= 5 table 13 is lossless
17996
int base = clamp255((int)max_alpha - 2);
17997
17998
blk.m_base = base;
17999
blk.m_multiplier = 1;
18000
blk.m_table = 13;
18001
18002
base -= 3;
18003
18004
uint64_t packed_sels = 0;
18005
for (uint32_t i = 0; i < 16; i++)
18006
{
18007
const int a = pPixels[i * stride];
18008
18009
static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 };
18010
18011
int sel = a - base;
18012
assert(sel >= 0 && sel <= 5);
18013
18014
packed_sels |= (static_cast<uint64_t>(s_sels[sel]) << s_etc2_eac_bit_ofs[i]);
18015
}
18016
18017
blk.set_selector_bits(packed_sels);
18018
18019
return;
18020
}
18021
18022
const uint32_t T0 = 2, T1 = 8, T2 = 11, T3 = 13;
18023
static const uint8_t s_tables[4] = { T0, T1, T2, T3 };
18024
18025
int base[4], mul[4];
18026
uint32_t mul_or = 0;
18027
for (uint32_t i = 0; i < 4; i++)
18028
{
18029
const uint32_t table = s_tables[i];
18030
18031
const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]);
18032
18033
base[i] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)));
18034
mul[i] = clampi((int)roundf(alpha_range / range), 1, 15);
18035
mul_or |= mul[i];
18036
}
18037
18038
uint32_t total_err[4] = { 0, 0, 0, 0 };
18039
uint8_t sels[4][16];
18040
18041
for (uint32_t i = 0; i < 16; i++)
18042
{
18043
const int a = pPixels[i * stride];
18044
18045
uint32_t l0 = UINT32_MAX, l1 = UINT32_MAX, l2 = UINT32_MAX, l3 = UINT32_MAX;
18046
18047
if ((a < 7) || (a > (255 - 7)))
18048
{
18049
for (uint32_t s = 0; s < 8; s++)
18050
{
18051
const int v0 = clamp255(mul[0] * g_eac_modifier_table[T0][s] + base[0]);
18052
const int v1 = clamp255(mul[1] * g_eac_modifier_table[T1][s] + base[1]);
18053
const int v2 = clamp255(mul[2] * g_eac_modifier_table[T2][s] + base[2]);
18054
const int v3 = clamp255(mul[3] * g_eac_modifier_table[T3][s] + base[3]);
18055
18056
l0 = basisu::minimum(l0, (basisu::iabs(v0 - a) << 3) | s);
18057
l1 = basisu::minimum(l1, (basisu::iabs(v1 - a) << 3) | s);
18058
l2 = basisu::minimum(l2, (basisu::iabs(v2 - a) << 3) | s);
18059
l3 = basisu::minimum(l3, (basisu::iabs(v3 - a) << 3) | s);
18060
}
18061
}
18062
else if (mul_or == 1)
18063
{
18064
const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a;
18065
18066
for (uint32_t s = 0; s < 8; s++)
18067
{
18068
const int v0 = g_eac_modifier_table[T0][s] + a0;
18069
const int v1 = g_eac_modifier_table[T1][s] + a1;
18070
const int v2 = g_eac_modifier_table[T2][s] + a2;
18071
const int v3 = g_eac_modifier_table[T3][s] + a3;
18072
18073
l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s);
18074
l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s);
18075
l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s);
18076
l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s);
18077
}
18078
}
18079
else
18080
{
18081
const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a;
18082
18083
for (uint32_t s = 0; s < 8; s++)
18084
{
18085
const int v0 = mul[0] * g_eac_modifier_table[T0][s] + a0;
18086
const int v1 = mul[1] * g_eac_modifier_table[T1][s] + a1;
18087
const int v2 = mul[2] * g_eac_modifier_table[T2][s] + a2;
18088
const int v3 = mul[3] * g_eac_modifier_table[T3][s] + a3;
18089
18090
l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s);
18091
l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s);
18092
l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s);
18093
l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s);
18094
}
18095
}
18096
18097
sels[0][i] = l0 & 7;
18098
sels[1][i] = l1 & 7;
18099
sels[2][i] = l2 & 7;
18100
sels[3][i] = l3 & 7;
18101
18102
total_err[0] += basisu::square<uint32_t>(l0 >> 3);
18103
total_err[1] += basisu::square<uint32_t>(l1 >> 3);
18104
total_err[2] += basisu::square<uint32_t>(l2 >> 3);
18105
total_err[3] += basisu::square<uint32_t>(l3 >> 3);
18106
}
18107
18108
uint32_t min_err = total_err[0], min_index = 0;
18109
for (uint32_t i = 1; i < 4; i++)
18110
{
18111
if (total_err[i] < min_err)
18112
{
18113
min_err = total_err[i];
18114
min_index = i;
18115
}
18116
}
18117
18118
blk.m_base = base[min_index];
18119
blk.m_multiplier = mul[min_index];
18120
blk.m_table = s_tables[min_index];
18121
18122
uint64_t packed_sels = 0;
18123
const uint8_t* pSels = &sels[min_index][0];
18124
for (uint32_t i = 0; i < 16; i++)
18125
packed_sels |= (static_cast<uint64_t>(pSels[i]) << s_etc2_eac_bit_ofs[i]);
18126
18127
blk.set_selector_bits(packed_sels);
18128
}
18129
18130
// Checks all 16 tables. Around ~2 dB better vs. pack_eac(), ~1.2 dB less than near-optimal.
18131
static void pack_eac_high_quality(eac_block& blk, const uint8_t* pPixels, uint32_t stride)
18132
{
18133
uint32_t min_alpha = 255, max_alpha = 0;
18134
for (uint32_t i = 0; i < 16; i++)
18135
{
18136
const uint32_t a = pPixels[i * stride];
18137
if (a < min_alpha) min_alpha = a;
18138
if (a > max_alpha) max_alpha = a;
18139
}
18140
18141
if (min_alpha == max_alpha)
18142
{
18143
pack_eac_solid_block(blk, min_alpha);
18144
return;
18145
}
18146
18147
const uint32_t alpha_range = max_alpha - min_alpha;
18148
18149
const uint32_t SINGLE_TABLE_THRESH = 5;
18150
if (alpha_range <= SINGLE_TABLE_THRESH)
18151
{
18152
// If alpha_range <= 5 table 13 is lossless
18153
int base = clamp255((int)max_alpha - 2);
18154
18155
blk.m_base = base;
18156
blk.m_multiplier = 1;
18157
blk.m_table = 13;
18158
18159
base -= 3;
18160
18161
uint64_t packed_sels = 0;
18162
for (uint32_t i = 0; i < 16; i++)
18163
{
18164
const int a = pPixels[i * stride];
18165
18166
static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 };
18167
18168
int sel = a - base;
18169
assert(sel >= 0 && sel <= 5);
18170
18171
packed_sels |= (static_cast<uint64_t>(s_sels[sel]) << s_etc2_eac_bit_ofs[i]);
18172
}
18173
18174
blk.set_selector_bits(packed_sels);
18175
18176
return;
18177
}
18178
18179
int base[16], mul[16];
18180
for (uint32_t table = 0; table < 16; table++)
18181
{
18182
const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]);
18183
18184
base[table] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)));
18185
mul[table] = clampi((int)roundf(alpha_range / range), 1, 15);
18186
}
18187
18188
uint32_t total_err[16];
18189
memset(total_err, 0, sizeof(total_err));
18190
18191
uint8_t sels[16][16];
18192
18193
for (uint32_t table = 0; table < 16; table++)
18194
{
18195
const int8_t* pTable = &g_eac_modifier_table[table][0];
18196
const int m = mul[table], b = base[table];
18197
18198
uint32_t prev_l = 0, prev_a = UINT32_MAX;
18199
18200
for (uint32_t i = 0; i < 16; i++)
18201
{
18202
const int a = pPixels[i * stride];
18203
18204
if ((uint32_t)a == prev_a)
18205
{
18206
sels[table][i] = prev_l & 7;
18207
total_err[table] += basisu::square<uint32_t>(prev_l >> 3);
18208
}
18209
else
18210
{
18211
uint32_t l = basisu::iabs(clamp255(m * pTable[0] + b) - a) << 3;
18212
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[1] + b) - a) << 3) | 1);
18213
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[2] + b) - a) << 3) | 2);
18214
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[3] + b) - a) << 3) | 3);
18215
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[4] + b) - a) << 3) | 4);
18216
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[5] + b) - a) << 3) | 5);
18217
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[6] + b) - a) << 3) | 6);
18218
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[7] + b) - a) << 3) | 7);
18219
18220
sels[table][i] = l & 7;
18221
total_err[table] += basisu::square<uint32_t>(l >> 3);
18222
18223
prev_l = l;
18224
prev_a = a;
18225
}
18226
}
18227
}
18228
18229
uint32_t min_err = total_err[0], min_index = 0;
18230
for (uint32_t i = 1; i < 16; i++)
18231
{
18232
if (total_err[i] < min_err)
18233
{
18234
min_err = total_err[i];
18235
min_index = i;
18236
}
18237
}
18238
18239
blk.m_base = base[min_index];
18240
blk.m_multiplier = mul[min_index];
18241
blk.m_table = min_index;
18242
18243
uint64_t packed_sels = 0;
18244
const uint8_t* pSels = &sels[min_index][0];
18245
for (uint32_t i = 0; i < 16; i++)
18246
packed_sels |= (static_cast<uint64_t>(pSels[i]) << s_etc2_eac_bit_ofs[i]);
18247
18248
blk.set_selector_bits(packed_sels);
18249
}
18250
18251
bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0)
18252
{
18253
unpacked_uastc_block unpacked_src_blk;
18254
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
18255
return false;
18256
18257
const uint32_t mode = unpacked_src_blk.m_mode;
18258
18259
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
18260
{
18261
pack_eac_solid_block(*static_cast<eac_block*>(pDst), unpacked_src_blk.m_solid_color.c[chan0]);
18262
return true;
18263
}
18264
18265
color32 block_pixels[4][4];
18266
const bool unpack_srgb = false;
18267
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
18268
return false;
18269
18270
if (chan0 == 3)
18271
transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, pDst);
18272
else
18273
(high_quality ? pack_eac_high_quality : pack_eac)(*static_cast<eac_block*>(pDst), &block_pixels[0][0].c[chan0], sizeof(color32));
18274
18275
return true;
18276
}
18277
18278
bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1)
18279
{
18280
unpacked_uastc_block unpacked_src_blk;
18281
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
18282
return false;
18283
18284
const uint32_t mode = unpacked_src_blk.m_mode;
18285
18286
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
18287
{
18288
pack_eac_solid_block(static_cast<eac_block*>(pDst)[0], unpacked_src_blk.m_solid_color.c[chan0]);
18289
pack_eac_solid_block(static_cast<eac_block*>(pDst)[1], unpacked_src_blk.m_solid_color.c[chan1]);
18290
return true;
18291
}
18292
18293
color32 block_pixels[4][4];
18294
const bool unpack_srgb = false;
18295
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
18296
return false;
18297
18298
if (chan0 == 3)
18299
transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast<eac_block*>(pDst)[0]);
18300
else
18301
(high_quality ? pack_eac_high_quality : pack_eac)(static_cast<eac_block*>(pDst)[0], &block_pixels[0][0].c[chan0], sizeof(color32));
18302
18303
if (chan1 == 3)
18304
transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast<eac_block*>(pDst)[1]);
18305
else
18306
(high_quality ? pack_eac_high_quality : pack_eac)(static_cast<eac_block*>(pDst)[1], &block_pixels[0][0].c[chan1], sizeof(color32));
18307
return true;
18308
}
18309
18310
// PVRTC1
18311
static void fixup_pvrtc1_4_modulation_rgb(
18312
const uastc_block* pSrc_blocks,
18313
const uint32_t* pPVRTC_endpoints,
18314
void* pDst_blocks,
18315
uint32_t num_blocks_x, uint32_t num_blocks_y, bool from_alpha)
18316
{
18317
const uint32_t x_mask = num_blocks_x - 1;
18318
const uint32_t y_mask = num_blocks_y - 1;
18319
const uint32_t x_bits = basisu::total_bits(x_mask);
18320
const uint32_t y_bits = basisu::total_bits(y_mask);
18321
const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
18322
//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
18323
const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
18324
18325
uint32_t block_index = 0;
18326
18327
// really 3x3
18328
int e0[4][4], e1[4][4];
18329
18330
for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
18331
{
18332
const uint32_t* pE_rows[3];
18333
18334
for (int ey = 0; ey < 3; ey++)
18335
{
18336
int by = y + ey - 1;
18337
18338
const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
18339
18340
pE_rows[ey] = pE;
18341
18342
for (int ex = 0; ex < 3; ex++)
18343
{
18344
int bx = 0 + ex - 1;
18345
18346
const uint32_t e = pE[bx & x_mask];
18347
18348
e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31;
18349
e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31;
18350
}
18351
}
18352
18353
const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
18354
18355
for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
18356
{
18357
const uastc_block& src_block = pSrc_blocks[block_index];
18358
18359
color32 block_pixels[4][4];
18360
unpack_uastc(src_block, &block_pixels[0][0], false);
18361
if (from_alpha)
18362
{
18363
// Just set RGB to alpha to avoid adding complexity below.
18364
for (uint32_t i = 0; i < 16; i++)
18365
{
18366
const uint8_t a = ((color32*)block_pixels)[i].a;
18367
((color32*)block_pixels)[i].set(a, a, a, 255);
18368
}
18369
}
18370
18371
const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
18372
18373
uint32_t swizzled = x_swizzle | y_swizzle;
18374
if (num_blocks_x != num_blocks_y)
18375
{
18376
swizzled &= swizzle_mask;
18377
18378
if (num_blocks_x > num_blocks_y)
18379
swizzled |= ((x >> min_bits) << (min_bits * 2));
18380
else
18381
swizzled |= ((y >> min_bits) << (min_bits * 2));
18382
}
18383
18384
pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
18385
pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
18386
18387
{
18388
const uint32_t ex = 2;
18389
int bx = x + ex - 1;
18390
bx &= x_mask;
18391
18392
#define DO_ROW(ey) \
18393
{ \
18394
const uint32_t e = pE_rows[ey][bx]; \
18395
e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \
18396
e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \
18397
}
18398
18399
DO_ROW(0);
18400
DO_ROW(1);
18401
DO_ROW(2);
18402
#undef DO_ROW
18403
}
18404
18405
uint32_t mod = 0;
18406
18407
#define DO_PIX(lx, ly, w0, w1, w2, w3) \
18408
{ \
18409
int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
18410
int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
18411
int cl = (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b) * 16; \
18412
int dl = cb_l - ca_l; \
18413
int vl = cl - ca_l; \
18414
int p = vl * 16; \
18415
if (ca_l > cb_l) { p = -p; dl = -dl; } \
18416
uint32_t m = 0; \
18417
if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
18418
if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
18419
if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
18420
mod |= m; \
18421
}
18422
18423
{
18424
const uint32_t ex = 0, ey = 0;
18425
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18426
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18427
DO_PIX(0, 0, 4, 4, 4, 4);
18428
DO_PIX(1, 0, 2, 6, 2, 6);
18429
DO_PIX(0, 1, 2, 2, 6, 6);
18430
DO_PIX(1, 1, 1, 3, 3, 9);
18431
}
18432
18433
{
18434
const uint32_t ex = 1, ey = 0;
18435
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18436
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18437
DO_PIX(2, 0, 8, 0, 8, 0);
18438
DO_PIX(3, 0, 6, 2, 6, 2);
18439
DO_PIX(2, 1, 4, 0, 12, 0);
18440
DO_PIX(3, 1, 3, 1, 9, 3);
18441
}
18442
18443
{
18444
const uint32_t ex = 0, ey = 1;
18445
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18446
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18447
DO_PIX(0, 2, 8, 8, 0, 0);
18448
DO_PIX(1, 2, 4, 12, 0, 0);
18449
DO_PIX(0, 3, 6, 6, 2, 2);
18450
DO_PIX(1, 3, 3, 9, 1, 3);
18451
}
18452
18453
{
18454
const uint32_t ex = 1, ey = 1;
18455
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18456
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18457
DO_PIX(2, 2, 16, 0, 0, 0);
18458
DO_PIX(3, 2, 12, 4, 0, 0);
18459
DO_PIX(2, 3, 12, 0, 4, 0);
18460
DO_PIX(3, 3, 9, 3, 3, 1);
18461
}
18462
#undef DO_PIX
18463
18464
pDst_block->m_modulation = mod;
18465
18466
e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
18467
e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
18468
e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
18469
18470
e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
18471
e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
18472
e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
18473
18474
} // x
18475
} // y
18476
}
18477
18478
static void fixup_pvrtc1_4_modulation_rgba(
18479
const uastc_block* pSrc_blocks,
18480
const uint32_t* pPVRTC_endpoints,
18481
void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y)
18482
{
18483
const uint32_t x_mask = num_blocks_x - 1;
18484
const uint32_t y_mask = num_blocks_y - 1;
18485
const uint32_t x_bits = basisu::total_bits(x_mask);
18486
const uint32_t y_bits = basisu::total_bits(y_mask);
18487
const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
18488
//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
18489
const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
18490
18491
uint32_t block_index = 0;
18492
18493
// really 3x3
18494
int e0[4][4], e1[4][4];
18495
18496
for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
18497
{
18498
const uint32_t* pE_rows[3];
18499
18500
for (int ey = 0; ey < 3; ey++)
18501
{
18502
int by = y + ey - 1;
18503
18504
const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
18505
18506
pE_rows[ey] = pE;
18507
18508
for (int ex = 0; ex < 3; ex++)
18509
{
18510
int bx = 0 + ex - 1;
18511
18512
const uint32_t e = pE[bx & x_mask];
18513
18514
e0[ex][ey] = get_endpoint_l8(e, 0);
18515
e1[ex][ey] = get_endpoint_l8(e, 1);
18516
}
18517
}
18518
18519
const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
18520
18521
for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
18522
{
18523
const uastc_block& src_block = pSrc_blocks[block_index];
18524
18525
color32 block_pixels[4][4];
18526
unpack_uastc(src_block, &block_pixels[0][0], false);
18527
18528
const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
18529
18530
uint32_t swizzled = x_swizzle | y_swizzle;
18531
if (num_blocks_x != num_blocks_y)
18532
{
18533
swizzled &= swizzle_mask;
18534
18535
if (num_blocks_x > num_blocks_y)
18536
swizzled |= ((x >> min_bits) << (min_bits * 2));
18537
else
18538
swizzled |= ((y >> min_bits) << (min_bits * 2));
18539
}
18540
18541
pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
18542
pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
18543
18544
{
18545
const uint32_t ex = 2;
18546
int bx = x + ex - 1;
18547
bx &= x_mask;
18548
18549
#define DO_ROW(ey) \
18550
{ \
18551
const uint32_t e = pE_rows[ey][bx]; \
18552
e0[ex][ey] = get_endpoint_l8(e, 0); \
18553
e1[ex][ey] = get_endpoint_l8(e, 1); \
18554
}
18555
18556
DO_ROW(0);
18557
DO_ROW(1);
18558
DO_ROW(2);
18559
#undef DO_ROW
18560
}
18561
18562
uint32_t mod = 0;
18563
18564
#define DO_PIX(lx, ly, w0, w1, w2, w3) \
18565
{ \
18566
int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
18567
int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
18568
int cl = 16 * (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b + block_pixels[ly][lx].a); \
18569
int dl = cb_l - ca_l; \
18570
int vl = cl - ca_l; \
18571
int p = vl * 16; \
18572
if (ca_l > cb_l) { p = -p; dl = -dl; } \
18573
uint32_t m = 0; \
18574
if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
18575
if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
18576
if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
18577
mod |= m; \
18578
}
18579
18580
{
18581
const uint32_t ex = 0, ey = 0;
18582
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18583
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18584
DO_PIX(0, 0, 4, 4, 4, 4);
18585
DO_PIX(1, 0, 2, 6, 2, 6);
18586
DO_PIX(0, 1, 2, 2, 6, 6);
18587
DO_PIX(1, 1, 1, 3, 3, 9);
18588
}
18589
18590
{
18591
const uint32_t ex = 1, ey = 0;
18592
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18593
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18594
DO_PIX(2, 0, 8, 0, 8, 0);
18595
DO_PIX(3, 0, 6, 2, 6, 2);
18596
DO_PIX(2, 1, 4, 0, 12, 0);
18597
DO_PIX(3, 1, 3, 1, 9, 3);
18598
}
18599
18600
{
18601
const uint32_t ex = 0, ey = 1;
18602
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18603
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18604
DO_PIX(0, 2, 8, 8, 0, 0);
18605
DO_PIX(1, 2, 4, 12, 0, 0);
18606
DO_PIX(0, 3, 6, 6, 2, 2);
18607
DO_PIX(1, 3, 3, 9, 1, 3);
18608
}
18609
18610
{
18611
const uint32_t ex = 1, ey = 1;
18612
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18613
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18614
DO_PIX(2, 2, 16, 0, 0, 0);
18615
DO_PIX(3, 2, 12, 4, 0, 0);
18616
DO_PIX(2, 3, 12, 0, 4, 0);
18617
DO_PIX(3, 3, 9, 3, 3, 1);
18618
}
18619
#undef DO_PIX
18620
18621
pDst_block->m_modulation = mod;
18622
18623
e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
18624
e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
18625
e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
18626
18627
e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
18628
e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
18629
e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
18630
18631
} // x
18632
} // y
18633
}
18634
18635
bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha)
18636
{
18637
BASISU_NOTE_UNUSED(high_quality);
18638
18639
if ((!num_blocks_x) || (!num_blocks_y))
18640
return false;
18641
18642
const uint32_t width = num_blocks_x * 4;
18643
const uint32_t height = num_blocks_y * 4;
18644
if (!basisu::is_pow2(width) || !basisu::is_pow2(height))
18645
return false;
18646
18647
basisu::vector<uint32_t> temp_endpoints(num_blocks_x * num_blocks_y);
18648
18649
for (uint32_t y = 0; y < num_blocks_y; y++)
18650
{
18651
for (uint32_t x = 0; x < num_blocks_x; x++)
18652
{
18653
color32 block_pixels[16];
18654
if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false))
18655
return false;
18656
18657
// Get block's RGB bounding box
18658
color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0);
18659
18660
if (from_alpha)
18661
{
18662
uint32_t low_a = 255, high_a = 0;
18663
for (uint32_t i = 0; i < 16; i++)
18664
{
18665
low_a = basisu::minimum<uint32_t>(low_a, block_pixels[i].a);
18666
high_a = basisu::maximum<uint32_t>(high_a, block_pixels[i].a);
18667
}
18668
low_color.set(low_a, low_a, low_a, 255);
18669
high_color.set(high_a, high_a, high_a, 255);
18670
}
18671
else
18672
{
18673
for (uint32_t i = 0; i < 16; i++)
18674
{
18675
low_color = color32::comp_min(low_color, block_pixels[i]);
18676
high_color = color32::comp_max(high_color, block_pixels[i]);
18677
}
18678
}
18679
18680
// Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
18681
pvrtc4_block temp;
18682
temp.set_opaque_endpoint_floor(0, low_color);
18683
temp.set_opaque_endpoint_ceil(1, high_color);
18684
18685
temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints;
18686
}
18687
}
18688
18689
fixup_pvrtc1_4_modulation_rgb(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y, from_alpha);
18690
18691
return true;
18692
}
18693
18694
bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality)
18695
{
18696
BASISU_NOTE_UNUSED(high_quality);
18697
18698
if ((!num_blocks_x) || (!num_blocks_y))
18699
return false;
18700
18701
const uint32_t width = num_blocks_x * 4;
18702
const uint32_t height = num_blocks_y * 4;
18703
if (!basisu::is_pow2(width) || !basisu::is_pow2(height))
18704
return false;
18705
18706
basisu::vector<uint32_t> temp_endpoints(num_blocks_x * num_blocks_y);
18707
18708
for (uint32_t y = 0; y < num_blocks_y; y++)
18709
{
18710
for (uint32_t x = 0; x < num_blocks_x; x++)
18711
{
18712
color32 block_pixels[16];
18713
if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false))
18714
return false;
18715
18716
// Get block's RGBA bounding box
18717
color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0);
18718
18719
for (uint32_t i = 0; i < 16; i++)
18720
{
18721
low_color = color32::comp_min(low_color, block_pixels[i]);
18722
high_color = color32::comp_max(high_color, block_pixels[i]);
18723
}
18724
18725
// Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
18726
pvrtc4_block temp;
18727
temp.set_endpoint_floor(0, low_color);
18728
temp.set_endpoint_ceil(1, high_color);
18729
18730
temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints;
18731
}
18732
}
18733
18734
fixup_pvrtc1_4_modulation_rgba(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y);
18735
18736
return true;
18737
}
18738
18739
void uastc_init()
18740
{
18741
for (uint32_t range = 0; range < BC7ENC_TOTAL_ASTC_RANGES; range++)
18742
{
18743
if (!astc_is_valid_endpoint_range(range))
18744
continue;
18745
18746
const uint32_t levels = astc_get_levels(range);
18747
18748
uint32_t vals[256];
18749
for (uint32_t i = 0; i < levels; i++)
18750
vals[i] = (unquant_astc_endpoint_val(i, range) << 8) | i;
18751
18752
std::sort(vals, vals + levels);
18753
18754
for (uint32_t i = 0; i < levels; i++)
18755
{
18756
const uint32_t order = vals[i] & 0xFF;
18757
const uint32_t unq = vals[i] >> 8;
18758
18759
g_astc_unquant[range][order].m_unquant = (uint8_t)unq;
18760
g_astc_unquant[range][order].m_index = (uint8_t)i;
18761
18762
} // i
18763
}
18764
18765
// TODO: Precompute?
18766
// BC7 777.1
18767
for (int c = 0; c < 256; c++)
18768
{
18769
for (uint32_t lp = 0; lp < 2; lp++)
18770
{
18771
endpoint_err best;
18772
best.m_error = (uint16_t)UINT16_MAX;
18773
18774
for (uint32_t l = 0; l < 128; l++)
18775
{
18776
const uint32_t low = (l << 1) | lp;
18777
18778
for (uint32_t h = 0; h < 128; h++)
18779
{
18780
const uint32_t high = (h << 1) | lp;
18781
18782
const int k = (low * (64 - g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX]) + high * g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX] + 32) >> 6;
18783
18784
const int err = (k - c) * (k - c);
18785
if (err < best.m_error)
18786
{
18787
best.m_error = (uint16_t)err;
18788
best.m_lo = (uint8_t)l;
18789
best.m_hi = (uint8_t)h;
18790
}
18791
} // h
18792
} // l
18793
18794
g_bc7_mode_6_optimal_endpoints[c][lp] = best;
18795
} // lp
18796
18797
} // c
18798
18799
// BC7 777
18800
for (int c = 0; c < 256; c++)
18801
{
18802
endpoint_err best;
18803
best.m_error = (uint16_t)UINT16_MAX;
18804
18805
for (uint32_t l = 0; l < 128; l++)
18806
{
18807
const uint32_t low = (l << 1) | (l >> 6);
18808
18809
for (uint32_t h = 0; h < 128; h++)
18810
{
18811
const uint32_t high = (h << 1) | (h >> 6);
18812
18813
const int k = (low * (64 - g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX] + 32) >> 6;
18814
18815
const int err = (k - c) * (k - c);
18816
if (err < best.m_error)
18817
{
18818
best.m_error = (uint16_t)err;
18819
best.m_lo = (uint8_t)l;
18820
best.m_hi = (uint8_t)h;
18821
}
18822
} // h
18823
} // l
18824
18825
g_bc7_mode_5_optimal_endpoints[c] = best;
18826
18827
} // c
18828
}
18829
18830
#endif // #if BASISD_SUPPORT_UASTC
18831
18832
// ------------------------------------------------------------------------------------------------------
18833
// KTX2
18834
// ------------------------------------------------------------------------------------------------------
18835
18836
#if BASISD_SUPPORT_KTX2
18837
const uint8_t g_ktx2_file_identifier[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };
18838
18839
ktx2_transcoder::ktx2_transcoder() :
18840
m_etc1s_transcoder()
18841
{
18842
clear();
18843
}
18844
18845
void ktx2_transcoder::clear()
18846
{
18847
m_pData = nullptr;
18848
m_data_size = 0;
18849
18850
memset(&m_header, 0, sizeof(m_header));
18851
m_levels.clear();
18852
m_dfd.clear();
18853
m_key_values.clear();
18854
memset(&m_etc1s_header, 0, sizeof(m_etc1s_header));
18855
m_etc1s_image_descs.clear();
18856
m_astc_6x6_intermediate_image_descs.clear();
18857
18858
m_format = basist::basis_tex_format::cETC1S;
18859
18860
m_dfd_color_model = 0;
18861
m_dfd_color_prims = KTX2_DF_PRIMARIES_UNSPECIFIED;
18862
m_dfd_transfer_func = 0;
18863
m_dfd_flags = 0;
18864
m_dfd_samples = 0;
18865
m_dfd_chan0 = KTX2_DF_CHANNEL_UASTC_RGB;
18866
m_dfd_chan1 = KTX2_DF_CHANNEL_UASTC_RGB;
18867
18868
m_etc1s_transcoder.clear();
18869
18870
m_def_transcoder_state.clear();
18871
18872
m_has_alpha = false;
18873
m_is_video = false;
18874
m_ldr_hdr_upconversion_nit_multiplier = 0.0f;
18875
}
18876
18877
bool ktx2_transcoder::init(const void* pData, uint32_t data_size)
18878
{
18879
clear();
18880
18881
if (!pData)
18882
{
18883
BASISU_DEVEL_ERROR("ktx2_transcoder::init: pData is nullptr\n");
18884
assert(0);
18885
return false;
18886
}
18887
18888
if (data_size <= sizeof(ktx2_header))
18889
{
18890
BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is impossibly too small to be a valid KTX2 file\n");
18891
return false;
18892
}
18893
18894
if (memcmp(pData, g_ktx2_file_identifier, sizeof(g_ktx2_file_identifier)) != 0)
18895
{
18896
BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file identifier is not present\n");
18897
return false;
18898
}
18899
18900
m_pData = static_cast<const uint8_t *>(pData);
18901
m_data_size = data_size;
18902
18903
memcpy(&m_header, pData, sizeof(m_header));
18904
18905
// Check for supported VK formats. We may also need to parse the DFD.
18906
if ((m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) &&
18907
(m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK) &&
18908
(m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK))
18909
{
18910
BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC LDR/HDR format\n");
18911
return false;
18912
}
18913
18914
// 3.3: "When format is VK_FORMAT_UNDEFINED, typeSize must equal 1."
18915
if (m_header.m_type_size != 1)
18916
{
18917
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid type_size\n");
18918
return false;
18919
}
18920
18921
// We only currently support 2D textures (plain, cubemapped, or texture array), which is by far the most common use case.
18922
// The BasisU library does not support 1D or 3D textures at all.
18923
if ((m_header.m_pixel_width < 1) || (m_header.m_pixel_height < 1) || (m_header.m_pixel_depth > 0))
18924
{
18925
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Only 2D or cubemap textures are supported\n");
18926
return false;
18927
}
18928
18929
// Face count must be 1 or 6
18930
if ((m_header.m_face_count != 1) && (m_header.m_face_count != 6))
18931
{
18932
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid face count, file is corrupted or invalid\n");
18933
return false;
18934
}
18935
18936
if (m_header.m_face_count > 1)
18937
{
18938
// 3.4: Make sure cubemaps are square.
18939
if (m_header.m_pixel_width != m_header.m_pixel_height)
18940
{
18941
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Cubemap is not square\n");
18942
return false;
18943
}
18944
}
18945
18946
// 3.7 levelCount: "levelCount=0 is allowed, except for block-compressed formats"
18947
if (m_header.m_level_count < 1)
18948
{
18949
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level count\n");
18950
return false;
18951
}
18952
18953
// Sanity check the level count.
18954
if (m_header.m_level_count > KTX2_MAX_SUPPORTED_LEVEL_COUNT)
18955
{
18956
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Too many levels or file is corrupted or invalid\n");
18957
return false;
18958
}
18959
18960
if (m_header.m_supercompression_scheme > KTX2_SS_ZSTANDARD)
18961
{
18962
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid/unsupported supercompression or file is corrupted or invalid\n");
18963
return false;
18964
}
18965
18966
if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
18967
{
18968
#if 0
18969
if (m_header.m_sgd_byte_length <= sizeof(ktx2_etc1s_global_data_header))
18970
{
18971
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data is too small\n");
18972
return false;
18973
}
18974
#endif
18975
18976
if (m_header.m_sgd_byte_offset < sizeof(ktx2_header))
18977
{
18978
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset is too low\n");
18979
return false;
18980
}
18981
18982
if (m_header.m_sgd_byte_offset + m_header.m_sgd_byte_length > m_data_size)
18983
{
18984
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset and/or length is too high\n");
18985
return false;
18986
}
18987
}
18988
18989
if (!m_levels.try_resize(m_header.m_level_count))
18990
{
18991
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n");
18992
return false;
18993
}
18994
18995
const uint32_t level_index_size_in_bytes = basisu::maximum(1U, (uint32_t)m_header.m_level_count) * sizeof(ktx2_level_index);
18996
18997
if ((sizeof(ktx2_header) + level_index_size_in_bytes) > m_data_size)
18998
{
18999
BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is too small (can't read level index array)\n");
19000
return false;
19001
}
19002
19003
memcpy(&m_levels[0], m_pData + sizeof(ktx2_header), level_index_size_in_bytes);
19004
19005
// Sanity check the level offsets and byte sizes
19006
for (uint32_t i = 0; i < m_levels.size(); i++)
19007
{
19008
if (m_levels[i].m_byte_offset < sizeof(ktx2_header))
19009
{
19010
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too low)\n");
19011
return false;
19012
}
19013
19014
if (!m_levels[i].m_byte_length)
19015
{
19016
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level byte length\n");
19017
}
19018
19019
if ((m_levels[i].m_byte_offset + m_levels[i].m_byte_length) > m_data_size)
19020
{
19021
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset and/or length\n");
19022
return false;
19023
}
19024
19025
const uint64_t MAX_SANE_LEVEL_UNCOMP_SIZE = 2048ULL * 1024ULL * 1024ULL;
19026
19027
if (m_levels[i].m_uncompressed_byte_length >= MAX_SANE_LEVEL_UNCOMP_SIZE)
19028
{
19029
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too large)\n");
19030
return false;
19031
}
19032
19033
if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
19034
{
19035
if (m_levels[i].m_uncompressed_byte_length)
19036
{
19037
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (0)\n");
19038
return false;
19039
}
19040
}
19041
else if (m_header.m_supercompression_scheme >= KTX2_SS_ZSTANDARD)
19042
{
19043
if (!m_levels[i].m_uncompressed_byte_length)
19044
{
19045
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (1)\n");
19046
return false;
19047
}
19048
}
19049
}
19050
19051
const uint32_t DFD_MINIMUM_SIZE = 44, DFD_MAXIMUM_SIZE = 60;
19052
if ((m_header.m_dfd_byte_length != DFD_MINIMUM_SIZE) && (m_header.m_dfd_byte_length != DFD_MAXIMUM_SIZE))
19053
{
19054
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD size\n");
19055
return false;
19056
}
19057
19058
if (((m_header.m_dfd_byte_offset + m_header.m_dfd_byte_length) > m_data_size) || (m_header.m_dfd_byte_offset < sizeof(ktx2_header)))
19059
{
19060
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD offset and/or length\n");
19061
return false;
19062
}
19063
19064
const uint8_t* pDFD = m_pData + m_header.m_dfd_byte_offset;
19065
19066
if (!m_dfd.try_resize(m_header.m_dfd_byte_length))
19067
{
19068
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n");
19069
return false;
19070
}
19071
19072
memcpy(m_dfd.data(), pDFD, m_header.m_dfd_byte_length);
19073
19074
// This is all hard coded for only ETC1S and UASTC.
19075
uint32_t dfd_total_size = basisu::read_le_dword(pDFD);
19076
19077
// 3.10.3: Sanity check
19078
if (dfd_total_size != m_header.m_dfd_byte_length)
19079
{
19080
BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (1)\n");
19081
return false;
19082
}
19083
19084
// 3.10.3: More sanity checking
19085
if (m_header.m_kvd_byte_length)
19086
{
19087
if (dfd_total_size != m_header.m_kvd_byte_offset - m_header.m_dfd_byte_offset)
19088
{
19089
BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (2)\n");
19090
return false;
19091
}
19092
}
19093
19094
const uint32_t dfd_bits = basisu::read_le_dword(pDFD + 3 * sizeof(uint32_t));
19095
const uint32_t sample_channel0 = basisu::read_le_dword(pDFD + 7 * sizeof(uint32_t));
19096
19097
m_dfd_color_model = dfd_bits & 255;
19098
m_dfd_color_prims = (ktx2_df_color_primaries)((dfd_bits >> 8) & 255);
19099
m_dfd_transfer_func = (dfd_bits >> 16) & 255;
19100
m_dfd_flags = (dfd_bits >> 24) & 255;
19101
19102
// See 3.10.1.Restrictions
19103
if ((m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_LINEAR) && (m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_SRGB))
19104
{
19105
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD transfer function\n");
19106
return false;
19107
}
19108
19109
if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ETC1S)
19110
{
19111
if (m_header.m_vk_format != basist::KTX2_VK_FORMAT_UNDEFINED)
19112
{
19113
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid header vkFormat\n");
19114
return false;
19115
}
19116
19117
m_format = basist::basis_tex_format::cETC1S;
19118
19119
// 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD's sample count."
19120
// If m_has_alpha is true it may be 2-channel RRRG or 4-channel RGBA, but we let the caller deal with that.
19121
m_has_alpha = (m_header.m_dfd_byte_length == 60);
19122
19123
m_dfd_samples = m_has_alpha ? 2 : 1;
19124
m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
19125
19126
if (m_has_alpha)
19127
{
19128
const uint32_t sample_channel1 = basisu::read_le_dword(pDFD + 11 * sizeof(uint32_t));
19129
m_dfd_chan1 = (ktx2_df_channel_id)((sample_channel1 >> 24) & 15);
19130
}
19131
}
19132
else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_LDR_4X4)
19133
{
19134
if (m_header.m_vk_format != basist::KTX2_VK_FORMAT_UNDEFINED)
19135
{
19136
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid header vkFormat\n");
19137
return false;
19138
}
19139
19140
m_format = basist::basis_tex_format::cUASTC4x4;
19141
19142
m_dfd_samples = 1;
19143
m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
19144
19145
// We're assuming "DATA" means RGBA so it has alpha.
19146
m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
19147
}
19148
else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_HDR_4X4)
19149
{
19150
// UASTC HDR 4x4 is standard ASTC HDR 4x4 texture data. Check the header's vkFormat.
19151
if (m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK)
19152
{
19153
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid header vkFormat\n");
19154
return false;
19155
}
19156
19157
m_format = basist::basis_tex_format::cUASTC_HDR_4x4;
19158
19159
m_dfd_samples = 1;
19160
m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
19161
19162
// We're assuming "DATA" means RGBA so it has alpha.
19163
// [11/26/2024] - changed to always false for now
19164
m_has_alpha = false;// (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
19165
}
19166
else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ASTC)
19167
{
19168
// The DFD indicates plain ASTC texture data. We only support ASTC HDR 6x6 - check the header's vkFormat.
19169
if (m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK)
19170
{
19171
BASISU_DEVEL_ERROR("ktx2_transcoder::init: DVD color model is ASTC, but the header's vkFormat isn't KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK\n");
19172
return false;
19173
}
19174
19175
m_format = basist::basis_tex_format::cASTC_HDR_6x6;
19176
19177
m_dfd_samples = 1;
19178
m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
19179
19180
m_has_alpha = false;
19181
}
19182
else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE)
19183
{
19184
// Custom variable block size ASTC HDR 6x6 texture data.
19185
if (m_header.m_vk_format != basist::KTX2_VK_FORMAT_UNDEFINED)
19186
{
19187
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid header vkFormat\n");
19188
return false;
19189
}
19190
19191
m_format = basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE;
19192
19193
m_dfd_samples = 1;
19194
m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
19195
19196
m_has_alpha = false;
19197
}
19198
else
19199
{
19200
// Unsupported DFD color model.
19201
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD color model\n");
19202
return false;
19203
}
19204
19205
if (!read_key_values())
19206
{
19207
BASISU_DEVEL_ERROR("ktx2_transcoder::init: read_key_values() failed\n");
19208
return false;
19209
}
19210
19211
// Check for a KTXanimData key
19212
for (uint32_t i = 0; i < m_key_values.size(); i++)
19213
{
19214
if (strcmp(reinterpret_cast<const char*>(m_key_values[i].m_key.data()), "KTXanimData") == 0)
19215
{
19216
m_is_video = true;
19217
break;
19218
}
19219
}
19220
19221
m_ldr_hdr_upconversion_nit_multiplier = 0.0f;
19222
19223
for (uint32_t i = 0; i < m_key_values.size(); i++)
19224
{
19225
if (strcmp(reinterpret_cast<const char*>(m_key_values[i].m_key.data()), "LDRUpconversionMultiplier") == 0)
19226
{
19227
m_ldr_hdr_upconversion_nit_multiplier = (float)atof(reinterpret_cast<const char*>(m_key_values[i].m_value.data()));
19228
19229
if (std::isnan(m_ldr_hdr_upconversion_nit_multiplier) || std::isinf(m_ldr_hdr_upconversion_nit_multiplier) || (m_ldr_hdr_upconversion_nit_multiplier < 0.0f))
19230
m_ldr_hdr_upconversion_nit_multiplier = 0;
19231
19232
break;
19233
}
19234
}
19235
19236
return true;
19237
}
19238
19239
uint32_t ktx2_transcoder::get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const
19240
{
19241
const uint32_t etc1s_image_index =
19242
(level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
19243
layer_index * m_header.m_face_count +
19244
face_index;
19245
19246
if (etc1s_image_index >= get_etc1s_image_descs().size())
19247
{
19248
assert(0);
19249
return 0;
19250
}
19251
19252
return get_etc1s_image_descs()[etc1s_image_index].m_image_flags;
19253
}
19254
19255
const basisu::uint8_vec* ktx2_transcoder::find_key(const std::string& key_name) const
19256
{
19257
for (uint32_t i = 0; i < m_key_values.size(); i++)
19258
if (strcmp((const char *)m_key_values[i].m_key.data(), key_name.c_str()) == 0)
19259
return &m_key_values[i].m_value;
19260
19261
return nullptr;
19262
}
19263
19264
bool ktx2_transcoder::start_transcoding()
19265
{
19266
if (!m_pData)
19267
{
19268
BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: Must call init() first\n");
19269
return false;
19270
}
19271
19272
if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
19273
{
19274
if (m_format == basis_tex_format::cETC1S)
19275
{
19276
// Check if we've already decompressed the ETC1S global data. If so don't unpack it again.
19277
if (!m_etc1s_transcoder.get_endpoints().empty())
19278
return true;
19279
19280
if (!decompress_etc1s_global_data())
19281
{
19282
BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: decompress_etc1s_global_data() failed\n");
19283
return false;
19284
}
19285
19286
if (!m_is_video)
19287
{
19288
// See if there are any P-frames. If so it must be a video, even if there wasn't a KTXanimData key.
19289
// Video cannot be a cubemap, and it must be a texture array.
19290
if ((m_header.m_face_count == 1) && (m_header.m_layer_count > 1))
19291
{
19292
for (uint32_t i = 0; i < m_etc1s_image_descs.size(); i++)
19293
{
19294
if (m_etc1s_image_descs[i].m_image_flags & KTX2_IMAGE_IS_P_FRAME)
19295
{
19296
m_is_video = true;
19297
break;
19298
}
19299
}
19300
}
19301
}
19302
}
19303
else if (m_format == basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)
19304
{
19305
if (m_astc_6x6_intermediate_image_descs.size())
19306
return true;
19307
19308
if (!read_astc_6x6_hdr_intermediate_global_data())
19309
{
19310
BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: read_astc_6x6_hdr_intermediate_global_data() failed\n");
19311
return false;
19312
}
19313
}
19314
else
19315
{
19316
BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: Invalid supercompression scheme and/or format\n");
19317
return false;
19318
}
19319
}
19320
else if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
19321
{
19322
#if !BASISD_SUPPORT_KTX2_ZSTD
19323
BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: File uses zstd supercompression, but zstd support was not enabled at compilation time (BASISD_SUPPORT_KTX2_ZSTD == 0)\n");
19324
return false;
19325
#endif
19326
}
19327
19328
return true;
19329
}
19330
19331
bool ktx2_transcoder::get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const
19332
{
19333
if (level_index >= m_levels.size())
19334
{
19335
BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: level_index >= m_levels.size()\n");
19336
return false;
19337
}
19338
19339
if (m_header.m_face_count > 1)
19340
{
19341
if (face_index >= 6)
19342
{
19343
BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index >= 6\n");
19344
return false;
19345
}
19346
}
19347
else if (face_index != 0)
19348
{
19349
BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index != 0\n");
19350
return false;
19351
}
19352
19353
if (layer_index >= basisu::maximum<uint32_t>(m_header.m_layer_count, 1))
19354
{
19355
BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: layer_index >= maximum<uint32_t>(m_header.m_layer_count, 1)\n");
19356
return false;
19357
}
19358
19359
const uint32_t level_width = basisu::maximum<uint32_t>(m_header.m_pixel_width >> level_index, 1);
19360
const uint32_t level_height = basisu::maximum<uint32_t>(m_header.m_pixel_height >> level_index, 1);
19361
19362
const uint32_t block_width = get_block_width();
19363
const uint32_t block_height = get_block_height();
19364
19365
const uint32_t num_blocks_x = (level_width + block_width - 1) / block_width;
19366
const uint32_t num_blocks_y = (level_height + block_height - 1) / block_height;
19367
19368
level_info.m_face_index = face_index;
19369
level_info.m_layer_index = layer_index;
19370
level_info.m_level_index = level_index;
19371
level_info.m_orig_width = level_width;
19372
level_info.m_orig_height = level_height;
19373
level_info.m_width = num_blocks_x * block_width;
19374
level_info.m_height = num_blocks_y * block_height;
19375
level_info.m_block_width = block_width;
19376
level_info.m_block_height = block_height;
19377
level_info.m_num_blocks_x = num_blocks_x;
19378
level_info.m_num_blocks_y = num_blocks_y;
19379
level_info.m_total_blocks = num_blocks_x * num_blocks_y;
19380
level_info.m_alpha_flag = m_has_alpha;
19381
level_info.m_iframe_flag = false;
19382
19383
if (m_etc1s_image_descs.size())
19384
{
19385
const uint32_t etc1s_image_index =
19386
(level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
19387
layer_index * m_header.m_face_count +
19388
face_index;
19389
19390
level_info.m_iframe_flag = (m_etc1s_image_descs[etc1s_image_index].m_image_flags & KTX2_IMAGE_IS_P_FRAME) == 0;
19391
}
19392
19393
return true;
19394
}
19395
19396
bool ktx2_transcoder::transcode_image_level(
19397
uint32_t level_index, uint32_t layer_index, uint32_t face_index,
19398
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
19399
basist::transcoder_texture_format fmt,
19400
uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, int channel0, int channel1,
19401
ktx2_transcoder_state* pState)
19402
{
19403
if (!m_pData)
19404
{
19405
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Must call init() first\n");
19406
return false;
19407
}
19408
19409
if (!pState)
19410
pState = &m_def_transcoder_state;
19411
19412
if (level_index >= m_levels.size())
19413
{
19414
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: level_index >= m_levels.size()\n");
19415
return false;
19416
}
19417
19418
if (m_header.m_face_count > 1)
19419
{
19420
if (face_index >= 6)
19421
{
19422
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index >= 6\n");
19423
return false;
19424
}
19425
}
19426
else if (face_index != 0)
19427
{
19428
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index != 0\n");
19429
return false;
19430
}
19431
19432
if (layer_index >= basisu::maximum<uint32_t>(m_header.m_layer_count, 1))
19433
{
19434
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: layer_index >= maximum<uint32_t>(m_header.m_layer_count, 1)\n");
19435
return false;
19436
}
19437
19438
const uint8_t* pComp_level_data = m_pData + m_levels[level_index].m_byte_offset;
19439
uint64_t comp_level_data_size = m_levels[level_index].m_byte_length;
19440
19441
const uint8_t* pUncomp_level_data = pComp_level_data;
19442
uint64_t uncomp_level_data_size = comp_level_data_size;
19443
19444
if (uncomp_level_data_size > UINT32_MAX)
19445
{
19446
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_level_data_size > UINT32_MAX\n");
19447
return false;
19448
}
19449
19450
if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
19451
{
19452
// Check if we've already decompressed this level's supercompressed data.
19453
if ((int)level_index != pState->m_uncomp_data_level_index)
19454
{
19455
// Uncompress the entire level's supercompressed data.
19456
if (!decompress_level_data(level_index, pState->m_level_uncomp_data))
19457
{
19458
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: decompress_level_data() failed\n");
19459
return false;
19460
}
19461
pState->m_uncomp_data_level_index = level_index;
19462
}
19463
19464
pUncomp_level_data = pState->m_level_uncomp_data.data();
19465
uncomp_level_data_size = pState->m_level_uncomp_data.size();
19466
}
19467
19468
const uint32_t level_width = basisu::maximum<uint32_t>(m_header.m_pixel_width >> level_index, 1);
19469
const uint32_t level_height = basisu::maximum<uint32_t>(m_header.m_pixel_height >> level_index, 1);
19470
const uint32_t num_blocks4_x = (level_width + 3) >> 2;
19471
const uint32_t num_blocks4_y = (level_height + 3) >> 2;
19472
19473
if (m_format == basist::basis_tex_format::cETC1S)
19474
{
19475
// Ensure start_transcoding() was called.
19476
if (m_etc1s_transcoder.get_endpoints().empty())
19477
{
19478
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: must call start_transcoding() first\n");
19479
return false;
19480
}
19481
19482
const uint32_t etc1s_image_index =
19483
(level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
19484
layer_index * m_header.m_face_count +
19485
face_index;
19486
19487
// Sanity check
19488
if (etc1s_image_index >= m_etc1s_image_descs.size())
19489
{
19490
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: etc1s_image_index >= m_etc1s_image_descs.size()\n");
19491
assert(0);
19492
return false;
19493
}
19494
19495
const ktx2_etc1s_image_desc& image_desc = m_etc1s_image_descs[etc1s_image_index];
19496
19497
if (!m_etc1s_transcoder.transcode_image(fmt,
19498
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, m_pData, m_data_size,
19499
num_blocks4_x, num_blocks4_y, level_width, level_height,
19500
level_index,
19501
m_levels[level_index].m_byte_offset + image_desc.m_rgb_slice_byte_offset, image_desc.m_rgb_slice_byte_length,
19502
image_desc.m_alpha_slice_byte_length ? (m_levels[level_index].m_byte_offset + image_desc.m_alpha_slice_byte_offset) : 0, image_desc.m_alpha_slice_byte_length,
19503
decode_flags, m_has_alpha,
19504
m_is_video, output_row_pitch_in_blocks_or_pixels, &pState->m_transcoder_state, output_rows_in_pixels))
19505
{
19506
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ETC1S transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
19507
return false;
19508
}
19509
}
19510
else if (m_format == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)
19511
{
19512
if (!m_astc_6x6_intermediate_image_descs.size())
19513
{
19514
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: must call start_transcoding() first\n");
19515
return false;
19516
}
19517
19518
const uint32_t num_blocks6_x = (level_width + 5) / 6;
19519
const uint32_t num_blocks6_y = (level_height + 5) / 6;
19520
19521
const uint32_t image_index =
19522
(level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
19523
layer_index * m_header.m_face_count +
19524
face_index;
19525
19526
// Sanity check
19527
if (image_index >= m_astc_6x6_intermediate_image_descs.size())
19528
{
19529
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Invalid image_index\n");
19530
assert(0);
19531
return false;
19532
}
19533
19534
const ktx2_astc_hdr_6x6_intermediate_image_desc& image_desc = m_astc_6x6_intermediate_image_descs[image_index];
19535
19536
if (!m_astc_hdr_6x6_intermediate_transcoder.transcode_image(fmt,
19537
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
19538
m_pData, m_data_size, num_blocks6_x, num_blocks6_y, level_width, level_height, level_index,
19539
m_levels[level_index].m_byte_offset + image_desc.m_rgb_slice_byte_offset, image_desc.m_rgb_slice_byte_length,
19540
decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
19541
{
19542
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ASTC 6x6 HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
19543
return false;
19544
}
19545
}
19546
else if (m_format == basist::basis_tex_format::cASTC_HDR_6x6)
19547
{
19548
const uint32_t num_blocks6_x = (level_width + 5) / 6;
19549
const uint32_t num_blocks6_y = (level_height + 5) / 6;
19550
19551
// Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices.
19552
assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length);
19553
const uint32_t total_2D_image_size = num_blocks6_x * num_blocks6_y * sizeof(astc_helpers::astc_block);
19554
19555
const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size;
19556
19557
// Sanity checks
19558
if (uncomp_ofs >= uncomp_level_data_size)
19559
{
19560
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_ofs >= total_2D_image_size\n");
19561
return false;
19562
}
19563
19564
if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size)
19565
{
19566
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n");
19567
return false;
19568
}
19569
19570
if (!m_astc_hdr_6x6_transcoder.transcode_image(fmt,
19571
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
19572
(const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks6_x, num_blocks6_y, level_width, level_height, level_index,
19573
0, (uint32_t)total_2D_image_size,
19574
decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
19575
{
19576
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ASTC 6x6 HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
19577
return false;
19578
}
19579
}
19580
else if ((m_format == basist::basis_tex_format::cUASTC4x4) ||
19581
(m_format == basist::basis_tex_format::cUASTC_HDR_4x4))
19582
{
19583
// Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices.
19584
assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length);
19585
const uint32_t total_2D_image_size = num_blocks4_x * num_blocks4_y * KTX2_UASTC_BLOCK_SIZE;
19586
19587
const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size;
19588
19589
// Sanity checks
19590
if (uncomp_ofs >= uncomp_level_data_size)
19591
{
19592
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_ofs >= total_2D_image_size\n");
19593
return false;
19594
}
19595
19596
if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size)
19597
{
19598
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n");
19599
return false;
19600
}
19601
19602
if (m_format == basist::basis_tex_format::cUASTC_HDR_4x4)
19603
{
19604
if (!m_uastc_hdr_transcoder.transcode_image(fmt,
19605
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
19606
(const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks4_x, num_blocks4_y, level_width, level_height, level_index,
19607
0, (uint32_t)total_2D_image_size,
19608
decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
19609
{
19610
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
19611
return false;
19612
}
19613
}
19614
else
19615
{
19616
if (!m_uastc_transcoder.transcode_image(fmt,
19617
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
19618
(const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks4_x, num_blocks4_y, level_width, level_height, level_index,
19619
0, (uint32_t)total_2D_image_size,
19620
decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
19621
{
19622
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
19623
return false;
19624
}
19625
}
19626
}
19627
else
19628
{
19629
// Shouldn't get here.
19630
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Internal error\n");
19631
assert(0);
19632
return false;
19633
}
19634
19635
return true;
19636
}
19637
19638
bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data)
19639
{
19640
const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData;
19641
const uint64_t comp_size = m_levels[level_index].m_byte_length;
19642
19643
const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length;
19644
19645
if (((size_t)comp_size) != comp_size)
19646
{
19647
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Compressed data too large\n");
19648
return false;
19649
}
19650
if (((size_t)uncomp_size) != uncomp_size)
19651
{
19652
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Uncompressed data too large\n");
19653
return false;
19654
}
19655
19656
if (!uncomp_data.try_resize((size_t)uncomp_size))
19657
{
19658
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Out of memory\n");
19659
return false;
19660
}
19661
19662
if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
19663
{
19664
#if BASISD_SUPPORT_KTX2_ZSTD
19665
size_t actualUncompSize = ZSTD_decompress(uncomp_data.data(), (size_t)uncomp_size, pComp_data, (size_t)comp_size);
19666
if (ZSTD_isError(actualUncompSize))
19667
{
19668
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression failed, file is invalid or corrupted\n");
19669
return false;
19670
}
19671
if (actualUncompSize != uncomp_size)
19672
{
19673
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression returned too few bytes, file is invalid or corrupted\n");
19674
return false;
19675
}
19676
#else
19677
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: File uses Zstd supercompression, but Zstd support was not enabled at compile time (BASISD_SUPPORT_KTX2_ZSTD is 0)\n");
19678
return false;
19679
#endif
19680
}
19681
19682
return true;
19683
}
19684
19685
bool ktx2_transcoder::read_astc_6x6_hdr_intermediate_global_data()
19686
{
19687
const uint32_t image_count = basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count * m_header.m_level_count;
19688
assert(image_count);
19689
19690
const uint8_t* pSrc = m_pData + m_header.m_sgd_byte_offset;
19691
19692
if (m_header.m_sgd_byte_length != image_count * sizeof(ktx2_astc_hdr_6x6_intermediate_image_desc))
19693
{
19694
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_astc_6x6_hdr_intermediate_global_data: Invalid global data length\n");
19695
return false;
19696
}
19697
19698
m_astc_6x6_intermediate_image_descs.resize(image_count);
19699
19700
memcpy(m_astc_6x6_intermediate_image_descs.data(), pSrc, sizeof(ktx2_astc_hdr_6x6_intermediate_image_desc) * image_count);
19701
19702
// Sanity check the image descs
19703
for (uint32_t i = 0; i < image_count; i++)
19704
{
19705
// transcode_image() will validate the slice offsets/lengths before transcoding.
19706
19707
if (!m_astc_6x6_intermediate_image_descs[i].m_rgb_slice_byte_length)
19708
{
19709
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_astc_6x6_hdr_intermediate_global_data: image descs sanity check failed (1)\n");
19710
return false;
19711
}
19712
}
19713
19714
return true;
19715
}
19716
19717
bool ktx2_transcoder::decompress_etc1s_global_data()
19718
{
19719
// Note: we don't actually support 3D textures in here yet
19720
//uint32_t layer_pixel_depth = basisu::maximum<uint32_t>(m_header.m_pixel_depth, 1);
19721
//for (uint32_t i = 1; i < m_header.m_level_count; i++)
19722
// layer_pixel_depth += basisu::maximum<uint32_t>(m_header.m_pixel_depth >> i, 1);
19723
19724
const uint32_t image_count = basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count * m_header.m_level_count;
19725
assert(image_count);
19726
19727
const uint8_t* pSrc = m_pData + m_header.m_sgd_byte_offset;
19728
19729
memcpy(&m_etc1s_header, pSrc, sizeof(ktx2_etc1s_global_data_header));
19730
pSrc += sizeof(ktx2_etc1s_global_data_header);
19731
19732
if ((!m_etc1s_header.m_endpoints_byte_length) || (!m_etc1s_header.m_selectors_byte_length) || (!m_etc1s_header.m_tables_byte_length))
19733
{
19734
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Invalid ETC1S global data\n");
19735
return false;
19736
}
19737
19738
if ((!m_etc1s_header.m_endpoint_count) || (!m_etc1s_header.m_selector_count))
19739
{
19740
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: endpoint and/or selector count is 0, file is invalid or corrupted\n");
19741
return false;
19742
}
19743
19744
// Sanity check the ETC1S header.
19745
if ((sizeof(ktx2_etc1s_global_data_header) +
19746
sizeof(ktx2_etc1s_image_desc) * image_count +
19747
m_etc1s_header.m_endpoints_byte_length +
19748
m_etc1s_header.m_selectors_byte_length +
19749
m_etc1s_header.m_tables_byte_length +
19750
m_etc1s_header.m_extended_byte_length) > m_header.m_sgd_byte_length)
19751
{
19752
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: SGD byte length is too small, file is invalid or corrupted\n");
19753
return false;
19754
}
19755
19756
if (!m_etc1s_image_descs.try_resize(image_count))
19757
{
19758
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Out of memory\n");
19759
return false;
19760
}
19761
19762
memcpy(m_etc1s_image_descs.data(), pSrc, sizeof(ktx2_etc1s_image_desc) * image_count);
19763
pSrc += sizeof(ktx2_etc1s_image_desc) * image_count;
19764
19765
// Sanity check the ETC1S image descs
19766
for (uint32_t i = 0; i < image_count; i++)
19767
{
19768
// m_etc1s_transcoder.transcode_image() will validate the slice offsets/lengths before transcoding.
19769
19770
if (!m_etc1s_image_descs[i].m_rgb_slice_byte_length)
19771
{
19772
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (1)\n");
19773
return false;
19774
}
19775
19776
if (m_has_alpha)
19777
{
19778
if (!m_etc1s_image_descs[i].m_alpha_slice_byte_length)
19779
{
19780
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (2)\n");
19781
return false;
19782
}
19783
}
19784
}
19785
19786
const uint8_t* pEndpoint_data = pSrc;
19787
const uint8_t* pSelector_data = pSrc + m_etc1s_header.m_endpoints_byte_length;
19788
const uint8_t* pTables_data = pSrc + m_etc1s_header.m_endpoints_byte_length + m_etc1s_header.m_selectors_byte_length;
19789
19790
if (!m_etc1s_transcoder.decode_tables(pTables_data, m_etc1s_header.m_tables_byte_length))
19791
{
19792
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_tables() failed, file is invalid or corrupted\n");
19793
return false;
19794
}
19795
19796
if (!m_etc1s_transcoder.decode_palettes(
19797
m_etc1s_header.m_endpoint_count, pEndpoint_data, m_etc1s_header.m_endpoints_byte_length,
19798
m_etc1s_header.m_selector_count, pSelector_data, m_etc1s_header.m_selectors_byte_length))
19799
{
19800
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_palettes() failed, file is likely corrupted\n");
19801
return false;
19802
}
19803
19804
return true;
19805
}
19806
19807
bool ktx2_transcoder::read_key_values()
19808
{
19809
if (!m_header.m_kvd_byte_length)
19810
{
19811
if (m_header.m_kvd_byte_offset)
19812
{
19813
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset (it should be zero when the length is zero)\n");
19814
return false;
19815
}
19816
19817
return true;
19818
}
19819
19820
if (m_header.m_kvd_byte_offset < sizeof(ktx2_header))
19821
{
19822
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset\n");
19823
return false;
19824
}
19825
19826
if ((m_header.m_kvd_byte_offset + m_header.m_kvd_byte_length) > m_data_size)
19827
{
19828
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset and/or length\n");
19829
return false;
19830
}
19831
19832
const uint8_t* pSrc = m_pData + m_header.m_kvd_byte_offset;
19833
uint32_t src_left = m_header.m_kvd_byte_length;
19834
19835
if (!m_key_values.try_reserve(8))
19836
{
19837
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
19838
return false;
19839
}
19840
19841
while (src_left > sizeof(uint32_t))
19842
{
19843
uint32_t l = basisu::read_le_dword(pSrc);
19844
19845
pSrc += sizeof(uint32_t);
19846
src_left -= sizeof(uint32_t);
19847
19848
if (l < 2)
19849
{
19850
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (0)\n");
19851
return false;
19852
}
19853
19854
if (src_left < l)
19855
{
19856
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (1)\n");
19857
return false;
19858
}
19859
19860
if (!m_key_values.try_resize(m_key_values.size() + 1))
19861
{
19862
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
19863
return false;
19864
}
19865
19866
basisu::uint8_vec& key_data = m_key_values.back().m_key;
19867
basisu::uint8_vec& value_data = m_key_values.back().m_value;
19868
19869
do
19870
{
19871
if (!l)
19872
{
19873
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (2)\n");
19874
return false;
19875
}
19876
19877
if (!key_data.try_push_back(*pSrc++))
19878
{
19879
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
19880
return false;
19881
}
19882
19883
src_left--;
19884
l--;
19885
19886
} while (key_data.back());
19887
19888
// Ensure key and value are definitely 0 terminated
19889
if (!key_data.try_push_back('\0'))
19890
{
19891
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
19892
return false;
19893
}
19894
19895
if (!value_data.try_resize(l))
19896
{
19897
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
19898
return false;
19899
}
19900
19901
if (l)
19902
{
19903
memcpy(value_data.data(), pSrc, l);
19904
pSrc += l;
19905
src_left -= l;
19906
}
19907
19908
// Ensure key and value are definitely 0 terminated
19909
if (!value_data.try_push_back('\0'))
19910
{
19911
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
19912
return false;
19913
}
19914
19915
uint32_t ofs = (uint32_t)(pSrc - m_pData) & 3;
19916
uint32_t alignment_bytes = (4 - ofs) & 3;
19917
19918
if (src_left < alignment_bytes)
19919
{
19920
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (3)\n");
19921
return false;
19922
}
19923
19924
pSrc += alignment_bytes;
19925
src_left -= alignment_bytes;
19926
}
19927
19928
return true;
19929
}
19930
19931
#endif // BASISD_SUPPORT_KTX2
19932
19933
bool basisu_transcoder_supports_ktx2()
19934
{
19935
#if BASISD_SUPPORT_KTX2
19936
return true;
19937
#else
19938
return false;
19939
#endif
19940
}
19941
19942
bool basisu_transcoder_supports_ktx2_zstd()
19943
{
19944
#if BASISD_SUPPORT_KTX2_ZSTD
19945
return true;
19946
#else
19947
return false;
19948
#endif
19949
}
19950
19951
//-------------------------------
19952
19953
#if BASISD_SUPPORT_UASTC_HDR
19954
// This float->half conversion matches how "F32TO16" works on Intel GPU's.
19955
basist::half_float float_to_half(float val)
19956
{
19957
union { float f; int32_t i; uint32_t u; } fi = { val };
19958
const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
19959
int s = flt_s, e = 0, m = 0;
19960
19961
// inf/NaN
19962
if (flt_e == 0xff)
19963
{
19964
e = 31;
19965
if (flt_m != 0) // NaN
19966
m = 1;
19967
}
19968
// not zero or denormal
19969
else if (flt_e != 0)
19970
{
19971
int new_exp = flt_e - 127;
19972
if (new_exp > 15)
19973
e = 31;
19974
else if (new_exp < -14)
19975
m = lrintf((1 << 24) * fabsf(fi.f));
19976
else
19977
{
19978
e = new_exp + 15;
19979
m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));
19980
}
19981
}
19982
19983
assert((0 <= m) && (m <= 1024));
19984
if (m == 1024)
19985
{
19986
e++;
19987
m = 0;
19988
}
19989
19990
assert((s >= 0) && (s <= 1));
19991
assert((e >= 0) && (e <= 31));
19992
assert((m >= 0) && (m <= 1023));
19993
19994
basist::half_float result = (basist::half_float)((s << 15) | (e << 10) | m);
19995
return result;
19996
}
19997
19998
//------------------------------------------------------------------------------------------------
19999
// HDR support
20000
//
20001
// Originally from bc6h_enc.cpp
20002
// BC6H decoder fuzzed vs. DirectXTex's for unsigned/signed
20003
20004
const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4] = // base bits, r, g, b
20005
{
20006
// 2 subsets
20007
{ 10, 5, 5, 5, }, // 0, mode 1 in MS/D3D docs
20008
{ 7, 6, 6, 6, }, // 1
20009
{ 11, 5, 4, 4, }, // 2
20010
{ 11, 4, 5, 4, }, // 3
20011
{ 11, 4, 4, 5, }, // 4
20012
{ 9, 5, 5, 5, }, // 5
20013
{ 8, 6, 5, 5, }, // 6
20014
{ 8, 5, 6, 5, }, // 7
20015
{ 8, 5, 5, 6, }, // 8
20016
{ 6, 6, 6, 6, }, // 9, endpoints not delta encoded, mode 10 in MS/D3D docs
20017
// 1 subset
20018
{ 10, 10, 10, 10, }, // 10, endpoints not delta encoded, mode 11 in MS/D3D docs
20019
{ 11, 9, 9, 9, }, // 11
20020
{ 12, 8, 8, 8, }, // 12
20021
{ 16, 4, 4, 4, } // 13, also useful for solid blocks
20022
};
20023
20024
const int8_t g_bc6h_mode_lookup[32] = { 0, 1, 2, 10, 0, 1, 3, 11, 0, 1, 4, 12, 0, 1, 5, 13, 0, 1, 6, -1, 0, 1, 7, -1, 0, 1, 8, -1, 0, 1, 9, -1 };
20025
20026
const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX] =
20027
{
20028
// comp_index, subset*2+lh_index, last_bit, first_bit
20029
//------------------------ mode 0: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (10.555, 10.555, 10.555), delta
20030
{ { 1, 2, 4, -1 }, { 2, 2, 4, -1 }, { 2, 3, 4, -1 }, { 0, 0, 9, 0 }, { 1, 0, 9, 0 }, { 2, 0, 9, 0 }, { 0, 1, 4, 0 },
20031
{ 1, 3, 4, -1 }, { 1, 2, 3, 0 }, { 1, 1, 4, 0 }, { 2, 3, 0, -1 }, { 1, 3, 3, 0 }, { 2, 1, 4, 0 }, { 2, 3, 1, -1 },
20032
{ 2, 2, 3, 0 }, { 0, 2, 4, 0 }, { 2, 3, 2, -1 }, { 0, 3, 4, 0 }, { 2, 3, 3, -1 }, { 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20033
//------------------------ mode 1: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (7.666, 7.666, 7.666), delta
20034
{ { 1, 2, 5, -1 },{ 1, 3, 4, -1 },{ 1, 3, 5, -1 },{ 0, 0, 6, 0 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },
20035
{ 1, 0, 6, 0 },{ 2, 2, 5, -1 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 6, 0 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },
20036
{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },
20037
{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20038
//------------------------ mode 2: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.555, 11.444, 11.444), delta
20039
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 4, 0 },{ 0, 0, 10, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },{ 1, 0, 10, -1 },
20040
{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },
20041
{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20042
//------------------------ mode 3: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.555, 11.444), delta
20043
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },
20044
{ 1, 0, 10, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 0, -1 },
20045
{ 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 1, 2, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20046
//------------------------ mode 4: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.444, 11.555), delta
20047
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 2, 2, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },
20048
{ 1, 0, 10, -1 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 0, 10, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 1, -1 },
20049
{ 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 2, 3, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20050
//------------------------ mode 5: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (9.555, 9.555, 9.555), delta
20051
{ { 0, 0, 8, 0 },{ 2, 2, 4, -1 },{ 1, 0, 8, 0 },{ 1, 2, 4, -1 },{ 2, 0, 8, 0 },{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },
20052
{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },
20053
{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20054
//------------------------ mode 6: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.666, 8.555, 8.555), delta
20055
{ { 0, 0, 7, 0 },{ 1, 3, 4, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 3, -1 },
20056
{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
20057
{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20058
//------------------------ mode 7: 2 subsets, Weight bits: 46 bits, Endpoints bits: 72 bits (8.555, 8.666, 8.555), delta
20059
{ { 0, 0, 7, 0 },{ 2, 3, 0, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 1, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 1, 3, 5, -1 },
20060
{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
20061
{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20062
//------------------------ mode 8: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.555, 8.555, 8.666), delta
20063
{ { 0, 0, 7, 0 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 5, -1 },
20064
{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },
20065
{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20066
//------------------------ mode 9: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (6.6.6.6, 6.6.6.6, 6.6.6.6), NO delta
20067
{ { 0, 0, 5, 0 },{ 1, 3, 4, -1 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 5, 0 },{ 1, 2, 5, -1 },{ 2, 2, 5, -1 },
20068
{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 5, 0 },{ 1, 3, 5, -1 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },
20069
{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20070
//------------------------ mode 10: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (10.10, 10.10, 10.10), NO delta
20071
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 9, 0 },{ 1, 1, 9, 0 },{ 2, 1, 9, 0 }, {-1, 0, 0, 0} },
20072
//------------------------ mode 11: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (11.9, 11.9, 11.9), delta
20073
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 8, 0 },{ 0, 0, 10, -1 },{ 1, 1, 8, 0 },{ 1, 0, 10, -1 },{ 2, 1, 8, 0 },{ 2, 0, 10, -1 }, {-1, 0, 0, 0} },
20074
//------------------------ mode 12: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (12.8, 12.8, 12.8), delta
20075
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 7, 0 },{ 0, 0, 10, 11 },{ 1, 1, 7, 0 },{ 1, 0, 10, 11 },{ 2, 1, 7, 0 },{ 2, 0, 10, 11 }, {-1, 0, 0, 0} },
20076
//------------------------ mode 13: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (16.4, 16.4, 16.4), delta
20077
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, 15 },{ 1, 1, 3, 0 },{ 1, 0, 10, 15 },{ 2, 1, 3, 0 },{ 2, 0, 10, 15 }, {-1, 0, 0, 0} }
20078
};
20079
20080
// The same as the first 32 2-subset patterns in BC7.
20081
// Bit 7 is a flag indicating that the weight uses 1 less bit than usual.
20082
const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4] = // [pat][y][x]
20083
{
20084
{ {0x80, 0, 1, 1}, { 0, 0, 1, 1 }, { 0, 0, 1, 1 }, { 0, 0, 1, 0x81 }}, { {0x80, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0x81} },
20085
{ {0x80, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 0x81} }, { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
20086
{ {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} },
20087
{ {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
20088
{ {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
20089
{ {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 0x81} },
20090
{ {0x80, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
20091
{ {0x80, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 0x81} },
20092
{ {0x80, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 1, 0}, {1, 1, 1, 0x81} }, { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} },
20093
{ {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 1, 0x81, 1}, {0, 0, 1, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} },
20094
{ {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 1, 0, 0}, {1, 1, 1, 0} },
20095
{ {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} }, { {0x80, 1, 1, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 0, 0, 0x81} },
20096
{ {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} },
20097
{ {0x80, 1, 0x81, 0}, {0, 1, 1, 0}, {0, 1, 1, 0}, {0, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {0, 1, 1, 0}, {0, 1, 1, 0}, {1, 1, 0, 0} },
20098
{ {0x80, 0, 0, 1}, {0, 1, 1, 1}, {0x81, 1, 1, 0}, {1, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {0x81, 1, 1, 1}, {0, 0, 0, 0} },
20099
{ {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {1, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {1, 0, 0, 1}, {1, 0, 0, 1}, {1, 1, 0, 0} }
20100
};
20101
20102
const uint8_t g_bc6h_weight3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
20103
const uint8_t g_bc6h_weight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
20104
20105
static inline void write_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
20106
{
20107
assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
20108
assert(val < (1ULL << num_bits));
20109
20110
if (bit_pos < 64)
20111
{
20112
l |= (val << bit_pos);
20113
20114
if ((bit_pos + num_bits) > 64)
20115
h |= (val >> (64 - bit_pos));
20116
}
20117
else
20118
{
20119
h |= (val << (bit_pos - 64));
20120
}
20121
20122
bit_pos += num_bits;
20123
assert(bit_pos <= 128);
20124
}
20125
20126
static inline void write_rev_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
20127
{
20128
assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
20129
assert(val < (1ULL << num_bits));
20130
20131
for (uint32_t i = 0; i < num_bits; i++)
20132
write_bits((val >> (num_bits - 1u - i)) & 1, 1, bit_pos, l, h);
20133
}
20134
20135
void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk)
20136
{
20137
const uint8_t s_mode_bits[NUM_BC6H_MODES] = { 0b00, 0b01, 0b00010, 0b00110, 0b01010, 0b01110, 0b10010, 0b10110, 0b11010, 0b11110, 0b00011, 0b00111, 0b01011, 0b01111 };
20138
20139
const uint32_t mode = log_blk.m_mode;
20140
assert(mode < NUM_BC6H_MODES);
20141
20142
uint64_t l = s_mode_bits[mode], h = 0;
20143
uint32_t bit_pos = (mode >= 2) ? 5 : 2;
20144
20145
const uint32_t num_subsets = (mode >= BC6H_FIRST_1SUBSET_MODE_INDEX) ? 1 : 2;
20146
20147
assert(((num_subsets == 2) && (log_blk.m_partition_pattern < TOTAL_BC6H_PARTITION_PATTERNS)) ||
20148
((num_subsets == 1) && (!log_blk.m_partition_pattern)));
20149
20150
// Sanity checks
20151
for (uint32_t c = 0; c < 3; c++)
20152
{
20153
assert(log_blk.m_endpoints[c][0] < (1u << g_bc6h_mode_sig_bits[mode][0])); // 1st subset l, base bits
20154
assert(log_blk.m_endpoints[c][1] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 1st subset h, these are deltas except for modes 9,10
20155
assert(log_blk.m_endpoints[c][2] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset l
20156
assert(log_blk.m_endpoints[c][3] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset h
20157
}
20158
20159
const bc6h_bit_layout* pLayout = &g_bc6h_bit_layouts[mode][0];
20160
20161
while (pLayout->m_comp != -1)
20162
{
20163
uint32_t v = (pLayout->m_comp == 3) ? log_blk.m_partition_pattern : log_blk.m_endpoints[pLayout->m_comp][pLayout->m_index];
20164
20165
if (pLayout->m_first_bit == -1)
20166
{
20167
write_bits((v >> pLayout->m_last_bit) & 1, 1, bit_pos, l, h);
20168
}
20169
else
20170
{
20171
const uint32_t total_bits = basisu::iabs(pLayout->m_last_bit - pLayout->m_first_bit) + 1;
20172
20173
v >>= basisu::minimum(pLayout->m_first_bit, pLayout->m_last_bit);
20174
v &= ((1 << total_bits) - 1);
20175
20176
if (pLayout->m_first_bit > pLayout->m_last_bit)
20177
write_rev_bits(v, total_bits, bit_pos, l, h);
20178
else
20179
write_bits(v, total_bits, bit_pos, l, h);
20180
}
20181
20182
pLayout++;
20183
}
20184
20185
const uint32_t num_mode_sel_bits = (num_subsets == 1) ? 4 : 3;
20186
const uint8_t* pPat = &g_bc6h_2subset_patterns[log_blk.m_partition_pattern][0][0];
20187
20188
for (uint32_t i = 0; i < 16; i++)
20189
{
20190
const uint32_t sel = log_blk.m_weights[i];
20191
20192
uint32_t num_bits = num_mode_sel_bits;
20193
if (num_subsets == 2)
20194
{
20195
const uint32_t subset_index = pPat[i];
20196
num_bits -= (subset_index >> 7);
20197
}
20198
else if (!i)
20199
{
20200
num_bits--;
20201
}
20202
20203
assert(sel < (1u << num_bits));
20204
20205
write_bits(sel, num_bits, bit_pos, l, h);
20206
}
20207
20208
assert(bit_pos == 128);
20209
20210
basisu::write_le_dword(&dst_blk.m_bytes[0], (uint32_t)l);
20211
basisu::write_le_dword(&dst_blk.m_bytes[4], (uint32_t)(l >> 32u));
20212
basisu::write_le_dword(&dst_blk.m_bytes[8], (uint32_t)h);
20213
basisu::write_le_dword(&dst_blk.m_bytes[12], (uint32_t)(h >> 32u));
20214
}
20215
20216
#if 0
20217
static inline uint32_t bc6h_blog_dequantize_to_blog16(uint32_t comp, uint32_t bits_per_comp)
20218
{
20219
int unq;
20220
20221
if (bits_per_comp >= 15)
20222
unq = comp;
20223
else if (comp == 0)
20224
unq = 0;
20225
else if (comp == ((1u << bits_per_comp) - 1u))
20226
unq = 0xFFFFu;
20227
else
20228
unq = ((comp << 16u) + 0x8000u) >> bits_per_comp;
20229
20230
return unq;
20231
}
20232
#endif
20233
20234
// 6,7,8,9,10,11,12
20235
const uint32_t BC6H_BLOG_TAB_MIN = 6;
20236
const uint32_t BC6H_BLOG_TAB_MAX = 12;
20237
//const uint32_t BC6H_BLOG_TAB_NUM = BC6H_BLOG_TAB_MAX - BC6H_BLOG_TAB_MIN + 1;
20238
20239
// Handles 16, or 6-12 bits. Others assert.
20240
static inline uint32_t half_to_blog_tab(half_float h, uint32_t num_bits)
20241
{
20242
assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
20243
assert((num_bits == 16) || ((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX)));
20244
20245
return bc6h_half_to_blog(h, num_bits);
20246
#if 0
20247
BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MIN);
20248
BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MAX);
20249
20250
if (num_bits == 16)
20251
{
20252
return bc6h_half_to_blog(h, 16);
20253
}
20254
else
20255
{
20256
assert((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX));
20257
20258
// Note: This used to be done using a table lookup, but it required ~224KB of tables. This isn't quite as accurate, but the error is very slight (+-1 half values as ints).
20259
return bc6h_half_to_blog(h, num_bits);
20260
}
20261
#endif
20262
}
20263
20264
bool g_bc6h_enc_initialized;
20265
20266
void bc6h_enc_init()
20267
{
20268
if (g_bc6h_enc_initialized)
20269
return;
20270
20271
g_bc6h_enc_initialized = true;
20272
}
20273
20274
// mode 10, 4-bit weights
20275
void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
20276
{
20277
assert(g_bc6h_enc_initialized);
20278
20279
for (uint32_t i = 0; i < 16; i++)
20280
{
20281
assert(pWeights[i] <= 15);
20282
}
20283
20284
bc6h_logical_block log_blk;
20285
log_blk.clear();
20286
20287
// Convert half endpoints to blog10 (mode 10 doesn't use delta encoding)
20288
for (uint32_t c = 0; c < 3; c++)
20289
{
20290
log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 10);
20291
log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 10);
20292
}
20293
20294
memcpy(log_blk.m_weights, pWeights, 16);
20295
20296
if (log_blk.m_weights[0] & 8)
20297
{
20298
for (uint32_t i = 0; i < 16; i++)
20299
log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
20300
20301
for (uint32_t c = 0; c < 3; c++)
20302
{
20303
std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
20304
}
20305
}
20306
20307
log_blk.m_mode = BC6H_FIRST_1SUBSET_MODE_INDEX;
20308
pack_bc6h_block(*pPacked_block, log_blk);
20309
}
20310
20311
// Tries modes 11-13 (delta endpoint) encoding, falling back to mode 10 only when necessary, 4-bit weights
20312
void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
20313
{
20314
assert(g_bc6h_enc_initialized);
20315
20316
for (uint32_t i = 0; i < 16; i++)
20317
{
20318
assert(pWeights[i] <= 15);
20319
}
20320
20321
bc6h_logical_block log_blk;
20322
log_blk.clear();
20323
20324
for (uint32_t mode = BC6H_LAST_MODE_INDEX; mode > BC6H_FIRST_1SUBSET_MODE_INDEX; mode--)
20325
{
20326
const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0], num_delta_bits = g_bc6h_mode_sig_bits[mode][1];
20327
const int base_bitmask = (1 << num_base_bits) - 1;
20328
const int delta_bitmask = (1 << num_delta_bits) - 1;
20329
BASISU_NOTE_UNUSED(base_bitmask);
20330
20331
assert(num_delta_bits < num_base_bits);
20332
assert((num_delta_bits == g_bc6h_mode_sig_bits[mode][2]) && (num_delta_bits == g_bc6h_mode_sig_bits[mode][3]));
20333
20334
uint32_t blog_endpoints[3][2];
20335
20336
// Convert half endpoints to blog 16, 12, or 11
20337
for (uint32_t c = 0; c < 3; c++)
20338
{
20339
blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
20340
assert((int)blog_endpoints[c][0] <= base_bitmask);
20341
20342
blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
20343
assert((int)blog_endpoints[c][1] <= base_bitmask);
20344
}
20345
20346
// Copy weights
20347
memcpy(log_blk.m_weights, pWeights, 16);
20348
20349
// Ensure first weight MSB is 0
20350
if (log_blk.m_weights[0] & 8)
20351
{
20352
// Invert weights
20353
for (uint32_t i = 0; i < 16; i++)
20354
log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
20355
20356
// Swap blog quantized endpoints
20357
for (uint32_t c = 0; c < 3; c++)
20358
{
20359
std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
20360
}
20361
}
20362
20363
const int max_delta = (1 << (num_delta_bits - 1)) - 1;
20364
const int min_delta = -(max_delta + 1);
20365
assert((max_delta - min_delta) == delta_bitmask);
20366
20367
bool failed_flag = false;
20368
for (uint32_t c = 0; c < 3; c++)
20369
{
20370
log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
20371
20372
int delta = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
20373
if ((delta < min_delta) || (delta > max_delta))
20374
{
20375
failed_flag = true;
20376
break;
20377
}
20378
20379
log_blk.m_endpoints[c][1] = delta & delta_bitmask;
20380
}
20381
20382
if (failed_flag)
20383
continue;
20384
20385
log_blk.m_mode = mode;
20386
pack_bc6h_block(*pPacked_block, log_blk);
20387
20388
return;
20389
}
20390
20391
// Worst case fall back to mode 10, which can handle any endpoints
20392
bc6h_enc_block_mode10(pPacked_block, pEndpoints, pWeights);
20393
}
20394
20395
// Mode 9 (direct endpoint encoding), 3-bit weights, but only 1 subset
20396
void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
20397
{
20398
assert(g_bc6h_enc_initialized);
20399
20400
for (uint32_t i = 0; i < 16; i++)
20401
{
20402
assert(pWeights[i] <= 7);
20403
}
20404
20405
bc6h_logical_block log_blk;
20406
log_blk.clear();
20407
20408
// Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
20409
for (uint32_t c = 0; c < 3; c++)
20410
{
20411
log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 6);
20412
log_blk.m_endpoints[c][2] = log_blk.m_endpoints[c][0];
20413
20414
log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 6);
20415
log_blk.m_endpoints[c][3] = log_blk.m_endpoints[c][1];
20416
}
20417
20418
memcpy(log_blk.m_weights, pWeights, 16);
20419
20420
const uint32_t pat_index = 0;
20421
const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
20422
20423
if (log_blk.m_weights[0] & 4)
20424
{
20425
for (uint32_t c = 0; c < 3; c++)
20426
std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
20427
20428
for (uint32_t i = 0; i < 16; i++)
20429
if ((pPat[i] & 0x7F) == 0)
20430
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20431
}
20432
20433
if (log_blk.m_weights[15] & 4)
20434
{
20435
for (uint32_t c = 0; c < 3; c++)
20436
std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
20437
20438
for (uint32_t i = 0; i < 16; i++)
20439
if ((pPat[i] & 0x7F) == 1)
20440
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20441
}
20442
20443
log_blk.m_mode = 9;
20444
log_blk.m_partition_pattern = pat_index;
20445
pack_bc6h_block(*pPacked_block, log_blk);
20446
}
20447
20448
// Tries modes 0-8, falls back to mode 9
20449
void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
20450
{
20451
assert(g_bc6h_enc_initialized);
20452
20453
for (uint32_t i = 0; i < 16; i++)
20454
{
20455
assert(pWeights[i] <= 7);
20456
}
20457
20458
bc6h_logical_block log_blk;
20459
log_blk.clear();
20460
20461
for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
20462
{
20463
static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least
20464
const uint32_t mode = s_mode_order[mode_iter];
20465
20466
const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
20467
const int base_bitmask = (1 << num_base_bits) - 1;
20468
BASISU_NOTE_UNUSED(base_bitmask);
20469
20470
const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
20471
const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
20472
20473
uint32_t blog_endpoints[3][4];
20474
20475
// Convert half endpoints to blog 7-11
20476
for (uint32_t c = 0; c < 3; c++)
20477
{
20478
blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
20479
blog_endpoints[c][2] = blog_endpoints[c][0];
20480
assert((int)blog_endpoints[c][0] <= base_bitmask);
20481
20482
blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
20483
blog_endpoints[c][3] = blog_endpoints[c][1];
20484
assert((int)blog_endpoints[c][1] <= base_bitmask);
20485
}
20486
20487
const uint32_t pat_index = 0;
20488
const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
20489
20490
memcpy(log_blk.m_weights, pWeights, 16);
20491
20492
if (log_blk.m_weights[0] & 4)
20493
{
20494
// Swap part 0's endpoints/weights
20495
for (uint32_t c = 0; c < 3; c++)
20496
std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
20497
20498
for (uint32_t i = 0; i < 16; i++)
20499
if ((pPat[i] & 0x7F) == 0)
20500
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20501
}
20502
20503
if (log_blk.m_weights[15] & 4)
20504
{
20505
// Swap part 1's endpoints/weights
20506
for (uint32_t c = 0; c < 3; c++)
20507
std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
20508
20509
for (uint32_t i = 0; i < 16; i++)
20510
if ((pPat[i] & 0x7F) == 1)
20511
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20512
}
20513
20514
bool failed_flag = false;
20515
20516
for (uint32_t c = 0; c < 3; c++)
20517
{
20518
const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
20519
20520
const int min_delta = -(max_delta + 1);
20521
assert((max_delta - min_delta) == delta_bitmasks[c]);
20522
20523
log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
20524
20525
int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
20526
int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
20527
int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
20528
20529
if ((delta0 < min_delta) || (delta0 > max_delta) ||
20530
(delta1 < min_delta) || (delta1 > max_delta) ||
20531
(delta2 < min_delta) || (delta2 > max_delta))
20532
{
20533
failed_flag = true;
20534
break;
20535
}
20536
20537
log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
20538
log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
20539
log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
20540
}
20541
20542
if (failed_flag)
20543
continue;
20544
20545
log_blk.m_mode = mode;
20546
log_blk.m_partition_pattern = pat_index;
20547
pack_bc6h_block(*pPacked_block, log_blk);
20548
20549
return;
20550
20551
} // mode_iter
20552
20553
bc6h_enc_block_1subset_mode9_3bit_weights(pPacked_block, pEndpoints, pWeights);
20554
}
20555
20556
// pEndpoints[subset][comp][lh_index]
20557
void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
20558
{
20559
assert(g_bc6h_enc_initialized);
20560
assert(common_part_index < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2);
20561
20562
for (uint32_t i = 0; i < 16; i++)
20563
{
20564
assert(pWeights[i] <= 7);
20565
}
20566
20567
bc6h_logical_block log_blk;
20568
log_blk.clear();
20569
20570
// Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
20571
for (uint32_t s = 0; s < 2; s++)
20572
{
20573
for (uint32_t c = 0; c < 3; c++)
20574
{
20575
log_blk.m_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], 6);
20576
log_blk.m_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], 6);
20577
}
20578
}
20579
20580
memcpy(log_blk.m_weights, pWeights, 16);
20581
20582
//const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
20583
const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
20584
20585
const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
20586
if (invert_flag)
20587
{
20588
for (uint32_t c = 0; c < 3; c++)
20589
{
20590
std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][2]);
20591
std::swap(log_blk.m_endpoints[c][1], log_blk.m_endpoints[c][3]);
20592
}
20593
}
20594
20595
const uint32_t pat_index = bc7_pattern;
20596
assert(pat_index < 32);
20597
const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
20598
20599
bool swap_flags[2] = { false, false };
20600
for (uint32_t i = 0; i < 16; i++)
20601
{
20602
if ((pPat[i] & 0x80) == 0)
20603
continue;
20604
20605
if (log_blk.m_weights[i] & 4)
20606
{
20607
const uint32_t p = pPat[i] & 1;
20608
swap_flags[p] = true;
20609
}
20610
}
20611
20612
if (swap_flags[0])
20613
{
20614
for (uint32_t c = 0; c < 3; c++)
20615
std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
20616
20617
for (uint32_t i = 0; i < 16; i++)
20618
if ((pPat[i] & 0x7F) == 0)
20619
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20620
}
20621
20622
if (swap_flags[1])
20623
{
20624
for (uint32_t c = 0; c < 3; c++)
20625
std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
20626
20627
for (uint32_t i = 0; i < 16; i++)
20628
if ((pPat[i] & 0x7F) == 1)
20629
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20630
}
20631
20632
log_blk.m_mode = 9;
20633
log_blk.m_partition_pattern = pat_index;
20634
pack_bc6h_block(*pPacked_block, log_blk);
20635
}
20636
20637
void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
20638
{
20639
assert(g_bc6h_enc_initialized);
20640
20641
for (uint32_t i = 0; i < 16; i++)
20642
{
20643
assert(pWeights[i] <= 7);
20644
}
20645
20646
bc6h_logical_block log_blk;
20647
log_blk.clear();
20648
20649
for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
20650
{
20651
static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least
20652
const uint32_t mode = s_mode_order[mode_iter];
20653
20654
const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
20655
const int base_bitmask = (1 << num_base_bits) - 1;
20656
BASISU_NOTE_UNUSED(base_bitmask);
20657
20658
const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
20659
const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
20660
20661
uint32_t blog_endpoints[3][4];
20662
20663
// Convert half endpoints to blog 7-11
20664
for (uint32_t s = 0; s < 2; s++)
20665
{
20666
for (uint32_t c = 0; c < 3; c++)
20667
{
20668
blog_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], num_base_bits);
20669
blog_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], num_base_bits);
20670
}
20671
}
20672
20673
memcpy(log_blk.m_weights, pWeights, 16);
20674
20675
//const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
20676
const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
20677
20678
const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
20679
if (invert_flag)
20680
{
20681
for (uint32_t c = 0; c < 3; c++)
20682
{
20683
std::swap(blog_endpoints[c][0], blog_endpoints[c][2]);
20684
std::swap(blog_endpoints[c][1], blog_endpoints[c][3]);
20685
}
20686
}
20687
20688
const uint32_t pat_index = bc7_pattern;
20689
assert(pat_index < 32);
20690
const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
20691
20692
bool swap_flags[2] = { false, false };
20693
for (uint32_t i = 0; i < 16; i++)
20694
{
20695
if ((pPat[i] & 0x80) == 0)
20696
continue;
20697
20698
if (log_blk.m_weights[i] & 4)
20699
{
20700
const uint32_t p = pPat[i] & 1;
20701
swap_flags[p] = true;
20702
}
20703
}
20704
20705
if (swap_flags[0])
20706
{
20707
for (uint32_t c = 0; c < 3; c++)
20708
std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
20709
20710
for (uint32_t i = 0; i < 16; i++)
20711
if ((pPat[i] & 0x7F) == 0)
20712
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20713
}
20714
20715
if (swap_flags[1])
20716
{
20717
for (uint32_t c = 0; c < 3; c++)
20718
std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
20719
20720
for (uint32_t i = 0; i < 16; i++)
20721
if ((pPat[i] & 0x7F) == 1)
20722
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20723
}
20724
20725
// Try packing the endpoints
20726
bool failed_flag = false;
20727
20728
for (uint32_t c = 0; c < 3; c++)
20729
{
20730
const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
20731
20732
const int min_delta = -(max_delta + 1);
20733
assert((max_delta - min_delta) == delta_bitmasks[c]);
20734
20735
log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
20736
20737
int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
20738
int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
20739
int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
20740
20741
if ((delta0 < min_delta) || (delta0 > max_delta) ||
20742
(delta1 < min_delta) || (delta1 > max_delta) ||
20743
(delta2 < min_delta) || (delta2 > max_delta))
20744
{
20745
failed_flag = true;
20746
break;
20747
}
20748
20749
log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
20750
log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
20751
log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
20752
}
20753
20754
if (failed_flag)
20755
continue;
20756
20757
log_blk.m_mode = mode;
20758
log_blk.m_partition_pattern = pat_index;
20759
pack_bc6h_block(*pPacked_block, log_blk);
20760
20761
//half_float blk[16 * 3];
20762
//unpack_bc6h(pPacked_block, blk, false);
20763
20764
return;
20765
}
20766
20767
bc6h_enc_block_2subset_mode9_3bit_weights(pPacked_block, common_part_index, pEndpoints, pWeights);
20768
}
20769
20770
bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3])
20771
{
20772
assert(g_bc6h_enc_initialized);
20773
20774
if ((pColor[0] | pColor[1] | pColor[2]) & 0x8000)
20775
return false;
20776
20777
// ASTC block unpacker won't allow Inf/NaN's to come through.
20778
//if (is_half_inf_or_nan(pColor[0]) || is_half_inf_or_nan(pColor[1]) || is_half_inf_or_nan(pColor[2]))
20779
// return false;
20780
20781
uint8_t weights[16];
20782
memset(weights, 0, sizeof(weights));
20783
20784
half_float endpoints[3][2];
20785
endpoints[0][0] = pColor[0];
20786
endpoints[0][1] = pColor[0];
20787
20788
endpoints[1][0] = pColor[1];
20789
endpoints[1][1] = pColor[1];
20790
20791
endpoints[2][0] = pColor[2];
20792
endpoints[2][1] = pColor[2];
20793
20794
bc6h_enc_block_1subset_4bit_weights(pPacked_block, endpoints, weights);
20795
20796
return true;
20797
}
20798
20799
//--------------------------------------------------------------------------------------------------------------------------
20800
// basisu_astc_hdr_core.cpp
20801
20802
static bool g_astc_hdr_core_initialized;
20803
static int8_t g_astc_partition_id_to_common_bc7_pat_index[1024];
20804
20805
//--------------------------------------------------------------------------------------------------------------------------
20806
20807
void astc_hdr_core_init()
20808
{
20809
if (g_astc_hdr_core_initialized)
20810
return;
20811
20812
memset(g_astc_partition_id_to_common_bc7_pat_index, 0xFF, sizeof(g_astc_partition_id_to_common_bc7_pat_index));
20813
20814
for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; ++part_index)
20815
{
20816
const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
20817
//const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
20818
20819
assert(astc_pattern < 1024);
20820
g_astc_partition_id_to_common_bc7_pat_index[astc_pattern] = (int8_t)part_index;
20821
}
20822
20823
g_astc_hdr_core_initialized = true;
20824
}
20825
20826
//--------------------------------------------------------------------------------------------------------------------------
20827
20828
static inline int astc_hdr_sign_extend(int src, int num_src_bits)
20829
{
20830
assert(basisu::in_range(num_src_bits, 2, 31));
20831
20832
const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
20833
if (negative)
20834
return src | ~((1 << num_src_bits) - 1);
20835
else
20836
return src & ((1 << num_src_bits) - 1);
20837
}
20838
20839
static inline void astc_hdr_pack_bit(
20840
int& dst, int dst_bit,
20841
int src_val, int src_bit = 0)
20842
{
20843
assert(dst_bit >= 0 && dst_bit <= 31);
20844
int bit = basisu::get_bit(src_val, src_bit);
20845
dst |= (bit << dst_bit);
20846
}
20847
20848
//--------------------------------------------------------------------------------------------------------------------------
20849
20850
void decode_mode7_to_qlog12_ise20(
20851
const uint8_t* pEndpoints,
20852
int e[2][3],
20853
int* pScale)
20854
{
20855
assert(g_astc_hdr_core_initialized);
20856
20857
for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
20858
{
20859
assert(pEndpoints[i] <= 255);
20860
}
20861
20862
const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3];
20863
20864
// Extract mode bits and unpack to major component and mode.
20865
const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
20866
20867
int majcomp, mode;
20868
if ((modeval & 0xC) != 0xC)
20869
{
20870
majcomp = modeval >> 2;
20871
mode = modeval & 3;
20872
}
20873
else if (modeval != 0xF)
20874
{
20875
majcomp = modeval & 3;
20876
mode = 4;
20877
}
20878
else
20879
{
20880
majcomp = 0;
20881
mode = 5;
20882
}
20883
20884
// Extract low-order bits of r, g, b, and s.
20885
int red = v0 & 0x3f;
20886
int green = v1 & 0x1f;
20887
int blue = v2 & 0x1f;
20888
int scale = v3 & 0x1f;
20889
20890
// Extract high-order bits, which may be assigned depending on mode
20891
int x0 = (v1 >> 6) & 1;
20892
int x1 = (v1 >> 5) & 1;
20893
int x2 = (v2 >> 6) & 1;
20894
int x3 = (v2 >> 5) & 1;
20895
int x4 = (v3 >> 7) & 1;
20896
int x5 = (v3 >> 6) & 1;
20897
int x6 = (v3 >> 5) & 1;
20898
20899
// Now move the high-order xs into the right place.
20900
const int ohm = 1 << mode;
20901
if (ohm & 0x30) green |= x0 << 6;
20902
if (ohm & 0x3A) green |= x1 << 5;
20903
if (ohm & 0x30) blue |= x2 << 6;
20904
if (ohm & 0x3A) blue |= x3 << 5;
20905
if (ohm & 0x3D) scale |= x6 << 5;
20906
if (ohm & 0x2D) scale |= x5 << 6;
20907
if (ohm & 0x04) scale |= x4 << 7;
20908
if (ohm & 0x3B) red |= x4 << 6;
20909
if (ohm & 0x04) red |= x3 << 6;
20910
if (ohm & 0x10) red |= x5 << 7;
20911
if (ohm & 0x0F) red |= x2 << 7;
20912
if (ohm & 0x05) red |= x1 << 8;
20913
if (ohm & 0x0A) red |= x0 << 8;
20914
if (ohm & 0x05) red |= x0 << 9;
20915
if (ohm & 0x02) red |= x6 << 9;
20916
if (ohm & 0x01) red |= x3 << 10;
20917
if (ohm & 0x02) red |= x5 << 10;
20918
20919
// Shift the bits to the top of the 12-bit result.
20920
static const int s_shamts[6] = { 1,1,2,3,4,5 };
20921
20922
const int shamt = s_shamts[mode];
20923
red <<= shamt;
20924
green <<= shamt;
20925
blue <<= shamt;
20926
scale <<= shamt;
20927
20928
// Minor components are stored as differences
20929
if (mode != 5)
20930
{
20931
green = red - green;
20932
blue = red - blue;
20933
}
20934
20935
// Swizzle major component into place
20936
if (majcomp == 1)
20937
std::swap(red, green);
20938
20939
if (majcomp == 2)
20940
std::swap(red, blue);
20941
20942
// Clamp output values, set alpha to 1.0
20943
e[1][0] = basisu::clamp(red, 0, 0xFFF);
20944
e[1][1] = basisu::clamp(green, 0, 0xFFF);
20945
e[1][2] = basisu::clamp(blue, 0, 0xFFF);
20946
20947
e[0][0] = basisu::clamp(red - scale, 0, 0xFFF);
20948
e[0][1] = basisu::clamp(green - scale, 0, 0xFFF);
20949
e[0][2] = basisu::clamp(blue - scale, 0, 0xFFF);
20950
20951
if (pScale)
20952
*pScale = scale;
20953
}
20954
20955
//--------------------------------------------------------------------------------------------------------------------------
20956
20957
bool decode_mode7_to_qlog12(
20958
const uint8_t* pEndpoints,
20959
int e[2][3],
20960
int* pScale,
20961
uint32_t ise_endpoint_range)
20962
{
20963
assert(g_astc_hdr_core_initialized);
20964
20965
if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
20966
{
20967
decode_mode7_to_qlog12_ise20(pEndpoints, e, pScale);
20968
}
20969
else
20970
{
20971
uint8_t dequantized_endpoints[NUM_MODE7_ENDPOINTS];
20972
20973
for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
20974
dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
20975
20976
decode_mode7_to_qlog12_ise20(dequantized_endpoints, e, pScale);
20977
}
20978
20979
for (uint32_t i = 0; i < 2; i++)
20980
{
20981
if (e[i][0] > (int)MAX_QLOG12)
20982
return false;
20983
20984
if (e[i][1] > (int)MAX_QLOG12)
20985
return false;
20986
20987
if (e[i][2] > (int)MAX_QLOG12)
20988
return false;
20989
}
20990
20991
return true;
20992
}
20993
20994
//--------------------------------------------------------------------------------------------------------------------------
20995
20996
void decode_mode11_to_qlog12_ise20(
20997
const uint8_t* pEndpoints,
20998
int e[2][3])
20999
{
21000
#ifdef _DEBUG
21001
for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
21002
{
21003
assert(pEndpoints[i] <= 255);
21004
}
21005
#endif
21006
21007
const uint32_t maj_comp = basisu::get_bit(pEndpoints[4], 7) | (basisu::get_bit(pEndpoints[5], 7) << 1);
21008
21009
if (maj_comp == 3)
21010
{
21011
// Direct, qlog8 and qlog7
21012
e[0][0] = pEndpoints[0] << 4;
21013
e[1][0] = pEndpoints[1] << 4;
21014
21015
e[0][1] = pEndpoints[2] << 4;
21016
e[1][1] = pEndpoints[3] << 4;
21017
21018
e[0][2] = (pEndpoints[4] & 127) << 5;
21019
e[1][2] = (pEndpoints[5] & 127) << 5;
21020
}
21021
else
21022
{
21023
int v0 = pEndpoints[0];
21024
int v1 = pEndpoints[1];
21025
int v2 = pEndpoints[2];
21026
int v3 = pEndpoints[3];
21027
int v4 = pEndpoints[4];
21028
int v5 = pEndpoints[5];
21029
21030
int mode = 0;
21031
astc_hdr_pack_bit(mode, 0, v1, 7);
21032
astc_hdr_pack_bit(mode, 1, v2, 7);
21033
astc_hdr_pack_bit(mode, 2, v3, 7);
21034
21035
int va = v0;
21036
astc_hdr_pack_bit(va, 8, v1, 6);
21037
21038
int vb0 = v2 & 63;
21039
int vb1 = v3 & 63;
21040
int vc = v1 & 63;
21041
21042
int vd0 = v4 & 0x7F; // this takes more bits than is sometimes needed
21043
int vd1 = v5 & 0x7F; // this takes more bits than is sometimes needed
21044
static const int8_t dbitstab[8] = { 7,6,7,6,5,6,5,6 };
21045
vd0 = astc_hdr_sign_extend(vd0, dbitstab[mode]);
21046
vd1 = astc_hdr_sign_extend(vd1, dbitstab[mode]);
21047
21048
int x0 = basisu::get_bit(v2, 6);
21049
int x1 = basisu::get_bit(v3, 6);
21050
int x2 = basisu::get_bit(v4, 6);
21051
int x3 = basisu::get_bit(v5, 6);
21052
int x4 = basisu::get_bit(v4, 5);
21053
int x5 = basisu::get_bit(v5, 5);
21054
21055
const uint32_t ohm = 1U << mode;
21056
if (ohm & 0xA4) va |= (x0 << 9);
21057
if (ohm & 0x08) va |= (x2 << 9);
21058
if (ohm & 0x50) va |= (x4 << 9);
21059
if (ohm & 0x50) va |= (x5 << 10);
21060
if (ohm & 0xA0) va |= (x1 << 10);
21061
if (ohm & 0xC0) va |= (x2 << 11);
21062
if (ohm & 0x04) vc |= (x1 << 6);
21063
if (ohm & 0xE8) vc |= (x3 << 6);
21064
if (ohm & 0x20) vc |= (x2 << 7);
21065
if (ohm & 0x5B) vb0 |= (x0 << 6);
21066
if (ohm & 0x5B) vb1 |= (x1 << 6);
21067
if (ohm & 0x12) vb0 |= (x2 << 7);
21068
if (ohm & 0x12) vb1 |= (x3 << 7);
21069
21070
const int shamt = (mode >> 1) ^ 3;
21071
21072
va = (uint32_t)va << shamt;
21073
vb0 = (uint32_t)vb0 << shamt;
21074
vb1 = (uint32_t)vb1 << shamt;
21075
vc = (uint32_t)vc << shamt;
21076
vd0 = (uint32_t)vd0 << shamt;
21077
vd1 = (uint32_t)vd1 << shamt;
21078
21079
// qlog12
21080
e[1][0] = basisu::clamp<int>(va, 0, 0xFFF);
21081
e[1][1] = basisu::clamp<int>(va - vb0, 0, 0xFFF);
21082
e[1][2] = basisu::clamp<int>(va - vb1, 0, 0xFFF);
21083
21084
e[0][0] = basisu::clamp<int>(va - vc, 0, 0xFFF);
21085
e[0][1] = basisu::clamp<int>(va - vb0 - vc - vd0, 0, 0xFFF);
21086
e[0][2] = basisu::clamp<int>(va - vb1 - vc - vd1, 0, 0xFFF);
21087
21088
if (maj_comp)
21089
{
21090
std::swap(e[0][0], e[0][maj_comp]);
21091
std::swap(e[1][0], e[1][maj_comp]);
21092
}
21093
}
21094
}
21095
21096
//--------------------------------------------------------------------------------------------------------------------------
21097
21098
bool decode_mode11_to_qlog12(
21099
const uint8_t* pEndpoints,
21100
int e[2][3],
21101
uint32_t ise_endpoint_range)
21102
{
21103
assert(g_astc_hdr_core_initialized);
21104
assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
21105
21106
if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
21107
{
21108
decode_mode11_to_qlog12_ise20(pEndpoints, e);
21109
}
21110
else
21111
{
21112
uint8_t dequantized_endpoints[NUM_MODE11_ENDPOINTS];
21113
21114
for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
21115
dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
21116
21117
decode_mode11_to_qlog12_ise20(dequantized_endpoints, e);
21118
}
21119
21120
for (uint32_t i = 0; i < 2; i++)
21121
{
21122
if (e[i][0] > (int)MAX_QLOG12)
21123
return false;
21124
21125
if (e[i][1] > (int)MAX_QLOG12)
21126
return false;
21127
21128
if (e[i][2] > (int)MAX_QLOG12)
21129
return false;
21130
}
21131
21132
return true;
21133
}
21134
21135
//--------------------------------------------------------------------------------------------------------------------------
21136
21137
bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
21138
{
21139
assert(g_astc_hdr_core_initialized);
21140
assert((best_blk.m_weight_ise_range >= 1) && (best_blk.m_weight_ise_range <= 8));
21141
21142
if (best_blk.m_weight_ise_range == 5)
21143
{
21144
// Use 3-bit BC6H weights which are a perfect match for 3-bit ASTC weights, but encode 1-subset as 2 equal subsets
21145
bc6h_enc_block_1subset_3bit_weights(&transcoded_bc6h_blk, h_e, best_blk.m_weights);
21146
}
21147
else
21148
{
21149
uint8_t bc6h_weights[16];
21150
21151
if (best_blk.m_weight_ise_range == 1)
21152
{
21153
// weight ISE 1: 3 levels
21154
static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 8, 15 };
21155
21156
for (uint32_t i = 0; i < 16; i++)
21157
bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
21158
}
21159
else if (best_blk.m_weight_ise_range == 2)
21160
{
21161
// weight ISE 2: 4 levels
21162
static const uint8_t s_astc2_to_bc6h_4[4] = { 0, 5, 10, 15 };
21163
21164
for (uint32_t i = 0; i < 16; i++)
21165
bc6h_weights[i] = s_astc2_to_bc6h_4[best_blk.m_weights[i]];
21166
}
21167
else if (best_blk.m_weight_ise_range == 3)
21168
{
21169
// weight ISE 3: 5 levels
21170
static const uint8_t s_astc3_to_bc6h_4[5] = { 0, 4, 7, 11, 15 };
21171
21172
for (uint32_t i = 0; i < 16; i++)
21173
bc6h_weights[i] = s_astc3_to_bc6h_4[best_blk.m_weights[i]];
21174
}
21175
else if (best_blk.m_weight_ise_range == 4)
21176
{
21177
// weight ISE 4: 6 levels
21178
static const uint8_t s_astc4_to_bc6h_4[6] = { 0, 15, 3, 12, 6, 9 };
21179
21180
for (uint32_t i = 0; i < 16; i++)
21181
bc6h_weights[i] = s_astc4_to_bc6h_4[best_blk.m_weights[i]];
21182
}
21183
else if (best_blk.m_weight_ise_range == 6)
21184
{
21185
// weight ISE 6: 10 levels
21186
static const uint8_t s_astc6_to_bc6h_4[10] = { 0, 15, 2, 13, 3, 12, 5, 10, 6, 9 };
21187
21188
for (uint32_t i = 0; i < 16; i++)
21189
bc6h_weights[i] = s_astc6_to_bc6h_4[best_blk.m_weights[i]];
21190
}
21191
else if (best_blk.m_weight_ise_range == 7)
21192
{
21193
// weight ISE 7: 12 levels
21194
static const uint8_t s_astc7_to_bc6h_4[12] = { 0, 15, 4, 11, 1, 14, 5, 10, 2, 13, 6, 9 };
21195
21196
for (uint32_t i = 0; i < 16; i++)
21197
bc6h_weights[i] = s_astc7_to_bc6h_4[best_blk.m_weights[i]];
21198
}
21199
else if (best_blk.m_weight_ise_range == 8)
21200
{
21201
// 16 levels
21202
memcpy(bc6h_weights, best_blk.m_weights, 16);
21203
}
21204
else
21205
{
21206
assert(0);
21207
return false;
21208
}
21209
21210
bc6h_enc_block_1subset_4bit_weights(&transcoded_bc6h_blk, h_e, bc6h_weights);
21211
}
21212
21213
return true;
21214
}
21215
21216
//--------------------------------------------------------------------------------------------------------------------------
21217
21218
bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
21219
{
21220
assert(g_astc_hdr_core_initialized);
21221
assert(best_blk.m_num_partitions == 2);
21222
assert(common_part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
21223
21224
half_float bc6h_endpoints[2][3][2]; // [subset][comp][lh_index]
21225
21226
// UASTC HDR checks
21227
// Both CEM's must be equal in 2-subset UASTC HDR.
21228
if (best_blk.m_color_endpoint_modes[0] != best_blk.m_color_endpoint_modes[1])
21229
return false;
21230
if ((best_blk.m_color_endpoint_modes[0] != 7) && (best_blk.m_color_endpoint_modes[0] != 11))
21231
return false;
21232
21233
if (best_blk.m_color_endpoint_modes[0] == 7)
21234
{
21235
if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 20)) ||
21236
((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 20)) ||
21237
((best_blk.m_weight_ise_range == 3) && (best_blk.m_endpoint_ise_range == 19)) ||
21238
((best_blk.m_weight_ise_range == 4) && (best_blk.m_endpoint_ise_range == 17)) ||
21239
((best_blk.m_weight_ise_range == 5) && (best_blk.m_endpoint_ise_range == 15))))
21240
{
21241
return false;
21242
}
21243
}
21244
else
21245
{
21246
if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 14)) ||
21247
((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 12))))
21248
{
21249
return false;
21250
}
21251
}
21252
21253
for (uint32_t s = 0; s < 2; s++)
21254
{
21255
int e[2][3];
21256
if (best_blk.m_color_endpoint_modes[0] == 7)
21257
{
21258
bool success = decode_mode7_to_qlog12(best_blk.m_endpoints + s * NUM_MODE7_ENDPOINTS, e, nullptr, best_blk.m_endpoint_ise_range);
21259
if (!success)
21260
return false;
21261
}
21262
else
21263
{
21264
bool success = decode_mode11_to_qlog12(best_blk.m_endpoints + s * NUM_MODE11_ENDPOINTS, e, best_blk.m_endpoint_ise_range);
21265
if (!success)
21266
return false;
21267
}
21268
21269
for (uint32_t c = 0; c < 3; c++)
21270
{
21271
bc6h_endpoints[s][c][0] = qlog_to_half(e[0][c], 12);
21272
if (is_half_inf_or_nan(bc6h_endpoints[s][c][0]))
21273
return false;
21274
21275
bc6h_endpoints[s][c][1] = qlog_to_half(e[1][c], 12);
21276
if (is_half_inf_or_nan(bc6h_endpoints[s][c][1]))
21277
return false;
21278
}
21279
}
21280
21281
uint8_t bc6h_weights[16];
21282
if (best_blk.m_weight_ise_range == 1)
21283
{
21284
static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 4, 7 };
21285
21286
for (uint32_t i = 0; i < 16; i++)
21287
bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
21288
}
21289
else if (best_blk.m_weight_ise_range == 2)
21290
{
21291
static const uint8_t s_astc2_to_bc6h_3[4] = { 0, 2, 5, 7 };
21292
21293
for (uint32_t i = 0; i < 16; i++)
21294
bc6h_weights[i] = s_astc2_to_bc6h_3[best_blk.m_weights[i]];
21295
}
21296
else if (best_blk.m_weight_ise_range == 3)
21297
{
21298
static const uint8_t s_astc3_to_bc6h_3[5] = { 0, 2, 4, 5, 7 };
21299
21300
for (uint32_t i = 0; i < 16; i++)
21301
bc6h_weights[i] = s_astc3_to_bc6h_3[best_blk.m_weights[i]];
21302
}
21303
else if (best_blk.m_weight_ise_range == 4)
21304
{
21305
static const uint8_t s_astc4_to_bc6h_3[6] = { 0, 7, 1, 6, 3, 4 };
21306
21307
for (uint32_t i = 0; i < 16; i++)
21308
bc6h_weights[i] = s_astc4_to_bc6h_3[best_blk.m_weights[i]];
21309
}
21310
else if (best_blk.m_weight_ise_range == 5)
21311
{
21312
memcpy(bc6h_weights, best_blk.m_weights, 16);
21313
}
21314
else
21315
{
21316
assert(0);
21317
return false;
21318
}
21319
21320
bc6h_enc_block_2subset_3bit_weights(&transcoded_bc6h_blk, common_part_index, bc6h_endpoints, bc6h_weights);
21321
21322
return true;
21323
}
21324
21325
//--------------------------------------------------------------------------------------------------------------------------
21326
// Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
21327
bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk)
21328
{
21329
assert(g_astc_hdr_core_initialized);
21330
if (!g_astc_hdr_core_initialized)
21331
{
21332
assert(0);
21333
return false;
21334
}
21335
21336
astc_helpers::log_astc_block log_blk;
21337
21338
if (!astc_helpers::unpack_block(&src_blk, log_blk, 4, 4))
21339
{
21340
// Failed unpacking ASTC data
21341
return false;
21342
}
21343
21344
return astc_hdr_transcode_to_bc6h(log_blk, dst_blk);
21345
}
21346
21347
//--------------------------------------------------------------------------------------------------------------------------
21348
// Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
21349
bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk)
21350
{
21351
assert(g_astc_hdr_core_initialized);
21352
if (!g_astc_hdr_core_initialized)
21353
{
21354
assert(0);
21355
return false;
21356
}
21357
21358
if (log_blk.m_solid_color_flag_ldr)
21359
{
21360
// Don't support LDR solid colors.
21361
return false;
21362
}
21363
21364
if (log_blk.m_solid_color_flag_hdr)
21365
{
21366
// Solid color HDR block
21367
return bc6h_enc_block_solid_color(&dst_blk, log_blk.m_solid_color);
21368
}
21369
21370
// Only support 4x4 grid sizes
21371
if ((log_blk.m_grid_width != 4) || (log_blk.m_grid_height != 4))
21372
return false;
21373
21374
// Don't support dual plane encoding
21375
if (log_blk.m_dual_plane)
21376
return false;
21377
21378
if (log_blk.m_num_partitions == 1)
21379
{
21380
// Handle 1 partition (or subset)
21381
21382
// UASTC HDR checks
21383
if ((log_blk.m_weight_ise_range < 1) || (log_blk.m_weight_ise_range > 8))
21384
return false;
21385
21386
int e[2][3];
21387
bool success;
21388
21389
if (log_blk.m_color_endpoint_modes[0] == 7)
21390
{
21391
if (log_blk.m_endpoint_ise_range != 20)
21392
return false;
21393
21394
success = decode_mode7_to_qlog12(log_blk.m_endpoints, e, nullptr, log_blk.m_endpoint_ise_range);
21395
}
21396
else if (log_blk.m_color_endpoint_modes[0] == 11)
21397
{
21398
// UASTC HDR checks
21399
if (log_blk.m_weight_ise_range <= 7)
21400
{
21401
if (log_blk.m_endpoint_ise_range != 20)
21402
return false;
21403
}
21404
else if (log_blk.m_endpoint_ise_range != 19)
21405
{
21406
return false;
21407
}
21408
21409
success = decode_mode11_to_qlog12(log_blk.m_endpoints, e, log_blk.m_endpoint_ise_range);
21410
}
21411
else
21412
{
21413
return false;
21414
}
21415
21416
if (!success)
21417
return false;
21418
21419
// Transform endpoints to half float
21420
half_float h_e[3][2] =
21421
{
21422
{ qlog_to_half(e[0][0], 12), qlog_to_half(e[1][0], 12) },
21423
{ qlog_to_half(e[0][1], 12), qlog_to_half(e[1][1], 12) },
21424
{ qlog_to_half(e[0][2], 12), qlog_to_half(e[1][2], 12) }
21425
};
21426
21427
// Sanity check for NaN/Inf
21428
for (uint32_t i = 0; i < 2; i++)
21429
if (is_half_inf_or_nan(h_e[0][i]) || is_half_inf_or_nan(h_e[1][i]) || is_half_inf_or_nan(h_e[2][i]))
21430
return false;
21431
21432
// Transcode to bc6h
21433
if (!transcode_bc6h_1subset(h_e, log_blk, dst_blk))
21434
return false;
21435
}
21436
else if (log_blk.m_num_partitions == 2)
21437
{
21438
// Handle 2 partition (or subset)
21439
int common_bc7_pat_index = g_astc_partition_id_to_common_bc7_pat_index[log_blk.m_partition_id];
21440
if (common_bc7_pat_index < 0)
21441
return false;
21442
21443
assert(common_bc7_pat_index < (int)basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
21444
21445
if (!transcode_bc6h_2subsets(common_bc7_pat_index, log_blk, dst_blk))
21446
return false;
21447
}
21448
else
21449
{
21450
// Only supports 1 or 2 partitions (or subsets)
21451
return false;
21452
}
21453
21454
return true;
21455
}
21456
21457
// ASTC 6x6 support
21458
namespace astc_6x6_hdr
21459
{
21460
const block_mode_desc g_block_mode_descs[TOTAL_BLOCK_MODE_DECS] =
21461
{
21462
// ------ mode 11
21463
{ false, 11, 1, 6, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21464
{ false, 11, 1, 6, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21465
21466
{ false, 11, 1, 6, 5, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21467
{ false, 11, 1, 5, 6, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21468
21469
{ false, 11, 1, 6, 4, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21470
{ false, 11, 1, 4, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21471
21472
{ false, 11, 1, 6, 3, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21473
{ false, 11, 1, 3, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21474
21475
{ false, 11, 1, 5, 5, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21476
{ false, 11, 1, 4, 4, astc_helpers::BISE_192_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_192_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21477
21478
{ false, 11, 1, 3, 3, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21479
21480
// ------ mode 7
21481
{ false, 7, 1, 6, 6, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21482
21483
{ false, 7, 1, 6, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21484
{ false, 7, 1, 6, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21485
21486
{ false, 7, 1, 5, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_6_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_6_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21487
{ false, 7, 1, 6, 5, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_6_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_6_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21488
21489
{ false, 7, 1, 3, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_20_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_20_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21490
{ false, 7, 1, 6, 3, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_20_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_20_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21491
21492
// ------ mode 11, 2 subset
21493
{ false, 11, 2, 6, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21494
21495
// 6x3/3x6
21496
{ false, 11, 2, 6, 3, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21497
{ false, 11, 2, 3, 6, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21498
21499
// 3x6/6x3
21500
{ false, 11, 2, 3, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21501
{ false, 11, 2, 6, 3, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21502
21503
// 3x6/6x3
21504
{ false, 11, 2, 4, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21505
{ false, 11, 2, 6, 4, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21506
21507
// ------ mode 7, 2 subset
21508
21509
// 6x5/5x6
21510
{ false, 7, 2, 5, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21511
{ false, 7, 2, 6, 5, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21512
21513
// 6x4/4x6 mode 7
21514
{ false, 7, 2, 4, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21515
{ false, 7, 2, 6, 4, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21516
21517
// 6x6
21518
{ false, 7, 2, 6, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21519
21520
// 6x6
21521
{ false, 7, 2, 6, 6, astc_helpers::BISE_192_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_192_LEVELS, astc_helpers::BISE_2_LEVELS, 0, 0 },
21522
21523
// 5x5
21524
{ false, 7, 2, 5, 5, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, 0, 0 },
21525
21526
// 6x3/3x6 mode 7
21527
{ false, 7, 2, 3, 6, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_8_LEVELS, 0, 0 },
21528
{ false, 7, 2, 6, 3, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_8_LEVELS, 0, 0 },
21529
21530
// 6x3/3x6 mode 7
21531
{ false, 7, 2, 3, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_6_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_6_LEVELS, 0, 0 },
21532
{ false, 7, 2, 6, 3, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_6_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_6_LEVELS, 0, 0 },
21533
21534
// ------ dual plane
21535
21536
// 3x6
21537
{ true, 11, 1, 3, 6, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21538
{ true, 11, 1, 3, 6, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 1 },
21539
{ true, 11, 1, 3, 6, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 2 },
21540
21541
// 6x3
21542
{ true, 11, 1, 6, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21543
{ true, 11, 1, 6, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 1 },
21544
{ true, 11, 1, 6, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 2 },
21545
21546
// 3x3
21547
{ true, 11, 1, 3, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21548
{ true, 11, 1, 3, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 1 },
21549
{ true, 11, 1, 3, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 2 },
21550
21551
// 4x4
21552
{ true, 11, 1, 4, 4, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21553
{ true, 11, 1, 4, 4, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL2, 1 },
21554
{ true, 11, 1, 4, 4, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL2, 2 },
21555
21556
// 5x5
21557
{ true, 11, 1, 5, 5, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21558
{ true, 11, 1, 5, 5, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 1 },
21559
{ true, 11, 1, 5, 5, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 2 },
21560
21561
// ------ 2x2 modes for RDO
21562
// note 2x2 modes will be upsampled to 4x4 during transcoding (the min # of weight bits is 7 in ASTC)
21563
{ true, 11, 1, 2, 2, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21564
{ true, 11, 1, 2, 2, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 1 },
21565
{ true, 11, 1, 2, 2, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 2 },
21566
{ false, 11, 1, 2, 2, astc_helpers::BISE_128_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21567
21568
// ------ 3 subsets
21569
21570
// 6x6
21571
{ false, 7, 3, 6, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21572
21573
// 5x5
21574
{ false, 7, 3, 5, 5, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21575
21576
// 4x4
21577
{ false, 7, 3, 4, 4, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21578
{ false, 7, 3, 4, 4, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21579
{ false, 7, 3, 4, 4, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_5_LEVELS, 0, 0 },
21580
21581
// 3x3
21582
{ false, 7, 3, 3, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, 0, 0 },
21583
21584
// 6x4
21585
{ false, 7, 3, 6, 4, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21586
{ false, 7, 3, 4, 6, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21587
21588
// 6x4
21589
{ false, 7, 3, 6, 4, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21590
{ false, 7, 3, 4, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21591
21592
// 6x5
21593
{ false, 7, 3, 6, 5, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21594
{ false, 7, 3, 5, 6, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21595
21596
// 6x3
21597
{ false, 7, 3, 6, 3, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21598
{ false, 7, 3, 3, 6, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21599
21600
// 6x3
21601
{ false, 7, 3, 6, 3, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21602
{ false, 7, 3, 3, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21603
21604
// 6x3
21605
{ false, 7, 3, 6, 3, astc_helpers::BISE_24_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_24_LEVELS, astc_helpers::BISE_5_LEVELS, 0, 0 },
21606
{ false, 7, 3, 3, 6, astc_helpers::BISE_24_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_24_LEVELS, astc_helpers::BISE_5_LEVELS, 0, 0 },
21607
21608
// 5x4
21609
{ false, 7, 3, 5, 4, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21610
{ false, 7, 3, 4, 5, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21611
};
21612
21613
const reuse_xy_delta g_reuse_xy_deltas[NUM_REUSE_XY_DELTAS] =
21614
{
21615
{ -1, 0 }, { -2, 0 }, { -3, 0 }, { -4, 0 },
21616
{ 3, -1 }, { 2, -1 }, { 1, -1 }, { 0, -1 }, { -1, -1 }, { -2, -1 }, { -3, -1 }, { -4, -1 },
21617
{ 3, -2 }, { 2, -2 }, { 1, -2 }, { 0, -2 }, { -1, -2 }, { -2, -2 }, { -3, -2 }, { -4, -2 },
21618
{ 3, -3 }, { 2, -3 }, { 1, -3 }, { 0, -3 }, { -1, -3 }, { -2, -3 }, { -3, -3 }, { -4, -3 },
21619
{ 3, -4 }, { 2, -4 }, { 1, -4 }, { 0, -4 }
21620
};
21621
21622
//--------------------------------------------------------------------------------------------------------------------------
21623
21624
void requantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_ise_vals, uint32_t to_ise_range)
21625
{
21626
if (from_ise_range == to_ise_range)
21627
{
21628
if (pDst_ise_vals != pSrc_ise_vals)
21629
memcpy(pDst_ise_vals, pSrc_ise_vals, n);
21630
return;
21631
}
21632
21633
const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(from_ise_range).m_ISE_to_val;
21634
const auto& quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(to_ise_range).m_val_to_ise;
21635
21636
for (uint32_t i = 0; i < n; i++)
21637
pDst_ise_vals[i] = quant_tab[dequant_tab[pSrc_ise_vals[i]]];
21638
}
21639
21640
//--------------------------------------------------------------------------------------------------------------------------
21641
21642
inline int get_bit(
21643
int src_val, int src_bit)
21644
{
21645
assert(src_bit >= 0 && src_bit <= 31);
21646
int bit = (src_val >> src_bit) & 1;
21647
return bit;
21648
}
21649
21650
inline void pack_bit(
21651
int& dst, int dst_bit,
21652
int src_val, int src_bit = 0)
21653
{
21654
assert(dst_bit >= 0 && dst_bit <= 31);
21655
int bit = get_bit(src_val, src_bit);
21656
dst |= (bit << dst_bit);
21657
}
21658
21659
// Valid for weight ISE ranges 12-192 levels. Preserves upper 2 or 3 bits post-quantization.
21660
static uint8_t g_quantize_tables_preserve2[astc_helpers::TOTAL_ISE_RANGES - 1][256];
21661
static uint8_t g_quantize_tables_preserve3[astc_helpers::TOTAL_ISE_RANGES - 1][256];
21662
21663
const uint32_t g_part2_unique_index_to_seed[NUM_UNIQUE_PARTITIONS2] =
21664
{
21665
86, 959, 936, 476, 1007, 672, 447, 423, 488, 422, 273, 65, 267, 786, 585, 195, 108, 731, 878, 812, 264, 125, 868, 581, 258, 390, 549, 872, 661, 352, 645, 543, 988,
21666
906, 903, 616, 482, 529, 3, 286, 272, 303, 151, 504, 498, 260, 79, 66, 608, 769, 305, 610, 1014, 967, 835, 789, 7, 951, 691, 15, 763, 976, 438, 314, 601, 673, 177,
21667
252, 615, 436, 220, 899, 623, 433, 674, 278, 797, 107, 847, 114, 470, 760, 821, 490, 329, 945, 387, 471, 225, 172, 83, 418, 966, 439, 316, 247, 43, 343, 625, 798,
21668
1, 61, 73, 307, 136, 474, 42, 664, 1013, 249, 389, 227, 374, 121, 48, 538, 226, 309, 554, 802, 834, 335, 495, 10, 955, 461, 293, 508, 153, 101, 63, 139, 31, 687,
21669
132, 174, 324, 545, 289, 39, 178, 594, 963, 854, 222, 323, 998, 964, 598, 475, 720, 1019, 983, 91, 703, 614, 394, 612, 281, 207, 930, 758, 586, 128, 517, 426, 306,
21670
168, 713, 36, 458, 876, 368, 780, 5, 9, 214, 109, 553, 726, 175, 103, 753, 684, 44, 665, 53, 500, 367, 611, 119, 732, 639, 326, 203, 156, 686, 910, 255, 62, 392, 591,
21671
112, 88, 213, 19, 1022, 478, 90, 486, 799, 702, 730, 414, 99, 1008, 142, 886, 373, 216, 69, 393, 299, 648, 415, 822, 912, 110, 567, 550, 693, 2, 138, 59, 271, 562, 295,
21672
714, 719, 199, 893, 831, 1006, 662, 235, 262, 78, 51, 902, 298, 190, 169, 583, 347, 890, 958, 909, 49, 987, 696, 633, 480, 50, 764, 826, 1023, 1016, 437, 891, 774, 257,
21673
724, 791, 526, 593, 690, 638, 858, 895, 794, 995, 130, 87, 877, 819, 318, 649, 376, 211, 284, 937, 370, 688, 229, 994, 115, 842, 60, 521, 95, 694, 804, 146, 754, 487, 55,
21674
17, 770, 450, 223, 4, 137, 911, 236, 683, 523, 47, 181, 24, 270, 602, 736, 11, 355, 148, 351, 762, 1009, 16, 210, 619, 805, 874, 807, 887, 403, 999, 810, 27, 402, 551, 135,
21675
778, 33, 409, 993, 71, 363, 159, 183, 77, 596, 670, 380, 968, 811, 404, 348, 539, 158, 578, 196, 621, 68, 530, 193, 100, 167, 919, 353, 366, 327, 643, 948, 518, 756, 801, 558,
21676
28, 705, 116, 94, 898, 453, 622, 647, 231, 445, 652, 230, 191, 277, 292, 254, 198, 766, 386, 232, 29, 70, 942, 740, 291, 607, 411, 496, 839, 8, 675, 319, 742, 21, 547, 627, 716,
21677
663, 23, 914, 631, 595, 499, 685, 950, 510, 54, 587, 432, 45, 646, 25, 122, 947, 171, 862, 441, 808, 722, 14, 74, 658, 129, 266, 1001, 534, 395, 527, 250, 206, 237, 67, 897, 634,
21678
572, 569, 533, 37, 341, 89, 463, 419, 75, 134, 283, 943, 519, 362, 144, 681, 407, 954, 131, 455, 934, 46, 513, 339, 194, 361, 606, 852, 546, 655, 1015, 147, 506, 240, 56, 836, 76,
21679
98, 600, 430, 388, 980, 695, 817, 279, 58, 215, 149, 170, 531, 870, 18, 727, 154, 26, 938, 929, 302, 697, 452, 218, 700, 524, 828, 751, 869, 217, 440, 354
21680
};
21681
21682
const uint32_t g_part3_unique_index_to_seed[NUM_UNIQUE_PARTITIONS3] =
21683
{
21684
0, 8, 11, 14, 15, 17, 18, 19, 26, 31, 34, 35, 36, 38, 44, 47, 48, 49, 51, 56,
21685
59, 61, 70, 74, 76, 82, 88, 90, 96, 100, 103, 104, 108, 110, 111, 117, 122, 123,
21686
126, 127, 132, 133, 135, 139, 147, 150, 151, 152, 156, 157, 163, 166, 168, 171,
21687
175, 176, 179, 181, 182, 183, 186, 189, 192, 199, 203, 205, 207, 210, 214, 216,
21688
222, 247, 249, 250, 252, 254, 260, 261, 262, 263, 266, 272, 273, 275, 276, 288,
21689
291, 292, 293, 294, 297, 302, 309, 310, 313, 314, 318, 327, 328, 331, 335, 337,
21690
346, 356, 357, 358, 363, 365, 368, 378, 381, 384, 386, 390, 391, 392, 396, 397,
21691
398, 399, 401, 410, 411, 419, 427, 430, 431, 437, 439, 440, 451, 455, 457, 458,
21692
459, 460, 462, 468, 470, 471, 472, 474, 475, 477, 479, 482, 483, 488, 493, 495,
21693
496, 502, 503, 504, 507, 510, 511, 512, 515, 516, 518, 519, 522, 523, 525, 526,
21694
527, 538, 543, 544, 546, 547, 549, 550, 552, 553, 554, 562, 570, 578, 579, 581,
21695
582, 588, 589, 590, 593, 595, 600, 606, 611, 613, 618, 623, 625, 632, 637, 638,
21696
645, 646, 650, 651, 658, 659, 662, 666, 667, 669, 670, 678, 679, 685, 686, 687,
21697
688, 691, 694, 696, 698, 699, 700, 701, 703, 704, 707, 713, 714, 715, 717, 719,
21698
722, 724, 727, 730, 731, 734, 738, 739, 743, 747, 748, 750, 751, 753, 758, 760,
21699
764, 766, 769, 775, 776, 783, 784, 785, 787, 791, 793, 798, 799, 802, 804, 805,
21700
806, 807, 808, 809, 810, 813, 822, 823, 825, 831, 835, 837, 838, 839, 840, 842,
21701
845, 846, 848, 853, 854, 858, 859, 860, 866, 874, 882, 884, 887, 888, 892, 894,
21702
898, 902, 907, 914, 915, 918, 919, 922, 923, 925, 927, 931, 932, 937, 938, 940,
21703
943, 944, 945, 953, 955, 958, 959, 963, 966, 971, 974, 979, 990, 991, 998, 999,
21704
1007, 1010, 1011, 1012, 1015, 1020, 1023
21705
};
21706
21707
static void init_quantize_tables()
21708
{
21709
for (uint32_t ise_range = astc_helpers::BISE_192_LEVELS; ise_range >= astc_helpers::BISE_12_LEVELS; ise_range--)
21710
{
21711
const uint32_t num_levels = astc_helpers::get_ise_levels(ise_range);
21712
const auto& ise_to_val_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_ISE_to_val;
21713
21714
for (uint32_t desired_val = 0; desired_val < 256; desired_val++)
21715
{
21716
{
21717
uint32_t best_err = UINT32_MAX;
21718
int best_ise_val = -1;
21719
21720
for (uint32_t ise_val = 0; ise_val < num_levels; ise_val++)
21721
{
21722
const uint32_t quant_val = ise_to_val_tab[ise_val];
21723
21724
if ((quant_val & 0b11000000) != (desired_val & 0b11000000))
21725
continue;
21726
21727
uint32_t err = basisu::squarei((int)quant_val - (int)desired_val);
21728
if (err < best_err)
21729
{
21730
best_err = err;
21731
best_ise_val = ise_val;
21732
}
21733
21734
} // ise_val
21735
21736
assert(best_ise_val != -1);
21737
21738
g_quantize_tables_preserve2[ise_range][desired_val] = (uint8_t)best_ise_val;
21739
}
21740
21741
{
21742
uint32_t best_err = UINT32_MAX;
21743
int best_ise_val = -1;
21744
21745
for (uint32_t ise_val = 0; ise_val < num_levels; ise_val++)
21746
{
21747
const uint32_t quant_val = ise_to_val_tab[ise_val];
21748
21749
if ((quant_val & 0b11100000) != (desired_val & 0b11100000))
21750
continue;
21751
21752
uint32_t err = basisu::squarei((int)quant_val - (int)desired_val);
21753
if (err < best_err)
21754
{
21755
best_err = err;
21756
best_ise_val = ise_val;
21757
}
21758
21759
} // ise_val
21760
21761
assert(best_ise_val != -1);
21762
21763
g_quantize_tables_preserve3[ise_range][desired_val] = (uint8_t)best_ise_val;
21764
}
21765
21766
} // desired_val
21767
21768
#if 0
21769
for (uint32_t i = 0; i < 256; i++)
21770
{
21771
if (g_quantize_tables_preserve2[ise_range][i] != astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_val_to_ise[i])
21772
{
21773
fmt_printf("P2, Range: {}, {} vs. {}\n", ise_range, g_quantize_tables_preserve2[ise_range][i], astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_val_to_ise[i]);
21774
}
21775
21776
if (g_quantize_tables_preserve3[ise_range][i] != astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_val_to_ise[i])
21777
{
21778
fmt_printf("P3, Range: {}, {} vs. {}\n", ise_range, g_quantize_tables_preserve3[ise_range][i], astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_val_to_ise[i]);
21779
}
21780
}
21781
#endif
21782
21783
} // ise_range
21784
}
21785
21786
void requantize_ise_endpoints(uint32_t cem, uint32_t src_ise_endpoint_range, const uint8_t* pSrc_endpoints, uint32_t dst_ise_endpoint_range, uint8_t* pDst_endpoints)
21787
{
21788
assert(pSrc_endpoints != pDst_endpoints);
21789
assert((src_ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (src_ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
21790
assert((dst_ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (dst_ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
21791
21792
// must be >=12 ISE levels for g_quantize_tables_preserve2 etc.
21793
assert(dst_ise_endpoint_range >= astc_helpers::BISE_12_LEVELS);
21794
21795
const uint32_t n = (cem == 11) ? basist::NUM_MODE11_ENDPOINTS : basist::NUM_MODE7_ENDPOINTS;
21796
21797
if (src_ise_endpoint_range == dst_ise_endpoint_range)
21798
{
21799
memcpy(pDst_endpoints, pSrc_endpoints, n);
21800
return;
21801
}
21802
21803
uint8_t temp_endpoints[basist::NUM_MODE11_ENDPOINTS];
21804
if (src_ise_endpoint_range != astc_helpers::BISE_256_LEVELS)
21805
{
21806
assert(n <= basist::NUM_MODE11_ENDPOINTS);
21807
21808
const auto& endpoint_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(src_ise_endpoint_range).m_ISE_to_val;
21809
21810
for (uint32_t i = 0; i < n; i++)
21811
temp_endpoints[i] = endpoint_dequant_tab[pSrc_endpoints[i]];
21812
21813
pSrc_endpoints = temp_endpoints;
21814
}
21815
21816
if (dst_ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
21817
{
21818
memcpy(pDst_endpoints, pSrc_endpoints, n);
21819
return;
21820
}
21821
21822
const auto& quant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(dst_ise_endpoint_range).m_val_to_ise;
21823
21824
const auto& dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(dst_ise_endpoint_range).m_ISE_to_val;
21825
BASISU_NOTE_UNUSED(dequant_tab);
21826
21827
#if 1
21828
// A smarter value quantization that preserves the key upper bits. (If these bits get corrupted, the entire meaning of the encoding can get lost.)
21829
if (cem == 11)
21830
{
21831
assert(n == 6);
21832
21833
int maj_comp = 0;
21834
pack_bit(maj_comp, 0, pSrc_endpoints[4], 7);
21835
pack_bit(maj_comp, 1, pSrc_endpoints[5], 7);
21836
21837
if (maj_comp == 3)
21838
{
21839
// Direct
21840
pDst_endpoints[0] = quant_tab[pSrc_endpoints[0]];
21841
pDst_endpoints[1] = quant_tab[pSrc_endpoints[1]];
21842
pDst_endpoints[2] = quant_tab[pSrc_endpoints[2]];
21843
pDst_endpoints[3] = quant_tab[pSrc_endpoints[3]];
21844
// No need for preserve1 tables, we can use the regular quantization tables because they preserve the MSB.
21845
pDst_endpoints[4] = quant_tab[pSrc_endpoints[4]];
21846
pDst_endpoints[5] = quant_tab[pSrc_endpoints[5]];
21847
21848
assert((dequant_tab[pDst_endpoints[4]] & 128) == (pSrc_endpoints[4] & 128));
21849
assert((dequant_tab[pDst_endpoints[5]] & 128) == (pSrc_endpoints[5] & 128));
21850
}
21851
else
21852
{
21853
pDst_endpoints[0] = quant_tab[pSrc_endpoints[0]];
21854
pDst_endpoints[1] = g_quantize_tables_preserve2[dst_ise_endpoint_range][pSrc_endpoints[1]];
21855
pDst_endpoints[2] = g_quantize_tables_preserve2[dst_ise_endpoint_range][pSrc_endpoints[2]];
21856
pDst_endpoints[3] = g_quantize_tables_preserve2[dst_ise_endpoint_range][pSrc_endpoints[3]];
21857
pDst_endpoints[4] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[4]];
21858
pDst_endpoints[5] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[5]];
21859
21860
assert((dequant_tab[pDst_endpoints[1]] & 0b11000000) == (pSrc_endpoints[1] & 0b11000000));
21861
assert((dequant_tab[pDst_endpoints[2]] & 0b11000000) == (pSrc_endpoints[2] & 0b11000000));
21862
assert((dequant_tab[pDst_endpoints[3]] & 0b11000000) == (pSrc_endpoints[3] & 0b11000000));
21863
assert((dequant_tab[pDst_endpoints[4]] & 0b11100000) == (pSrc_endpoints[4] & 0b11100000));
21864
assert((dequant_tab[pDst_endpoints[5]] & 0b11100000) == (pSrc_endpoints[5] & 0b11100000));
21865
}
21866
}
21867
else if (cem == 7)
21868
{
21869
assert(n == 4);
21870
21871
pDst_endpoints[0] = g_quantize_tables_preserve2[dst_ise_endpoint_range][pSrc_endpoints[0]];
21872
pDst_endpoints[1] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[1]];
21873
pDst_endpoints[2] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[2]];
21874
pDst_endpoints[3] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[3]];
21875
21876
assert((dequant_tab[pDst_endpoints[0]] & 0b11000000) == (pSrc_endpoints[0] & 0b11000000));
21877
assert((dequant_tab[pDst_endpoints[1]] & 0b11100000) == (pSrc_endpoints[1] & 0b11100000));
21878
assert((dequant_tab[pDst_endpoints[2]] & 0b11100000) == (pSrc_endpoints[2] & 0b11100000));
21879
assert((dequant_tab[pDst_endpoints[3]] & 0b11100000) == (pSrc_endpoints[3] & 0b11100000));
21880
}
21881
else
21882
{
21883
assert(0);
21884
}
21885
#else
21886
for (uint32_t i = 0; i < n; i++)
21887
{
21888
uint32_t v = pSrc_endpoints[i];
21889
assert(v <= 255);
21890
21891
pDst_endpoints[i] = quant_tab[v];
21892
}
21893
#endif
21894
}
21895
21896
void copy_weight_grid(bool dual_plane, uint32_t grid_x, uint32_t grid_y, const uint8_t* transcode_weights, astc_helpers::log_astc_block& decomp_blk)
21897
{
21898
assert(decomp_blk.m_weight_ise_range >= astc_helpers::BISE_2_LEVELS);
21899
assert(decomp_blk.m_weight_ise_range <= astc_helpers::BISE_32_LEVELS);
21900
21901
// Special case for 2x2 which isn't typically valid ASTC (too few weight bits without dual plane). Upsample to 4x4.
21902
if ((!dual_plane) && (grid_x == 2) && (grid_y == 2))
21903
{
21904
decomp_blk.m_grid_width = 4;
21905
decomp_blk.m_grid_height = 4;
21906
21907
//const uint32_t total_weight_levels = astc_helpers::bise_levels(decomp_blk.m_weight_ise_range);
21908
const auto& dequant_weight = astc_helpers::g_dequant_tables.get_weight_tab(decomp_blk.m_weight_ise_range).m_ISE_to_val;
21909
const auto& quant_weight = astc_helpers::g_dequant_tables.get_weight_tab(decomp_blk.m_weight_ise_range).m_val_to_ise;
21910
21911
astc_helpers::weighted_sample weights[16];
21912
21913
compute_upsample_weights(4, 4, 2, 2, weights);
21914
21915
for (uint32_t y = 0; y < 4; y++)
21916
{
21917
for (uint32_t x = 0; x < 4; x++)
21918
{
21919
const astc_helpers::weighted_sample& sample = weights[x + y * 4];
21920
21921
uint32_t total_weight = 8;
21922
21923
for (uint32_t yo = 0; yo < 2; yo++)
21924
{
21925
for (uint32_t xo = 0; xo < 2; xo++)
21926
{
21927
if (!sample.m_weights[yo][xo])
21928
continue;
21929
21930
total_weight += dequant_weight[transcode_weights[basisu::in_bounds((x + xo) + (y + yo) * grid_x, 0, grid_x * grid_y)]] * sample.m_weights[yo][xo];
21931
} // x
21932
} // y
21933
21934
total_weight >>= 4;
21935
21936
assert(total_weight <= 64);
21937
21938
decomp_blk.m_weights[x + y * 4] = quant_weight[total_weight];
21939
}
21940
}
21941
}
21942
else
21943
{
21944
const uint32_t num_planes = dual_plane ? 2 : 1;
21945
21946
decomp_blk.m_grid_width = (uint8_t)grid_x;
21947
decomp_blk.m_grid_height = (uint8_t)grid_y;
21948
memcpy(decomp_blk.m_weights, transcode_weights, grid_x * grid_y * num_planes);
21949
}
21950
}
21951
21952
// cur_y is the current destination row
21953
// prev_y is the row we want to access
21954
static inline int calc_row_index(int cur_y, int prev_y, int cur_row_index)
21955
{
21956
assert((cur_y >= 0) && (prev_y >= 0));
21957
assert((cur_row_index >= 0) && (cur_row_index < REUSE_MAX_BUFFER_ROWS));
21958
21959
int delta_y = prev_y - cur_y;
21960
assert((delta_y > -REUSE_MAX_BUFFER_ROWS) && (delta_y <= 0));
21961
21962
cur_row_index += delta_y;
21963
if (cur_row_index < 0)
21964
cur_row_index += REUSE_MAX_BUFFER_ROWS;
21965
21966
assert((cur_row_index >= 0) && (cur_row_index < REUSE_MAX_BUFFER_ROWS));
21967
21968
return cur_row_index;
21969
}
21970
21971
bool decode_values(basist::bitwise_decoder& decoder, uint32_t total_values, uint32_t ise_range, uint8_t* pValues)
21972
{
21973
assert(ise_range <= astc_helpers::BISE_256_LEVELS);
21974
21975
const uint32_t ep_bits = astc_helpers::g_ise_range_table[ise_range][0];
21976
const uint32_t ep_trits = astc_helpers::g_ise_range_table[ise_range][1];
21977
const uint32_t ep_quints = astc_helpers::g_ise_range_table[ise_range][2];
21978
21979
uint32_t total_tqs = 0;
21980
uint32_t bundle_size = 0, mul = 0;
21981
if (ep_trits)
21982
{
21983
total_tqs = (total_values + 4) / 5;
21984
bundle_size = 5;
21985
mul = 3;
21986
}
21987
else if (ep_quints)
21988
{
21989
total_tqs = (total_values + 2) / 3;
21990
bundle_size = 3;
21991
mul = 5;
21992
}
21993
21994
const uint32_t MAX_TQ_VALUES = 32;
21995
assert(total_tqs <= MAX_TQ_VALUES);
21996
uint32_t tq_values[MAX_TQ_VALUES];
21997
21998
for (uint32_t i = 0; i < total_tqs; i++)
21999
{
22000
uint32_t num_bits = ep_trits ? 8 : 7;
22001
22002
if (i == (total_tqs - 1))
22003
{
22004
uint32_t num_remaining = total_values - (total_tqs - 1) * bundle_size;
22005
if (ep_trits)
22006
{
22007
switch (num_remaining)
22008
{
22009
case 1: num_bits = 2; break;
22010
case 2: num_bits = 4; break;
22011
case 3: num_bits = 5; break;
22012
case 4: num_bits = 7; break;
22013
default: break;
22014
}
22015
}
22016
else if (ep_quints)
22017
{
22018
switch (num_remaining)
22019
{
22020
case 1: num_bits = 3; break;
22021
case 2: num_bits = 5; break;
22022
default: break;
22023
}
22024
}
22025
}
22026
22027
tq_values[i] = (uint32_t)decoder.get_bits(num_bits);
22028
} // i
22029
22030
uint32_t accum = 0;
22031
uint32_t accum_remaining = 0;
22032
uint32_t next_tq_index = 0;
22033
22034
for (uint32_t i = 0; i < total_values; i++)
22035
{
22036
uint32_t value = (uint32_t)decoder.get_bits(ep_bits);
22037
22038
if (total_tqs)
22039
{
22040
if (!accum_remaining)
22041
{
22042
assert(next_tq_index < total_tqs);
22043
accum = tq_values[next_tq_index++];
22044
accum_remaining = bundle_size;
22045
}
22046
22047
uint32_t v = accum % mul;
22048
accum /= mul;
22049
accum_remaining--;
22050
22051
value |= (v << ep_bits);
22052
}
22053
22054
pValues[i] = (uint8_t)value;
22055
}
22056
22057
return true;
22058
}
22059
22060
static inline uint32_t get_num_endpoint_vals(uint32_t cem)
22061
{
22062
assert((cem == 7) || (cem == 11));
22063
return (cem == 11) ? basist::NUM_MODE11_ENDPOINTS : basist::NUM_MODE7_ENDPOINTS;
22064
}
22065
22066
const uint32_t g_bc6h_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
22067
22068
#if 0
22069
static BASISU_FORCE_INLINE int pos_lrintf(float x)
22070
{
22071
assert(x >= 0.0f);
22072
return (int)(x + .5f);
22073
}
22074
22075
static BASISU_FORCE_INLINE basist::half_float fast_float_to_half_non_neg_no_nan_inf(float val)
22076
{
22077
union { float f; int32_t i; uint32_t u; } fi = { val };
22078
const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF;
22079
int e = 0, m = 0;
22080
22081
assert(((fi.i >> 31) == 0) && (flt_e != 0xFF));
22082
22083
// not zero or denormal
22084
if (flt_e != 0)
22085
{
22086
int new_exp = flt_e - 127;
22087
if (new_exp > 15)
22088
e = 31;
22089
else if (new_exp < -14)
22090
m = pos_lrintf((1 << 24) * fabsf(fi.f));
22091
else
22092
{
22093
e = new_exp + 15;
22094
m = pos_lrintf(flt_m * (1.0f / ((float)(1 << 13))));
22095
}
22096
}
22097
22098
assert((0 <= m) && (m <= 1024));
22099
if (m == 1024)
22100
{
22101
e++;
22102
m = 0;
22103
}
22104
22105
assert((e >= 0) && (e <= 31));
22106
assert((m >= 0) && (m <= 1023));
22107
22108
basist::half_float result = (basist::half_float)((e << 10) | m);
22109
return result;
22110
}
22111
#endif
22112
22113
union fu32
22114
{
22115
uint32_t u;
22116
float f;
22117
};
22118
22119
static BASISU_FORCE_INLINE basist::half_float fast_float_to_half_no_clamp_neg_nan_or_inf(float f)
22120
{
22121
assert(!isnan(f) && !isinf(f));
22122
assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT));
22123
22124
// Sutract 112 from the exponent, to change the bias from 127 to 15.
22125
static const fu32 g_f_to_h{ 0x7800000 };
22126
22127
fu32 fu;
22128
22129
fu.f = f * g_f_to_h.f;
22130
22131
uint32_t h = (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF);
22132
22133
// round to even
22134
uint32_t mant = fu.u & 8191; // examine lowest 13 bits
22135
h += (mant > 4096);
22136
22137
if (h > basist::MAX_HALF_FLOAT_AS_INT_BITS)
22138
h = basist::MAX_HALF_FLOAT_AS_INT_BITS;
22139
22140
return (basist::half_float)h;
22141
}
22142
22143
static BASISU_FORCE_INLINE float ftoh(float f)
22144
{
22145
//float res = (float)fast_float_to_half_non_neg_no_nan_inf(fabsf(f)) * ((f < 0.0f) ? -1.0f : 1.0f);
22146
float res = (float)fast_float_to_half_no_clamp_neg_nan_or_inf(fabsf(f)) * ((f < 0.0f) ? -1.0f : 1.0f);
22147
return res;
22148
}
22149
22150
// Supports positive and denormals only. No NaN or Inf.
22151
static BASISU_FORCE_INLINE float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h)
22152
{
22153
assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h));
22154
22155
// add 112 to the exponent (112+half float's exp bias of 15=float32's bias of 127)
22156
static const fu32 K = { 0x77800000 };
22157
22158
fu32 o;
22159
o.u = h << 13;
22160
o.f *= K.f;
22161
22162
return o.f;
22163
}
22164
22165
static BASISU_FORCE_INLINE float inv_sqrt(float v)
22166
{
22167
union
22168
{
22169
float flt;
22170
uint32_t ui;
22171
} un;
22172
22173
un.flt = v;
22174
un.ui = 0x5F1FFFF9UL - (un.ui >> 1);
22175
22176
return 0.703952253f * un.flt * (2.38924456f - v * (un.flt * un.flt));
22177
}
22178
22179
static const int FAST_BC6H_STD_DEV_THRESH = 256;
22180
static const int FAST_BC6H_COMPLEX_STD_DEV_THRESH = 512;
22181
static const int FAST_BC6H_VERY_COMPLEX_STD_DEV_THRESH = 2048;
22182
22183
static void assign_weights_simple_4(
22184
const basist::half_float* pPixels,
22185
uint8_t* pWeights,
22186
int min_r, int min_g, int min_b,
22187
int max_r, int max_g, int max_b, int64_t block_max_var)
22188
{
22189
BASISU_NOTE_UNUSED(block_max_var);
22190
22191
float fmin_r = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)min_r);
22192
float fmin_g = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)min_g);
22193
float fmin_b = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)min_b);
22194
22195
float fmax_r = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)max_r);
22196
float fmax_g = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)max_g);
22197
float fmax_b = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)max_b);
22198
22199
float fdir_r = fmax_r - fmin_r;
22200
float fdir_g = fmax_g - fmin_g;
22201
float fdir_b = fmax_b - fmin_b;
22202
22203
float l = inv_sqrt(fdir_r * fdir_r + fdir_g * fdir_g + fdir_b * fdir_b);
22204
if (l != 0.0f)
22205
{
22206
fdir_r *= l;
22207
fdir_g *= l;
22208
fdir_b *= l;
22209
}
22210
22211
float lr = ftoh(fmin_r * fdir_r + fmin_g * fdir_g + fmin_b * fdir_b);
22212
float hr = ftoh(fmax_r * fdir_r + fmax_g * fdir_g + fmax_b * fdir_b);
22213
22214
float frr = (hr == lr) ? 0.0f : (14.93333f / (float)(hr - lr));
22215
22216
lr = (-lr * frr) + 0.53333f;
22217
for (uint32_t i = 0; i < 16; i++)
22218
{
22219
const float r = fast_half_to_float_pos_not_inf_or_nan(pPixels[i * 3 + 0]);
22220
const float g = fast_half_to_float_pos_not_inf_or_nan(pPixels[i * 3 + 1]);
22221
const float b = fast_half_to_float_pos_not_inf_or_nan(pPixels[i * 3 + 2]);
22222
const float w = ftoh(r * fdir_r + g * fdir_g + b * fdir_b);
22223
22224
pWeights[i] = (uint8_t)basisu::clamp((int)(w * frr + lr), 0, 15);
22225
}
22226
}
22227
22228
static double assign_weights_4(
22229
const vec3F* pFloat_pixels, const float* pPixel_scales,
22230
uint8_t* pWeights,
22231
int min_r, int min_g, int min_b,
22232
int max_r, int max_g, int max_b, int64_t block_max_var, bool try_2subsets_flag,
22233
const fast_bc6h_params& params)
22234
{
22235
float cr[16], cg[16], cb[16];
22236
22237
for (uint32_t i = 0; i < 16; i++)
22238
{
22239
const uint32_t w = g_bc6h_weights4[i];
22240
22241
cr[i] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_r * (64 - w) + max_r * w + 32) >> 6));
22242
cg[i] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_g * (64 - w) + max_g * w + 32) >> 6));
22243
cb[i] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_b * (64 - w) + max_b * w + 32) >> 6));
22244
}
22245
22246
double total_err = 0.0f;
22247
22248
if (params.m_brute_force_weight4_assignment)
22249
{
22250
for (uint32_t i = 0; i < 16; i++)
22251
{
22252
const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2];
22253
22254
float best_err = basisu::squaref(cr[0] - qr) + basisu::squaref(cg[0] - qg) + basisu::squaref(cb[0] - qb);
22255
uint32_t best_idx = 0;
22256
22257
for (uint32_t j = 1; j < 16; j++)
22258
{
22259
float rd = cr[j] - qr, gd = cg[j] - qg, bd = cb[j] - qb;
22260
float e = rd * rd + gd * gd + bd * bd;
22261
22262
if (e < best_err)
22263
{
22264
best_err = e;
22265
best_idx = j;
22266
}
22267
}
22268
22269
pWeights[i] = (uint8_t)best_idx;
22270
22271
total_err += best_err * pPixel_scales[i];
22272
}
22273
}
22274
else
22275
{
22276
const float dir_r = cr[15] - cr[0], dir_g = cg[15] - cg[0], dir_b = cb[15] - cb[0];
22277
22278
float dots[16];
22279
for (uint32_t i = 0; i < 16; i++)
22280
dots[i] = cr[i] * dir_r + cg[i] * dir_g + cb[i] * dir_b;
22281
22282
float mid_dots[15];
22283
bool monotonically_increasing = true;
22284
for (uint32_t i = 0; i < 15; i++)
22285
{
22286
mid_dots[i] = (dots[i] + dots[i + 1]) * .5f;
22287
22288
if (dots[i] > dots[i + 1])
22289
monotonically_increasing = false;
22290
}
22291
22292
const bool check_more_colors = block_max_var > (FAST_BC6H_VERY_COMPLEX_STD_DEV_THRESH * FAST_BC6H_VERY_COMPLEX_STD_DEV_THRESH * 16); // watch prec
22293
22294
if (!monotonically_increasing)
22295
{
22296
// Seems very rare, not worth optimizing the other cases
22297
for (uint32_t i = 0; i < 16; i++)
22298
{
22299
const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2];
22300
22301
float d = qr * dir_r + qg * dir_g + qb * dir_b;
22302
22303
float best_e = fabsf(d - dots[0]);
22304
int best_idx = 0;
22305
22306
for (int j = 1; j < 16; j++)
22307
{
22308
float e = fabsf(d - dots[j]);
22309
if (e < best_e)
22310
{
22311
best_e = e;
22312
best_idx = j;
22313
}
22314
}
22315
22316
assert((best_idx >= 0) && (best_idx <= 15));
22317
22318
pWeights[i] = (uint8_t)best_idx;
22319
22320
float err = basisu::squaref(qr - cr[best_idx]) + basisu::squaref(qg - cg[best_idx]) + basisu::squaref(qb - cb[best_idx]);
22321
total_err += err * pPixel_scales[i];
22322
}
22323
}
22324
else if ((!try_2subsets_flag) || (!check_more_colors))
22325
{
22326
for (uint32_t i = 0; i < 16; i++)
22327
{
22328
const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2];
22329
22330
uint32_t best_idx = 0;
22331
22332
float d = qr * dir_r + qg * dir_g + qb * dir_b;
22333
22334
int low = 0;
22335
22336
int mid = low + 7;
22337
if (d >= mid_dots[mid]) low = mid + 1;
22338
mid = low + 3;
22339
if (d >= mid_dots[mid]) low = mid + 1;
22340
mid = low + 1;
22341
if (d >= mid_dots[mid]) low = mid + 1;
22342
mid = low;
22343
if (d >= mid_dots[mid]) low = mid + 1;
22344
22345
best_idx = low;
22346
assert((best_idx >= 0) && (best_idx <= 15));
22347
22348
pWeights[i] = (uint8_t)best_idx;
22349
22350
// Giesen's MRSSE (Mean Relative Sum of Squared Errors).
22351
// Our ASTC HDR encoder uses slightly slower approx. MSLE, and it's too late/risky to eval the difference vs. MRSSE on the larger ASTC HDR blocks.
22352
float err = basisu::squaref(qr - cr[best_idx]) + basisu::squaref(qg - cg[best_idx]) + basisu::squaref(qb - cb[best_idx]);
22353
total_err += err * pPixel_scales[i];
22354
}
22355
}
22356
else
22357
{
22358
for (uint32_t i = 0; i < 16; i++)
22359
{
22360
const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2];
22361
22362
uint32_t best_idx = 0;
22363
22364
float d = qr * dir_r + qg * dir_g + qb * dir_b;
22365
22366
int low = 0;
22367
22368
int mid = low + 7;
22369
if (d >= mid_dots[mid]) low = mid + 1;
22370
mid = low + 3;
22371
if (d >= mid_dots[mid]) low = mid + 1;
22372
mid = low + 1;
22373
if (d >= mid_dots[mid]) low = mid + 1;
22374
mid = low;
22375
if (d >= mid_dots[mid]) low = mid + 1;
22376
22377
best_idx = low;
22378
assert((best_idx >= 0) && (best_idx <= 15));
22379
22380
float err = basisu::squaref(qr - cr[best_idx]) + basisu::squaref(qg - cg[best_idx]) + basisu::squaref(qb - cb[best_idx]);
22381
22382
{
22383
int alt_idx = best_idx + 1;
22384
if (alt_idx > 15)
22385
alt_idx = 13;
22386
22387
float alt_err = basisu::squaref(qr - cr[alt_idx]) + basisu::squaref(qg - cg[alt_idx]) + basisu::squaref(qb - cb[alt_idx]);
22388
if (alt_err < err)
22389
{
22390
err = alt_err;
22391
best_idx = alt_idx;
22392
}
22393
}
22394
22395
{
22396
int alt_idx2 = best_idx - 1;
22397
if (alt_idx2 < 0)
22398
alt_idx2 = 2;
22399
float alt_err2 = basisu::squaref(qr - cr[alt_idx2]) + basisu::squaref(qg - cg[alt_idx2]) + basisu::squaref(qb - cb[alt_idx2]);
22400
if (alt_err2 < err)
22401
{
22402
err = alt_err2;
22403
best_idx = alt_idx2;
22404
}
22405
}
22406
22407
pWeights[i] = (uint8_t)best_idx;
22408
22409
total_err += err * pPixel_scales[i];
22410
}
22411
}
22412
}
22413
22414
return total_err;
22415
}
22416
22417
static void assign_weights3(uint8_t trial_weights[16],
22418
uint32_t best_pat_bits,
22419
uint32_t subset_min_r[2], uint32_t subset_min_g[2], uint32_t subset_min_b[2],
22420
uint32_t subset_max_r[2], uint32_t subset_max_g[2], uint32_t subset_max_b[2],
22421
const vec3F* pFloat_pixels)
22422
{
22423
float subset_cr[2][8], subset_cg[2][8], subset_cb[2][8];
22424
22425
for (uint32_t subset = 0; subset < 2; subset++)
22426
{
22427
const uint32_t min_r = subset_min_r[subset], min_g = subset_min_g[subset], min_b = subset_min_b[subset];
22428
const uint32_t max_r = subset_max_r[subset], max_g = subset_max_g[subset], max_b = subset_max_b[subset];
22429
22430
for (uint32_t j = 0; j < 8; j++)
22431
{
22432
const uint32_t w = g_bc7_weights3[j];
22433
22434
subset_cr[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_r * (64 - w) + max_r * w + 32) >> 6));
22435
subset_cg[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_g * (64 - w) + max_g * w + 32) >> 6));
22436
subset_cb[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_b * (64 - w) + max_b * w + 32) >> 6));
22437
} // j
22438
22439
} // subset
22440
22441
// TODO: Plane optimization?
22442
22443
for (uint32_t i = 0; i < 16; i++)
22444
{
22445
const uint32_t subset = (best_pat_bits >> i) & 1;
22446
const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2];
22447
22448
float best_error = basisu::squaref(subset_cr[subset][0] - qr) + basisu::squaref(subset_cg[subset][0] - qg) + basisu::squaref(subset_cb[subset][0] - qb);
22449
uint32_t best_idx = 0;
22450
22451
for (uint32_t j = 1; j < 8; j++)
22452
{
22453
float e = basisu::squaref(subset_cr[subset][j] - qr) + basisu::squaref(subset_cg[subset][j] - qg) + basisu::squaref(subset_cb[subset][j] - qb);
22454
if (e < best_error)
22455
{
22456
best_error = e;
22457
best_idx = j;
22458
}
22459
}
22460
22461
trial_weights[i] = (uint8_t)best_idx;
22462
22463
} // i
22464
}
22465
22466
static double assign_weights_error_3(uint8_t trial_weights[16],
22467
uint32_t best_pat_bits,
22468
uint32_t subset_min_r[2], uint32_t subset_min_g[2], uint32_t subset_min_b[2],
22469
uint32_t subset_max_r[2], uint32_t subset_max_g[2], uint32_t subset_max_b[2],
22470
const vec3F* pFloat_pixels, const float* pPixel_scales)
22471
{
22472
float subset_cr[2][8], subset_cg[2][8], subset_cb[2][8];
22473
22474
for (uint32_t subset = 0; subset < 2; subset++)
22475
{
22476
const uint32_t min_r = subset_min_r[subset], min_g = subset_min_g[subset], min_b = subset_min_b[subset];
22477
const uint32_t max_r = subset_max_r[subset], max_g = subset_max_g[subset], max_b = subset_max_b[subset];
22478
22479
for (uint32_t j = 0; j < 8; j++)
22480
{
22481
const uint32_t w = g_bc7_weights3[j];
22482
22483
subset_cr[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_r * (64 - w) + max_r * w + 32) >> 6));
22484
subset_cg[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_g * (64 - w) + max_g * w + 32) >> 6));
22485
subset_cb[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_b * (64 - w) + max_b * w + 32) >> 6));
22486
} // j
22487
22488
} // subset
22489
22490
double trial_error = 0.0f;
22491
22492
// TODO: Plane optimization?
22493
22494
for (uint32_t i = 0; i < 16; i++)
22495
{
22496
const uint32_t subset = (best_pat_bits >> i) & 1;
22497
const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2];
22498
22499
float best_error = basisu::squaref(subset_cr[subset][0] - qr) + basisu::squaref(subset_cg[subset][0] - qg) + basisu::squaref(subset_cb[subset][0] - qb);
22500
uint32_t best_idx = 0;
22501
22502
for (uint32_t j = 1; j < 8; j++)
22503
{
22504
float e = basisu::squaref(subset_cr[subset][j] - qr) + basisu::squaref(subset_cg[subset][j] - qg) + basisu::squaref(subset_cb[subset][j] - qb);
22505
if (e < best_error)
22506
{
22507
best_error = e;
22508
best_idx = j;
22509
}
22510
}
22511
22512
trial_weights[i] = (uint8_t)best_idx;
22513
22514
trial_error += best_error * pPixel_scales[i];
22515
22516
} // i
22517
22518
return trial_error;
22519
}
22520
22521
static basist::vec4F g_bc6h_ls_weights_3[8];
22522
static basist::vec4F g_bc6h_ls_weights_4[16];
22523
22524
const uint32_t BC6H_NUM_PATS = 32;
22525
static uint32_t g_bc6h_pats2[BC6H_NUM_PATS];
22526
22527
static void fast_encode_bc6h_init()
22528
{
22529
for (uint32_t i = 0; i < 8; i++)
22530
{
22531
const float w = (float)g_bc7_weights3[i] * (1.0f / 64.0f);
22532
g_bc6h_ls_weights_3[i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w);
22533
}
22534
22535
for (uint32_t i = 0; i < 16; i++)
22536
{
22537
const float w = (float)g_bc6h_weights4[i] * (1.0f / 64.0f);
22538
g_bc6h_ls_weights_4[i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w);
22539
}
22540
22541
for (uint32_t pat_index = 0; pat_index < BC6H_NUM_PATS; pat_index++)
22542
{
22543
uint32_t pat_bits = 0;
22544
22545
for (uint32_t j = 0; j < 16; j++)
22546
pat_bits |= (g_bc7_partition2[pat_index * 16 + j] << j);
22547
22548
g_bc6h_pats2[pat_index] = pat_bits;
22549
}
22550
}
22551
22552
static int bc6h_dequantize(int val, int bits)
22553
{
22554
assert(val < (1 << bits));
22555
22556
int result;
22557
if (bits >= 15)
22558
result = val;
22559
else if (!val)
22560
result = 0;
22561
else if (val == ((1 << bits) - 1))
22562
result = 0xFFFF;
22563
else
22564
result = ((val << 16) + 0x8000) >> bits;
22565
return result;
22566
}
22567
22568
static inline basist::half_float bc6h_convert_to_half(int val)
22569
{
22570
assert(val < 65536);
22571
22572
// scale by 31/64
22573
return (basist::half_float)((val * 31) >> 6);
22574
}
22575
22576
static void bc6h_quant_dequant_endpoints(uint32_t& min_r, uint32_t& min_g, uint32_t& min_b, uint32_t& max_r, uint32_t& max_g, uint32_t& max_b, int bits) // bits=10
22577
{
22578
min_r = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)min_r, bits), bits));
22579
min_g = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)min_g, bits), bits));
22580
min_b = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)min_b, bits), bits));
22581
22582
max_r = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)max_r, bits), bits));
22583
max_g = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)max_g, bits), bits));
22584
max_b = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)max_b, bits), bits));
22585
}
22586
22587
static void bc6h_quant_endpoints(
22588
uint32_t min_hr, uint32_t min_hg, uint32_t min_hb, uint32_t max_hr, uint32_t max_hg, uint32_t max_hb,
22589
uint32_t& min_r, uint32_t& min_g, uint32_t& min_b, uint32_t& max_r, uint32_t& max_g, uint32_t& max_b,
22590
int bits)
22591
{
22592
min_r = basist::bc6h_half_to_blog((basist::half_float)min_hr, bits);
22593
min_g = basist::bc6h_half_to_blog((basist::half_float)min_hg, bits);
22594
min_b = basist::bc6h_half_to_blog((basist::half_float)min_hb, bits);
22595
22596
max_r = basist::bc6h_half_to_blog((basist::half_float)max_hr, bits);
22597
max_g = basist::bc6h_half_to_blog((basist::half_float)max_hg, bits);
22598
max_b = basist::bc6h_half_to_blog((basist::half_float)max_hb, bits);
22599
}
22600
22601
static void bc6h_dequant_endpoints(
22602
uint32_t min_br, uint32_t min_bg, uint32_t min_bb, uint32_t max_br, uint32_t max_bg, uint32_t max_bb,
22603
uint32_t& min_hr, uint32_t& min_hg, uint32_t& min_hb, uint32_t& max_hr, uint32_t& max_hg, uint32_t& max_hb,
22604
int bits)
22605
{
22606
min_hr = bc6h_convert_to_half(bc6h_dequantize(min_br, bits));
22607
min_hg = bc6h_convert_to_half(bc6h_dequantize(min_bg, bits));
22608
min_hb = bc6h_convert_to_half(bc6h_dequantize(min_bb, bits));
22609
22610
max_hr = bc6h_convert_to_half(bc6h_dequantize(max_br, bits));
22611
max_hg = bc6h_convert_to_half(bc6h_dequantize(max_bg, bits));
22612
max_hb = bc6h_convert_to_half(bc6h_dequantize(max_bb, bits));
22613
}
22614
22615
static BASISU_FORCE_INLINE int popcount32(uint32_t x)
22616
{
22617
#if defined(__EMSCRIPTEN__) || defined(__clang__) || defined(__GNUC__)
22618
return __builtin_popcount(x);
22619
#elif defined(_MSC_VER)
22620
return __popcnt(x);
22621
#else
22622
int count = 0;
22623
while (x)
22624
{
22625
x &= (x - 1);
22626
++count;
22627
}
22628
return count;
22629
#endif
22630
}
22631
22632
static BASISU_FORCE_INLINE int fast_roundf_int(float x)
22633
{
22634
return (x >= 0.0f) ? (int)(x + 0.5f) : (int)(x - 0.5f);
22635
}
22636
22637
static void fast_encode_bc6h_2subsets_pattern(
22638
uint32_t best_pat_index, uint32_t best_pat_bits,
22639
const basist::half_float* pPixels, const vec3F* pFloat_pixels, const float* pPixel_scales,
22640
double& cur_error, basist::bc6h_logical_block& log_blk,
22641
int64_t block_max_var,
22642
int mean_r, int mean_g, int mean_b,
22643
const fast_bc6h_params& params)
22644
{
22645
BASISU_NOTE_UNUSED(block_max_var);
22646
22647
uint32_t subset_means[2][3] = { { 0 } };
22648
for (uint32_t i = 0; i < 16; i++)
22649
{
22650
const uint32_t subset_index = (best_pat_bits >> i) & 1;
22651
const uint32_t r = pPixels[i * 3 + 0], g = pPixels[i * 3 + 1], b = pPixels[i * 3 + 2];
22652
22653
subset_means[subset_index][0] += r;
22654
subset_means[subset_index][1] += g;
22655
subset_means[subset_index][2] += b;
22656
}
22657
22658
for (uint32_t s = 0; s < 2; s++)
22659
for (uint32_t c = 0; c < 3; c++)
22660
subset_means[s][c] = (subset_means[s][c] + 8) / 16;
22661
22662
int64_t subset_icov[2][6] = { { 0 } };
22663
22664
for (uint32_t i = 0; i < 16; i++)
22665
{
22666
const uint32_t subset_index = (best_pat_bits >> i) & 1;
22667
const int r = (int)pPixels[i * 3 + 0] - mean_r, g = (int)pPixels[i * 3 + 1] - mean_g, b = (int)pPixels[i * 3 + 2] - mean_b;
22668
22669
subset_icov[subset_index][0] += r * r;
22670
subset_icov[subset_index][1] += r * g;
22671
subset_icov[subset_index][2] += r * b;
22672
subset_icov[subset_index][3] += g * g;
22673
subset_icov[subset_index][4] += g * b;
22674
subset_icov[subset_index][5] += b * b;
22675
}
22676
22677
vec3F subset_axis[2];
22678
22679
for (uint32_t subset_index = 0; subset_index < 2; subset_index++)
22680
{
22681
float cov[6];
22682
for (uint32_t i = 0; i < 6; i++)
22683
cov[i] = (float)subset_icov[subset_index][i];
22684
22685
const float sc = 1.0f / (basisu::maximum(cov[0], cov[3], cov[5]) + basisu::REALLY_SMALL_FLOAT_VAL);
22686
const float wx = sc * cov[0], wy = sc * cov[3], wz = sc * cov[5];
22687
22688
const float alt_xr = cov[0] * wx + cov[1] * wy + cov[2] * wz;
22689
const float alt_xg = cov[1] * wx + cov[3] * wy + cov[4] * wz;
22690
const float alt_xb = cov[2] * wx + cov[4] * wy + cov[5] * wz;
22691
22692
float l = basisu::squaref(alt_xr) + basisu::squaref(alt_xg) + basisu::squaref(alt_xb);
22693
22694
float axis_r = 0.57735027f, axis_g = 0.57735027f, axis_b = 0.57735027f;
22695
if (fabs(l) >= basisu::SMALL_FLOAT_VAL)
22696
{
22697
const float inv_l = inv_sqrt(l);
22698
axis_r = alt_xr * inv_l;
22699
axis_g = alt_xg * inv_l;
22700
axis_b = alt_xb * inv_l;
22701
}
22702
22703
subset_axis[subset_index].set(axis_r, axis_g, axis_b);
22704
} // s
22705
22706
float subset_min_dot[2] = { basisu::BIG_FLOAT_VAL, basisu::BIG_FLOAT_VAL };
22707
float subset_max_dot[2] = { -basisu::BIG_FLOAT_VAL, -basisu::BIG_FLOAT_VAL };
22708
int subset_min_idx[2] = { 0 }, subset_max_idx[2] = { 0 };
22709
22710
for (uint32_t i = 0; i < 16; i++)
22711
{
22712
const uint32_t subset_index = (best_pat_bits >> i) & 1;
22713
const float r = (float)pPixels[i * 3 + 0], g = (float)pPixels[i * 3 + 1], b = (float)pPixels[i * 3 + 2];
22714
const float dot = r * subset_axis[subset_index].c[0] + g * subset_axis[subset_index].c[1] + b * subset_axis[subset_index].c[2];
22715
22716
if (dot < subset_min_dot[subset_index])
22717
{
22718
subset_min_dot[subset_index] = dot;
22719
subset_min_idx[subset_index] = i;
22720
}
22721
22722
if (dot > subset_max_dot[subset_index])
22723
{
22724
subset_max_dot[subset_index] = dot;
22725
subset_max_idx[subset_index] = i;
22726
}
22727
} // i
22728
22729
uint32_t subset_min_r[2], subset_min_g[2], subset_min_b[2];
22730
uint32_t subset_max_r[2], subset_max_g[2], subset_max_b[2];
22731
22732
for (uint32_t subset_index = 0; subset_index < 2; subset_index++)
22733
{
22734
const uint32_t min_index = subset_min_idx[subset_index] * 3, max_index = subset_max_idx[subset_index] * 3;
22735
22736
subset_min_r[subset_index] = pPixels[min_index + 0];
22737
subset_min_g[subset_index] = pPixels[min_index + 1];
22738
subset_min_b[subset_index] = pPixels[min_index + 2];
22739
22740
subset_max_r[subset_index] = pPixels[max_index + 0];
22741
subset_max_g[subset_index] = pPixels[max_index + 1];
22742
subset_max_b[subset_index] = pPixels[max_index + 2];
22743
22744
} // subset_index
22745
22746
// least squares with unquantized endpoints
22747
const bool use_ls = true;
22748
if (use_ls)
22749
{
22750
uint8_t trial_weights[16];
22751
assign_weights3(trial_weights, best_pat_bits, subset_min_r, subset_min_g, subset_min_b, subset_max_r, subset_max_g, subset_max_b, pFloat_pixels);
22752
22753
float z00[2] = { 0.0f }, z01[2] = { 0.0f }, z10[2] = { 0.0f }, z11[2] = { 0.0f };
22754
float q00_r[2] = { 0.0f }, q10_r[2] = { 0.0f }, t_r[2] = { 0.0f };
22755
float q00_g[2] = { 0.0f }, q10_g[2] = { 0.0f }, t_g[2] = { 0.0f };
22756
float q00_b[2] = { 0.0f }, q10_b[2] = { 0.0f }, t_b[2] = { 0.0f };
22757
22758
for (uint32_t i = 0; i < 16; i++)
22759
{
22760
const uint32_t subset = (best_pat_bits >> i) & 1;
22761
22762
float r = (float)pPixels[i * 3 + 0];
22763
float g = (float)pPixels[i * 3 + 1];
22764
float b = (float)pPixels[i * 3 + 2];
22765
22766
const uint32_t sel = trial_weights[i];
22767
22768
z00[subset] += g_bc6h_ls_weights_3[sel][0];
22769
z10[subset] += g_bc6h_ls_weights_3[sel][1];
22770
z11[subset] += g_bc6h_ls_weights_3[sel][2];
22771
22772
float w = g_bc6h_ls_weights_3[sel][3];
22773
22774
q00_r[subset] += w * r;
22775
t_r[subset] += r;
22776
22777
q00_g[subset] += w * g;
22778
t_g[subset] += g;
22779
22780
q00_b[subset] += w * b;
22781
t_b[subset] += b;
22782
}
22783
22784
for (uint32_t subset = 0; subset < 2; subset++)
22785
{
22786
q10_r[subset] = t_r[subset] - q00_r[subset];
22787
q10_g[subset] = t_g[subset] - q00_g[subset];
22788
q10_b[subset] = t_b[subset] - q00_b[subset];
22789
22790
z01[subset] = z10[subset];
22791
22792
float det = z00[subset] * z11[subset] - z01[subset] * z10[subset];
22793
if (fabs(det) >= basisu::SMALL_FLOAT_VAL)
22794
{
22795
det = 1.0f / det;
22796
22797
float iz00 = z11[subset] * det;
22798
float iz01 = -z01[subset] * det;
22799
float iz10 = -z10[subset] * det;
22800
float iz11 = z00[subset] * det;
22801
22802
subset_max_r[subset] = basisu::clamp<int>(fast_roundf_int(iz00 * q00_r[subset] + iz01 * q10_r[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
22803
subset_min_r[subset] = basisu::clamp<int>(fast_roundf_int(iz10 * q00_r[subset] + iz11 * q10_r[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
22804
22805
subset_max_g[subset] = basisu::clamp<int>(fast_roundf_int(iz00 * q00_g[subset] + iz01 * q10_g[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
22806
subset_min_g[subset] = basisu::clamp<int>(fast_roundf_int(iz10 * q00_g[subset] + iz11 * q10_g[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
22807
22808
subset_max_b[subset] = basisu::clamp<int>(fast_roundf_int(iz00 * q00_b[subset] + iz01 * q10_b[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
22809
subset_min_b[subset] = basisu::clamp<int>(fast_roundf_int(iz10 * q00_b[subset] + iz11 * q10_b[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
22810
}
22811
} // subset
22812
}
22813
22814
const int BC6H_2SUBSET_ABS_ENDPOINT_MODE = 9;
22815
22816
int bc6h_mode_index = BC6H_2SUBSET_ABS_ENDPOINT_MODE, num_endpoint_bits = 6;
22817
uint32_t abs_blog_endpoints[3][4];
22818
22819
if (params.m_num_diff_endpoint_modes_to_try)
22820
{
22821
// ordered from largest base bits to least
22822
static const int s_bc6h_mode_order2[2] = { 5, 1 };
22823
static const int s_bc6h_mode_order4[4] = { 0, 5, 7, 1 };
22824
static const int s_bc6h_mode_order9[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 };
22825
22826
uint32_t num_endpoint_modes = 2;
22827
const int* pBC6H_mode_order = s_bc6h_mode_order2;
22828
22829
if (params.m_num_diff_endpoint_modes_to_try >= 9)
22830
{
22831
num_endpoint_modes = 9;
22832
pBC6H_mode_order = s_bc6h_mode_order9;
22833
}
22834
else if (params.m_num_diff_endpoint_modes_to_try >= 4)
22835
{
22836
num_endpoint_modes = 4;
22837
pBC6H_mode_order = s_bc6h_mode_order4;
22838
}
22839
22840
// Find the BC6H mode that will conservatively encode our trial endpoints. The mode chosen will handle any endpoint swaps.
22841
for (uint32_t bc6h_mode_iter = 0; bc6h_mode_iter < num_endpoint_modes; bc6h_mode_iter++)
22842
{
22843
const uint32_t mode = pBC6H_mode_order[bc6h_mode_iter];
22844
22845
const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
22846
const int base_bitmask = (1 << num_base_bits) - 1;
22847
BASISU_NOTE_UNUSED(base_bitmask);
22848
22849
const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
22850
const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
22851
22852
for (uint32_t subset_index = 0; subset_index < 2; subset_index++)
22853
{
22854
bc6h_quant_endpoints(
22855
subset_min_r[subset_index], subset_min_g[subset_index], subset_min_b[subset_index], subset_max_r[subset_index], subset_max_g[subset_index], subset_max_b[subset_index],
22856
abs_blog_endpoints[0][subset_index * 2 + 0], abs_blog_endpoints[1][subset_index * 2 + 0], abs_blog_endpoints[2][subset_index * 2 + 0],
22857
abs_blog_endpoints[0][subset_index * 2 + 1], abs_blog_endpoints[1][subset_index * 2 + 1], abs_blog_endpoints[2][subset_index * 2 + 1],
22858
num_base_bits);
22859
}
22860
22861
uint32_t c;
22862
for (c = 0; c < 3; c++)
22863
{
22864
// a very conservative check because we don't have the weight indices yet, so we don't know how to swap end point values
22865
// purposely enforcing a symmetric limit here so we can invert any endpoints later if needed
22866
const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
22867
const int min_delta = -max_delta;
22868
22869
int delta0 = (int)abs_blog_endpoints[c][1] - (int)abs_blog_endpoints[c][0];
22870
if ((delta0 < min_delta) || (delta0 > max_delta))
22871
break;
22872
22873
int delta1 = (int)abs_blog_endpoints[c][2] - (int)abs_blog_endpoints[c][0];
22874
if ((delta1 < min_delta) || (delta1 > max_delta))
22875
break;
22876
22877
int delta2 = (int)abs_blog_endpoints[c][3] - (int)abs_blog_endpoints[c][0];
22878
if ((delta2 < min_delta) || (delta2 > max_delta))
22879
break;
22880
22881
// in case the endpoints are swapped
22882
int delta3 = (int)abs_blog_endpoints[c][2] - (int)abs_blog_endpoints[c][1];
22883
if ((delta3 < min_delta) || (delta3 > max_delta))
22884
break;
22885
22886
int delta4 = (int)abs_blog_endpoints[c][3] - (int)abs_blog_endpoints[c][1];
22887
if ((delta4 < min_delta) || (delta4 > max_delta))
22888
break;
22889
}
22890
22891
if (c == 3)
22892
{
22893
bc6h_mode_index = mode;
22894
num_endpoint_bits = num_base_bits;
22895
break;
22896
}
22897
}
22898
}
22899
22900
if (bc6h_mode_index == BC6H_2SUBSET_ABS_ENDPOINT_MODE)
22901
{
22902
for (uint32_t subset_index = 0; subset_index < 2; subset_index++)
22903
{
22904
bc6h_quant_endpoints(
22905
subset_min_r[subset_index], subset_min_g[subset_index], subset_min_b[subset_index], subset_max_r[subset_index], subset_max_g[subset_index], subset_max_b[subset_index],
22906
abs_blog_endpoints[0][subset_index * 2 + 0], abs_blog_endpoints[1][subset_index * 2 + 0], abs_blog_endpoints[2][subset_index * 2 + 0],
22907
abs_blog_endpoints[0][subset_index * 2 + 1], abs_blog_endpoints[1][subset_index * 2 + 1], abs_blog_endpoints[2][subset_index * 2 + 1],
22908
num_endpoint_bits);
22909
}
22910
}
22911
22912
for (uint32_t subset_index = 0; subset_index < 2; subset_index++)
22913
{
22914
bc6h_dequant_endpoints(
22915
abs_blog_endpoints[0][subset_index * 2 + 0], abs_blog_endpoints[1][subset_index * 2 + 0], abs_blog_endpoints[2][subset_index * 2 + 0],
22916
abs_blog_endpoints[0][subset_index * 2 + 1], abs_blog_endpoints[1][subset_index * 2 + 1], abs_blog_endpoints[2][subset_index * 2 + 1],
22917
subset_min_r[subset_index], subset_min_g[subset_index], subset_min_b[subset_index],
22918
subset_max_r[subset_index], subset_max_g[subset_index], subset_max_b[subset_index], num_endpoint_bits);
22919
}
22920
22921
uint8_t trial_weights[16];
22922
double trial_error = assign_weights_error_3(trial_weights, best_pat_bits, subset_min_r, subset_min_g, subset_min_b, subset_max_r, subset_max_g, subset_max_b, pFloat_pixels, pPixel_scales);
22923
22924
if (trial_error < cur_error)
22925
{
22926
basist::bc6h_logical_block trial_log_blk;
22927
22928
trial_log_blk.m_mode = bc6h_mode_index;
22929
trial_log_blk.m_partition_pattern = best_pat_index;
22930
22931
memcpy(trial_log_blk.m_endpoints, abs_blog_endpoints, sizeof(trial_log_blk.m_endpoints));
22932
memcpy(trial_log_blk.m_weights, trial_weights, 16);
22933
22934
if (trial_log_blk.m_weights[0] & 4)
22935
{
22936
for (uint32_t c = 0; c < 3; c++)
22937
std::swap(trial_log_blk.m_endpoints[c][0], trial_log_blk.m_endpoints[c][1]);
22938
22939
for (uint32_t i = 0; i < 16; i++)
22940
{
22941
const uint32_t subset_index = (best_pat_bits >> i) & 1;
22942
if (subset_index == 0)
22943
trial_log_blk.m_weights[i] = 7 - trial_log_blk.m_weights[i];
22944
}
22945
}
22946
22947
const uint32_t subset2_anchor_index = g_bc7_table_anchor_index_second_subset[best_pat_index];
22948
if (trial_log_blk.m_weights[subset2_anchor_index] & 4)
22949
{
22950
for (uint32_t c = 0; c < 3; c++)
22951
std::swap(trial_log_blk.m_endpoints[c][2], trial_log_blk.m_endpoints[c][3]);
22952
22953
for (uint32_t i = 0; i < 16; i++)
22954
{
22955
const uint32_t subset_index = (best_pat_bits >> i) & 1;
22956
if (subset_index == 1)
22957
trial_log_blk.m_weights[i] = 7 - trial_log_blk.m_weights[i];
22958
}
22959
}
22960
22961
if (bc6h_mode_index != BC6H_2SUBSET_ABS_ENDPOINT_MODE)
22962
{
22963
const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[bc6h_mode_index][1], g_bc6h_mode_sig_bits[bc6h_mode_index][2], g_bc6h_mode_sig_bits[bc6h_mode_index][3] };
22964
const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
22965
22966
for (uint32_t c = 0; c < 3; c++)
22967
{
22968
const int delta0 = (int)trial_log_blk.m_endpoints[c][1] - (int)trial_log_blk.m_endpoints[c][0];
22969
const int delta1 = (int)trial_log_blk.m_endpoints[c][2] - (int)trial_log_blk.m_endpoints[c][0];
22970
const int delta2 = (int)trial_log_blk.m_endpoints[c][3] - (int)trial_log_blk.m_endpoints[c][0];
22971
22972
#ifdef _DEBUG
22973
// sanity check the final endpoints
22974
const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
22975
const int min_delta = -(max_delta + 1);
22976
assert((max_delta - min_delta) == delta_bitmasks[c]);
22977
22978
if ((delta0 < min_delta) || (delta0 > max_delta) || (delta1 < min_delta) || (delta1 > max_delta) || (delta2 < min_delta) || (delta2 > max_delta))
22979
{
22980
assert(0);
22981
break;
22982
}
22983
#endif
22984
22985
trial_log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
22986
trial_log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
22987
trial_log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
22988
22989
} // c
22990
}
22991
22992
cur_error = trial_error;
22993
log_blk = trial_log_blk;
22994
}
22995
}
22996
22997
static void fast_encode_bc6h_2subsets(
22998
const basist::half_float* pPixels, const vec3F* pFloat_pixels, const float* pPixel_scales,
22999
double& cur_error, basist::bc6h_logical_block& log_blk,
23000
int64_t block_max_var,
23001
int mean_r, int mean_g, int mean_b, float block_axis_r, float block_axis_g, float block_axis_b,
23002
const fast_bc6h_params& params)
23003
{
23004
assert((params.m_max_2subset_pats_to_try > 0) && (params.m_max_2subset_pats_to_try <= BC6H_NUM_PATS));
23005
23006
if (params.m_max_2subset_pats_to_try == BC6H_NUM_PATS)
23007
{
23008
for (uint32_t i = 0; i < BC6H_NUM_PATS; i++)
23009
{
23010
const uint32_t best_pat_index = i;
23011
const uint32_t best_pat_bits = g_bc6h_pats2[best_pat_index];
23012
23013
fast_encode_bc6h_2subsets_pattern(
23014
best_pat_index, best_pat_bits,
23015
pPixels, pFloat_pixels, pPixel_scales,
23016
cur_error, log_blk,
23017
block_max_var,
23018
mean_r, mean_g, mean_b, params);
23019
}
23020
return;
23021
}
23022
23023
uint32_t desired_pat_bits = 0;
23024
for (uint32_t i = 0; i < 16; i++)
23025
{
23026
float f = (float)(pPixels[i * 3 + 0] - mean_r) * block_axis_r +
23027
(float)(pPixels[i * 3 + 1] - mean_g) * block_axis_g +
23028
(float)(pPixels[i * 3 + 2] - mean_b) * block_axis_b;
23029
23030
desired_pat_bits |= (((f >= 0.0f) ? 1 : 0) << i);
23031
} // i
23032
23033
if (params.m_max_2subset_pats_to_try == 1)
23034
{
23035
uint32_t best_diff = UINT32_MAX;
23036
for (uint32_t p = 0; p < BC6H_NUM_PATS; p++)
23037
{
23038
const uint32_t bc6h_pat_bits = g_bc6h_pats2[p];
23039
23040
int diff = popcount32(bc6h_pat_bits ^ desired_pat_bits);
23041
int diff_inv = 16 - diff;
23042
23043
uint32_t min_diff = (basisu::minimum<int>(diff, diff_inv) << 8) | p;
23044
if (min_diff < best_diff)
23045
best_diff = min_diff;
23046
} // p
23047
23048
const uint32_t best_pat_index = best_diff & 0xFF;
23049
const uint32_t best_pat_bits = g_bc6h_pats2[best_pat_index];
23050
23051
fast_encode_bc6h_2subsets_pattern(
23052
best_pat_index, best_pat_bits,
23053
pPixels, pFloat_pixels, pPixel_scales,
23054
cur_error, log_blk,
23055
block_max_var,
23056
mean_r, mean_g, mean_b, params);
23057
}
23058
else
23059
{
23060
assert(params.m_max_2subset_pats_to_try <= BC6H_NUM_PATS);
23061
uint32_t pat_diffs[BC6H_NUM_PATS];
23062
23063
for (uint32_t p = 0; p < BC6H_NUM_PATS; p++)
23064
{
23065
const uint32_t bc6h_pat_bits = g_bc6h_pats2[p];
23066
23067
int diff = popcount32(bc6h_pat_bits ^ desired_pat_bits);
23068
int diff_inv = 16 - diff;
23069
23070
pat_diffs[p] = (basisu::minimum<int>(diff, diff_inv) << 8) | p;
23071
} // p
23072
23073
std::sort(pat_diffs, pat_diffs + BC6H_NUM_PATS);
23074
23075
for (uint32_t pat_iter = 0; pat_iter < params.m_max_2subset_pats_to_try; pat_iter++)
23076
{
23077
const uint32_t best_pat_index = pat_diffs[pat_iter] & 0xFF;
23078
const uint32_t best_pat_bits = g_bc6h_pats2[best_pat_index];
23079
23080
fast_encode_bc6h_2subsets_pattern(
23081
best_pat_index, best_pat_bits,
23082
pPixels, pFloat_pixels, pPixel_scales,
23083
cur_error, log_blk,
23084
block_max_var,
23085
mean_r, mean_g, mean_b, params);
23086
}
23087
}
23088
}
23089
23090
void fast_encode_bc6h(const basist::half_float* pPixels, basist::bc6h_block* pBlock, const fast_bc6h_params &params)
23091
{
23092
basist::bc6h_logical_block log_blk;
23093
log_blk.clear();
23094
23095
log_blk.m_mode = basist::BC6H_FIRST_1SUBSET_MODE_INDEX;
23096
23097
uint32_t omin_r = UINT32_MAX, omin_g = UINT32_MAX, omin_b = UINT32_MAX;
23098
uint32_t omax_r = 0, omax_g = 0, omax_b = 0;
23099
uint32_t total_r = 0, total_g = 0, total_b = 0;
23100
23101
for (uint32_t i = 0; i < 16; i++)
23102
{
23103
uint32_t r = pPixels[i * 3 + 0];
23104
uint32_t g = pPixels[i * 3 + 1];
23105
uint32_t b = pPixels[i * 3 + 2];
23106
23107
total_r += r;
23108
total_g += g;
23109
total_b += b;
23110
23111
omin_r = basisu::minimum(omin_r, r);
23112
omin_g = basisu::minimum(omin_g, g);
23113
omin_b = basisu::minimum(omin_b, b);
23114
23115
omax_r = basisu::maximum(omax_r, r);
23116
omax_g = basisu::maximum(omax_g, g);
23117
omax_b = basisu::maximum(omax_b, b);
23118
}
23119
23120
if ((omin_r == omax_r) && (omin_g == omax_g) && (omin_b == omax_b))
23121
{
23122
// Solid block
23123
log_blk.m_endpoints[0][0] = basist::bc6h_half_to_blog16((basist::half_float)omin_r);
23124
log_blk.m_endpoints[0][1] = 0;
23125
23126
log_blk.m_endpoints[1][0] = basist::bc6h_half_to_blog16((basist::half_float)omin_g);
23127
log_blk.m_endpoints[1][1] = 0;
23128
23129
log_blk.m_endpoints[2][0] = basist::bc6h_half_to_blog16((basist::half_float)omin_b);
23130
log_blk.m_endpoints[2][1] = 0;
23131
23132
log_blk.m_mode = 13;
23133
pack_bc6h_block(*pBlock, log_blk);
23134
23135
return;
23136
}
23137
23138
uint32_t min_r, min_g, min_b, max_r, max_g, max_b;
23139
23140
int mean_r = (total_r + 8) / 16;
23141
int mean_g = (total_g + 8) / 16;
23142
int mean_b = (total_b + 8) / 16;
23143
23144
int64_t icov[6] = { 0, 0, 0, 0, 0, 0 };
23145
23146
for (uint32_t i = 0; i < 16; i++)
23147
{
23148
int r = (int)pPixels[i * 3 + 0] - mean_r;
23149
int g = (int)pPixels[i * 3 + 1] - mean_g;
23150
int b = (int)pPixels[i * 3 + 2] - mean_b;
23151
23152
icov[0] += r * r;
23153
icov[1] += r * g;
23154
icov[2] += r * b;
23155
icov[3] += g * g;
23156
icov[4] += g * b;
23157
icov[5] += b * b;
23158
}
23159
23160
int64_t block_max_var = basisu::maximum(icov[0], icov[3], icov[5]); // not divided by 16, i.e. scaled by 16
23161
23162
if (block_max_var < (FAST_BC6H_STD_DEV_THRESH * FAST_BC6H_STD_DEV_THRESH * 16))
23163
{
23164
// Simple block
23165
min_r = (omax_r - omin_r) / 32 + omin_r;
23166
min_g = (omax_g - omin_g) / 32 + omin_g;
23167
min_b = (omax_b - omin_b) / 32 + omin_b;
23168
23169
max_r = ((omax_r - omin_r) * 31) / 32 + omin_r;
23170
max_g = ((omax_g - omin_g) * 31) / 32 + omin_g;
23171
max_b = ((omax_b - omin_b) * 31) / 32 + omin_b;
23172
23173
assert((max_r < MAX_HALF_FLOAT_AS_INT_BITS) && (max_g < MAX_HALF_FLOAT_AS_INT_BITS) && (max_b < MAX_HALF_FLOAT_AS_INT_BITS));
23174
23175
bc6h_quant_dequant_endpoints(min_r, min_g, min_b, max_r, max_g, max_b, 10);
23176
23177
assign_weights_simple_4(pPixels, log_blk.m_weights, min_r, min_g, min_b, max_r, max_g, max_b, block_max_var);
23178
23179
log_blk.m_endpoints[0][0] = basist::bc6h_half_to_blog((basist::half_float)min_r, 10);
23180
log_blk.m_endpoints[0][1] = basist::bc6h_half_to_blog((basist::half_float)max_r, 10);
23181
23182
log_blk.m_endpoints[1][0] = basist::bc6h_half_to_blog((basist::half_float)min_g, 10);
23183
log_blk.m_endpoints[1][1] = basist::bc6h_half_to_blog((basist::half_float)max_g, 10);
23184
23185
log_blk.m_endpoints[2][0] = basist::bc6h_half_to_blog((basist::half_float)min_b, 10);
23186
log_blk.m_endpoints[2][1] = basist::bc6h_half_to_blog((basist::half_float)max_b, 10);
23187
23188
if (log_blk.m_weights[0] & 8)
23189
{
23190
for (uint32_t i = 0; i < 16; i++)
23191
log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
23192
23193
for (uint32_t c = 0; c < 3; c++)
23194
{
23195
std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
23196
}
23197
}
23198
23199
pack_bc6h_block(*pBlock, log_blk);
23200
23201
return;
23202
}
23203
23204
// block_max_var cannot be 0 here, also trace cannot be 0
23205
23206
// Complex block (edges/strong gradients)
23207
bool try_2subsets = false;
23208
double cur_err = 0.0f;
23209
vec3F float_pixels[16];
23210
float pixel_scales[16];
23211
23212
// covar rows are:
23213
// 0, 1, 2
23214
// 1, 3, 4
23215
// 2, 4, 5
23216
float cov[6];
23217
for (uint32_t i = 0; i < 6; i++)
23218
cov[i] = (float)icov[i];
23219
23220
const float sc = 1.0f / (float)block_max_var;
23221
const float wx = sc * cov[0], wy = sc * cov[3], wz = sc * cov[5];
23222
23223
const float alt_xr = cov[0] * wx + cov[1] * wy + cov[2] * wz;
23224
const float alt_xg = cov[1] * wx + cov[3] * wy + cov[4] * wz;
23225
const float alt_xb = cov[2] * wx + cov[4] * wy + cov[5] * wz;
23226
23227
float l = basisu::squaref(alt_xr) + basisu::squaref(alt_xg) + basisu::squaref(alt_xb);
23228
23229
float axis_r = 0.57735027f, axis_g = 0.57735027f, axis_b = 0.57735027f;
23230
if (fabs(l) >= basisu::SMALL_FLOAT_VAL)
23231
{
23232
const float inv_l = inv_sqrt(l);
23233
axis_r = alt_xr * inv_l;
23234
axis_g = alt_xg * inv_l;
23235
axis_b = alt_xb * inv_l;
23236
}
23237
23238
const float tr = axis_r * cov[0] + axis_g * cov[1] + axis_b * cov[2];
23239
const float tg = axis_r * cov[1] + axis_g * cov[3] + axis_b * cov[4];
23240
const float tb = axis_r * cov[2] + axis_g * cov[4] + axis_b * cov[5];
23241
const float principle_axis_var = tr * axis_r + tg * axis_g + tb * axis_b;
23242
23243
const float inv_principle_axis_var = 1.0f / (principle_axis_var + basisu::REALLY_SMALL_FLOAT_VAL);
23244
axis_r = tr * inv_principle_axis_var;
23245
axis_g = tg * inv_principle_axis_var;
23246
axis_b = tb * inv_principle_axis_var;
23247
23248
float total_var = cov[0] + cov[3] + cov[5];
23249
23250
// If the principle axis variance vs. the block's total variance accounts for less than this threshold, it's a "very complex" block that may benefit from 2 subsets.
23251
const float COMPLEX_BLOCK_PRINCIPLE_AXIS_FRACT_THRESH = .995f;
23252
try_2subsets = principle_axis_var < (total_var * COMPLEX_BLOCK_PRINCIPLE_AXIS_FRACT_THRESH);
23253
23254
uint32_t min_idx = 0, max_idx = 0;
23255
float min_dot = basisu::BIG_FLOAT_VAL, max_dot = -basisu::BIG_FLOAT_VAL;
23256
23257
for (uint32_t i = 0; i < 16; i++)
23258
{
23259
float r = (float)pPixels[i * 3 + 0];
23260
float g = (float)pPixels[i * 3 + 1];
23261
float b = (float)pPixels[i * 3 + 2];
23262
23263
float_pixels[i].c[0] = fast_half_to_float_pos_not_inf_or_nan((half_float)r);
23264
float_pixels[i].c[1] = fast_half_to_float_pos_not_inf_or_nan((half_float)g);
23265
float_pixels[i].c[2] = fast_half_to_float_pos_not_inf_or_nan((half_float)b);
23266
23267
pixel_scales[i] = 1.0f / (basisu::squaref(float_pixels[i].c[0]) + basisu::squaref(float_pixels[i].c[1]) + basisu::squaref(float_pixels[i].c[2]) + (float)MIN_HALF_FLOAT);
23268
23269
float dot = r * axis_r + g * axis_g + b * axis_b;
23270
23271
if (dot < min_dot)
23272
{
23273
min_dot = dot;
23274
min_idx = i;
23275
}
23276
23277
if (dot > max_dot)
23278
{
23279
max_dot = dot;
23280
max_idx = i;
23281
}
23282
}
23283
23284
min_r = pPixels[min_idx * 3 + 0];
23285
min_g = pPixels[min_idx * 3 + 1];
23286
min_b = pPixels[min_idx * 3 + 2];
23287
23288
max_r = pPixels[max_idx * 3 + 0];
23289
max_g = pPixels[max_idx * 3 + 1];
23290
max_b = pPixels[max_idx * 3 + 2];
23291
23292
assert((max_r < MAX_HALF_FLOAT_AS_INT_BITS) && (max_g < MAX_HALF_FLOAT_AS_INT_BITS) && (max_b < MAX_HALF_FLOAT_AS_INT_BITS));
23293
23294
bc6h_quant_dequant_endpoints(min_r, min_g, min_b, max_r, max_g, max_b, 10);
23295
23296
cur_err = assign_weights_4(float_pixels, pixel_scales, log_blk.m_weights, min_r, min_g, min_b, max_r, max_g, max_b, block_max_var, try_2subsets, params);
23297
23298
const uint32_t MAX_LS_PASSES = params.m_hq_ls ? 2 : 1;
23299
for (uint32_t pass = 0; pass < MAX_LS_PASSES; pass++)
23300
{
23301
float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
23302
float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
23303
float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f;
23304
float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f;
23305
23306
for (uint32_t i = 0; i < 16; i++)
23307
{
23308
float r = (float)pPixels[i * 3 + 0];
23309
float g = (float)pPixels[i * 3 + 1];
23310
float b = (float)pPixels[i * 3 + 2];
23311
23312
const uint32_t sel = log_blk.m_weights[i];
23313
23314
z00 += g_bc6h_ls_weights_4[sel][0];
23315
z10 += g_bc6h_ls_weights_4[sel][1];
23316
z11 += g_bc6h_ls_weights_4[sel][2];
23317
23318
float w = g_bc6h_ls_weights_4[sel][3];
23319
23320
q00_r += w * r;
23321
t_r += r;
23322
23323
q00_g += w * g;
23324
t_g += g;
23325
23326
q00_b += w * b;
23327
t_b += b;
23328
}
23329
23330
q10_r = t_r - q00_r;
23331
q10_g = t_g - q00_g;
23332
q10_b = t_b - q00_b;
23333
23334
z01 = z10;
23335
23336
float det = z00 * z11 - z01 * z10;
23337
if (fabs(det) < basisu::SMALL_FLOAT_VAL)
23338
break;
23339
23340
det = 1.0f / det;
23341
23342
float iz00 = z11 * det;
23343
float iz01 = -z01 * det;
23344
float iz10 = -z10 * det;
23345
float iz11 = z00 * det;
23346
23347
uint32_t trial_max_r = (int)basisu::clamp<float>(std::round(iz00 * q00_r + iz01 * q10_r), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
23348
uint32_t trial_min_r = (int)basisu::clamp<float>(std::round(iz10 * q00_r + iz11 * q10_r), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
23349
23350
uint32_t trial_max_g = (int)basisu::clamp<float>(std::round(iz00 * q00_g + iz01 * q10_g), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
23351
uint32_t trial_min_g = (int)basisu::clamp<float>(std::round(iz10 * q00_g + iz11 * q10_g), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
23352
23353
uint32_t trial_max_b = (int)basisu::clamp<float>(std::round(iz00 * q00_b + iz01 * q10_b), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
23354
uint32_t trial_min_b = (int)basisu::clamp<float>(std::round(iz10 * q00_b + iz11 * q10_b), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
23355
23356
bc6h_quant_dequant_endpoints(trial_min_r, trial_min_g, trial_min_b, trial_max_r, trial_max_g, trial_max_b, 10);
23357
23358
uint8_t trial_weights[16];
23359
double trial_err = assign_weights_4(float_pixels, pixel_scales, trial_weights, trial_min_r, trial_min_g, trial_min_b, trial_max_r, trial_max_g, trial_max_b, block_max_var, try_2subsets, params);
23360
23361
if (trial_err < cur_err)
23362
{
23363
cur_err = trial_err;
23364
23365
min_r = trial_min_r;
23366
max_r = trial_max_r;
23367
23368
min_g = trial_min_g;
23369
max_g = trial_max_g;
23370
23371
min_b = trial_min_b;
23372
max_b = trial_max_b;
23373
23374
memcpy(log_blk.m_weights, trial_weights, 16);
23375
}
23376
else
23377
{
23378
break;
23379
}
23380
23381
} // pass
23382
23383
#if 0
23384
//if (full_flag)
23385
if ((try_2subsets) && (block_max_var > (FAST_BC6H_COMPLEX_STD_DEV_THRESH * FAST_BC6H_COMPLEX_STD_DEV_THRESH * 16)))
23386
{
23387
min_r = 0;
23388
max_r = 0;
23389
min_g = 0;
23390
max_g = 0;
23391
min_b = 0;
23392
max_b = 0;
23393
}
23394
#endif
23395
23396
log_blk.m_endpoints[0][0] = basist::bc6h_half_to_blog((basist::half_float)min_r, 10);
23397
log_blk.m_endpoints[0][1] = basist::bc6h_half_to_blog((basist::half_float)max_r, 10);
23398
23399
log_blk.m_endpoints[1][0] = basist::bc6h_half_to_blog((basist::half_float)min_g, 10);
23400
log_blk.m_endpoints[1][1] = basist::bc6h_half_to_blog((basist::half_float)max_g, 10);
23401
23402
log_blk.m_endpoints[2][0] = basist::bc6h_half_to_blog((basist::half_float)min_b, 10);
23403
log_blk.m_endpoints[2][1] = basist::bc6h_half_to_blog((basist::half_float)max_b, 10);
23404
23405
if (log_blk.m_weights[0] & 8)
23406
{
23407
for (uint32_t i = 0; i < 16; i++)
23408
log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
23409
23410
for (uint32_t c = 0; c < 3; c++)
23411
{
23412
std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
23413
}
23414
}
23415
23416
if ((params.m_max_2subset_pats_to_try > 0) && ((try_2subsets) && (block_max_var > (FAST_BC6H_COMPLEX_STD_DEV_THRESH * FAST_BC6H_COMPLEX_STD_DEV_THRESH * 16))))
23417
{
23418
fast_encode_bc6h_2subsets(pPixels, float_pixels, pixel_scales, cur_err, log_blk, block_max_var, mean_r, mean_g, mean_b, axis_r, axis_g, axis_b, params);
23419
}
23420
23421
pack_bc6h_block(*pBlock, log_blk);
23422
}
23423
23424
bool decode_6x6_hdr(const uint8_t *pComp_data, uint32_t comp_data_size, basisu::vector2D<astc_helpers::astc_block>& decoded_blocks, uint32_t& width, uint32_t& height)
23425
{
23426
const uint32_t BLOCK_W = 6, BLOCK_H = 6;
23427
23428
//interval_timer tm;
23429
//tm.start();
23430
23431
width = 0;
23432
height = 0;
23433
23434
if (comp_data_size <= (2 * 3 + 1))
23435
return false;
23436
23437
basist::bitwise_decoder decoder;
23438
if (!decoder.init(pComp_data, comp_data_size))
23439
return false;
23440
23441
if (decoder.get_bits(16) != 0xABCD)
23442
return false;
23443
23444
width = decoder.get_bits(16);
23445
height = decoder.get_bits(16);
23446
23447
if (!width || !height || (width > MAX_ASTC_HDR_6X6_DIM) || (height > MAX_ASTC_HDR_6X6_DIM))
23448
return false;
23449
23450
const uint32_t num_blocks_x = (width + BLOCK_W - 1) / BLOCK_W;
23451
const uint32_t num_blocks_y = (height + BLOCK_H - 1) / BLOCK_H;
23452
23453
const uint32_t total_blocks = num_blocks_x * num_blocks_y;
23454
23455
decoded_blocks.resize(num_blocks_x, num_blocks_y);
23456
//memset(decoded_blocks.get_ptr(), 0, decoded_blocks.size_in_bytes());
23457
23458
// These are the decoded log blocks, NOT the output log blocks.
23459
basisu::vector2D<astc_helpers::log_astc_block> decoded_log_blocks(num_blocks_x, REUSE_MAX_BUFFER_ROWS);
23460
memset(decoded_log_blocks.get_ptr(), 0, decoded_log_blocks.size_in_bytes());
23461
23462
uint32_t cur_bx = 0, cur_by = 0;
23463
int cur_row_index = 0;
23464
23465
uint32_t step_counter = 0;
23466
BASISU_NOTE_UNUSED(step_counter);
23467
23468
while (cur_by < num_blocks_y)
23469
{
23470
step_counter++;
23471
23472
//if ((cur_bx == 9) && (cur_by == 13))
23473
// printf("!");
23474
23475
#if SYNC_MARKERS
23476
uint32_t mk = decoder.get_bits(16);
23477
if (mk != 0xDEAD)
23478
{
23479
printf("!");
23480
assert(0);
23481
return false;
23482
}
23483
#endif
23484
if (decoder.get_bits_remaining() < 1)
23485
return false;
23486
23487
encoding_type et = encoding_type::cBlock;
23488
23489
uint32_t b0 = decoder.get_bits(1);
23490
if (!b0)
23491
{
23492
uint32_t b1 = decoder.get_bits(1);
23493
if (b1)
23494
et = encoding_type::cReuse;
23495
else
23496
{
23497
uint32_t b2 = decoder.get_bits(1);
23498
if (b2)
23499
et = encoding_type::cSolid;
23500
else
23501
et = encoding_type::cRun;
23502
}
23503
}
23504
23505
switch (et)
23506
{
23507
case encoding_type::cRun:
23508
{
23509
if (!cur_bx && !cur_by)
23510
return false;
23511
23512
const uint32_t run_len = decoder.decode_vlc(5) + 1;
23513
23514
uint32_t num_blocks_remaining = total_blocks - (cur_bx + cur_by * num_blocks_x);
23515
if (run_len > num_blocks_remaining)
23516
return false;
23517
23518
uint32_t prev_bx = cur_bx, prev_by = cur_by;
23519
23520
if (cur_bx)
23521
prev_bx--;
23522
else
23523
{
23524
prev_bx = num_blocks_x - 1;
23525
prev_by--;
23526
}
23527
23528
const astc_helpers::log_astc_block& prev_log_blk = decoded_log_blocks(prev_bx, calc_row_index(cur_by, prev_by, cur_row_index));
23529
const astc_helpers::astc_block& prev_phys_blk = decoded_blocks(prev_bx, prev_by);
23530
23531
assert((prev_log_blk.m_user_mode == 255) || (prev_log_blk.m_user_mode < TOTAL_BLOCK_MODE_DECS));
23532
23533
for (uint32_t i = 0; i < run_len; i++)
23534
{
23535
decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index)) = prev_log_blk;
23536
decoded_blocks(cur_bx, cur_by) = prev_phys_blk;
23537
23538
cur_bx++;
23539
if (cur_bx == num_blocks_x)
23540
{
23541
cur_bx = 0;
23542
cur_by++;
23543
cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS;
23544
}
23545
}
23546
23547
break;
23548
}
23549
case encoding_type::cSolid:
23550
{
23551
const basist::half_float rh = (basist::half_float)decoder.get_bits(15);
23552
const basist::half_float gh = (basist::half_float)decoder.get_bits(15);
23553
const basist::half_float bh = (basist::half_float)decoder.get_bits(15);
23554
23555
astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index));
23556
23557
log_blk.clear();
23558
log_blk.m_user_mode = 255;
23559
log_blk.m_solid_color_flag_hdr = true;
23560
log_blk.m_solid_color[0] = rh;
23561
log_blk.m_solid_color[1] = gh;
23562
log_blk.m_solid_color[2] = bh;
23563
log_blk.m_solid_color[3] = basist::float_to_half(1.0f);
23564
23565
bool status = astc_helpers::pack_astc_block(decoded_blocks(cur_bx, cur_by), log_blk);
23566
if (!status)
23567
return false;
23568
23569
cur_bx++;
23570
if (cur_bx == num_blocks_x)
23571
{
23572
cur_bx = 0;
23573
cur_by++;
23574
cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS;
23575
}
23576
23577
break;
23578
}
23579
case encoding_type::cReuse:
23580
{
23581
if (!cur_bx && !cur_by)
23582
return false;
23583
23584
const uint32_t reuse_delta_index = decoder.get_bits(REUSE_XY_DELTA_BITS);
23585
23586
const int reuse_delta_x = g_reuse_xy_deltas[reuse_delta_index].m_x;
23587
const int reuse_delta_y = g_reuse_xy_deltas[reuse_delta_index].m_y;
23588
23589
const int prev_bx = cur_bx + reuse_delta_x, prev_by = cur_by + reuse_delta_y;
23590
if ((prev_bx < 0) || (prev_bx >= (int)num_blocks_x))
23591
return false;
23592
if (prev_by < 0)
23593
return false;
23594
23595
const astc_helpers::log_astc_block& prev_log_blk = decoded_log_blocks(prev_bx, calc_row_index(cur_by, prev_by, cur_row_index));
23596
23597
if (prev_log_blk.m_solid_color_flag_hdr)
23598
return false;
23599
assert(prev_log_blk.m_user_mode < TOTAL_BLOCK_MODE_DECS);
23600
23601
astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index));
23602
astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by);
23603
23604
log_blk = prev_log_blk;
23605
23606
const uint32_t total_grid_weights = log_blk.m_grid_width * log_blk.m_grid_height * (log_blk.m_dual_plane ? 2 : 1);
23607
23608
bool status = decode_values(decoder, total_grid_weights, log_blk.m_weight_ise_range, log_blk.m_weights);
23609
if (!status)
23610
return false;
23611
23612
#if 0
23613
const astc_helpers::astc_block& prev_phys_blk = decoded_blocks(prev_bx, prev_by);
23614
23615
astc_helpers::log_astc_block decomp_blk;
23616
status = astc_helpers::unpack_block(&prev_phys_blk, decomp_blk, BLOCK_W, BLOCK_H);
23617
if (!status)
23618
return false;
23619
23620
uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H * 2];
23621
requantize_astc_weights(total_grid_weights, log_blk.m_weights, log_blk.m_weight_ise_range, transcode_weights, decomp_blk.m_weight_ise_range);
23622
23623
copy_weight_grid(log_blk.m_dual_plane, log_blk.m_grid_width, log_blk.m_grid_height, transcode_weights, decomp_blk);
23624
#else
23625
assert(log_blk.m_user_mode < TOTAL_BLOCK_MODE_DECS);
23626
const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)log_blk.m_user_mode];
23627
const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem);
23628
23629
assert(bmd.m_grid_x == log_blk.m_grid_width && bmd.m_grid_y == log_blk.m_grid_height);
23630
assert(bmd.m_dp == log_blk.m_dual_plane);
23631
assert(bmd.m_cem == log_blk.m_color_endpoint_modes[0]);
23632
assert(bmd.m_num_partitions == log_blk.m_num_partitions);
23633
assert(bmd.m_dp_channel == log_blk.m_color_component_selector);
23634
23635
// important: bmd.m_weight_ise_range/m_endpoint_ise_range may not match the logical block's due to deltas.
23636
23637
astc_helpers::log_astc_block decomp_blk;
23638
decomp_blk.clear();
23639
decomp_blk.m_dual_plane = bmd.m_dp;
23640
decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23641
decomp_blk.m_partition_id = log_blk.m_partition_id;
23642
23643
decomp_blk.m_num_partitions = (uint8_t)bmd.m_num_partitions;
23644
23645
for (uint32_t p = 0; p < bmd.m_num_partitions; p++)
23646
decomp_blk.m_color_endpoint_modes[p] = (uint8_t)bmd.m_cem;
23647
23648
decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range;
23649
decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range;
23650
23651
for (uint32_t p = 0; p < bmd.m_num_partitions; p++)
23652
requantize_ise_endpoints(bmd.m_cem, log_blk.m_endpoint_ise_range, log_blk.m_endpoints + num_endpoint_values * p, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints + num_endpoint_values * p);
23653
23654
uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2];
23655
requantize_astc_weights(total_grid_weights, log_blk.m_weights, log_blk.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range);
23656
23657
copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk);
23658
#endif
23659
status = astc_helpers::pack_astc_block(phys_blk, decomp_blk);
23660
if (!status)
23661
return false;
23662
23663
cur_bx++;
23664
if (cur_bx == num_blocks_x)
23665
{
23666
cur_bx = 0;
23667
cur_by++;
23668
cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS;
23669
}
23670
23671
break;
23672
}
23673
case encoding_type::cBlock:
23674
{
23675
const block_mode bm = (block_mode)decoder.decode_truncated_binary((uint32_t)block_mode::cBMTotalModes);
23676
const endpoint_mode em = (endpoint_mode)decoder.decode_truncated_binary((uint32_t)endpoint_mode::cTotal);
23677
23678
switch (em)
23679
{
23680
case endpoint_mode::cUseLeft:
23681
case endpoint_mode::cUseUpper:
23682
{
23683
int neighbor_bx = cur_bx, neighbor_by = cur_by;
23684
23685
if (em == endpoint_mode::cUseLeft)
23686
neighbor_bx--;
23687
else
23688
neighbor_by--;
23689
23690
if ((neighbor_bx < 0) || (neighbor_by < 0))
23691
return false;
23692
23693
const astc_helpers::log_astc_block& neighbor_blk = decoded_log_blocks(neighbor_bx, calc_row_index(cur_by, neighbor_by, cur_row_index));
23694
if (!neighbor_blk.m_color_endpoint_modes[0])
23695
return false;
23696
23697
const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)bm];
23698
const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem);
23699
23700
if (bmd.m_cem != neighbor_blk.m_color_endpoint_modes[0])
23701
return false;
23702
23703
astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index));
23704
astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by);
23705
23706
log_blk.clear();
23707
assert((uint32_t)bm <= UINT8_MAX);
23708
log_blk.m_user_mode = (uint8_t)bm;
23709
log_blk.m_num_partitions = 1;
23710
log_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem;
23711
// Important: Notice how we're copying the neighbor's endpoint ISE range. Not using the mode's endpoint ISE range here.
23712
// This is to avoid introducing more quantization error.
23713
log_blk.m_endpoint_ise_range = neighbor_blk.m_endpoint_ise_range;
23714
log_blk.m_weight_ise_range = (uint8_t)bmd.m_weight_ise_range;
23715
log_blk.m_grid_width = (uint8_t)bmd.m_grid_x;
23716
log_blk.m_grid_height = (uint8_t)bmd.m_grid_y;
23717
log_blk.m_dual_plane = (uint8_t)bmd.m_dp;
23718
log_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23719
23720
memcpy(log_blk.m_endpoints, neighbor_blk.m_endpoints, num_endpoint_values);
23721
23722
const uint32_t total_grid_weights = bmd.m_grid_x * bmd.m_grid_y * (bmd.m_dp ? 2 : 1);
23723
23724
bool status = decode_values(decoder, total_grid_weights, bmd.m_weight_ise_range, log_blk.m_weights);
23725
if (!status)
23726
return false;
23727
23728
astc_helpers::log_astc_block decomp_blk;
23729
decomp_blk.clear();
23730
23731
decomp_blk.m_num_partitions = 1;
23732
decomp_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem;
23733
decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range;
23734
decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range;
23735
decomp_blk.m_dual_plane = (uint8_t)bmd.m_dp;
23736
decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23737
23738
requantize_ise_endpoints(bmd.m_cem, log_blk.m_endpoint_ise_range, log_blk.m_endpoints, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints);
23739
23740
uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2];
23741
requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range);
23742
23743
copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk);
23744
23745
status = astc_helpers::pack_astc_block(phys_blk, decomp_blk);
23746
if (!status)
23747
return false;
23748
23749
cur_bx++;
23750
if (cur_bx == num_blocks_x)
23751
{
23752
cur_bx = 0;
23753
cur_by++;
23754
cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS;
23755
}
23756
23757
break;
23758
}
23759
case endpoint_mode::cUseLeftDelta:
23760
case endpoint_mode::cUseUpperDelta:
23761
{
23762
int neighbor_bx = cur_bx, neighbor_by = cur_by;
23763
23764
if (em == endpoint_mode::cUseLeftDelta)
23765
neighbor_bx--;
23766
else
23767
neighbor_by--;
23768
23769
if ((neighbor_bx < 0) || (neighbor_by < 0))
23770
return false;
23771
23772
const astc_helpers::log_astc_block& neighbor_blk = decoded_log_blocks(neighbor_bx, calc_row_index(cur_by, neighbor_by, cur_row_index));
23773
if (!neighbor_blk.m_color_endpoint_modes[0])
23774
return false;
23775
23776
const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)bm];
23777
const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem);
23778
23779
if (bmd.m_cem != neighbor_blk.m_color_endpoint_modes[0])
23780
return false;
23781
23782
astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index));
23783
astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by);
23784
23785
log_blk.clear();
23786
assert((uint32_t)bm <= UINT8_MAX);
23787
log_blk.m_user_mode = (uint8_t)bm;
23788
log_blk.m_num_partitions = 1;
23789
log_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem;
23790
log_blk.m_dual_plane = bmd.m_dp;
23791
log_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23792
23793
log_blk.m_endpoint_ise_range = (uint8_t)bmd.m_endpoint_ise_range;
23794
requantize_ise_endpoints(bmd.m_cem, neighbor_blk.m_endpoint_ise_range, neighbor_blk.m_endpoints, bmd.m_endpoint_ise_range, log_blk.m_endpoints);
23795
23796
const int total_endpoint_delta_vals = 1 << NUM_ENDPOINT_DELTA_BITS;
23797
const int low_delta_limit = -(total_endpoint_delta_vals / 2); // high_delta_limit = (total_endpoint_delta_vals / 2) - 1;
23798
23799
const auto& ise_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_ISE_to_rank;
23800
const auto& rank_to_ise = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_rank_to_ISE;
23801
const int total_endpoint_levels = astc_helpers::get_ise_levels(log_blk.m_endpoint_ise_range);
23802
23803
for (uint32_t i = 0; i < num_endpoint_values; i++)
23804
{
23805
int cur_val = ise_to_rank[log_blk.m_endpoints[i]];
23806
23807
int delta = (int)decoder.get_bits(NUM_ENDPOINT_DELTA_BITS) + low_delta_limit;
23808
23809
cur_val += delta;
23810
if ((cur_val < 0) || (cur_val >= total_endpoint_levels))
23811
return false;
23812
23813
log_blk.m_endpoints[i] = rank_to_ise[cur_val];
23814
}
23815
23816
log_blk.m_weight_ise_range = (uint8_t)bmd.m_weight_ise_range;
23817
log_blk.m_grid_width = (uint8_t)bmd.m_grid_x;
23818
log_blk.m_grid_height = (uint8_t)bmd.m_grid_y;
23819
23820
const uint32_t total_grid_weights = bmd.m_grid_x * bmd.m_grid_y * (bmd.m_dp ? 2 : 1);
23821
23822
bool status = decode_values(decoder, total_grid_weights, bmd.m_weight_ise_range, log_blk.m_weights);
23823
if (!status)
23824
return false;
23825
23826
astc_helpers::log_astc_block decomp_blk;
23827
decomp_blk.clear();
23828
23829
decomp_blk.m_num_partitions = 1;
23830
decomp_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem;
23831
decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range;
23832
decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range;
23833
decomp_blk.m_dual_plane = (uint8_t)bmd.m_dp;
23834
decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23835
23836
requantize_ise_endpoints(bmd.m_cem, log_blk.m_endpoint_ise_range, log_blk.m_endpoints, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints);
23837
23838
uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2];
23839
requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range);
23840
23841
copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk);
23842
23843
status = astc_helpers::pack_astc_block(phys_blk, decomp_blk);
23844
if (!status)
23845
return false;
23846
23847
cur_bx++;
23848
if (cur_bx == num_blocks_x)
23849
{
23850
cur_bx = 0;
23851
cur_by++;
23852
cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS;
23853
}
23854
23855
break;
23856
}
23857
case endpoint_mode::cRaw:
23858
{
23859
const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)bm];
23860
23861
const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem);
23862
23863
astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index));
23864
astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by);
23865
23866
log_blk.clear();
23867
23868
assert((uint32_t)bm <= UINT8_MAX);
23869
log_blk.m_user_mode = (uint8_t)bm;
23870
23871
log_blk.m_num_partitions = (uint8_t)bmd.m_num_partitions;
23872
23873
for (uint32_t p = 0; p < bmd.m_num_partitions; p++)
23874
log_blk.m_color_endpoint_modes[p] = (uint8_t)bmd.m_cem;
23875
23876
log_blk.m_endpoint_ise_range = (uint8_t)bmd.m_endpoint_ise_range;
23877
log_blk.m_weight_ise_range = (uint8_t)bmd.m_weight_ise_range;
23878
23879
log_blk.m_grid_width = (uint8_t)bmd.m_grid_x;
23880
log_blk.m_grid_height = (uint8_t)bmd.m_grid_y;
23881
log_blk.m_dual_plane = (uint8_t)bmd.m_dp;
23882
log_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23883
23884
if (bmd.m_num_partitions == 2)
23885
{
23886
const uint32_t unique_partition_index = decoder.decode_truncated_binary(NUM_UNIQUE_PARTITIONS2);
23887
log_blk.m_partition_id = (uint16_t)g_part2_unique_index_to_seed[unique_partition_index];
23888
}
23889
else if (bmd.m_num_partitions == 3)
23890
{
23891
const uint32_t unique_partition_index = decoder.decode_truncated_binary(NUM_UNIQUE_PARTITIONS3);
23892
log_blk.m_partition_id = (uint16_t)g_part3_unique_index_to_seed[unique_partition_index];
23893
}
23894
23895
bool status = decode_values(decoder, num_endpoint_values * bmd.m_num_partitions, bmd.m_endpoint_ise_range, log_blk.m_endpoints);
23896
if (!status)
23897
return false;
23898
23899
const uint32_t total_grid_weights = bmd.m_grid_x * bmd.m_grid_y * (bmd.m_dp ? 2 : 1);
23900
23901
status = decode_values(decoder, total_grid_weights, bmd.m_weight_ise_range, log_blk.m_weights);
23902
if (!status)
23903
return false;
23904
23905
astc_helpers::log_astc_block decomp_blk;
23906
decomp_blk.clear();
23907
decomp_blk.m_dual_plane = bmd.m_dp;
23908
decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23909
decomp_blk.m_partition_id = log_blk.m_partition_id;
23910
23911
decomp_blk.m_num_partitions = (uint8_t)bmd.m_num_partitions;
23912
23913
for (uint32_t p = 0; p < bmd.m_num_partitions; p++)
23914
decomp_blk.m_color_endpoint_modes[p] = (uint8_t)bmd.m_cem;
23915
23916
decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range;
23917
decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range;
23918
23919
for (uint32_t p = 0; p < bmd.m_num_partitions; p++)
23920
requantize_ise_endpoints(bmd.m_cem, bmd.m_endpoint_ise_range, log_blk.m_endpoints + num_endpoint_values * p, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints + num_endpoint_values * p);
23921
23922
uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2];
23923
requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range);
23924
23925
copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk);
23926
23927
status = astc_helpers::pack_astc_block(phys_blk, decomp_blk);
23928
if (!status)
23929
return false;
23930
23931
cur_bx++;
23932
if (cur_bx == num_blocks_x)
23933
{
23934
cur_bx = 0;
23935
cur_by++;
23936
cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS;
23937
}
23938
23939
break;
23940
}
23941
default:
23942
{
23943
assert(0);
23944
return false;
23945
}
23946
}
23947
23948
break;
23949
}
23950
default:
23951
{
23952
assert(0);
23953
return false;
23954
}
23955
}
23956
}
23957
23958
if (decoder.get_bits(16) != 0xA742)
23959
{
23960
//fmt_error_printf("End marker not found!\n");
23961
return false;
23962
}
23963
23964
//fmt_printf("Total decode_file() time: {} secs\n", tm.get_elapsed_secs());
23965
23966
return true;
23967
}
23968
23969
} // namespace astc_6x6_hdr
23970
23971
#endif // BASISD_SUPPORT_UASTC_HDR
23972
23973
} // namespace basist
23974
23975