Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
20873 views
1
// basisu_transcoder.cpp
2
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
// http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
16
#include "basisu_transcoder.h"
17
#include "basisu_containers_impl.h"
18
19
#include "basisu_astc_hdr_core.h"
20
21
#define BASISU_ASTC_HELPERS_IMPLEMENTATION
22
#include "basisu_astc_helpers.h"
23
24
#include <limits.h>
25
26
#if defined(_MSC_VER)
27
#include <intrin.h> // For __popcnt intrinsic
28
#endif
29
30
#ifndef BASISD_IS_BIG_ENDIAN
31
// TODO: This doesn't work on OSX. How can this be so difficult?
32
//#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
33
// #define BASISD_IS_BIG_ENDIAN (1)
34
//#else
35
#define BASISD_IS_BIG_ENDIAN (0)
36
//#endif
37
#endif
38
39
#ifndef BASISD_USE_UNALIGNED_WORD_READS
40
#ifdef __EMSCRIPTEN__
41
// Can't use unaligned loads/stores with WebAssembly.
42
#define BASISD_USE_UNALIGNED_WORD_READS (0)
43
#elif defined(_M_AMD64) || defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)
44
#define BASISD_USE_UNALIGNED_WORD_READS (1)
45
#else
46
#define BASISD_USE_UNALIGNED_WORD_READS (0)
47
#endif
48
#endif
49
50
// Using unaligned loads and stores causes errors when using UBSan. Jam it off.
51
#if defined(__has_feature)
52
#if __has_feature(undefined_behavior_sanitizer)
53
#undef BASISD_USE_UNALIGNED_WORD_READS
54
#define BASISD_USE_UNALIGNED_WORD_READS 0
55
#endif
56
#endif
57
58
#define BASISD_SUPPORTED_BASIS_VERSION (0x13)
59
60
#ifndef BASISD_SUPPORT_KTX2
61
#error Must have defined BASISD_SUPPORT_KTX2
62
#endif
63
64
#ifndef BASISD_SUPPORT_KTX2_ZSTD
65
#error Must have defined BASISD_SUPPORT_KTX2_ZSTD
66
#endif
67
68
// Set to 1 for fuzz testing. This will disable all CRC16 checks on headers and compressed data.
69
#ifndef BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS
70
#define BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS 0
71
#endif
72
73
#ifndef BASISD_SUPPORT_DXT1
74
#define BASISD_SUPPORT_DXT1 1
75
#endif
76
77
#ifndef BASISD_SUPPORT_DXT5A
78
#define BASISD_SUPPORT_DXT5A 1
79
#endif
80
81
// Disable all BC7 transcoders if necessary (useful when cross compiling to Javascript)
82
#if defined(BASISD_SUPPORT_BC7) && !BASISD_SUPPORT_BC7
83
#ifndef BASISD_SUPPORT_BC7_MODE5
84
#define BASISD_SUPPORT_BC7_MODE5 0
85
#endif
86
#endif // !BASISD_SUPPORT_BC7
87
88
// BC7 mode 5 supports both opaque and opaque+alpha textures, and uses less memory BC1.
89
#ifndef BASISD_SUPPORT_BC7_MODE5
90
#define BASISD_SUPPORT_BC7_MODE5 1
91
#endif
92
93
#ifndef BASISD_SUPPORT_PVRTC1
94
#define BASISD_SUPPORT_PVRTC1 1
95
#endif
96
97
#ifndef BASISD_SUPPORT_ETC2_EAC_A8
98
#define BASISD_SUPPORT_ETC2_EAC_A8 1
99
#endif
100
101
// Set BASISD_SUPPORT_UASTC to 0 to completely disable support for transcoding UASTC files.
102
#ifndef BASISD_SUPPORT_UASTC
103
#define BASISD_SUPPORT_UASTC 1
104
#endif
105
106
#ifndef BASISD_SUPPORT_ASTC
107
#define BASISD_SUPPORT_ASTC 1
108
#endif
109
110
// Note that if BASISD_SUPPORT_ATC is enabled, BASISD_SUPPORT_DXT5A should also be enabled for alpha support.
111
#ifndef BASISD_SUPPORT_ATC
112
#define BASISD_SUPPORT_ATC 1
113
#endif
114
115
// Support for ETC2 EAC R11 and ETC2 EAC RG11
116
#ifndef BASISD_SUPPORT_ETC2_EAC_RG11
117
#define BASISD_SUPPORT_ETC2_EAC_RG11 1
118
#endif
119
120
// If BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY is 1, opaque blocks will be transcoded to ASTC at slightly higher quality (higher than BC1), but the transcoder tables will be 2x as large.
121
// This impacts grayscale and grayscale+alpha textures the most.
122
#ifndef BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
123
#ifdef __EMSCRIPTEN__
124
// Let's assume size matters more than quality when compiling with emscripten.
125
#define BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY 0
126
#else
127
// Compiling native, so an extra 64K lookup table is probably acceptable.
128
#define BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY 1
129
#endif
130
#endif
131
132
#ifndef BASISD_SUPPORT_FXT1
133
#define BASISD_SUPPORT_FXT1 1
134
#endif
135
136
#ifndef BASISD_SUPPORT_PVRTC2
137
#define BASISD_SUPPORT_PVRTC2 1
138
#endif
139
140
#if BASISD_SUPPORT_PVRTC2
141
#if !BASISD_SUPPORT_ATC
142
#error BASISD_SUPPORT_ATC must be 1 if BASISD_SUPPORT_PVRTC2 is 1
143
#endif
144
#endif
145
146
#if BASISD_SUPPORT_ATC
147
#if !BASISD_SUPPORT_DXT5A
148
#error BASISD_SUPPORT_DXT5A must be 1 if BASISD_SUPPORT_ATC is 1
149
#endif
150
#endif
151
152
#ifndef BASISD_SUPPORT_UASTC_HDR
153
#define BASISD_SUPPORT_UASTC_HDR 1
154
#endif
155
156
#define BASISD_WRITE_NEW_BC7_MODE5_TABLES 0
157
#define BASISD_WRITE_NEW_DXT1_TABLES 0
158
#define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES 0
159
#define BASISD_WRITE_NEW_ASTC_TABLES 0
160
#define BASISD_WRITE_NEW_ATC_TABLES 0
161
#define BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES 0
162
163
#ifndef BASISD_ENABLE_DEBUG_FLAGS
164
#define BASISD_ENABLE_DEBUG_FLAGS 0
165
#endif
166
167
// If KTX2 support is enabled, we may need Zstd for decompression of supercompressed UASTC files. Include this header.
168
#if BASISD_SUPPORT_KTX2
169
// If BASISD_SUPPORT_KTX2_ZSTD is 0, UASTC files compressed with Zstd cannot be loaded.
170
#if BASISD_SUPPORT_KTX2_ZSTD
171
// We only use two Zstd API's: ZSTD_decompress() and ZSTD_isError()
172
#include <zstd.h>
173
#endif
174
#endif
175
176
#if BASISD_SUPPORT_UASTC_HDR
177
using namespace basist::astc_6x6_hdr;
178
#endif
179
180
namespace basisu
181
{
182
bool g_debug_printf;
183
184
void enable_debug_printf(bool enabled)
185
{
186
g_debug_printf = enabled;
187
}
188
189
void debug_printf(const char* pFmt, ...)
190
{
191
#if BASISU_FORCE_DEVEL_MESSAGES
192
g_debug_printf = true;
193
#endif
194
if (g_debug_printf)
195
{
196
va_list args;
197
va_start(args, pFmt);
198
vprintf(pFmt, args);
199
va_end(args);
200
}
201
}
202
203
void debug_puts(const char* p)
204
{
205
#if BASISU_FORCE_DEVEL_MESSAGES
206
g_debug_printf = true;
207
#endif
208
if (g_debug_printf)
209
{
210
//puts(p);
211
printf("%s", p);
212
}
213
}
214
} // namespace basisu
215
216
namespace basist
217
{
218
#if BASISD_ENABLE_DEBUG_FLAGS
219
static uint32_t g_debug_flags = 0;
220
#endif
221
222
uint32_t get_debug_flags()
223
{
224
#if BASISD_ENABLE_DEBUG_FLAGS
225
return g_debug_flags;
226
#else
227
return 0;
228
#endif
229
}
230
231
void set_debug_flags(uint32_t f)
232
{
233
BASISU_NOTE_UNUSED(f);
234
#if BASISD_ENABLE_DEBUG_FLAGS
235
g_debug_flags = f;
236
#endif
237
}
238
239
inline uint16_t byteswap_uint16(uint16_t v)
240
{
241
return static_cast<uint16_t>((v >> 8) | (v << 8));
242
}
243
244
static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; }
245
static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; }
246
static inline float saturate(float value) { return clampf(value, 0, 1.0f); }
247
248
static inline uint8_t mul_8(uint32_t v, uint32_t q) { v = v * q + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
249
static inline int mul_8bit(int a, int b) { int t = a * b + 128; return (t + (t >> 8)) >> 8; }
250
static inline int lerp_8bit(int a, int b, int s) { assert(a >= 0 && a <= 255); assert(b >= 0 && b <= 255); assert(s >= 0 && s <= 255); return a + mul_8bit(b - a, s); }
251
252
struct vec2F
253
{
254
float c[2];
255
256
inline vec2F() {}
257
258
inline vec2F(float s) { c[0] = s; c[1] = s; }
259
inline vec2F(float x, float y) { c[0] = x; c[1] = y; }
260
261
inline void set(float x, float y) { c[0] = x; c[1] = y; }
262
263
inline float dot(const vec2F& o) const { return (c[0] * o.c[0]) + (c[1] * o.c[1]); }
264
265
inline float operator[] (uint32_t index) const { assert(index < 2); return c[index]; }
266
inline float& operator[] (uint32_t index) { assert(index < 2); return c[index]; }
267
268
inline vec2F& clamp(float l, float h)
269
{
270
c[0] = basisu::clamp(c[0], l, h);
271
c[1] = basisu::clamp(c[1], l, h);
272
return *this;
273
}
274
275
static vec2F lerp(const vec2F& a, const vec2F& b, float s)
276
{
277
vec2F res;
278
for (uint32_t i = 0; i < 2; i++)
279
res[i] = basisu::lerp(a[i], b[i], s);
280
return res;
281
}
282
};
283
284
struct vec3F
285
{
286
float c[3];
287
288
inline vec3F() {}
289
290
inline vec3F(float s) { c[0] = s; c[1] = s; c[2] = s; }
291
inline vec3F(float x, float y, float z) { c[0] = x; c[1] = y; c[2] = z; }
292
293
inline void set(float x, float y, float z) { c[0] = x; c[1] = y; c[2] = z; }
294
295
inline float dot(const vec3F& o) const { return (c[0] * o.c[0]) + (c[1] * o.c[1]) + (c[2] * o.c[2]); }
296
297
inline float operator[] (uint32_t index) const { assert(index < 3); return c[index]; }
298
inline float &operator[] (uint32_t index) { assert(index < 3); return c[index]; }
299
300
inline vec3F& clamp(float l, float h)
301
{
302
c[0] = basisu::clamp(c[0], l, h);
303
c[1] = basisu::clamp(c[1], l, h);
304
c[2] = basisu::clamp(c[2], l, h);
305
return *this;
306
}
307
308
static vec3F lerp(const vec3F& a, const vec3F& b, float s)
309
{
310
vec3F res;
311
for (uint32_t i = 0; i < 3; i++)
312
res[i] = basisu::lerp(a[i], b[i], s);
313
return res;
314
}
315
};
316
317
uint16_t crc16(const void* r, size_t size, uint16_t crc)
318
{
319
crc = ~crc;
320
321
const uint8_t* p = static_cast<const uint8_t*>(r);
322
for (; size; --size)
323
{
324
const uint16_t q = *p++ ^ (crc >> 8);
325
uint16_t k = (q >> 4) ^ q;
326
crc = (((crc << 8) ^ k) ^ (k << 5)) ^ (k << 12);
327
}
328
329
return static_cast<uint16_t>(~crc);
330
}
331
332
struct vec4F
333
{
334
float c[4];
335
336
inline void set(float x, float y, float z, float w) { c[0] = x; c[1] = y; c[2] = z; c[3] = w; }
337
338
float operator[] (uint32_t index) const { assert(index < 4); return c[index]; }
339
float& operator[] (uint32_t index) { assert(index < 4); return c[index]; }
340
};
341
342
enum etc_constants
343
{
344
cETC1BytesPerBlock = 8U,
345
346
cETC1SelectorBits = 2U,
347
cETC1SelectorValues = 1U << cETC1SelectorBits,
348
cETC1SelectorMask = cETC1SelectorValues - 1U,
349
350
cETC1BlockShift = 2U,
351
cETC1BlockSize = 1U << cETC1BlockShift,
352
353
cETC1LSBSelectorIndicesBitOffset = 0,
354
cETC1MSBSelectorIndicesBitOffset = 16,
355
356
cETC1FlipBitOffset = 32,
357
cETC1DiffBitOffset = 33,
358
359
cETC1IntenModifierNumBits = 3,
360
cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits,
361
cETC1RightIntenModifierTableBitOffset = 34,
362
cETC1LeftIntenModifierTableBitOffset = 37,
363
364
// Base+Delta encoding (5 bit bases, 3 bit delta)
365
cETC1BaseColorCompNumBits = 5,
366
cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits,
367
368
cETC1DeltaColorCompNumBits = 3,
369
cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits,
370
cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits,
371
372
cETC1BaseColor5RBitOffset = 59,
373
cETC1BaseColor5GBitOffset = 51,
374
cETC1BaseColor5BBitOffset = 43,
375
376
cETC1DeltaColor3RBitOffset = 56,
377
cETC1DeltaColor3GBitOffset = 48,
378
cETC1DeltaColor3BBitOffset = 40,
379
380
// Absolute (non-delta) encoding (two 4-bit per component bases)
381
cETC1AbsColorCompNumBits = 4,
382
cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits,
383
384
cETC1AbsColor4R1BitOffset = 60,
385
cETC1AbsColor4G1BitOffset = 52,
386
cETC1AbsColor4B1BitOffset = 44,
387
388
cETC1AbsColor4R2BitOffset = 56,
389
cETC1AbsColor4G2BitOffset = 48,
390
cETC1AbsColor4B2BitOffset = 40,
391
392
cETC1ColorDeltaMin = -4,
393
cETC1ColorDeltaMax = 3,
394
395
// Delta3:
396
// 0 1 2 3 4 5 6 7
397
// 000 001 010 011 100 101 110 111
398
// 0 1 2 3 -4 -3 -2 -1
399
};
400
401
#define DECLARE_ETC1_INTEN_TABLE(name, N) \
402
static const int name[cETC1IntenModifierValues][cETC1SelectorValues] = \
403
{ \
404
{ N * -8, N * -2, N * 2, N * 8 },{ N * -17, N * -5, N * 5, N * 17 },{ N * -29, N * -9, N * 9, N * 29 },{ N * -42, N * -13, N * 13, N * 42 }, \
405
{ N * -60, N * -18, N * 18, N * 60 },{ N * -80, N * -24, N * 24, N * 80 },{ N * -106, N * -33, N * 33, N * 106 },{ N * -183, N * -47, N * 47, N * 183 } \
406
};
407
408
DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables, 1);
409
DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables16, 16);
410
DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables48, 3 * 16);
411
412
//const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
413
const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
414
415
static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 };
416
417
struct decoder_etc_block
418
{
419
// big endian uint64:
420
// bit ofs: 56 48 40 32 24 16 8 0
421
// byte ofs: b0, b1, b2, b3, b4, b5, b6, b7
422
union
423
{
424
uint64_t m_uint64;
425
426
uint32_t m_uint32[2];
427
428
uint8_t m_bytes[8];
429
430
struct
431
{
432
signed m_dred2 : 3;
433
uint32_t m_red1 : 5;
434
435
signed m_dgreen2 : 3;
436
uint32_t m_green1 : 5;
437
438
signed m_dblue2 : 3;
439
uint32_t m_blue1 : 5;
440
441
uint32_t m_flip : 1;
442
uint32_t m_diff : 1;
443
uint32_t m_cw2 : 3;
444
uint32_t m_cw1 : 3;
445
446
uint32_t m_selectors;
447
} m_differential;
448
};
449
450
inline void clear()
451
{
452
assert(sizeof(*this) == 8);
453
basisu::clear_obj(*this);
454
}
455
456
inline void set_byte_bits(uint32_t ofs, uint32_t num, uint32_t bits)
457
{
458
assert((ofs + num) <= 64U);
459
assert(num && (num < 32U));
460
assert((ofs >> 3) == ((ofs + num - 1) >> 3));
461
assert(bits < (1U << num));
462
const uint32_t byte_ofs = 7 - (ofs >> 3);
463
const uint32_t byte_bit_ofs = ofs & 7;
464
const uint32_t mask = (1 << num) - 1;
465
m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs);
466
m_bytes[byte_ofs] |= (bits << byte_bit_ofs);
467
}
468
469
inline void set_flip_bit(bool flip)
470
{
471
m_bytes[3] &= ~1;
472
m_bytes[3] |= static_cast<uint8_t>(flip);
473
}
474
475
inline void set_diff_bit(bool diff)
476
{
477
m_bytes[3] &= ~2;
478
m_bytes[3] |= (static_cast<uint32_t>(diff) << 1);
479
}
480
481
// Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1)
482
inline void set_inten_table(uint32_t subblock_id, uint32_t t)
483
{
484
assert(subblock_id < 2);
485
assert(t < 8);
486
const uint32_t ofs = subblock_id ? 2 : 5;
487
m_bytes[3] &= ~(7 << ofs);
488
m_bytes[3] |= (t << ofs);
489
}
490
491
// Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables.
492
inline void set_selector(uint32_t x, uint32_t y, uint32_t val)
493
{
494
assert((x | y | val) < 4);
495
const uint32_t bit_index = x * 4 + y;
496
497
uint8_t* p = &m_bytes[7 - (bit_index >> 3)];
498
499
const uint32_t byte_bit_ofs = bit_index & 7;
500
const uint32_t mask = 1 << byte_bit_ofs;
501
502
static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 };
503
const uint32_t etc1_val = s_selector_index_to_etc1[val];
504
505
const uint32_t lsb = etc1_val & 1;
506
const uint32_t msb = etc1_val >> 1;
507
508
p[0] &= ~mask;
509
p[0] |= (lsb << byte_bit_ofs);
510
511
p[-2] &= ~mask;
512
p[-2] |= (msb << byte_bit_ofs);
513
}
514
515
// Returned encoded selector value ranges from 0-3 (this is NOT a direct index into g_etc1_inten_tables, see get_selector())
516
inline uint32_t get_raw_selector(uint32_t x, uint32_t y) const
517
{
518
assert((x | y) < 4);
519
520
const uint32_t bit_index = x * 4 + y;
521
const uint32_t byte_bit_ofs = bit_index & 7;
522
const uint8_t* p = &m_bytes[7 - (bit_index >> 3)];
523
const uint32_t lsb = (p[0] >> byte_bit_ofs) & 1;
524
const uint32_t msb = (p[-2] >> byte_bit_ofs) & 1;
525
const uint32_t val = lsb | (msb << 1);
526
527
return val;
528
}
529
530
// Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
531
inline uint32_t get_selector(uint32_t x, uint32_t y) const
532
{
533
static const uint8_t s_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
534
return s_etc1_to_selector_index[get_raw_selector(x, y)];
535
}
536
537
inline void set_raw_selector_bits(uint32_t bits)
538
{
539
m_bytes[4] = static_cast<uint8_t>(bits);
540
m_bytes[5] = static_cast<uint8_t>(bits >> 8);
541
m_bytes[6] = static_cast<uint8_t>(bits >> 16);
542
m_bytes[7] = static_cast<uint8_t>(bits >> 24);
543
}
544
545
inline bool are_all_selectors_the_same() const
546
{
547
uint32_t v = *reinterpret_cast<const uint32_t*>(&m_bytes[4]);
548
549
if ((v == 0xFFFFFFFF) || (v == 0xFFFF) || (!v) || (v == 0xFFFF0000))
550
return true;
551
552
return false;
553
}
554
555
inline void set_raw_selector_bits(uint8_t byte0, uint8_t byte1, uint8_t byte2, uint8_t byte3)
556
{
557
m_bytes[4] = byte0;
558
m_bytes[5] = byte1;
559
m_bytes[6] = byte2;
560
m_bytes[7] = byte3;
561
}
562
563
inline uint32_t get_raw_selector_bits() const
564
{
565
return m_bytes[4] | (m_bytes[5] << 8) | (m_bytes[6] << 16) | (m_bytes[7] << 24);
566
}
567
568
inline void set_base4_color(uint32_t idx, uint16_t c)
569
{
570
if (idx)
571
{
572
set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15);
573
set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15);
574
set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15);
575
}
576
else
577
{
578
set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15);
579
set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15);
580
set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15);
581
}
582
}
583
584
inline void set_base5_color(uint16_t c)
585
{
586
set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31);
587
set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31);
588
set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31);
589
}
590
591
void set_delta3_color(uint16_t c)
592
{
593
set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7);
594
set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7);
595
set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7);
596
}
597
598
void set_block_color4(const color32& c0_unscaled, const color32& c1_unscaled)
599
{
600
set_diff_bit(false);
601
602
set_base4_color(0, pack_color4(c0_unscaled, false));
603
set_base4_color(1, pack_color4(c1_unscaled, false));
604
}
605
606
void set_block_color5(const color32& c0_unscaled, const color32& c1_unscaled)
607
{
608
set_diff_bit(true);
609
610
set_base5_color(pack_color5(c0_unscaled, false));
611
612
int dr = c1_unscaled.r - c0_unscaled.r;
613
int dg = c1_unscaled.g - c0_unscaled.g;
614
int db = c1_unscaled.b - c0_unscaled.b;
615
616
set_delta3_color(pack_delta3(dr, dg, db));
617
}
618
619
bool set_block_color5_check(const color32& c0_unscaled, const color32& c1_unscaled)
620
{
621
set_diff_bit(true);
622
623
set_base5_color(pack_color5(c0_unscaled, false));
624
625
int dr = c1_unscaled.r - c0_unscaled.r;
626
int dg = c1_unscaled.g - c0_unscaled.g;
627
int db = c1_unscaled.b - c0_unscaled.b;
628
629
if (((dr < cETC1ColorDeltaMin) || (dr > cETC1ColorDeltaMax)) ||
630
((dg < cETC1ColorDeltaMin) || (dg > cETC1ColorDeltaMax)) ||
631
((db < cETC1ColorDeltaMin) || (db > cETC1ColorDeltaMax)))
632
return false;
633
634
set_delta3_color(pack_delta3(dr, dg, db));
635
636
return true;
637
}
638
639
inline uint32_t get_byte_bits(uint32_t ofs, uint32_t num) const
640
{
641
assert((ofs + num) <= 64U);
642
assert(num && (num <= 8U));
643
assert((ofs >> 3) == ((ofs + num - 1) >> 3));
644
const uint32_t byte_ofs = 7 - (ofs >> 3);
645
const uint32_t byte_bit_ofs = ofs & 7;
646
return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1);
647
}
648
649
inline uint16_t get_base5_color() const
650
{
651
const uint32_t r = get_byte_bits(cETC1BaseColor5RBitOffset, 5);
652
const uint32_t g = get_byte_bits(cETC1BaseColor5GBitOffset, 5);
653
const uint32_t b = get_byte_bits(cETC1BaseColor5BBitOffset, 5);
654
return static_cast<uint16_t>(b | (g << 5U) | (r << 10U));
655
}
656
657
inline uint16_t get_base4_color(uint32_t idx) const
658
{
659
uint32_t r, g, b;
660
if (idx)
661
{
662
r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4);
663
g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4);
664
b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4);
665
}
666
else
667
{
668
r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4);
669
g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4);
670
b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4);
671
}
672
return static_cast<uint16_t>(b | (g << 4U) | (r << 8U));
673
}
674
675
inline color32 get_base5_color_unscaled() const
676
{
677
return color32(m_differential.m_red1, m_differential.m_green1, m_differential.m_blue1, 255);
678
}
679
680
inline bool get_flip_bit() const
681
{
682
return (m_bytes[3] & 1) != 0;
683
}
684
685
inline bool get_diff_bit() const
686
{
687
return (m_bytes[3] & 2) != 0;
688
}
689
690
inline uint32_t get_inten_table(uint32_t subblock_id) const
691
{
692
assert(subblock_id < 2);
693
const uint32_t ofs = subblock_id ? 2 : 5;
694
return (m_bytes[3] >> ofs) & 7;
695
}
696
697
inline uint16_t get_delta3_color() const
698
{
699
const uint32_t r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3);
700
const uint32_t g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3);
701
const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3);
702
return static_cast<uint16_t>(b | (g << 3U) | (r << 6U));
703
}
704
705
void get_block_colors(color32* pBlock_colors, uint32_t subblock_index) const
706
{
707
color32 b;
708
709
if (get_diff_bit())
710
{
711
if (subblock_index)
712
unpack_color5(b, get_base5_color(), get_delta3_color(), true, 255);
713
else
714
unpack_color5(b, get_base5_color(), true);
715
}
716
else
717
{
718
b = unpack_color4(get_base4_color(subblock_index), true, 255);
719
}
720
721
const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)];
722
723
pBlock_colors[0].set_noclamp_rgba(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255);
724
pBlock_colors[1].set_noclamp_rgba(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255);
725
pBlock_colors[2].set_noclamp_rgba(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255);
726
pBlock_colors[3].set_noclamp_rgba(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255);
727
}
728
729
static uint16_t pack_color4(const color32& color, bool scaled, uint32_t bias = 127U)
730
{
731
return pack_color4(color.r, color.g, color.b, scaled, bias);
732
}
733
734
static uint16_t pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U)
735
{
736
if (scaled)
737
{
738
r = (r * 15U + bias) / 255U;
739
g = (g * 15U + bias) / 255U;
740
b = (b * 15U + bias) / 255U;
741
}
742
743
r = basisu::minimum(r, 15U);
744
g = basisu::minimum(g, 15U);
745
b = basisu::minimum(b, 15U);
746
747
return static_cast<uint16_t>(b | (g << 4U) | (r << 8U));
748
}
749
750
static uint16_t pack_color5(const color32& color, bool scaled, uint32_t bias = 127U)
751
{
752
return pack_color5(color.r, color.g, color.b, scaled, bias);
753
}
754
755
static uint16_t pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U)
756
{
757
if (scaled)
758
{
759
r = (r * 31U + bias) / 255U;
760
g = (g * 31U + bias) / 255U;
761
b = (b * 31U + bias) / 255U;
762
}
763
764
r = basisu::minimum(r, 31U);
765
g = basisu::minimum(g, 31U);
766
b = basisu::minimum(b, 31U);
767
768
return static_cast<uint16_t>(b | (g << 5U) | (r << 10U));
769
}
770
771
uint16_t pack_delta3(const color32& color)
772
{
773
return pack_delta3(color.r, color.g, color.b);
774
}
775
776
uint16_t pack_delta3(int r, int g, int b)
777
{
778
assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax));
779
assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax));
780
assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax));
781
if (r < 0) r += 8;
782
if (g < 0) g += 8;
783
if (b < 0) b += 8;
784
return static_cast<uint16_t>(b | (g << 3) | (r << 6));
785
}
786
787
static void unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3)
788
{
789
r = (packed_delta3 >> 6) & 7;
790
g = (packed_delta3 >> 3) & 7;
791
b = packed_delta3 & 7;
792
if (r >= 4) r -= 8;
793
if (g >= 4) g -= 8;
794
if (b >= 4) b -= 8;
795
}
796
797
static color32 unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha)
798
{
799
uint32_t b = packed_color5 & 31U;
800
uint32_t g = (packed_color5 >> 5U) & 31U;
801
uint32_t r = (packed_color5 >> 10U) & 31U;
802
803
if (scaled)
804
{
805
b = (b << 3U) | (b >> 2U);
806
g = (g << 3U) | (g >> 2U);
807
r = (r << 3U) | (r >> 2U);
808
}
809
810
assert(alpha <= 255);
811
812
return color32(cNoClamp, r, g, b, alpha);
813
}
814
815
static void unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled)
816
{
817
color32 c(unpack_color5(packed_color5, scaled, 0));
818
r = c.r;
819
g = c.g;
820
b = c.b;
821
}
822
823
static void unpack_color5(color32& result, uint16_t packed_color5, bool scaled)
824
{
825
result = unpack_color5(packed_color5, scaled, 255);
826
}
827
828
static bool unpack_color5(color32& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha)
829
{
830
int dr, dg, db;
831
unpack_delta3(dr, dg, db, packed_delta3);
832
833
int r = ((packed_color5 >> 10U) & 31U) + dr;
834
int g = ((packed_color5 >> 5U) & 31U) + dg;
835
int b = (packed_color5 & 31U) + db;
836
837
bool success = true;
838
if (static_cast<uint32_t>(r | g | b) > 31U)
839
{
840
success = false;
841
r = basisu::clamp<int>(r, 0, 31);
842
g = basisu::clamp<int>(g, 0, 31);
843
b = basisu::clamp<int>(b, 0, 31);
844
}
845
846
if (scaled)
847
{
848
b = (b << 3U) | (b >> 2U);
849
g = (g << 3U) | (g >> 2U);
850
r = (r << 3U) | (r >> 2U);
851
}
852
853
result.set_noclamp_rgba(r, g, b, basisu::minimum(alpha, 255U));
854
return success;
855
}
856
857
static color32 unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha)
858
{
859
uint32_t b = packed_color4 & 15U;
860
uint32_t g = (packed_color4 >> 4U) & 15U;
861
uint32_t r = (packed_color4 >> 8U) & 15U;
862
863
if (scaled)
864
{
865
b = (b << 4U) | b;
866
g = (g << 4U) | g;
867
r = (r << 4U) | r;
868
}
869
870
return color32(cNoClamp, r, g, b, basisu::minimum(alpha, 255U));
871
}
872
873
static void unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled)
874
{
875
color32 c(unpack_color4(packed_color4, scaled, 0));
876
r = c.r;
877
g = c.g;
878
b = c.b;
879
}
880
881
static void get_diff_subblock_colors(color32* pDst, uint16_t packed_color5, uint32_t table_idx)
882
{
883
assert(table_idx < cETC1IntenModifierValues);
884
const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
885
886
uint32_t r, g, b;
887
unpack_color5(r, g, b, packed_color5, true);
888
889
const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
890
891
const int y0 = pInten_modifer_table[0];
892
pDst[0].set(clamp255(ir + y0), clamp255(ig + y0), clamp255(ib + y0), 255);
893
894
const int y1 = pInten_modifer_table[1];
895
pDst[1].set(clamp255(ir + y1), clamp255(ig + y1), clamp255(ib + y1), 255);
896
897
const int y2 = pInten_modifer_table[2];
898
pDst[2].set(clamp255(ir + y2), clamp255(ig + y2), clamp255(ib + y2), 255);
899
900
const int y3 = pInten_modifer_table[3];
901
pDst[3].set(clamp255(ir + y3), clamp255(ig + y3), clamp255(ib + y3), 255);
902
}
903
904
static int clamp255(int x)
905
{
906
if (x & 0xFFFFFF00)
907
{
908
if (x < 0)
909
x = 0;
910
else if (x > 255)
911
x = 255;
912
}
913
914
return x;
915
}
916
917
static void get_block_colors5(color32* pBlock_colors, const color32& base_color5, uint32_t inten_table)
918
{
919
color32 b(base_color5);
920
921
b.r = (b.r << 3) | (b.r >> 2);
922
b.g = (b.g << 3) | (b.g >> 2);
923
b.b = (b.b << 3) | (b.b >> 2);
924
925
const int* pInten_table = g_etc1_inten_tables[inten_table];
926
927
pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255);
928
pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255);
929
pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255);
930
pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255);
931
}
932
933
static void get_block_color5(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t& r, uint32_t &g, uint32_t &b)
934
{
935
assert(index < 4);
936
937
uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2);
938
uint32_t bg = (base_color5.g << 3) | (base_color5.g >> 2);
939
uint32_t bb = (base_color5.b << 3) | (base_color5.b >> 2);
940
941
const int* pInten_table = g_etc1_inten_tables[inten_table];
942
943
r = clamp255(br + pInten_table[index]);
944
g = clamp255(bg + pInten_table[index]);
945
b = clamp255(bb + pInten_table[index]);
946
}
947
948
static void get_block_color5_r(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t &r)
949
{
950
assert(index < 4);
951
952
uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2);
953
954
const int* pInten_table = g_etc1_inten_tables[inten_table];
955
956
r = clamp255(br + pInten_table[index]);
957
}
958
959
static void get_block_colors5_g(int* pBlock_colors, const color32& base_color5, uint32_t inten_table)
960
{
961
const int g = (base_color5.g << 3) | (base_color5.g >> 2);
962
963
const int* pInten_table = g_etc1_inten_tables[inten_table];
964
965
pBlock_colors[0] = clamp255(g + pInten_table[0]);
966
pBlock_colors[1] = clamp255(g + pInten_table[1]);
967
pBlock_colors[2] = clamp255(g + pInten_table[2]);
968
pBlock_colors[3] = clamp255(g + pInten_table[3]);
969
}
970
971
static void get_block_colors5_bounds(color32* pBlock_colors, const color32& base_color5, uint32_t inten_table, uint32_t l = 0, uint32_t h = 3)
972
{
973
color32 b(base_color5);
974
975
b.r = (b.r << 3) | (b.r >> 2);
976
b.g = (b.g << 3) | (b.g >> 2);
977
b.b = (b.b << 3) | (b.b >> 2);
978
979
const int* pInten_table = g_etc1_inten_tables[inten_table];
980
981
pBlock_colors[0].set(clamp255(b.r + pInten_table[l]), clamp255(b.g + pInten_table[l]), clamp255(b.b + pInten_table[l]), 255);
982
pBlock_colors[1].set(clamp255(b.r + pInten_table[h]), clamp255(b.g + pInten_table[h]), clamp255(b.b + pInten_table[h]), 255);
983
}
984
985
static void get_block_colors5_bounds_g(uint32_t* pBlock_colors, const color32& base_color5, uint32_t inten_table, uint32_t l = 0, uint32_t h = 3)
986
{
987
color32 b(base_color5);
988
989
b.g = (b.g << 3) | (b.g >> 2);
990
991
const int* pInten_table = g_etc1_inten_tables[inten_table];
992
993
pBlock_colors[0] = clamp255(b.g + pInten_table[l]);
994
pBlock_colors[1] = clamp255(b.g + pInten_table[h]);
995
}
996
};
997
998
enum dxt_constants
999
{
1000
cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U,
1001
cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U,
1002
};
1003
1004
static const uint8_t g_etc1_x_selector_unpack[4][256] =
1005
{
1006
{
1007
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1008
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1009
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1010
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1011
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1012
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1013
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1014
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1015
},
1016
{
1017
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
1018
2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
1019
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
1020
2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
1021
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
1022
2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
1023
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
1024
2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
1025
},
1026
1027
{
1028
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
1029
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
1030
2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
1031
2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
1032
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
1033
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
1034
2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
1035
2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
1036
},
1037
1038
{
1039
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1040
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1041
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1042
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1043
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
1044
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
1045
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
1046
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
1047
}
1048
};
1049
1050
struct dxt1_block
1051
{
1052
enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
1053
1054
uint8_t m_low_color[cTotalEndpointBytes];
1055
uint8_t m_high_color[cTotalEndpointBytes];
1056
uint8_t m_selectors[cTotalSelectorBytes];
1057
1058
inline void clear() { basisu::clear_obj(*this); }
1059
1060
inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
1061
inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
1062
inline void set_low_color(uint16_t c) { m_low_color[0] = static_cast<uint8_t>(c & 0xFF); m_low_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
1063
inline void set_high_color(uint16_t c) { m_high_color[0] = static_cast<uint8_t>(c & 0xFF); m_high_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
1064
inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; }
1065
inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); }
1066
1067
static uint16_t pack_color(const color32& color, bool scaled, uint32_t bias = 127U)
1068
{
1069
uint32_t r = color.r, g = color.g, b = color.b;
1070
if (scaled)
1071
{
1072
r = (r * 31U + bias) / 255U;
1073
g = (g * 63U + bias) / 255U;
1074
b = (b * 31U + bias) / 255U;
1075
}
1076
return static_cast<uint16_t>(basisu::minimum(b, 31U) | (basisu::minimum(g, 63U) << 5U) | (basisu::minimum(r, 31U) << 11U));
1077
}
1078
1079
static uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast<uint16_t>(b | (g << 5U) | (r << 11U)); }
1080
};
1081
1082
struct dxt_selector_range
1083
{
1084
uint32_t m_low;
1085
uint32_t m_high;
1086
};
1087
1088
struct etc1_to_dxt1_56_solution
1089
{
1090
uint8_t m_lo;
1091
uint8_t m_hi;
1092
uint16_t m_err;
1093
};
1094
1095
#if BASISD_SUPPORT_DXT1
1096
static dxt_selector_range g_etc1_to_dxt1_selector_ranges[] =
1097
{
1098
{ 0, 3 },
1099
1100
{ 1, 3 },
1101
{ 0, 2 },
1102
1103
{ 1, 2 },
1104
1105
{ 2, 3 },
1106
{ 0, 1 },
1107
};
1108
1109
const uint32_t NUM_ETC1_TO_DXT1_SELECTOR_RANGES = sizeof(g_etc1_to_dxt1_selector_ranges) / sizeof(g_etc1_to_dxt1_selector_ranges[0]);
1110
1111
static uint32_t g_etc1_to_dxt1_selector_range_index[4][4];
1112
1113
const uint32_t NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS = 10;
1114
static const uint8_t g_etc1_to_dxt1_selector_mappings[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][4] =
1115
{
1116
{ 0, 0, 1, 1 },
1117
{ 0, 0, 1, 2 },
1118
{ 0, 0, 1, 3 },
1119
{ 0, 0, 2, 3 },
1120
{ 0, 1, 1, 1 },
1121
{ 0, 1, 2, 2 },
1122
{ 0, 1, 2, 3 },
1123
{ 0, 2, 3, 3 },
1124
{ 1, 2, 2, 2 },
1125
{ 1, 2, 3, 3 },
1126
};
1127
1128
static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256];
1129
static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256];
1130
1131
static const etc1_to_dxt1_56_solution g_etc1_to_dxt_6[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = {
1132
#include "basisu_transcoder_tables_dxt1_6.inc"
1133
};
1134
1135
static const etc1_to_dxt1_56_solution g_etc1_to_dxt_5[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = {
1136
#include "basisu_transcoder_tables_dxt1_5.inc"
1137
};
1138
#endif // BASISD_SUPPORT_DXT1
1139
1140
#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC
1141
// First saw the idea for optimal BC1 single-color block encoding using lookup tables in ryg_dxt.
1142
struct bc1_match_entry
1143
{
1144
uint8_t m_hi;
1145
uint8_t m_lo;
1146
};
1147
static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256]; // selector 1, allow equals hi/lo
1148
static bc1_match_entry g_bc1_match5_equals_0[256], g_bc1_match6_equals_0[256]; // selector 0, allow equals hi/lo
1149
1150
static void prepare_bc1_single_color_table(bc1_match_entry* pTable, const uint8_t* pExpand, int size0, int size1, int sel)
1151
{
1152
for (int i = 0; i < 256; i++)
1153
{
1154
int lowest_e = 256;
1155
for (int lo = 0; lo < size0; lo++)
1156
{
1157
for (int hi = 0; hi < size1; hi++)
1158
{
1159
const int lo_e = pExpand[lo], hi_e = pExpand[hi];
1160
int e;
1161
1162
if (sel == 1)
1163
{
1164
// Selector 1
1165
e = basisu::iabs(((hi_e * 2 + lo_e) / 3) - i);
1166
e += (basisu::iabs(hi_e - lo_e) * 3) / 100;
1167
}
1168
else
1169
{
1170
assert(sel == 0);
1171
1172
// Selector 0
1173
e = basisu::iabs(hi_e - i);
1174
}
1175
1176
if (e < lowest_e)
1177
{
1178
pTable[i].m_hi = static_cast<uint8_t>(hi);
1179
pTable[i].m_lo = static_cast<uint8_t>(lo);
1180
1181
lowest_e = e;
1182
}
1183
1184
} // hi
1185
} // lo
1186
}
1187
}
1188
#endif
1189
1190
#if BASISD_WRITE_NEW_DXT1_TABLES
1191
static void create_etc1_to_dxt1_5_conversion_table()
1192
{
1193
FILE* pFile = nullptr;
1194
fopen_s(&pFile, "basisu_transcoder_tables_dxt1_5.inc", "w");
1195
1196
uint32_t n = 0;
1197
1198
for (int inten = 0; inten < 8; inten++)
1199
{
1200
for (uint32_t g = 0; g < 32; g++)
1201
{
1202
color32 block_colors[4];
1203
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
1204
1205
for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
1206
{
1207
const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
1208
const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
1209
1210
for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
1211
{
1212
uint32_t best_lo = 0;
1213
uint32_t best_hi = 0;
1214
uint64_t best_err = UINT64_MAX;
1215
1216
for (uint32_t hi = 0; hi <= 31; hi++)
1217
{
1218
for (uint32_t lo = 0; lo <= 31; lo++)
1219
{
1220
//if (lo == hi) continue;
1221
1222
uint32_t colors[4];
1223
1224
colors[0] = (lo << 3) | (lo >> 2);
1225
colors[3] = (hi << 3) | (hi >> 2);
1226
1227
colors[1] = (colors[0] * 2 + colors[3]) / 3;
1228
colors[2] = (colors[3] * 2 + colors[0]) / 3;
1229
1230
uint64_t total_err = 0;
1231
1232
for (uint32_t s = low_selector; s <= high_selector; s++)
1233
{
1234
int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
1235
1236
total_err += err * err;
1237
}
1238
1239
if (total_err < best_err)
1240
{
1241
best_err = total_err;
1242
best_lo = lo;
1243
best_hi = hi;
1244
}
1245
}
1246
}
1247
1248
assert(best_err <= 0xFFFF);
1249
1250
//table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
1251
//table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
1252
//table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
1253
1254
//assert(best_lo != best_hi);
1255
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
1256
n++;
1257
if ((n & 31) == 31)
1258
fprintf(pFile, "\n");
1259
} // m
1260
} // sr
1261
} // g
1262
} // inten
1263
1264
fclose(pFile);
1265
}
1266
1267
static void create_etc1_to_dxt1_6_conversion_table()
1268
{
1269
FILE* pFile = nullptr;
1270
fopen_s(&pFile, "basisu_transcoder_tables_dxt1_6.inc", "w");
1271
1272
uint32_t n = 0;
1273
1274
for (int inten = 0; inten < 8; inten++)
1275
{
1276
for (uint32_t g = 0; g < 32; g++)
1277
{
1278
color32 block_colors[4];
1279
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
1280
1281
for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
1282
{
1283
const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
1284
const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
1285
1286
for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
1287
{
1288
uint32_t best_lo = 0;
1289
uint32_t best_hi = 0;
1290
uint64_t best_err = UINT64_MAX;
1291
1292
for (uint32_t hi = 0; hi <= 63; hi++)
1293
{
1294
for (uint32_t lo = 0; lo <= 63; lo++)
1295
{
1296
//if (lo == hi) continue;
1297
1298
uint32_t colors[4];
1299
1300
colors[0] = (lo << 2) | (lo >> 4);
1301
colors[3] = (hi << 2) | (hi >> 4);
1302
1303
colors[1] = (colors[0] * 2 + colors[3]) / 3;
1304
colors[2] = (colors[3] * 2 + colors[0]) / 3;
1305
1306
uint64_t total_err = 0;
1307
1308
for (uint32_t s = low_selector; s <= high_selector; s++)
1309
{
1310
int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
1311
1312
total_err += err * err;
1313
}
1314
1315
if (total_err < best_err)
1316
{
1317
best_err = total_err;
1318
best_lo = lo;
1319
best_hi = hi;
1320
}
1321
}
1322
}
1323
1324
assert(best_err <= 0xFFFF);
1325
1326
//table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
1327
//table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
1328
//table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
1329
1330
//assert(best_lo != best_hi);
1331
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
1332
n++;
1333
if ((n & 31) == 31)
1334
fprintf(pFile, "\n");
1335
1336
} // m
1337
} // sr
1338
} // g
1339
} // inten
1340
1341
fclose(pFile);
1342
}
1343
#endif
1344
1345
#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
1346
static const int8_t g_eac_modifier_table[16][8] =
1347
{
1348
{ -3, -6, -9, -15, 2, 5, 8, 14 },
1349
{ -3, -7, -10, -13, 2, 6, 9, 12 },
1350
{ -2, -5, -8, -13, 1, 4, 7, 12 },
1351
{ -2, -4, -6, -13, 1, 3, 5, 12 },
1352
{ -3, -6, -8, -12, 2, 5, 7, 11 },
1353
{ -3, -7, -9, -11, 2, 6, 8, 10 },
1354
{ -4, -7, -8, -11, 3, 6, 7, 10 },
1355
{ -3, -5, -8, -11, 2, 4, 7, 10 },
1356
1357
{ -2, -6, -8, -10, 1, 5, 7, 9 },
1358
{ -2, -5, -8, -10, 1, 4, 7, 9 },
1359
{ -2, -4, -8, -10, 1, 3, 7, 9 },
1360
{ -2, -5, -7, -10, 1, 4, 6, 9 },
1361
{ -3, -4, -7, -10, 2, 3, 6, 9 },
1362
{ -1, -2, -3, -10, 0, 1, 2, 9 }, // entry 13
1363
{ -4, -6, -8, -9, 3, 5, 7, 8 },
1364
{ -3, -5, -7, -9, 2, 4, 6, 8 }
1365
};
1366
1367
// Used by ETC2 EAC A8 and ETC2 EAC R11/RG11.
1368
struct eac_block
1369
{
1370
uint16_t m_base : 8;
1371
1372
uint16_t m_table : 4;
1373
uint16_t m_multiplier : 4;
1374
1375
uint8_t m_selectors[6];
1376
1377
uint32_t get_selector(uint32_t x, uint32_t y) const
1378
{
1379
assert((x < 4) && (y < 4));
1380
1381
const uint32_t ofs = 45 - (y + x * 4) * 3;
1382
1383
const uint64_t pixels = get_selector_bits();
1384
1385
return (pixels >> ofs) & 7;
1386
}
1387
1388
void set_selector(uint32_t x, uint32_t y, uint32_t s)
1389
{
1390
assert((x < 4) && (y < 4) && (s < 8));
1391
1392
const uint32_t ofs = 45 - (y + x * 4) * 3;
1393
1394
uint64_t pixels = get_selector_bits();
1395
1396
pixels &= ~(7ULL << ofs);
1397
pixels |= (static_cast<uint64_t>(s) << ofs);
1398
1399
set_selector_bits(pixels);
1400
}
1401
1402
uint64_t get_selector_bits() const
1403
{
1404
uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) |
1405
((uint64_t)m_selectors[2] << 24) |
1406
((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5];
1407
return pixels;
1408
}
1409
1410
void set_selector_bits(uint64_t pixels)
1411
{
1412
m_selectors[0] = (uint8_t)(pixels >> 40);
1413
m_selectors[1] = (uint8_t)(pixels >> 32);
1414
m_selectors[2] = (uint8_t)(pixels >> 24);
1415
m_selectors[3] = (uint8_t)(pixels >> 16);
1416
m_selectors[4] = (uint8_t)(pixels >> 8);
1417
m_selectors[5] = (uint8_t)(pixels);
1418
}
1419
};
1420
1421
#endif // #if BASISD_SUPPORT_UASTC BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
1422
1423
#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
1424
static const dxt_selector_range s_etc2_eac_selector_ranges[] =
1425
{
1426
{ 0, 3 },
1427
1428
{ 1, 3 },
1429
{ 0, 2 },
1430
1431
{ 1, 2 },
1432
};
1433
1434
const uint32_t NUM_ETC2_EAC_SELECTOR_RANGES = sizeof(s_etc2_eac_selector_ranges) / sizeof(s_etc2_eac_selector_ranges[0]);
1435
1436
struct etc1_g_to_eac_conversion
1437
{
1438
uint8_t m_base;
1439
uint8_t m_table_mul; // mul*16+table
1440
uint16_t m_trans; // translates ETC1 selectors to ETC2_EAC_A8
1441
};
1442
#endif // BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
1443
1444
#if BASISD_SUPPORT_ETC2_EAC_A8
1445
1446
#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
1447
struct pack_eac_a8_results
1448
{
1449
uint32_t m_base;
1450
uint32_t m_table;
1451
uint32_t m_multiplier;
1452
basisu::vector<uint8_t> m_selectors;
1453
basisu::vector<uint8_t> m_selectors_temp;
1454
};
1455
1456
static uint64_t pack_eac_a8_exhaustive(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels)
1457
{
1458
results.m_selectors.resize(num_pixels);
1459
results.m_selectors_temp.resize(num_pixels);
1460
1461
uint64_t best_err = UINT64_MAX;
1462
1463
for (uint32_t base_color = 0; base_color < 256; base_color++)
1464
{
1465
for (uint32_t multiplier = 1; multiplier < 16; multiplier++)
1466
{
1467
for (uint32_t table = 0; table < 16; table++)
1468
{
1469
uint64_t total_err = 0;
1470
1471
for (uint32_t i = 0; i < num_pixels; i++)
1472
{
1473
const int a = pPixels[i];
1474
1475
uint32_t best_s_err = UINT32_MAX;
1476
uint32_t best_s = 0;
1477
for (uint32_t s = 0; s < 8; s++)
1478
{
1479
int v = (int)multiplier * g_eac_modifier_table[table][s] + (int)base_color;
1480
if (v < 0)
1481
v = 0;
1482
else if (v > 255)
1483
v = 255;
1484
1485
uint32_t err = abs(a - v);
1486
if (err < best_s_err)
1487
{
1488
best_s_err = err;
1489
best_s = s;
1490
}
1491
}
1492
1493
results.m_selectors_temp[i] = static_cast<uint8_t>(best_s);
1494
1495
total_err += best_s_err * best_s_err;
1496
if (total_err >= best_err)
1497
break;
1498
}
1499
1500
if (total_err < best_err)
1501
{
1502
best_err = total_err;
1503
results.m_base = base_color;
1504
results.m_multiplier = multiplier;
1505
results.m_table = table;
1506
results.m_selectors.swap(results.m_selectors_temp);
1507
}
1508
1509
} // table
1510
1511
} // multiplier
1512
1513
} // base_color
1514
1515
return best_err;
1516
}
1517
#endif // BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
1518
1519
static
1520
#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
1521
const
1522
#endif
1523
etc1_g_to_eac_conversion s_etc1_g_to_etc2_a8[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] =
1524
{
1525
{ { 0,1,3328 },{ 0,1,3328 },{ 0,1,256 },{ 0,1,256 } },
1526
{ { 0,226,3936 },{ 0,226,3936 },{ 0,81,488 },{ 0,81,488 } },
1527
{ { 6,178,4012 },{ 6,178,4008 },{ 0,146,501 },{ 0,130,496 } },
1528
{ { 14,178,4012 },{ 14,178,4008 },{ 8,146,501 },{ 6,82,496 } },
1529
{ { 23,178,4012 },{ 23,178,4008 },{ 17,146,501 },{ 3,228,496 } },
1530
{ { 31,178,4012 },{ 31,178,4008 },{ 25,146,501 },{ 11,228,496 } },
1531
{ { 39,178,4012 },{ 39,178,4008 },{ 33,146,501 },{ 19,228,496 } },
1532
{ { 47,178,4012 },{ 47,178,4008 },{ 41,146,501 },{ 27,228,496 } },
1533
{ { 56,178,4012 },{ 56,178,4008 },{ 50,146,501 },{ 36,228,496 } },
1534
{ { 64,178,4012 },{ 64,178,4008 },{ 58,146,501 },{ 44,228,496 } },
1535
{ { 72,178,4012 },{ 72,178,4008 },{ 66,146,501 },{ 52,228,496 } },
1536
{ { 80,178,4012 },{ 80,178,4008 },{ 74,146,501 },{ 60,228,496 } },
1537
{ { 89,178,4012 },{ 89,178,4008 },{ 83,146,501 },{ 69,228,496 } },
1538
{ { 97,178,4012 },{ 97,178,4008 },{ 91,146,501 },{ 77,228,496 } },
1539
{ { 105,178,4012 },{ 105,178,4008 },{ 99,146,501 },{ 85,228,496 } },
1540
{ { 113,178,4012 },{ 113,178,4008 },{ 107,146,501 },{ 93,228,496 } },
1541
{ { 122,178,4012 },{ 122,178,4008 },{ 116,146,501 },{ 102,228,496 } },
1542
{ { 130,178,4012 },{ 130,178,4008 },{ 124,146,501 },{ 110,228,496 } },
1543
{ { 138,178,4012 },{ 138,178,4008 },{ 132,146,501 },{ 118,228,496 } },
1544
{ { 146,178,4012 },{ 146,178,4008 },{ 140,146,501 },{ 126,228,496 } },
1545
{ { 155,178,4012 },{ 155,178,4008 },{ 149,146,501 },{ 135,228,496 } },
1546
{ { 163,178,4012 },{ 163,178,4008 },{ 157,146,501 },{ 143,228,496 } },
1547
{ { 171,178,4012 },{ 171,178,4008 },{ 165,146,501 },{ 151,228,496 } },
1548
{ { 179,178,4012 },{ 179,178,4008 },{ 173,146,501 },{ 159,228,496 } },
1549
{ { 188,178,4012 },{ 188,178,4008 },{ 182,146,501 },{ 168,228,496 } },
1550
{ { 196,178,4012 },{ 196,178,4008 },{ 190,146,501 },{ 176,228,496 } },
1551
{ { 204,178,4012 },{ 204,178,4008 },{ 198,146,501 },{ 184,228,496 } },
1552
{ { 212,178,4012 },{ 212,178,4008 },{ 206,146,501 },{ 192,228,496 } },
1553
{ { 221,178,4012 },{ 221,178,4008 },{ 215,146,501 },{ 201,228,496 } },
1554
{ { 229,178,4012 },{ 229,178,4008 },{ 223,146,501 },{ 209,228,496 } },
1555
{ { 235,66,4012 },{ 221,100,4008 },{ 231,146,501 },{ 217,228,496 } },
1556
{ { 211,102,4085 },{ 118,31,4080 },{ 211,102,501 },{ 118,31,496 } },
1557
{ { 1,2,3328 },{ 1,2,3328 },{ 0,1,320 },{ 0,1,320 } },
1558
{ { 7,162,3905 },{ 7,162,3904 },{ 1,17,480 },{ 1,17,480 } },
1559
{ { 15,162,3906 },{ 15,162,3904 },{ 1,117,352 },{ 1,117,352 } },
1560
{ { 23,162,3906 },{ 23,162,3904 },{ 5,34,500 },{ 4,53,424 } },
1561
{ { 32,162,3906 },{ 32,162,3904 },{ 14,34,500 },{ 3,69,424 } },
1562
{ { 40,162,3906 },{ 40,162,3904 },{ 22,34,500 },{ 1,133,496 } },
1563
{ { 48,162,3906 },{ 48,162,3904 },{ 30,34,500 },{ 4,85,496 } },
1564
{ { 56,162,3906 },{ 56,162,3904 },{ 38,34,500 },{ 12,85,496 } },
1565
{ { 65,162,3906 },{ 65,162,3904 },{ 47,34,500 },{ 1,106,424 } },
1566
{ { 73,162,3906 },{ 73,162,3904 },{ 55,34,500 },{ 9,106,424 } },
1567
{ { 81,162,3906 },{ 81,162,3904 },{ 63,34,500 },{ 7,234,496 } },
1568
{ { 89,162,3906 },{ 89,162,3904 },{ 71,34,500 },{ 15,234,496 } },
1569
{ { 98,162,3906 },{ 98,162,3904 },{ 80,34,500 },{ 24,234,496 } },
1570
{ { 106,162,3906 },{ 106,162,3904 },{ 88,34,500 },{ 32,234,496 } },
1571
{ { 114,162,3906 },{ 114,162,3904 },{ 96,34,500 },{ 40,234,496 } },
1572
{ { 122,162,3906 },{ 122,162,3904 },{ 104,34,500 },{ 48,234,496 } },
1573
{ { 131,162,3906 },{ 131,162,3904 },{ 113,34,500 },{ 57,234,496 } },
1574
{ { 139,162,3906 },{ 139,162,3904 },{ 121,34,500 },{ 65,234,496 } },
1575
{ { 147,162,3906 },{ 147,162,3904 },{ 129,34,500 },{ 73,234,496 } },
1576
{ { 155,162,3906 },{ 155,162,3904 },{ 137,34,500 },{ 81,234,496 } },
1577
{ { 164,162,3906 },{ 164,162,3904 },{ 146,34,500 },{ 90,234,496 } },
1578
{ { 172,162,3906 },{ 172,162,3904 },{ 154,34,500 },{ 98,234,496 } },
1579
{ { 180,162,3906 },{ 180,162,3904 },{ 162,34,500 },{ 106,234,496 } },
1580
{ { 188,162,3906 },{ 188,162,3904 },{ 170,34,500 },{ 114,234,496 } },
1581
{ { 197,162,3906 },{ 197,162,3904 },{ 179,34,500 },{ 123,234,496 } },
1582
{ { 205,162,3906 },{ 205,162,3904 },{ 187,34,500 },{ 131,234,496 } },
1583
{ { 213,162,3906 },{ 213,162,3904 },{ 195,34,500 },{ 139,234,496 } },
1584
{ { 221,162,3906 },{ 221,162,3904 },{ 203,34,500 },{ 147,234,496 } },
1585
{ { 230,162,3906 },{ 230,162,3904 },{ 212,34,500 },{ 156,234,496 } },
1586
{ { 238,162,3906 },{ 174,106,4008 },{ 220,34,500 },{ 164,234,496 } },
1587
{ { 240,178,4001 },{ 182,106,4008 },{ 228,34,500 },{ 172,234,496 } },
1588
{ { 166,108,4085 },{ 115,31,4080 },{ 166,108,501 },{ 115,31,496 } },
1589
{ { 1,68,3328 },{ 1,68,3328 },{ 0,17,384 },{ 0,17,384 } },
1590
{ { 1,148,3904 },{ 1,148,3904 },{ 1,2,384 },{ 1,2,384 } },
1591
{ { 21,18,3851 },{ 21,18,3848 },{ 1,50,488 },{ 1,50,488 } },
1592
{ { 27,195,3851 },{ 29,18,3848 },{ 0,67,488 },{ 0,67,488 } },
1593
{ { 34,195,3907 },{ 38,18,3848 },{ 20,66,482 },{ 0,3,496 } },
1594
{ { 42,195,3907 },{ 46,18,3848 },{ 28,66,482 },{ 2,6,424 } },
1595
{ { 50,195,3907 },{ 54,18,3848 },{ 36,66,482 },{ 4,22,424 } },
1596
{ { 58,195,3907 },{ 62,18,3848 },{ 44,66,482 },{ 3,73,424 } },
1597
{ { 67,195,3907 },{ 71,18,3848 },{ 53,66,482 },{ 3,22,496 } },
1598
{ { 75,195,3907 },{ 79,18,3848 },{ 61,66,482 },{ 2,137,496 } },
1599
{ { 83,195,3907 },{ 87,18,3848 },{ 69,66,482 },{ 1,89,496 } },
1600
{ { 91,195,3907 },{ 95,18,3848 },{ 77,66,482 },{ 9,89,496 } },
1601
{ { 100,195,3907 },{ 104,18,3848 },{ 86,66,482 },{ 18,89,496 } },
1602
{ { 108,195,3907 },{ 112,18,3848 },{ 94,66,482 },{ 26,89,496 } },
1603
{ { 116,195,3907 },{ 120,18,3848 },{ 102,66,482 },{ 34,89,496 } },
1604
{ { 124,195,3907 },{ 128,18,3848 },{ 110,66,482 },{ 42,89,496 } },
1605
{ { 133,195,3907 },{ 137,18,3848 },{ 119,66,482 },{ 51,89,496 } },
1606
{ { 141,195,3907 },{ 145,18,3848 },{ 127,66,482 },{ 59,89,496 } },
1607
{ { 149,195,3907 },{ 153,18,3848 },{ 135,66,482 },{ 67,89,496 } },
1608
{ { 157,195,3907 },{ 161,18,3848 },{ 143,66,482 },{ 75,89,496 } },
1609
{ { 166,195,3907 },{ 170,18,3848 },{ 152,66,482 },{ 84,89,496 } },
1610
{ { 174,195,3907 },{ 178,18,3848 },{ 160,66,482 },{ 92,89,496 } },
1611
{ { 182,195,3907 },{ 186,18,3848 },{ 168,66,482 },{ 100,89,496 } },
1612
{ { 190,195,3907 },{ 194,18,3848 },{ 176,66,482 },{ 108,89,496 } },
1613
{ { 199,195,3907 },{ 203,18,3848 },{ 185,66,482 },{ 117,89,496 } },
1614
{ { 207,195,3907 },{ 211,18,3848 },{ 193,66,482 },{ 125,89,496 } },
1615
{ { 215,195,3907 },{ 219,18,3848 },{ 201,66,482 },{ 133,89,496 } },
1616
{ { 223,195,3907 },{ 227,18,3848 },{ 209,66,482 },{ 141,89,496 } },
1617
{ { 231,195,3907 },{ 168,89,4008 },{ 218,66,482 },{ 150,89,496 } },
1618
{ { 236,18,3907 },{ 176,89,4008 },{ 226,66,482 },{ 158,89,496 } },
1619
{ { 158,90,4085 },{ 103,31,4080 },{ 158,90,501 },{ 103,31,496 } },
1620
{ { 166,90,4085 },{ 111,31,4080 },{ 166,90,501 },{ 111,31,496 } },
1621
{ { 0,70,3328 },{ 0,70,3328 },{ 0,45,256 },{ 0,45,256 } },
1622
{ { 0,117,3904 },{ 0,117,3904 },{ 0,35,384 },{ 0,35,384 } },
1623
{ { 13,165,3905 },{ 13,165,3904 },{ 3,221,416 },{ 3,221,416 } },
1624
{ { 21,165,3906 },{ 21,165,3904 },{ 11,221,416 },{ 11,221,416 } },
1625
{ { 30,165,3906 },{ 30,165,3904 },{ 7,61,352 },{ 7,61,352 } },
1626
{ { 38,165,3906 },{ 38,165,3904 },{ 2,125,352 },{ 2,125,352 } },
1627
{ { 46,165,3906 },{ 46,165,3904 },{ 2,37,500 },{ 10,125,352 } },
1628
{ { 54,165,3906 },{ 54,165,3904 },{ 10,37,500 },{ 5,61,424 } },
1629
{ { 63,165,3906 },{ 63,165,3904 },{ 19,37,500 },{ 1,189,424 } },
1630
{ { 4,254,4012 },{ 71,165,3904 },{ 27,37,500 },{ 9,189,424 } },
1631
{ { 12,254,4012 },{ 79,165,3904 },{ 35,37,500 },{ 4,77,424 } },
1632
{ { 20,254,4012 },{ 87,165,3904 },{ 43,37,500 },{ 12,77,424 } },
1633
{ { 29,254,4012 },{ 96,165,3904 },{ 52,37,500 },{ 8,93,424 } },
1634
{ { 37,254,4012 },{ 104,165,3904 },{ 60,37,500 },{ 3,141,496 } },
1635
{ { 45,254,4012 },{ 112,165,3904 },{ 68,37,500 },{ 11,141,496 } },
1636
{ { 53,254,4012 },{ 120,165,3904 },{ 76,37,500 },{ 6,93,496 } },
1637
{ { 62,254,4012 },{ 129,165,3904 },{ 85,37,500 },{ 15,93,496 } },
1638
{ { 70,254,4012 },{ 137,165,3904 },{ 93,37,500 },{ 23,93,496 } },
1639
{ { 78,254,4012 },{ 145,165,3904 },{ 101,37,500 },{ 31,93,496 } },
1640
{ { 86,254,4012 },{ 153,165,3904 },{ 109,37,500 },{ 39,93,496 } },
1641
{ { 95,254,4012 },{ 162,165,3904 },{ 118,37,500 },{ 48,93,496 } },
1642
{ { 103,254,4012 },{ 170,165,3904 },{ 126,37,500 },{ 56,93,496 } },
1643
{ { 111,254,4012 },{ 178,165,3904 },{ 134,37,500 },{ 64,93,496 } },
1644
{ { 119,254,4012 },{ 186,165,3904 },{ 142,37,500 },{ 72,93,496 } },
1645
{ { 128,254,4012 },{ 195,165,3904 },{ 151,37,500 },{ 81,93,496 } },
1646
{ { 136,254,4012 },{ 203,165,3904 },{ 159,37,500 },{ 89,93,496 } },
1647
{ { 212,165,3906 },{ 136,77,4008 },{ 167,37,500 },{ 97,93,496 } },
1648
{ { 220,165,3394 },{ 131,93,4008 },{ 175,37,500 },{ 105,93,496 } },
1649
{ { 214,181,4001 },{ 140,93,4008 },{ 184,37,500 },{ 114,93,496 } },
1650
{ { 222,181,4001 },{ 148,93,4008 },{ 192,37,500 },{ 122,93,496 } },
1651
{ { 114,95,4085 },{ 99,31,4080 },{ 114,95,501 },{ 99,31,496 } },
1652
{ { 122,95,4085 },{ 107,31,4080 },{ 122,95,501 },{ 107,31,496 } },
1653
{ { 0,102,3840 },{ 0,102,3840 },{ 0,18,384 },{ 0,18,384 } },
1654
{ { 5,167,3904 },{ 5,167,3904 },{ 0,13,256 },{ 0,13,256 } },
1655
{ { 4,54,3968 },{ 4,54,3968 },{ 1,67,448 },{ 1,67,448 } },
1656
{ { 30,198,3850 },{ 30,198,3848 },{ 0,3,480 },{ 0,3,480 } },
1657
{ { 39,198,3850 },{ 39,198,3848 },{ 3,52,488 },{ 3,52,488 } },
1658
{ { 47,198,3851 },{ 47,198,3848 },{ 3,4,488 },{ 3,4,488 } },
1659
{ { 55,198,3851 },{ 55,198,3848 },{ 1,70,488 },{ 1,70,488 } },
1660
{ { 54,167,3906 },{ 63,198,3848 },{ 3,22,488 },{ 3,22,488 } },
1661
{ { 62,167,3906 },{ 72,198,3848 },{ 24,118,488 },{ 0,6,496 } },
1662
{ { 70,167,3906 },{ 80,198,3848 },{ 32,118,488 },{ 2,89,488 } },
1663
{ { 78,167,3906 },{ 88,198,3848 },{ 40,118,488 },{ 1,73,496 } },
1664
{ { 86,167,3906 },{ 96,198,3848 },{ 48,118,488 },{ 0,28,424 } },
1665
{ { 95,167,3906 },{ 105,198,3848 },{ 57,118,488 },{ 9,28,424 } },
1666
{ { 103,167,3906 },{ 113,198,3848 },{ 65,118,488 },{ 5,108,496 } },
1667
{ { 111,167,3906 },{ 121,198,3848 },{ 73,118,488 },{ 13,108,496 } },
1668
{ { 119,167,3906 },{ 129,198,3848 },{ 81,118,488 },{ 21,108,496 } },
1669
{ { 128,167,3906 },{ 138,198,3848 },{ 90,118,488 },{ 6,28,496 } },
1670
{ { 136,167,3906 },{ 146,198,3848 },{ 98,118,488 },{ 14,28,496 } },
1671
{ { 144,167,3906 },{ 154,198,3848 },{ 106,118,488 },{ 22,28,496 } },
1672
{ { 152,167,3906 },{ 162,198,3848 },{ 114,118,488 },{ 30,28,496 } },
1673
{ { 161,167,3906 },{ 171,198,3848 },{ 123,118,488 },{ 39,28,496 } },
1674
{ { 169,167,3906 },{ 179,198,3848 },{ 131,118,488 },{ 47,28,496 } },
1675
{ { 177,167,3906 },{ 187,198,3848 },{ 139,118,488 },{ 55,28,496 } },
1676
{ { 185,167,3906 },{ 195,198,3848 },{ 147,118,488 },{ 63,28,496 } },
1677
{ { 194,167,3906 },{ 120,12,4008 },{ 156,118,488 },{ 72,28,496 } },
1678
{ { 206,198,3907 },{ 116,28,4008 },{ 164,118,488 },{ 80,28,496 } },
1679
{ { 214,198,3907 },{ 124,28,4008 },{ 172,118,488 },{ 88,28,496 } },
1680
{ { 222,198,3395 },{ 132,28,4008 },{ 180,118,488 },{ 96,28,496 } },
1681
{ { 207,134,4001 },{ 141,28,4008 },{ 189,118,488 },{ 105,28,496 } },
1682
{ { 95,30,4085 },{ 86,31,4080 },{ 95,30,501 },{ 86,31,496 } },
1683
{ { 103,30,4085 },{ 94,31,4080 },{ 103,30,501 },{ 94,31,496 } },
1684
{ { 111,30,4085 },{ 102,31,4080 },{ 111,30,501 },{ 102,31,496 } },
1685
{ { 0,104,3840 },{ 0,104,3840 },{ 0,18,448 },{ 0,18,448 } },
1686
{ { 4,39,3904 },{ 4,39,3904 },{ 0,4,384 },{ 0,4,384 } },
1687
{ { 0,56,3968 },{ 0,56,3968 },{ 0,84,448 },{ 0,84,448 } },
1688
{ { 6,110,3328 },{ 6,110,3328 },{ 0,20,448 },{ 0,20,448 } },
1689
{ { 41,200,3850 },{ 41,200,3848 },{ 1,4,480 },{ 1,4,480 } },
1690
{ { 49,200,3850 },{ 49,200,3848 },{ 1,8,416 },{ 1,8,416 } },
1691
{ { 57,200,3851 },{ 57,200,3848 },{ 1,38,488 },{ 1,38,488 } },
1692
{ { 65,200,3851 },{ 65,200,3848 },{ 1,120,488 },{ 1,120,488 } },
1693
{ { 74,200,3851 },{ 74,200,3848 },{ 2,72,488 },{ 2,72,488 } },
1694
{ { 69,6,3907 },{ 82,200,3848 },{ 2,24,488 },{ 2,24,488 } },
1695
{ { 77,6,3907 },{ 90,200,3848 },{ 26,120,488 },{ 10,24,488 } },
1696
{ { 97,63,3330 },{ 98,200,3848 },{ 34,120,488 },{ 2,8,496 } },
1697
{ { 106,63,3330 },{ 107,200,3848 },{ 43,120,488 },{ 3,92,488 } },
1698
{ { 114,63,3330 },{ 115,200,3848 },{ 51,120,488 },{ 11,92,488 } },
1699
{ { 122,63,3330 },{ 123,200,3848 },{ 59,120,488 },{ 7,76,496 } },
1700
{ { 130,63,3330 },{ 131,200,3848 },{ 67,120,488 },{ 15,76,496 } },
1701
{ { 139,63,3330 },{ 140,200,3848 },{ 76,120,488 },{ 24,76,496 } },
1702
{ { 147,63,3330 },{ 148,200,3848 },{ 84,120,488 },{ 32,76,496 } },
1703
{ { 155,63,3330 },{ 156,200,3848 },{ 92,120,488 },{ 40,76,496 } },
1704
{ { 163,63,3330 },{ 164,200,3848 },{ 100,120,488 },{ 48,76,496 } },
1705
{ { 172,63,3330 },{ 173,200,3848 },{ 109,120,488 },{ 57,76,496 } },
1706
{ { 184,6,3851 },{ 181,200,3848 },{ 117,120,488 },{ 65,76,496 } },
1707
{ { 192,6,3851 },{ 133,28,3936 },{ 125,120,488 },{ 73,76,496 } },
1708
{ { 189,200,3907 },{ 141,28,3936 },{ 133,120,488 },{ 81,76,496 } },
1709
{ { 198,200,3907 },{ 138,108,4000 },{ 142,120,488 },{ 90,76,496 } },
1710
{ { 206,200,3907 },{ 146,108,4000 },{ 150,120,488 },{ 98,76,496 } },
1711
{ { 214,200,3395 },{ 154,108,4000 },{ 158,120,488 },{ 106,76,496 } },
1712
{ { 190,136,4001 },{ 162,108,4000 },{ 166,120,488 },{ 114,76,496 } },
1713
{ { 123,30,4076 },{ 87,15,4080 },{ 123,30,492 },{ 87,15,496 } },
1714
{ { 117,110,4084 },{ 80,31,4080 },{ 117,110,500 },{ 80,31,496 } },
1715
{ { 125,110,4084 },{ 88,31,4080 },{ 125,110,500 },{ 88,31,496 } },
1716
{ { 133,110,4084 },{ 96,31,4080 },{ 133,110,500 },{ 96,31,496 } },
1717
{ { 9,56,3904 },{ 9,56,3904 },{ 0,67,448 },{ 0,67,448 } },
1718
{ { 1,8,3904 },{ 1,8,3904 },{ 1,84,448 },{ 1,84,448 } },
1719
{ { 1,124,3904 },{ 1,124,3904 },{ 0,39,384 },{ 0,39,384 } },
1720
{ { 9,124,3904 },{ 9,124,3904 },{ 1,4,448 },{ 1,4,448 } },
1721
{ { 6,76,3904 },{ 6,76,3904 },{ 0,70,448 },{ 0,70,448 } },
1722
{ { 62,6,3859 },{ 62,6,3856 },{ 2,38,480 },{ 2,38,480 } },
1723
{ { 70,6,3859 },{ 70,6,3856 },{ 5,43,416 },{ 5,43,416 } },
1724
{ { 78,6,3859 },{ 78,6,3856 },{ 2,11,416 },{ 2,11,416 } },
1725
{ { 87,6,3859 },{ 87,6,3856 },{ 0,171,488 },{ 0,171,488 } },
1726
{ { 67,8,3906 },{ 95,6,3856 },{ 8,171,488 },{ 8,171,488 } },
1727
{ { 75,8,3907 },{ 103,6,3856 },{ 5,123,488 },{ 5,123,488 } },
1728
{ { 83,8,3907 },{ 111,6,3856 },{ 2,75,488 },{ 2,75,488 } },
1729
{ { 92,8,3907 },{ 120,6,3856 },{ 0,27,488 },{ 0,27,488 } },
1730
{ { 100,8,3907 },{ 128,6,3856 },{ 8,27,488 },{ 8,27,488 } },
1731
{ { 120,106,3843 },{ 136,6,3856 },{ 100,6,387 },{ 16,27,488 } },
1732
{ { 128,106,3843 },{ 144,6,3856 },{ 108,6,387 },{ 2,11,496 } },
1733
{ { 137,106,3843 },{ 153,6,3856 },{ 117,6,387 },{ 11,11,496 } },
1734
{ { 145,106,3843 },{ 161,6,3856 },{ 125,6,387 },{ 19,11,496 } },
1735
{ { 163,8,3851 },{ 137,43,3904 },{ 133,6,387 },{ 27,11,496 } },
1736
{ { 171,8,3851 },{ 101,11,4000 },{ 141,6,387 },{ 35,11,496 } },
1737
{ { 180,8,3851 },{ 110,11,4000 },{ 150,6,387 },{ 44,11,496 } },
1738
{ { 188,8,3851 },{ 118,11,4000 },{ 158,6,387 },{ 52,11,496 } },
1739
{ { 172,72,3907 },{ 126,11,4000 },{ 166,6,387 },{ 60,11,496 } },
1740
{ { 174,6,3971 },{ 134,11,4000 },{ 174,6,387 },{ 68,11,496 } },
1741
{ { 183,6,3971 },{ 143,11,4000 },{ 183,6,387 },{ 77,11,496 } },
1742
{ { 191,6,3971 },{ 151,11,4000 },{ 191,6,387 },{ 85,11,496 } },
1743
{ { 199,6,3971 },{ 159,11,4000 },{ 199,6,387 },{ 93,11,496 } },
1744
{ { 92,12,4084 },{ 69,15,4080 },{ 92,12,500 },{ 69,15,496 } },
1745
{ { 101,12,4084 },{ 78,15,4080 },{ 101,12,500 },{ 78,15,496 } },
1746
{ { 109,12,4084 },{ 86,15,4080 },{ 109,12,500 },{ 86,15,496 } },
1747
{ { 117,12,4084 },{ 79,31,4080 },{ 117,12,500 },{ 79,31,496 } },
1748
{ { 125,12,4084 },{ 87,31,4080 },{ 125,12,500 },{ 87,31,496 } },
1749
{ { 71,8,3602 },{ 71,8,3600 },{ 2,21,384 },{ 2,21,384 } },
1750
{ { 79,8,3611 },{ 79,8,3608 },{ 0,69,448 },{ 0,69,448 } },
1751
{ { 87,8,3611 },{ 87,8,3608 },{ 0,23,384 },{ 0,23,384 } },
1752
{ { 95,8,3611 },{ 95,8,3608 },{ 1,5,448 },{ 1,5,448 } },
1753
{ { 104,8,3611 },{ 104,8,3608 },{ 0,88,448 },{ 0,88,448 } },
1754
{ { 112,8,3611 },{ 112,8,3608 },{ 0,72,448 },{ 0,72,448 } },
1755
{ { 120,8,3611 },{ 121,8,3608 },{ 36,21,458 },{ 36,21,456 } },
1756
{ { 133,47,3091 },{ 129,8,3608 },{ 44,21,458 },{ 44,21,456 } },
1757
{ { 142,47,3091 },{ 138,8,3608 },{ 53,21,459 },{ 53,21,456 } },
1758
{ { 98,12,3850 },{ 98,12,3848 },{ 61,21,459 },{ 61,21,456 } },
1759
{ { 106,12,3850 },{ 106,12,3848 },{ 10,92,480 },{ 69,21,456 } },
1760
{ { 114,12,3851 },{ 114,12,3848 },{ 18,92,480 },{ 77,21,456 } },
1761
{ { 87,12,3906 },{ 87,12,3904 },{ 3,44,488 },{ 86,21,456 } },
1762
{ { 95,12,3906 },{ 95,12,3904 },{ 11,44,488 },{ 94,21,456 } },
1763
{ { 103,12,3906 },{ 103,12,3904 },{ 19,44,488 },{ 102,21,456 } },
1764
{ { 111,12,3907 },{ 111,12,3904 },{ 27,44,489 },{ 110,21,456 } },
1765
{ { 120,12,3907 },{ 120,12,3904 },{ 36,44,489 },{ 119,21,456 } },
1766
{ { 128,12,3907 },{ 128,12,3904 },{ 44,44,489 },{ 127,21,456 } },
1767
{ { 136,12,3907 },{ 136,12,3904 },{ 52,44,489 },{ 135,21,456 } },
1768
{ { 144,12,3907 },{ 144,12,3904 },{ 60,44,489 },{ 143,21,456 } },
1769
{ { 153,12,3907 },{ 153,12,3904 },{ 69,44,490 },{ 152,21,456 } },
1770
{ { 161,12,3395 },{ 149,188,3968 },{ 77,44,490 },{ 160,21,456 } },
1771
{ { 169,12,3395 },{ 198,21,3928 },{ 85,44,490 },{ 168,21,456 } },
1772
{ { 113,95,4001 },{ 201,69,3992 },{ 125,8,483 },{ 176,21,456 } },
1773
{ { 122,95,4001 },{ 200,21,3984 },{ 134,8,483 },{ 185,21,456 } },
1774
{ { 142,8,4067 },{ 208,21,3984 },{ 142,8,483 },{ 193,21,456 } },
1775
{ { 151,8,4067 },{ 47,15,4080 },{ 151,8,483 },{ 47,15,496 } },
1776
{ { 159,8,4067 },{ 55,15,4080 },{ 159,8,483 },{ 55,15,496 } },
1777
{ { 168,8,4067 },{ 64,15,4080 },{ 168,8,483 },{ 64,15,496 } },
1778
{ { 160,40,4075 },{ 72,15,4080 },{ 160,40,491 },{ 72,15,496 } },
1779
{ { 168,40,4075 },{ 80,15,4080 },{ 168,40,491 },{ 80,15,496 } },
1780
{ { 144,8,4082 },{ 88,15,4080 },{ 144,8,498 },{ 88,15,496 } }
1781
};
1782
#endif // BASISD_SUPPORT_ETC2_EAC_A8
1783
1784
#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
1785
static void create_etc2_eac_a8_conversion_table()
1786
{
1787
FILE* pFile = fopen("basisu_decoder_tables_etc2_eac_a8.inc", "w");
1788
1789
for (uint32_t inten = 0; inten < 8; inten++)
1790
{
1791
for (uint32_t base = 0; base < 32; base++)
1792
{
1793
color32 block_colors[4];
1794
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten);
1795
1796
fprintf(pFile, "{");
1797
1798
for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++)
1799
{
1800
const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low;
1801
const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high;
1802
1803
// We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector.
1804
// Now find the best ETC2 EAC A8 base/table/multiplier that fits these colors.
1805
1806
uint8_t pixels[4];
1807
uint32_t num_pixels = 0;
1808
for (uint32_t s = low_selector; s <= high_selector; s++)
1809
pixels[num_pixels++] = block_colors[s].g;
1810
1811
pack_eac_a8_results pack_results;
1812
pack_eac_a8_exhaustive(pack_results, pixels, num_pixels);
1813
1814
etc1_g_to_eac_conversion& c = s_etc1_g_to_etc2_a8[base + inten * 32][sel_range];
1815
1816
c.m_base = pack_results.m_base;
1817
c.m_table_mul = pack_results.m_table * 16 + pack_results.m_multiplier;
1818
c.m_trans = 0;
1819
1820
for (uint32_t s = 0; s < 4; s++)
1821
{
1822
if ((s < low_selector) || (s > high_selector))
1823
continue;
1824
1825
uint32_t etc2_selector = pack_results.m_selectors[s - low_selector];
1826
1827
c.m_trans |= (etc2_selector << (s * 3));
1828
}
1829
1830
fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans);
1831
if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1))
1832
fprintf(pFile, ",");
1833
}
1834
1835
fprintf(pFile, "},\n");
1836
}
1837
}
1838
1839
fclose(pFile);
1840
}
1841
#endif
1842
1843
#if BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
1844
struct pack_eac_r11_results
1845
{
1846
uint32_t m_base;
1847
uint32_t m_table;
1848
uint32_t m_multiplier;
1849
basisu::vector<uint8_t> m_selectors;
1850
basisu::vector<uint8_t> m_selectors_temp;
1851
};
1852
1853
static uint64_t pack_eac_r11_exhaustive(pack_eac_r11_results& results, const uint8_t* pPixels, uint32_t num_pixels)
1854
{
1855
results.m_selectors.resize(num_pixels);
1856
results.m_selectors_temp.resize(num_pixels);
1857
1858
uint64_t best_err = UINT64_MAX;
1859
1860
for (uint32_t base_color = 0; base_color < 256; base_color++)
1861
{
1862
for (uint32_t multiplier = 0; multiplier < 16; multiplier++)
1863
{
1864
for (uint32_t table = 0; table < 16; table++)
1865
{
1866
uint64_t total_err = 0;
1867
1868
for (uint32_t i = 0; i < num_pixels; i++)
1869
{
1870
// Convert 8-bit input to 11-bits
1871
const int a = (pPixels[i] * 2047 + 128) / 255;
1872
1873
uint32_t best_s_err = UINT32_MAX;
1874
uint32_t best_s = 0;
1875
for (uint32_t s = 0; s < 8; s++)
1876
{
1877
int v = (int)(multiplier ? (multiplier * 8) : 1) * g_eac_modifier_table[table][s] + (int)base_color * 8 + 4;
1878
if (v < 0)
1879
v = 0;
1880
else if (v > 2047)
1881
v = 2047;
1882
1883
uint32_t err = abs(a - v);
1884
if (err < best_s_err)
1885
{
1886
best_s_err = err;
1887
best_s = s;
1888
}
1889
}
1890
1891
results.m_selectors_temp[i] = static_cast<uint8_t>(best_s);
1892
1893
total_err += best_s_err * best_s_err;
1894
if (total_err >= best_err)
1895
break;
1896
}
1897
1898
if (total_err < best_err)
1899
{
1900
best_err = total_err;
1901
results.m_base = base_color;
1902
results.m_multiplier = multiplier;
1903
results.m_table = table;
1904
results.m_selectors.swap(results.m_selectors_temp);
1905
}
1906
1907
} // table
1908
1909
} // multiplier
1910
1911
} // base_color
1912
1913
return best_err;
1914
}
1915
1916
static void create_etc2_eac_r11_conversion_table()
1917
{
1918
FILE* pFile = nullptr;
1919
fopen_s(&pFile, "basisu_decoder_tables_etc2_eac_r11.inc", "w");
1920
1921
for (uint32_t inten = 0; inten < 8; inten++)
1922
{
1923
for (uint32_t base = 0; base < 32; base++)
1924
{
1925
color32 block_colors[4];
1926
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten);
1927
1928
fprintf(pFile, "{");
1929
1930
for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++)
1931
{
1932
const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low;
1933
const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high;
1934
1935
// We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector.
1936
// Now find the best ETC2 EAC R11 base/table/multiplier that fits these colors.
1937
1938
uint8_t pixels[4];
1939
uint32_t num_pixels = 0;
1940
for (uint32_t s = low_selector; s <= high_selector; s++)
1941
pixels[num_pixels++] = block_colors[s].g;
1942
1943
pack_eac_r11_results pack_results;
1944
pack_eac_r11_exhaustive(pack_results, pixels, num_pixels);
1945
1946
etc1_g_to_eac_conversion c;
1947
1948
c.m_base = (uint8_t)pack_results.m_base;
1949
c.m_table_mul = (uint8_t)(pack_results.m_table * 16 + pack_results.m_multiplier);
1950
c.m_trans = 0;
1951
1952
for (uint32_t s = 0; s < 4; s++)
1953
{
1954
if ((s < low_selector) || (s > high_selector))
1955
continue;
1956
1957
uint32_t etc2_selector = pack_results.m_selectors[s - low_selector];
1958
1959
c.m_trans |= (etc2_selector << (s * 3));
1960
}
1961
1962
fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans);
1963
if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1))
1964
fprintf(pFile, ",");
1965
}
1966
1967
fprintf(pFile, "},\n");
1968
}
1969
}
1970
1971
fclose(pFile);
1972
}
1973
#endif // BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
1974
1975
#if BASISD_WRITE_NEW_ASTC_TABLES
1976
static void create_etc1_to_astc_conversion_table_0_47();
1977
static void create_etc1_to_astc_conversion_table_0_255();
1978
#endif
1979
1980
#if BASISD_SUPPORT_ASTC
1981
static void transcoder_init_astc();
1982
#endif
1983
1984
#if BASISD_WRITE_NEW_BC7_MODE5_TABLES
1985
static void create_etc1_to_bc7_m5_color_conversion_table();
1986
static void create_etc1_to_bc7_m5_alpha_conversion_table();
1987
#endif
1988
1989
#if BASISD_SUPPORT_BC7_MODE5
1990
static void transcoder_init_bc7_mode5();
1991
#endif
1992
1993
#if BASISD_WRITE_NEW_ATC_TABLES
1994
static void create_etc1s_to_atc_conversion_tables();
1995
#endif
1996
1997
#if BASISD_SUPPORT_ATC
1998
static void transcoder_init_atc();
1999
#endif
2000
2001
#if BASISD_SUPPORT_PVRTC2
2002
static void transcoder_init_pvrtc2();
2003
#endif
2004
2005
#if BASISD_SUPPORT_UASTC
2006
void uastc_init();
2007
#endif
2008
2009
#if BASISD_SUPPORT_UASTC_HDR
2010
namespace astc_6x6_hdr
2011
{
2012
static void init_quantize_tables();
2013
static void fast_encode_bc6h_init();
2014
}
2015
#endif
2016
2017
#if BASISD_SUPPORT_BC7_MODE5
2018
namespace bc7_mode_5_encoder
2019
{
2020
void encode_bc7_mode5_init();
2021
}
2022
#endif
2023
2024
static bool g_transcoder_initialized;
2025
2026
// Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz.
2027
// If this is too slow, these computed tables can easilky be moved to be compiled in.
2028
void basisu_transcoder_init()
2029
{
2030
if (g_transcoder_initialized)
2031
{
2032
BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");
2033
return;
2034
}
2035
2036
BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");
2037
2038
#if BASISD_SUPPORT_UASTC
2039
uastc_init();
2040
#endif
2041
2042
#if BASISD_SUPPORT_UASTC_HDR
2043
// TODO: Examine this, optimize for startup time/mem utilization.
2044
astc_helpers::init_tables(false);
2045
2046
astc_hdr_core_init();
2047
#endif
2048
2049
#if BASISD_SUPPORT_ASTC
2050
transcoder_init_astc();
2051
#endif
2052
2053
#if BASISD_WRITE_NEW_ASTC_TABLES
2054
create_etc1_to_astc_conversion_table_0_47();
2055
create_etc1_to_astc_conversion_table_0_255();
2056
exit(0);
2057
#endif
2058
2059
#if BASISD_WRITE_NEW_BC7_MODE5_TABLES
2060
create_etc1_to_bc7_m5_color_conversion_table();
2061
create_etc1_to_bc7_m5_alpha_conversion_table();
2062
exit(0);
2063
#endif
2064
2065
#if BASISD_WRITE_NEW_DXT1_TABLES
2066
create_etc1_to_dxt1_5_conversion_table();
2067
create_etc1_to_dxt1_6_conversion_table();
2068
exit(0);
2069
#endif
2070
2071
#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
2072
create_etc2_eac_a8_conversion_table();
2073
exit(0);
2074
#endif
2075
2076
#if BASISD_WRITE_NEW_ATC_TABLES
2077
create_etc1s_to_atc_conversion_tables();
2078
exit(0);
2079
#endif
2080
2081
#if BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
2082
create_etc2_eac_r11_conversion_table();
2083
exit(0);
2084
#endif
2085
2086
#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC
2087
uint8_t bc1_expand5[32];
2088
for (int i = 0; i < 32; i++)
2089
bc1_expand5[i] = static_cast<uint8_t>((i << 3) | (i >> 2));
2090
prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, 32, 1);
2091
prepare_bc1_single_color_table(g_bc1_match5_equals_0, bc1_expand5, 1, 32, 0);
2092
2093
uint8_t bc1_expand6[64];
2094
for (int i = 0; i < 64; i++)
2095
bc1_expand6[i] = static_cast<uint8_t>((i << 2) | (i >> 4));
2096
prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, 64, 1);
2097
prepare_bc1_single_color_table(g_bc1_match6_equals_0, bc1_expand6, 1, 64, 0);
2098
2099
#if 0
2100
for (uint32_t i = 0; i < 256; i++)
2101
{
2102
printf("%u %u %u\n", i, (i * 63 + 127) / 255, g_bc1_match6_equals_0[i].m_hi);
2103
}
2104
exit(0);
2105
#endif
2106
2107
#endif
2108
2109
#if BASISD_SUPPORT_DXT1
2110
for (uint32_t i = 0; i < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; i++)
2111
{
2112
uint32_t l = g_etc1_to_dxt1_selector_ranges[i].m_low;
2113
uint32_t h = g_etc1_to_dxt1_selector_ranges[i].m_high;
2114
g_etc1_to_dxt1_selector_range_index[l][h] = i;
2115
}
2116
2117
for (uint32_t sm = 0; sm < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; sm++)
2118
{
2119
uint8_t etc1_to_dxt1_selector_mappings_raw_dxt1[4];
2120
uint8_t etc1_to_dxt1_selector_mappings_raw_dxt1_inv[4];
2121
2122
for (uint32_t j = 0; j < 4; j++)
2123
{
2124
static const uint8_t s_linear_dxt1_to_dxt1[4] = { 0, 2, 3, 1 };
2125
static const uint8_t s_dxt1_inverted_xlat[4] = { 1, 0, 3, 2 };
2126
2127
etc1_to_dxt1_selector_mappings_raw_dxt1[j] = (uint8_t)s_linear_dxt1_to_dxt1[g_etc1_to_dxt1_selector_mappings[sm][j]];
2128
etc1_to_dxt1_selector_mappings_raw_dxt1_inv[j] = (uint8_t)s_dxt1_inverted_xlat[etc1_to_dxt1_selector_mappings_raw_dxt1[j]];
2129
}
2130
2131
for (uint32_t i = 0; i < 256; i++)
2132
{
2133
uint32_t k = 0, k_inv = 0;
2134
for (uint32_t s = 0; s < 4; s++)
2135
{
2136
k |= (etc1_to_dxt1_selector_mappings_raw_dxt1[(i >> (s * 2)) & 3] << (s * 2));
2137
k_inv |= (etc1_to_dxt1_selector_mappings_raw_dxt1_inv[(i >> (s * 2)) & 3] << (s * 2));
2138
}
2139
g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[sm][i] = (uint8_t)k;
2140
g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[sm][i] = (uint8_t)k_inv;
2141
}
2142
}
2143
#endif
2144
2145
#if BASISD_SUPPORT_BC7_MODE5
2146
transcoder_init_bc7_mode5();
2147
#endif
2148
2149
#if BASISD_SUPPORT_ATC
2150
transcoder_init_atc();
2151
#endif
2152
2153
#if BASISD_SUPPORT_PVRTC2
2154
transcoder_init_pvrtc2();
2155
#endif
2156
2157
#if BASISD_SUPPORT_UASTC_HDR
2158
bc6h_enc_init();
2159
astc_6x6_hdr::init_quantize_tables();
2160
fast_encode_bc6h_init();
2161
#endif
2162
2163
#if BASISD_SUPPORT_BC7_MODE5
2164
bc7_mode_5_encoder::encode_bc7_mode5_init();
2165
#endif
2166
2167
g_transcoder_initialized = true;
2168
}
2169
2170
#if BASISD_SUPPORT_DXT1
2171
static void convert_etc1s_to_dxt1(dxt1_block* pDst_block, const endpoint *pEndpoints, const selector* pSelector, bool use_threecolor_blocks)
2172
{
2173
#if !BASISD_WRITE_NEW_DXT1_TABLES
2174
const uint32_t low_selector = pSelector->m_lo_selector;
2175
const uint32_t high_selector = pSelector->m_hi_selector;
2176
2177
const color32& base_color = pEndpoints->m_color5;
2178
const uint32_t inten_table = pEndpoints->m_inten5;
2179
2180
if (low_selector == high_selector)
2181
{
2182
uint32_t r, g, b;
2183
decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
2184
2185
uint32_t mask = 0xAA;
2186
uint32_t max16 = (g_bc1_match5_equals_1[r].m_hi << 11) | (g_bc1_match6_equals_1[g].m_hi << 5) | g_bc1_match5_equals_1[b].m_hi;
2187
uint32_t min16 = (g_bc1_match5_equals_1[r].m_lo << 11) | (g_bc1_match6_equals_1[g].m_lo << 5) | g_bc1_match5_equals_1[b].m_lo;
2188
2189
if ((!use_threecolor_blocks) && (min16 == max16))
2190
{
2191
// This is an annoying edge case that impacts BC3.
2192
// This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's.
2193
mask = 0;
2194
2195
// Make l > h
2196
if (min16 > 0)
2197
min16--;
2198
else
2199
{
2200
// l = h = 0
2201
assert(min16 == max16 && max16 == 0);
2202
2203
max16 = 1;
2204
min16 = 0;
2205
mask = 0x55;
2206
}
2207
2208
assert(max16 > min16);
2209
}
2210
2211
if (max16 < min16)
2212
{
2213
std::swap(max16, min16);
2214
mask ^= 0x55;
2215
}
2216
2217
pDst_block->set_low_color(static_cast<uint16_t>(max16));
2218
pDst_block->set_high_color(static_cast<uint16_t>(min16));
2219
pDst_block->m_selectors[0] = static_cast<uint8_t>(mask);
2220
pDst_block->m_selectors[1] = static_cast<uint8_t>(mask);
2221
pDst_block->m_selectors[2] = static_cast<uint8_t>(mask);
2222
pDst_block->m_selectors[3] = static_cast<uint8_t>(mask);
2223
2224
return;
2225
}
2226
else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
2227
{
2228
color32 block_colors[4];
2229
2230
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
2231
2232
const uint32_t r0 = block_colors[0].r;
2233
const uint32_t g0 = block_colors[0].g;
2234
const uint32_t b0 = block_colors[0].b;
2235
2236
const uint32_t r1 = block_colors[3].r;
2237
const uint32_t g1 = block_colors[3].g;
2238
const uint32_t b1 = block_colors[3].b;
2239
2240
uint32_t max16 = (g_bc1_match5_equals_0[r0].m_hi << 11) | (g_bc1_match6_equals_0[g0].m_hi << 5) | g_bc1_match5_equals_0[b0].m_hi;
2241
uint32_t min16 = (g_bc1_match5_equals_0[r1].m_hi << 11) | (g_bc1_match6_equals_0[g1].m_hi << 5) | g_bc1_match5_equals_0[b1].m_hi;
2242
2243
uint32_t l = 0, h = 1;
2244
2245
if (min16 == max16)
2246
{
2247
// Make l > h
2248
if (min16 > 0)
2249
{
2250
min16--;
2251
2252
l = 0;
2253
h = 0;
2254
}
2255
else
2256
{
2257
// l = h = 0
2258
assert(min16 == max16 && max16 == 0);
2259
2260
max16 = 1;
2261
min16 = 0;
2262
2263
l = 1;
2264
h = 1;
2265
}
2266
2267
assert(max16 > min16);
2268
}
2269
2270
if (max16 < min16)
2271
{
2272
std::swap(max16, min16);
2273
l = 1;
2274
h = 0;
2275
}
2276
2277
pDst_block->set_low_color((uint16_t)max16);
2278
pDst_block->set_high_color((uint16_t)min16);
2279
2280
for (uint32_t y = 0; y < 4; y++)
2281
{
2282
for (uint32_t x = 0; x < 4; x++)
2283
{
2284
uint32_t s = pSelector->get_selector(x, y);
2285
pDst_block->set_selector(x, y, (s == 3) ? h : l);
2286
}
2287
}
2288
2289
return;
2290
}
2291
2292
const uint32_t selector_range_table = g_etc1_to_dxt1_selector_range_index[low_selector][high_selector];
2293
2294
//[32][8][RANGES][MAPPING]
2295
const etc1_to_dxt1_56_solution* pTable_r = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
2296
const etc1_to_dxt1_56_solution* pTable_g = &g_etc1_to_dxt_6[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
2297
const etc1_to_dxt1_56_solution* pTable_b = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
2298
2299
uint32_t best_err = UINT_MAX;
2300
uint32_t best_mapping = 0;
2301
2302
assert(NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS == 10);
2303
#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
2304
DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
2305
DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
2306
#undef DO_ITER
2307
2308
uint32_t l = dxt1_block::pack_unscaled_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
2309
uint32_t h = dxt1_block::pack_unscaled_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
2310
2311
const uint8_t* pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[best_mapping][0];
2312
2313
if (l < h)
2314
{
2315
std::swap(l, h);
2316
pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[best_mapping][0];
2317
}
2318
2319
pDst_block->set_low_color(static_cast<uint16_t>(l));
2320
pDst_block->set_high_color(static_cast<uint16_t>(h));
2321
2322
if (l == h)
2323
{
2324
uint8_t mask = 0;
2325
2326
if (!use_threecolor_blocks)
2327
{
2328
// This is an annoying edge case that impacts BC3.
2329
2330
// Make l > h
2331
if (h > 0)
2332
h--;
2333
else
2334
{
2335
// l = h = 0
2336
assert(l == h && h == 0);
2337
2338
h = 0;
2339
l = 1;
2340
mask = 0x55;
2341
}
2342
2343
assert(l > h);
2344
pDst_block->set_low_color(static_cast<uint16_t>(l));
2345
pDst_block->set_high_color(static_cast<uint16_t>(h));
2346
}
2347
2348
pDst_block->m_selectors[0] = mask;
2349
pDst_block->m_selectors[1] = mask;
2350
pDst_block->m_selectors[2] = mask;
2351
pDst_block->m_selectors[3] = mask;
2352
2353
return;
2354
}
2355
2356
pDst_block->m_selectors[0] = pSelectors_xlat_256[pSelector->m_selectors[0]];
2357
pDst_block->m_selectors[1] = pSelectors_xlat_256[pSelector->m_selectors[1]];
2358
pDst_block->m_selectors[2] = pSelectors_xlat_256[pSelector->m_selectors[2]];
2359
pDst_block->m_selectors[3] = pSelectors_xlat_256[pSelector->m_selectors[3]];
2360
#endif
2361
}
2362
2363
#if BASISD_ENABLE_DEBUG_FLAGS
2364
static void convert_etc1s_to_dxt1_vis(dxt1_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector, bool use_threecolor_blocks)
2365
{
2366
convert_etc1s_to_dxt1(pDst_block, pEndpoints, pSelector, use_threecolor_blocks);
2367
2368
if (g_debug_flags & cDebugFlagVisBC1Sels)
2369
{
2370
uint32_t l = dxt1_block::pack_unscaled_color(31, 63, 31);
2371
uint32_t h = dxt1_block::pack_unscaled_color(0, 0, 0);
2372
pDst_block->set_low_color(static_cast<uint16_t>(l));
2373
pDst_block->set_high_color(static_cast<uint16_t>(h));
2374
}
2375
else if (g_debug_flags & cDebugFlagVisBC1Endpoints)
2376
{
2377
for (uint32_t y = 0; y < 4; y++)
2378
for (uint32_t x = 0; x < 4; x++)
2379
pDst_block->set_selector(x, y, (y < 2) ? 0 : 1);
2380
}
2381
}
2382
#endif
2383
#endif
2384
2385
#if BASISD_SUPPORT_FXT1
2386
struct fxt1_block
2387
{
2388
union
2389
{
2390
struct
2391
{
2392
uint64_t m_t00 : 2;
2393
uint64_t m_t01 : 2;
2394
uint64_t m_t02 : 2;
2395
uint64_t m_t03 : 2;
2396
uint64_t m_t04 : 2;
2397
uint64_t m_t05 : 2;
2398
uint64_t m_t06 : 2;
2399
uint64_t m_t07 : 2;
2400
uint64_t m_t08 : 2;
2401
uint64_t m_t09 : 2;
2402
uint64_t m_t10 : 2;
2403
uint64_t m_t11 : 2;
2404
uint64_t m_t12 : 2;
2405
uint64_t m_t13 : 2;
2406
uint64_t m_t14 : 2;
2407
uint64_t m_t15 : 2;
2408
uint64_t m_t16 : 2;
2409
uint64_t m_t17 : 2;
2410
uint64_t m_t18 : 2;
2411
uint64_t m_t19 : 2;
2412
uint64_t m_t20 : 2;
2413
uint64_t m_t21 : 2;
2414
uint64_t m_t22 : 2;
2415
uint64_t m_t23 : 2;
2416
uint64_t m_t24 : 2;
2417
uint64_t m_t25 : 2;
2418
uint64_t m_t26 : 2;
2419
uint64_t m_t27 : 2;
2420
uint64_t m_t28 : 2;
2421
uint64_t m_t29 : 2;
2422
uint64_t m_t30 : 2;
2423
uint64_t m_t31 : 2;
2424
} m_lo;
2425
uint64_t m_lo_bits;
2426
uint8_t m_sels[8];
2427
};
2428
union
2429
{
2430
struct
2431
{
2432
#ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING
2433
uint64_t m_b1 : 5;
2434
uint64_t m_g1 : 5;
2435
uint64_t m_r1 : 5;
2436
uint64_t m_b0 : 5;
2437
uint64_t m_g0 : 5;
2438
uint64_t m_r0 : 5;
2439
uint64_t m_b3 : 5;
2440
uint64_t m_g3 : 5;
2441
uint64_t m_r3 : 5;
2442
uint64_t m_b2 : 5;
2443
uint64_t m_g2 : 5;
2444
uint64_t m_r2 : 5;
2445
#else
2446
uint64_t m_b0 : 5;
2447
uint64_t m_g0 : 5;
2448
uint64_t m_r0 : 5;
2449
uint64_t m_b1 : 5;
2450
uint64_t m_g1 : 5;
2451
uint64_t m_r1 : 5;
2452
uint64_t m_b2 : 5;
2453
uint64_t m_g2 : 5;
2454
uint64_t m_r2 : 5;
2455
uint64_t m_b3 : 5;
2456
uint64_t m_g3 : 5;
2457
uint64_t m_r3 : 5;
2458
#endif
2459
uint64_t m_alpha : 1;
2460
uint64_t m_glsb : 2;
2461
uint64_t m_mode : 1;
2462
} m_hi;
2463
uint64_t m_hi_bits;
2464
};
2465
};
2466
2467
static uint8_t conv_dxt1_to_fxt1_sels(uint32_t sels)
2468
{
2469
static uint8_t s_conv_table[16] = { 0, 3, 1, 2, 12, 15, 13, 14, 4, 7, 5, 6, 8, 11, 9, 10 };
2470
return s_conv_table[sels & 15] | (s_conv_table[sels >> 4] << 4);
2471
}
2472
2473
static void convert_etc1s_to_fxt1(void *pDst, const endpoint *pEndpoints, const selector *pSelectors, uint32_t fxt1_subblock)
2474
{
2475
fxt1_block* pBlock = static_cast<fxt1_block*>(pDst);
2476
2477
// CC_MIXED is basically DXT1 with different encoding tricks.
2478
// So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless.
2479
// (It's not completely lossless because FXT1 rounds in its color lerps while DXT1 doesn't, but it should be good enough.)
2480
dxt1_block blk;
2481
convert_etc1s_to_dxt1(&blk, pEndpoints, pSelectors, false);
2482
2483
const uint32_t l = blk.get_low_color();
2484
const uint32_t h = blk.get_high_color();
2485
2486
color32 color0((l >> 11) & 31, (l >> 5) & 63, l & 31, 255);
2487
color32 color1((h >> 11) & 31, (h >> 5) & 63, h & 31, 255);
2488
2489
uint32_t g0 = color0.g & 1;
2490
uint32_t g1 = color1.g & 1;
2491
2492
color0.g >>= 1;
2493
color1.g >>= 1;
2494
2495
blk.m_selectors[0] = conv_dxt1_to_fxt1_sels(blk.m_selectors[0]);
2496
blk.m_selectors[1] = conv_dxt1_to_fxt1_sels(blk.m_selectors[1]);
2497
blk.m_selectors[2] = conv_dxt1_to_fxt1_sels(blk.m_selectors[2]);
2498
blk.m_selectors[3] = conv_dxt1_to_fxt1_sels(blk.m_selectors[3]);
2499
2500
if ((blk.get_selector(0, 0) >> 1) != (g0 ^ g1))
2501
{
2502
std::swap(color0, color1);
2503
std::swap(g0, g1);
2504
2505
blk.m_selectors[0] ^= 0xFF;
2506
blk.m_selectors[1] ^= 0xFF;
2507
blk.m_selectors[2] ^= 0xFF;
2508
blk.m_selectors[3] ^= 0xFF;
2509
}
2510
2511
if (fxt1_subblock == 0)
2512
{
2513
pBlock->m_hi.m_mode = 1;
2514
pBlock->m_hi.m_alpha = 0;
2515
pBlock->m_hi.m_glsb = g1 | (g1 << 1);
2516
pBlock->m_hi.m_r0 = color0.r;
2517
pBlock->m_hi.m_g0 = color0.g;
2518
pBlock->m_hi.m_b0 = color0.b;
2519
pBlock->m_hi.m_r1 = color1.r;
2520
pBlock->m_hi.m_g1 = color1.g;
2521
pBlock->m_hi.m_b1 = color1.b;
2522
pBlock->m_hi.m_r2 = color0.r;
2523
pBlock->m_hi.m_g2 = color0.g;
2524
pBlock->m_hi.m_b2 = color0.b;
2525
pBlock->m_hi.m_r3 = color1.r;
2526
pBlock->m_hi.m_g3 = color1.g;
2527
pBlock->m_hi.m_b3 = color1.b;
2528
pBlock->m_sels[0] = blk.m_selectors[0];
2529
pBlock->m_sels[1] = blk.m_selectors[1];
2530
pBlock->m_sels[2] = blk.m_selectors[2];
2531
pBlock->m_sels[3] = blk.m_selectors[3];
2532
2533
static const uint8_t s_border_dup[4] = { 0, 85, 170, 255 };
2534
pBlock->m_sels[4] = s_border_dup[blk.m_selectors[0] >> 6];
2535
pBlock->m_sels[5] = s_border_dup[blk.m_selectors[1] >> 6];
2536
pBlock->m_sels[6] = s_border_dup[blk.m_selectors[2] >> 6];
2537
pBlock->m_sels[7] = s_border_dup[blk.m_selectors[3] >> 6];
2538
}
2539
else
2540
{
2541
pBlock->m_hi.m_glsb = (pBlock->m_hi.m_glsb & 1) | (g1 << 1);
2542
pBlock->m_hi.m_r2 = color0.r;
2543
pBlock->m_hi.m_g2 = color0.g;
2544
pBlock->m_hi.m_b2 = color0.b;
2545
pBlock->m_hi.m_r3 = color1.r;
2546
pBlock->m_hi.m_g3 = color1.g;
2547
pBlock->m_hi.m_b3 = color1.b;
2548
pBlock->m_sels[4] = blk.m_selectors[0];
2549
pBlock->m_sels[5] = blk.m_selectors[1];
2550
pBlock->m_sels[6] = blk.m_selectors[2];
2551
pBlock->m_sels[7] = blk.m_selectors[3];
2552
}
2553
}
2554
#endif // BASISD_SUPPORT_FXT1
2555
#if BASISD_SUPPORT_DXT5A
2556
static dxt_selector_range s_dxt5a_selector_ranges[] =
2557
{
2558
{ 0, 3 },
2559
2560
{ 1, 3 },
2561
{ 0, 2 },
2562
2563
{ 1, 2 },
2564
};
2565
2566
const uint32_t NUM_DXT5A_SELECTOR_RANGES = sizeof(s_dxt5a_selector_ranges) / sizeof(s_dxt5a_selector_ranges[0]);
2567
2568
struct etc1_g_to_dxt5a_conversion
2569
{
2570
uint8_t m_lo, m_hi;
2571
uint16_t m_trans;
2572
};
2573
2574
static etc1_g_to_dxt5a_conversion g_etc1_g_to_dxt5a[32 * 8][NUM_DXT5A_SELECTOR_RANGES] =
2575
{
2576
{ { 8, 0, 393 },{ 8, 0, 392 },{ 2, 0, 9 },{ 2, 0, 8 }, }, { { 6, 16, 710 },{ 16, 6, 328 },{ 0, 10, 96 },{ 10, 6, 8 }, },
2577
{ { 28, 5, 1327 },{ 24, 14, 328 },{ 8, 18, 96 },{ 18, 14, 8 }, }, { { 36, 13, 1327 },{ 32, 22, 328 },{ 16, 26, 96 },{ 26, 22, 8 }, },
2578
{ { 45, 22, 1327 },{ 41, 31, 328 },{ 25, 35, 96 },{ 35, 31, 8 }, }, { { 53, 30, 1327 },{ 49, 39, 328 },{ 33, 43, 96 },{ 43, 39, 8 }, },
2579
{ { 61, 38, 1327 },{ 57, 47, 328 },{ 41, 51, 96 },{ 51, 47, 8 }, }, { { 69, 46, 1327 },{ 65, 55, 328 },{ 49, 59, 96 },{ 59, 55, 8 }, },
2580
{ { 78, 55, 1327 },{ 74, 64, 328 },{ 58, 68, 96 },{ 68, 64, 8 }, }, { { 86, 63, 1327 },{ 82, 72, 328 },{ 66, 76, 96 },{ 76, 72, 8 }, },
2581
{ { 94, 71, 1327 },{ 90, 80, 328 },{ 74, 84, 96 },{ 84, 80, 8 }, }, { { 102, 79, 1327 },{ 98, 88, 328 },{ 82, 92, 96 },{ 92, 88, 8 }, },
2582
{ { 111, 88, 1327 },{ 107, 97, 328 },{ 91, 101, 96 },{ 101, 97, 8 }, }, { { 119, 96, 1327 },{ 115, 105, 328 },{ 99, 109, 96 },{ 109, 105, 8 }, },
2583
{ { 127, 104, 1327 },{ 123, 113, 328 },{ 107, 117, 96 },{ 117, 113, 8 }, }, { { 135, 112, 1327 },{ 131, 121, 328 },{ 115, 125, 96 },{ 125, 121, 8 }, },
2584
{ { 144, 121, 1327 },{ 140, 130, 328 },{ 124, 134, 96 },{ 134, 130, 8 }, }, { { 152, 129, 1327 },{ 148, 138, 328 },{ 132, 142, 96 },{ 142, 138, 8 }, },
2585
{ { 160, 137, 1327 },{ 156, 146, 328 },{ 140, 150, 96 },{ 150, 146, 8 }, }, { { 168, 145, 1327 },{ 164, 154, 328 },{ 148, 158, 96 },{ 158, 154, 8 }, },
2586
{ { 177, 154, 1327 },{ 173, 163, 328 },{ 157, 167, 96 },{ 167, 163, 8 }, }, { { 185, 162, 1327 },{ 181, 171, 328 },{ 165, 175, 96 },{ 175, 171, 8 }, },
2587
{ { 193, 170, 1327 },{ 189, 179, 328 },{ 173, 183, 96 },{ 183, 179, 8 }, }, { { 201, 178, 1327 },{ 197, 187, 328 },{ 181, 191, 96 },{ 191, 187, 8 }, },
2588
{ { 210, 187, 1327 },{ 206, 196, 328 },{ 190, 200, 96 },{ 200, 196, 8 }, }, { { 218, 195, 1327 },{ 214, 204, 328 },{ 198, 208, 96 },{ 208, 204, 8 }, },
2589
{ { 226, 203, 1327 },{ 222, 212, 328 },{ 206, 216, 96 },{ 216, 212, 8 }, }, { { 234, 211, 1327 },{ 230, 220, 328 },{ 214, 224, 96 },{ 224, 220, 8 }, },
2590
{ { 243, 220, 1327 },{ 239, 229, 328 },{ 223, 233, 96 },{ 233, 229, 8 }, }, { { 251, 228, 1327 },{ 247, 237, 328 },{ 231, 241, 96 },{ 241, 237, 8 }, },
2591
{ { 239, 249, 3680 },{ 245, 249, 3648 },{ 239, 249, 96 },{ 249, 245, 8 }, }, { { 247, 253, 4040 },{ 255, 253, 8 },{ 247, 253, 456 },{ 255, 253, 8 }, },
2592
{ { 5, 17, 566 },{ 5, 17, 560 },{ 5, 0, 9 },{ 5, 0, 8 }, }, { { 25, 0, 313 },{ 25, 3, 328 },{ 13, 0, 49 },{ 13, 3, 8 }, },
2593
{ { 39, 0, 1329 },{ 33, 11, 328 },{ 11, 21, 70 },{ 21, 11, 8 }, }, { { 47, 7, 1329 },{ 41, 19, 328 },{ 29, 7, 33 },{ 29, 19, 8 }, },
2594
{ { 50, 11, 239 },{ 50, 28, 328 },{ 38, 16, 33 },{ 38, 28, 8 }, }, { { 92, 13, 2423 },{ 58, 36, 328 },{ 46, 24, 33 },{ 46, 36, 8 }, },
2595
{ { 100, 21, 2423 },{ 66, 44, 328 },{ 54, 32, 33 },{ 54, 44, 8 }, }, { { 86, 7, 1253 },{ 74, 52, 328 },{ 62, 40, 33 },{ 62, 52, 8 }, },
2596
{ { 95, 16, 1253 },{ 83, 61, 328 },{ 71, 49, 33 },{ 71, 61, 8 }, }, { { 103, 24, 1253 },{ 91, 69, 328 },{ 79, 57, 33 },{ 79, 69, 8 }, },
2597
{ { 111, 32, 1253 },{ 99, 77, 328 },{ 87, 65, 33 },{ 87, 77, 8 }, }, { { 119, 40, 1253 },{ 107, 85, 328 },{ 95, 73, 33 },{ 95, 85, 8 }, },
2598
{ { 128, 49, 1253 },{ 116, 94, 328 },{ 104, 82, 33 },{ 104, 94, 8 }, }, { { 136, 57, 1253 },{ 124, 102, 328 },{ 112, 90, 33 },{ 112, 102, 8 }, },
2599
{ { 144, 65, 1253 },{ 132, 110, 328 },{ 120, 98, 33 },{ 120, 110, 8 }, }, { { 152, 73, 1253 },{ 140, 118, 328 },{ 128, 106, 33 },{ 128, 118, 8 }, },
2600
{ { 161, 82, 1253 },{ 149, 127, 328 },{ 137, 115, 33 },{ 137, 127, 8 }, }, { { 169, 90, 1253 },{ 157, 135, 328 },{ 145, 123, 33 },{ 145, 135, 8 }, },
2601
{ { 177, 98, 1253 },{ 165, 143, 328 },{ 153, 131, 33 },{ 153, 143, 8 }, }, { { 185, 106, 1253 },{ 173, 151, 328 },{ 161, 139, 33 },{ 161, 151, 8 }, },
2602
{ { 194, 115, 1253 },{ 182, 160, 328 },{ 170, 148, 33 },{ 170, 160, 8 }, }, { { 202, 123, 1253 },{ 190, 168, 328 },{ 178, 156, 33 },{ 178, 168, 8 }, },
2603
{ { 210, 131, 1253 },{ 198, 176, 328 },{ 186, 164, 33 },{ 186, 176, 8 }, }, { { 218, 139, 1253 },{ 206, 184, 328 },{ 194, 172, 33 },{ 194, 184, 8 }, },
2604
{ { 227, 148, 1253 },{ 215, 193, 328 },{ 203, 181, 33 },{ 203, 193, 8 }, }, { { 235, 156, 1253 },{ 223, 201, 328 },{ 211, 189, 33 },{ 211, 201, 8 }, },
2605
{ { 243, 164, 1253 },{ 231, 209, 328 },{ 219, 197, 33 },{ 219, 209, 8 }, }, { { 183, 239, 867 },{ 239, 217, 328 },{ 227, 205, 33 },{ 227, 217, 8 }, },
2606
{ { 254, 214, 1329 },{ 248, 226, 328 },{ 236, 214, 33 },{ 236, 226, 8 }, }, { { 222, 244, 3680 },{ 234, 244, 3648 },{ 244, 222, 33 },{ 244, 234, 8 }, },
2607
{ { 230, 252, 3680 },{ 242, 252, 3648 },{ 252, 230, 33 },{ 252, 242, 8 }, }, { { 238, 250, 4040 },{ 255, 250, 8 },{ 238, 250, 456 },{ 255, 250, 8 }, },
2608
{ { 9, 29, 566 },{ 9, 29, 560 },{ 9, 0, 9 },{ 9, 0, 8 }, }, { { 17, 37, 566 },{ 17, 37, 560 },{ 17, 0, 9 },{ 17, 0, 8 }, },
2609
{ { 45, 0, 313 },{ 45, 0, 312 },{ 25, 0, 49 },{ 25, 7, 8 }, }, { { 14, 63, 2758 },{ 5, 53, 784 },{ 15, 33, 70 },{ 33, 15, 8 }, },
2610
{ { 71, 6, 1329 },{ 72, 4, 1328 },{ 42, 4, 33 },{ 42, 24, 8 }, }, { { 70, 3, 239 },{ 70, 2, 232 },{ 50, 12, 33 },{ 50, 32, 8 }, },
2611
{ { 0, 98, 2842 },{ 78, 10, 232 },{ 58, 20, 33 },{ 58, 40, 8 }, }, { { 97, 27, 1329 },{ 86, 18, 232 },{ 66, 28, 33 },{ 66, 48, 8 }, },
2612
{ { 0, 94, 867 },{ 95, 27, 232 },{ 75, 37, 33 },{ 75, 57, 8 }, }, { { 8, 102, 867 },{ 103, 35, 232 },{ 83, 45, 33 },{ 83, 65, 8 }, },
2613
{ { 12, 112, 867 },{ 111, 43, 232 },{ 91, 53, 33 },{ 91, 73, 8 }, }, { { 139, 2, 1253 },{ 119, 51, 232 },{ 99, 61, 33 },{ 99, 81, 8 }, },
2614
{ { 148, 13, 1253 },{ 128, 60, 232 },{ 108, 70, 33 },{ 108, 90, 8 }, }, { { 156, 21, 1253 },{ 136, 68, 232 },{ 116, 78, 33 },{ 116, 98, 8 }, },
2615
{ { 164, 29, 1253 },{ 144, 76, 232 },{ 124, 86, 33 },{ 124, 106, 8 }, }, { { 172, 37, 1253 },{ 152, 84, 232 },{ 132, 94, 33 },{ 132, 114, 8 }, },
2616
{ { 181, 46, 1253 },{ 161, 93, 232 },{ 141, 103, 33 },{ 141, 123, 8 }, }, { { 189, 54, 1253 },{ 169, 101, 232 },{ 149, 111, 33 },{ 149, 131, 8 }, },
2617
{ { 197, 62, 1253 },{ 177, 109, 232 },{ 157, 119, 33 },{ 157, 139, 8 }, }, { { 205, 70, 1253 },{ 185, 117, 232 },{ 165, 127, 33 },{ 165, 147, 8 }, },
2618
{ { 214, 79, 1253 },{ 194, 126, 232 },{ 174, 136, 33 },{ 174, 156, 8 }, }, { { 222, 87, 1253 },{ 202, 134, 232 },{ 182, 144, 33 },{ 182, 164, 8 }, },
2619
{ { 230, 95, 1253 },{ 210, 142, 232 },{ 190, 152, 33 },{ 190, 172, 8 }, }, { { 238, 103, 1253 },{ 218, 150, 232 },{ 198, 160, 33 },{ 198, 180, 8 }, },
2620
{ { 247, 112, 1253 },{ 227, 159, 232 },{ 207, 169, 33 },{ 207, 189, 8 }, }, { { 255, 120, 1253 },{ 235, 167, 232 },{ 215, 177, 33 },{ 215, 197, 8 }, },
2621
{ { 146, 243, 867 },{ 243, 175, 232 },{ 223, 185, 33 },{ 223, 205, 8 }, }, { { 184, 231, 3682 },{ 203, 251, 784 },{ 231, 193, 33 },{ 231, 213, 8 }, },
2622
{ { 193, 240, 3682 },{ 222, 240, 3648 },{ 240, 202, 33 },{ 240, 222, 8 }, }, { { 255, 210, 169 },{ 230, 248, 3648 },{ 248, 210, 33 },{ 248, 230, 8 }, },
2623
{ { 218, 238, 4040 },{ 255, 238, 8 },{ 218, 238, 456 },{ 255, 238, 8 }, }, { { 226, 246, 4040 },{ 255, 246, 8 },{ 226, 246, 456 },{ 255, 246, 8 }, },
2624
{ { 13, 42, 566 },{ 13, 42, 560 },{ 13, 0, 9 },{ 13, 0, 8 }, }, { { 50, 0, 329 },{ 50, 0, 328 },{ 21, 0, 9 },{ 21, 0, 8 }, },
2625
{ { 29, 58, 566 },{ 67, 2, 1352 },{ 3, 29, 70 },{ 29, 3, 8 }, }, { { 10, 79, 2758 },{ 76, 11, 1352 },{ 11, 37, 70 },{ 37, 11, 8 }, },
2626
{ { 7, 75, 790 },{ 7, 75, 784 },{ 20, 46, 70 },{ 46, 20, 8 }, }, { { 15, 83, 790 },{ 97, 1, 1328 },{ 28, 54, 70 },{ 54, 28, 8 }, },
2627
{ { 101, 7, 1329 },{ 105, 9, 1328 },{ 62, 0, 39 },{ 62, 36, 8 }, }, { { 99, 1, 239 },{ 99, 3, 232 },{ 1, 71, 98 },{ 70, 44, 8 }, },
2628
{ { 107, 11, 239 },{ 108, 12, 232 },{ 10, 80, 98 },{ 79, 53, 8 }, }, { { 115, 19, 239 },{ 116, 20, 232 },{ 18, 88, 98 },{ 87, 61, 8 }, },
2629
{ { 123, 27, 239 },{ 124, 28, 232 },{ 26, 96, 98 },{ 95, 69, 8 }, }, { { 131, 35, 239 },{ 132, 36, 232 },{ 34, 104, 98 },{ 103, 77, 8 }, },
2630
{ { 140, 44, 239 },{ 141, 45, 232 },{ 43, 113, 98 },{ 112, 86, 8 }, }, { { 148, 52, 239 },{ 149, 53, 232 },{ 51, 121, 98 },{ 120, 94, 8 }, },
2631
{ { 156, 60, 239 },{ 157, 61, 232 },{ 59, 129, 98 },{ 128, 102, 8 }, }, { { 164, 68, 239 },{ 165, 69, 232 },{ 67, 137, 98 },{ 136, 110, 8 }, },
2632
{ { 173, 77, 239 },{ 174, 78, 232 },{ 76, 146, 98 },{ 145, 119, 8 }, }, { { 181, 85, 239 },{ 182, 86, 232 },{ 84, 154, 98 },{ 153, 127, 8 }, },
2633
{ { 189, 93, 239 },{ 190, 94, 232 },{ 92, 162, 98 },{ 161, 135, 8 }, }, { { 197, 101, 239 },{ 198, 102, 232 },{ 100, 170, 98 },{ 169, 143, 8 }, },
2634
{ { 206, 110, 239 },{ 207, 111, 232 },{ 109, 179, 98 },{ 178, 152, 8 }, }, { { 214, 118, 239 },{ 215, 119, 232 },{ 117, 187, 98 },{ 186, 160, 8 }, },
2635
{ { 222, 126, 239 },{ 223, 127, 232 },{ 125, 195, 98 },{ 194, 168, 8 }, }, { { 230, 134, 239 },{ 231, 135, 232 },{ 133, 203, 98 },{ 202, 176, 8 }, },
2636
{ { 239, 143, 239 },{ 240, 144, 232 },{ 142, 212, 98 },{ 211, 185, 8 }, }, { { 247, 151, 239 },{ 180, 248, 784 },{ 150, 220, 98 },{ 219, 193, 8 }, },
2637
{ { 159, 228, 3682 },{ 201, 227, 3648 },{ 158, 228, 98 },{ 227, 201, 8 }, }, { { 181, 249, 3928 },{ 209, 235, 3648 },{ 166, 236, 98 },{ 235, 209, 8 }, },
2638
{ { 255, 189, 169 },{ 218, 244, 3648 },{ 175, 245, 98 },{ 244, 218, 8 }, }, { { 197, 226, 4040 },{ 226, 252, 3648 },{ 183, 253, 98 },{ 252, 226, 8 }, },
2639
{ { 205, 234, 4040 },{ 255, 234, 8 },{ 205, 234, 456 },{ 255, 234, 8 }, }, { { 213, 242, 4040 },{ 255, 242, 8 },{ 213, 242, 456 },{ 255, 242, 8 }, },
2640
{ { 18, 60, 566 },{ 18, 60, 560 },{ 18, 0, 9 },{ 18, 0, 8 }, }, { { 26, 68, 566 },{ 26, 68, 560 },{ 26, 0, 9 },{ 26, 0, 8 }, },
2641
{ { 34, 76, 566 },{ 34, 76, 560 },{ 34, 0, 9 },{ 34, 0, 8 }, }, { { 5, 104, 2758 },{ 98, 5, 1352 },{ 42, 0, 57 },{ 42, 6, 8 }, },
2642
{ { 92, 0, 313 },{ 93, 1, 312 },{ 15, 51, 70 },{ 51, 15, 8 }, }, { { 3, 101, 790 },{ 3, 101, 784 },{ 0, 59, 88 },{ 59, 23, 8 }, },
2643
{ { 14, 107, 790 },{ 11, 109, 784 },{ 31, 67, 70 },{ 67, 31, 8 }, }, { { 19, 117, 790 },{ 19, 117, 784 },{ 39, 75, 70 },{ 75, 39, 8 }, },
2644
{ { 28, 126, 790 },{ 28, 126, 784 },{ 83, 5, 33 },{ 84, 48, 8 }, }, { { 132, 0, 239 },{ 36, 134, 784 },{ 91, 13, 33 },{ 92, 56, 8 }, },
2645
{ { 142, 4, 239 },{ 44, 142, 784 },{ 99, 21, 33 },{ 100, 64, 8 }, }, { { 150, 12, 239 },{ 52, 150, 784 },{ 107, 29, 33 },{ 108, 72, 8 }, },
2646
{ { 159, 21, 239 },{ 61, 159, 784 },{ 116, 38, 33 },{ 117, 81, 8 }, }, { { 167, 29, 239 },{ 69, 167, 784 },{ 124, 46, 33 },{ 125, 89, 8 }, },
2647
{ { 175, 37, 239 },{ 77, 175, 784 },{ 132, 54, 33 },{ 133, 97, 8 }, }, { { 183, 45, 239 },{ 85, 183, 784 },{ 140, 62, 33 },{ 141, 105, 8 }, },
2648
{ { 192, 54, 239 },{ 94, 192, 784 },{ 149, 71, 33 },{ 150, 114, 8 }, }, { { 200, 62, 239 },{ 102, 200, 784 },{ 157, 79, 33 },{ 158, 122, 8 }, },
2649
{ { 208, 70, 239 },{ 110, 208, 784 },{ 165, 87, 33 },{ 166, 130, 8 }, }, { { 216, 78, 239 },{ 118, 216, 784 },{ 173, 95, 33 },{ 174, 138, 8 }, },
2650
{ { 225, 87, 239 },{ 127, 225, 784 },{ 182, 104, 33 },{ 183, 147, 8 }, }, { { 233, 95, 239 },{ 135, 233, 784 },{ 190, 112, 33 },{ 191, 155, 8 }, },
2651
{ { 241, 103, 239 },{ 143, 241, 784 },{ 198, 120, 33 },{ 199, 163, 8 }, }, { { 111, 208, 3682 },{ 151, 249, 784 },{ 206, 128, 33 },{ 207, 171, 8 }, },
2652
{ { 120, 217, 3682 },{ 180, 216, 3648 },{ 215, 137, 33 },{ 216, 180, 8 }, }, { { 128, 225, 3682 },{ 188, 224, 3648 },{ 223, 145, 33 },{ 224, 188, 8 }, },
2653
{ { 155, 253, 3928 },{ 196, 232, 3648 },{ 231, 153, 33 },{ 232, 196, 8 }, }, { { 144, 241, 3682 },{ 204, 240, 3648 },{ 239, 161, 33 },{ 240, 204, 8 }, },
2654
{ { 153, 250, 3682 },{ 213, 249, 3648 },{ 248, 170, 33 },{ 249, 213, 8 }, }, { { 179, 221, 4040 },{ 255, 221, 8 },{ 179, 221, 456 },{ 255, 221, 8 }, },
2655
{ { 187, 229, 4040 },{ 255, 229, 8 },{ 187, 229, 456 },{ 255, 229, 8 }, }, { { 195, 237, 4040 },{ 255, 237, 8 },{ 195, 237, 456 },{ 255, 237, 8 }, },
2656
{ { 24, 80, 566 },{ 24, 80, 560 },{ 24, 0, 9 },{ 24, 0, 8 }, }, { { 32, 88, 566 },{ 32, 88, 560 },{ 32, 0, 9 },{ 32, 0, 8 }, },
2657
{ { 40, 96, 566 },{ 40, 96, 560 },{ 40, 0, 9 },{ 40, 0, 8 }, }, { { 48, 104, 566 },{ 48, 104, 560 },{ 48, 0, 9 },{ 48, 0, 8 }, },
2658
{ { 9, 138, 2758 },{ 130, 7, 1352 },{ 9, 57, 70 },{ 57, 9, 8 }, }, { { 119, 0, 313 },{ 120, 0, 312 },{ 17, 65, 70 },{ 65, 17, 8 }, },
2659
{ { 0, 128, 784 },{ 128, 6, 312 },{ 25, 73, 70 },{ 73, 25, 8 }, }, { { 6, 137, 790 },{ 5, 136, 784 },{ 33, 81, 70 },{ 81, 33, 8 }, },
2660
{ { 42, 171, 2758 },{ 14, 145, 784 },{ 42, 90, 70 },{ 90, 42, 8 }, }, { { 50, 179, 2758 },{ 22, 153, 784 },{ 50, 98, 70 },{ 98, 50, 8 }, },
2661
{ { 58, 187, 2758 },{ 30, 161, 784 },{ 58, 106, 70 },{ 106, 58, 8 }, }, { { 191, 18, 1329 },{ 38, 169, 784 },{ 112, 9, 33 },{ 114, 66, 8 }, },
2662
{ { 176, 0, 239 },{ 47, 178, 784 },{ 121, 18, 33 },{ 123, 75, 8 }, }, { { 187, 1, 239 },{ 55, 186, 784 },{ 129, 26, 33 },{ 131, 83, 8 }, },
2663
{ { 195, 10, 239 },{ 63, 194, 784 },{ 137, 34, 33 },{ 139, 91, 8 }, }, { { 203, 18, 239 },{ 71, 202, 784 },{ 145, 42, 33 },{ 147, 99, 8 }, },
2664
{ { 212, 27, 239 },{ 80, 211, 784 },{ 154, 51, 33 },{ 156, 108, 8 }, }, { { 220, 35, 239 },{ 88, 219, 784 },{ 162, 59, 33 },{ 164, 116, 8 }, },
2665
{ { 228, 43, 239 },{ 96, 227, 784 },{ 170, 67, 33 },{ 172, 124, 8 }, }, { { 236, 51, 239 },{ 104, 235, 784 },{ 178, 75, 33 },{ 180, 132, 8 }, },
2666
{ { 245, 60, 239 },{ 113, 244, 784 },{ 187, 84, 33 },{ 189, 141, 8 }, }, { { 91, 194, 3680 },{ 149, 197, 3648 },{ 195, 92, 33 },{ 197, 149, 8 }, },
2667
{ { 99, 202, 3680 },{ 157, 205, 3648 },{ 203, 100, 33 },{ 205, 157, 8 }, }, { { 107, 210, 3680 },{ 165, 213, 3648 },{ 211, 108, 33 },{ 213, 165, 8 }, },
2668
{ { 119, 249, 3928 },{ 174, 222, 3648 },{ 220, 117, 33 },{ 222, 174, 8 }, }, { { 127, 255, 856 },{ 182, 230, 3648 },{ 228, 125, 33 },{ 230, 182, 8 }, },
2669
{ { 255, 135, 169 },{ 190, 238, 3648 },{ 236, 133, 33 },{ 238, 190, 8 }, }, { { 140, 243, 3680 },{ 198, 246, 3648 },{ 244, 141, 33 },{ 246, 198, 8 }, },
2670
{ { 151, 207, 4040 },{ 255, 207, 8 },{ 151, 207, 456 },{ 255, 207, 8 }, }, { { 159, 215, 4040 },{ 255, 215, 8 },{ 159, 215, 456 },{ 255, 215, 8 }, },
2671
{ { 167, 223, 4040 },{ 255, 223, 8 },{ 167, 223, 456 },{ 255, 223, 8 }, }, { { 175, 231, 4040 },{ 255, 231, 8 },{ 175, 231, 456 },{ 255, 231, 8 }, },
2672
{ { 33, 106, 566 },{ 33, 106, 560 },{ 33, 0, 9 },{ 33, 0, 8 }, }, { { 41, 114, 566 },{ 41, 114, 560 },{ 41, 0, 9 },{ 41, 0, 8 }, },
2673
{ { 49, 122, 566 },{ 49, 122, 560 },{ 49, 0, 9 },{ 49, 0, 8 }, }, { { 57, 130, 566 },{ 57, 130, 560 },{ 57, 0, 9 },{ 57, 0, 8 }, },
2674
{ { 66, 139, 566 },{ 66, 139, 560 },{ 66, 0, 9 },{ 66, 0, 8 }, }, { { 74, 147, 566 },{ 170, 7, 1352 },{ 8, 74, 70 },{ 74, 8, 8 }, },
2675
{ { 152, 0, 313 },{ 178, 15, 1352 },{ 0, 82, 80 },{ 82, 16, 8 }, }, { { 162, 0, 313 },{ 186, 23, 1352 },{ 24, 90, 70 },{ 90, 24, 8 }, },
2676
{ { 0, 171, 784 },{ 195, 32, 1352 },{ 33, 99, 70 },{ 99, 33, 8 }, }, { { 6, 179, 790 },{ 203, 40, 1352 },{ 41, 107, 70 },{ 107, 41, 8 }, },
2677
{ { 15, 187, 790 },{ 211, 48, 1352 },{ 115, 0, 41 },{ 115, 49, 8 }, }, { { 61, 199, 710 },{ 219, 56, 1352 },{ 57, 123, 70 },{ 123, 57, 8 }, },
2678
{ { 70, 208, 710 },{ 228, 65, 1352 },{ 66, 132, 70 },{ 132, 66, 8 }, }, { { 78, 216, 710 },{ 236, 73, 1352 },{ 74, 140, 70 },{ 140, 74, 8 }, },
2679
{ { 86, 224, 710 },{ 244, 81, 1352 },{ 145, 7, 33 },{ 148, 82, 8 }, }, { { 222, 8, 233 },{ 252, 89, 1352 },{ 153, 15, 33 },{ 156, 90, 8 }, },
2680
{ { 235, 0, 239 },{ 241, 101, 328 },{ 166, 6, 39 },{ 165, 99, 8 }, }, { { 32, 170, 3680 },{ 249, 109, 328 },{ 0, 175, 98 },{ 173, 107, 8 }, },
2681
{ { 40, 178, 3680 },{ 115, 181, 3648 },{ 8, 183, 98 },{ 181, 115, 8 }, }, { { 48, 186, 3680 },{ 123, 189, 3648 },{ 16, 191, 98 },{ 189, 123, 8 }, },
2682
{ { 57, 195, 3680 },{ 132, 198, 3648 },{ 25, 200, 98 },{ 198, 132, 8 }, }, { { 67, 243, 3928 },{ 140, 206, 3648 },{ 33, 208, 98 },{ 206, 140, 8 }, },
2683
{ { 76, 251, 3928 },{ 148, 214, 3648 },{ 41, 216, 98 },{ 214, 148, 8 }, }, { { 86, 255, 856 },{ 156, 222, 3648 },{ 49, 224, 98 },{ 222, 156, 8 }, },
2684
{ { 255, 93, 169 },{ 165, 231, 3648 },{ 58, 233, 98 },{ 231, 165, 8 }, }, { { 98, 236, 3680 },{ 173, 239, 3648 },{ 66, 241, 98 },{ 239, 173, 8 }, },
2685
{ { 108, 181, 4040 },{ 181, 247, 3648 },{ 74, 249, 98 },{ 247, 181, 8 }, }, { { 116, 189, 4040 },{ 255, 189, 8 },{ 116, 189, 456 },{ 255, 189, 8 }, },
2686
{ { 125, 198, 4040 },{ 255, 198, 8 },{ 125, 198, 456 },{ 255, 198, 8 }, }, { { 133, 206, 4040 },{ 255, 206, 8 },{ 133, 206, 456 },{ 255, 206, 8 }, },
2687
{ { 141, 214, 4040 },{ 255, 214, 8 },{ 141, 214, 456 },{ 255, 214, 8 }, }, { { 149, 222, 4040 },{ 255, 222, 8 },{ 149, 222, 456 },{ 255, 222, 8 }, },
2688
{ { 47, 183, 566 },{ 47, 183, 560 },{ 47, 0, 9 },{ 47, 0, 8 }, }, { { 55, 191, 566 },{ 55, 191, 560 },{ 55, 0, 9 },{ 55, 0, 8 }, },
2689
{ { 63, 199, 566 },{ 63, 199, 560 },{ 63, 0, 9 },{ 63, 0, 8 }, }, { { 71, 207, 566 },{ 71, 207, 560 },{ 71, 0, 9 },{ 71, 0, 8 }, },
2690
{ { 80, 216, 566 },{ 80, 216, 560 },{ 80, 0, 9 },{ 80, 0, 8 }, }, { { 88, 224, 566 },{ 88, 224, 560 },{ 88, 0, 9 },{ 88, 0, 8 }, },
2691
{ { 3, 233, 710 },{ 3, 233, 704 },{ 2, 96, 70 },{ 96, 2, 8 }, }, { { 11, 241, 710 },{ 11, 241, 704 },{ 10, 104, 70 },{ 104, 10, 8 }, },
2692
{ { 20, 250, 710 },{ 20, 250, 704 },{ 19, 113, 70 },{ 113, 19, 8 }, }, { { 27, 121, 3654 },{ 27, 121, 3648 },{ 27, 121, 70 },{ 121, 27, 8 }, },
2693
{ { 35, 129, 3654 },{ 35, 129, 3648 },{ 35, 129, 70 },{ 129, 35, 8 }, }, { { 43, 137, 3654 },{ 43, 137, 3648 },{ 43, 137, 70 },{ 137, 43, 8 }, },
2694
{ { 52, 146, 3654 },{ 52, 146, 3648 },{ 52, 146, 70 },{ 146, 52, 8 }, }, { { 60, 154, 3654 },{ 60, 154, 3648 },{ 60, 154, 70 },{ 154, 60, 8 }, },
2695
{ { 68, 162, 3654 },{ 68, 162, 3648 },{ 68, 162, 70 },{ 162, 68, 8 }, }, { { 76, 170, 3654 },{ 76, 170, 3648 },{ 76, 170, 70 },{ 170, 76, 8 }, },
2696
{ { 85, 179, 3654 },{ 85, 179, 3648 },{ 85, 179, 70 },{ 179, 85, 8 }, }, { { 93, 187, 3654 },{ 93, 187, 3648 },{ 93, 187, 70 },{ 187, 93, 8 }, },
2697
{ { 101, 195, 3654 },{ 101, 195, 3648 },{ 101, 195, 70 },{ 195, 101, 8 }, }, { { 109, 203, 3654 },{ 109, 203, 3648 },{ 109, 203, 70 },{ 203, 109, 8 }, },
2698
{ { 118, 212, 3654 },{ 118, 212, 3648 },{ 118, 212, 70 },{ 212, 118, 8 }, }, { { 126, 220, 3654 },{ 126, 220, 3648 },{ 126, 220, 70 },{ 220, 126, 8 }, },
2699
{ { 134, 228, 3654 },{ 134, 228, 3648 },{ 134, 228, 70 },{ 228, 134, 8 }, }, { { 5, 236, 3680 },{ 142, 236, 3648 },{ 5, 236, 96 },{ 236, 142, 8 }, },
2700
{ { 14, 245, 3680 },{ 151, 245, 3648 },{ 14, 245, 96 },{ 245, 151, 8 }, }, { { 23, 159, 4040 },{ 159, 253, 3648 },{ 23, 159, 456 },{ 253, 159, 8 }, },
2701
{ { 31, 167, 4040 },{ 255, 167, 8 },{ 31, 167, 456 },{ 255, 167, 8 }, }, { { 39, 175, 4040 },{ 255, 175, 8 },{ 39, 175, 456 },{ 255, 175, 8 }, },
2702
{ { 48, 184, 4040 },{ 255, 184, 8 },{ 48, 184, 456 },{ 255, 184, 8 }, }, { { 56, 192, 4040 },{ 255, 192, 8 },{ 56, 192, 456 },{ 255, 192, 8 }, },
2703
{ { 64, 200, 4040 },{ 255, 200, 8 },{ 64, 200, 456 },{ 255, 200, 8 }, },{ { 72, 208, 4040 },{ 255, 208, 8 },{ 72, 208, 456 },{ 255, 208, 8 }, },
2704
2705
};
2706
2707
struct dxt5a_block
2708
{
2709
uint8_t m_endpoints[2];
2710
2711
enum { cTotalSelectorBytes = 6 };
2712
uint8_t m_selectors[cTotalSelectorBytes];
2713
2714
inline void clear()
2715
{
2716
basisu::clear_obj(*this);
2717
}
2718
2719
inline uint32_t get_low_alpha() const
2720
{
2721
return m_endpoints[0];
2722
}
2723
2724
inline uint32_t get_high_alpha() const
2725
{
2726
return m_endpoints[1];
2727
}
2728
2729
inline void set_low_alpha(uint32_t i)
2730
{
2731
assert(i <= UINT8_MAX);
2732
m_endpoints[0] = static_cast<uint8_t>(i);
2733
}
2734
2735
inline void set_high_alpha(uint32_t i)
2736
{
2737
assert(i <= UINT8_MAX);
2738
m_endpoints[1] = static_cast<uint8_t>(i);
2739
}
2740
2741
inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
2742
2743
uint32_t get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); }
2744
uint32_t get_selectors_as_word(uint32_t index) { assert(index < 3); return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); }
2745
2746
inline uint32_t get_selector(uint32_t x, uint32_t y) const
2747
{
2748
assert((x < 4U) && (y < 4U));
2749
2750
uint32_t selector_index = (y * 4) + x;
2751
uint32_t bit_index = selector_index * cDXT5SelectorBits;
2752
2753
uint32_t byte_index = bit_index >> 3;
2754
uint32_t bit_ofs = bit_index & 7;
2755
2756
uint32_t v = m_selectors[byte_index];
2757
if (byte_index < (cTotalSelectorBytes - 1))
2758
v |= (m_selectors[byte_index + 1] << 8);
2759
2760
return (v >> bit_ofs) & 7;
2761
}
2762
2763
inline void set_selector(uint32_t x, uint32_t y, uint32_t val)
2764
{
2765
assert((x < 4U) && (y < 4U) && (val < 8U));
2766
2767
uint32_t selector_index = (y * 4) + x;
2768
uint32_t bit_index = selector_index * cDXT5SelectorBits;
2769
2770
uint32_t byte_index = bit_index >> 3;
2771
uint32_t bit_ofs = bit_index & 7;
2772
2773
uint32_t v = m_selectors[byte_index];
2774
if (byte_index < (cTotalSelectorBytes - 1))
2775
v |= (m_selectors[byte_index + 1] << 8);
2776
2777
v &= (~(7 << bit_ofs));
2778
v |= (val << bit_ofs);
2779
2780
m_selectors[byte_index] = static_cast<uint8_t>(v);
2781
if (byte_index < (cTotalSelectorBytes - 1))
2782
m_selectors[byte_index + 1] = static_cast<uint8_t>(v >> 8);
2783
}
2784
2785
enum { cMaxSelectorValues = 8 };
2786
2787
static uint32_t get_block_values6(color32* pDst, uint32_t l, uint32_t h)
2788
{
2789
pDst[0].a = static_cast<uint8_t>(l);
2790
pDst[1].a = static_cast<uint8_t>(h);
2791
pDst[2].a = static_cast<uint8_t>((l * 4 + h) / 5);
2792
pDst[3].a = static_cast<uint8_t>((l * 3 + h * 2) / 5);
2793
pDst[4].a = static_cast<uint8_t>((l * 2 + h * 3) / 5);
2794
pDst[5].a = static_cast<uint8_t>((l + h * 4) / 5);
2795
pDst[6].a = 0;
2796
pDst[7].a = 255;
2797
return 6;
2798
}
2799
2800
static uint32_t get_block_values8(color32* pDst, uint32_t l, uint32_t h)
2801
{
2802
pDst[0].a = static_cast<uint8_t>(l);
2803
pDst[1].a = static_cast<uint8_t>(h);
2804
pDst[2].a = static_cast<uint8_t>((l * 6 + h) / 7);
2805
pDst[3].a = static_cast<uint8_t>((l * 5 + h * 2) / 7);
2806
pDst[4].a = static_cast<uint8_t>((l * 4 + h * 3) / 7);
2807
pDst[5].a = static_cast<uint8_t>((l * 3 + h * 4) / 7);
2808
pDst[6].a = static_cast<uint8_t>((l * 2 + h * 5) / 7);
2809
pDst[7].a = static_cast<uint8_t>((l + h * 6) / 7);
2810
return 8;
2811
}
2812
2813
static uint32_t get_block_values(color32* pDst, uint32_t l, uint32_t h)
2814
{
2815
if (l > h)
2816
return get_block_values8(pDst, l, h);
2817
else
2818
return get_block_values6(pDst, l, h);
2819
}
2820
};
2821
2822
static void convert_etc1s_to_dxt5a(dxt5a_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
2823
{
2824
const uint32_t low_selector = pSelector->m_lo_selector;
2825
const uint32_t high_selector = pSelector->m_hi_selector;
2826
2827
const color32& base_color = pEndpoints->m_color5;
2828
const uint32_t inten_table = pEndpoints->m_inten5;
2829
2830
if (low_selector == high_selector)
2831
{
2832
uint32_t r;
2833
decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
2834
2835
pDst_block->set_low_alpha(r);
2836
pDst_block->set_high_alpha(r);
2837
pDst_block->m_selectors[0] = 0;
2838
pDst_block->m_selectors[1] = 0;
2839
pDst_block->m_selectors[2] = 0;
2840
pDst_block->m_selectors[3] = 0;
2841
pDst_block->m_selectors[4] = 0;
2842
pDst_block->m_selectors[5] = 0;
2843
return;
2844
}
2845
else if (pSelector->m_num_unique_selectors == 2)
2846
{
2847
color32 block_colors[4];
2848
2849
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
2850
2851
const uint32_t r0 = block_colors[low_selector].r;
2852
const uint32_t r1 = block_colors[high_selector].r;
2853
2854
pDst_block->set_low_alpha(r0);
2855
pDst_block->set_high_alpha(r1);
2856
2857
// TODO: Optimize this
2858
for (uint32_t y = 0; y < 4; y++)
2859
{
2860
for (uint32_t x = 0; x < 4; x++)
2861
{
2862
uint32_t s = pSelector->get_selector(x, y);
2863
pDst_block->set_selector(x, y, (s == high_selector) ? 1 : 0);
2864
}
2865
}
2866
2867
return;
2868
}
2869
2870
uint32_t selector_range_table = 0;
2871
for (selector_range_table = 0; selector_range_table < NUM_DXT5A_SELECTOR_RANGES; selector_range_table++)
2872
if ((low_selector == s_dxt5a_selector_ranges[selector_range_table].m_low) && (high_selector == s_dxt5a_selector_ranges[selector_range_table].m_high))
2873
break;
2874
if (selector_range_table >= NUM_DXT5A_SELECTOR_RANGES)
2875
selector_range_table = 0;
2876
2877
const etc1_g_to_dxt5a_conversion* pTable_entry = &g_etc1_g_to_dxt5a[base_color.r + inten_table * 32][selector_range_table];
2878
2879
pDst_block->set_low_alpha(pTable_entry->m_lo);
2880
pDst_block->set_high_alpha(pTable_entry->m_hi);
2881
2882
// TODO: Optimize this (like ETC1->BC1)
2883
for (uint32_t y = 0; y < 4; y++)
2884
{
2885
for (uint32_t x = 0; x < 4; x++)
2886
{
2887
uint32_t s = pSelector->get_selector(x, y);
2888
2889
uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
2890
2891
pDst_block->set_selector(x, y, ds);
2892
}
2893
}
2894
}
2895
#endif //BASISD_SUPPORT_DXT5A
2896
2897
// PVRTC
2898
2899
#if BASISD_SUPPORT_PVRTC1 || BASISD_SUPPORT_UASTC
2900
static const uint16_t g_pvrtc_swizzle_table[256] =
2901
{
2902
0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015, 0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, 0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115, 0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155,
2903
0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415, 0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455, 0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515, 0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555,
2904
0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015, 0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055, 0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115, 0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155,
2905
0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415, 0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455, 0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515, 0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555,
2906
0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015, 0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055, 0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115, 0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155,
2907
0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415, 0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455, 0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515, 0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555,
2908
0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015, 0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055, 0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115, 0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155,
2909
0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415, 0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455, 0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515, 0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555
2910
};
2911
2912
// Note we can't use simple calculations to convert PVRTC1 encoded endpoint components to/from 8-bits, due to hardware approximations.
2913
static const uint8_t g_pvrtc_5[32] = { 0,8,16,24,33,41,49,57,66,74,82,90,99,107,115,123,132,140,148,156,165,173,181,189,198,206,214,222,231,239,247,255 };
2914
static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 };
2915
static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 };
2916
static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 };
2917
2918
static const uint8_t g_pvrtc_5_floor[256] =
2919
{
2920
0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,
2921
3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,
2922
7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,
2923
11,11,11,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,
2924
15,15,15,15,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,
2925
19,19,19,19,19,20,20,20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,
2926
23,23,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,
2927
27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31
2928
};
2929
2930
static const uint8_t g_pvrtc_5_ceil[256] =
2931
{
2932
0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,
2933
4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,
2934
8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,12,12,12,12,12,
2935
12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,16,16,16,16,
2936
16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,20,20,20,
2937
20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,24,24,
2938
24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28,
2939
28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31
2940
};
2941
2942
static const uint8_t g_pvrtc_4_floor[256] =
2943
{
2944
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2945
1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2946
3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
2947
5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,
2948
7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,
2949
9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,
2950
11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,
2951
13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15
2952
};
2953
2954
static const uint8_t g_pvrtc_4_ceil[256] =
2955
{
2956
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2957
2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
2958
4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,
2959
6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,
2960
8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,
2961
10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,
2962
12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,
2963
14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15
2964
};
2965
2966
static const uint8_t g_pvrtc_3_floor[256] =
2967
{
2968
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2969
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2970
1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2971
2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2972
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,
2973
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,
2974
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,
2975
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7
2976
};
2977
2978
static const uint8_t g_pvrtc_3_ceil[256] =
2979
{
2980
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2981
1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2982
2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2983
3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
2984
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,
2985
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,
2986
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,
2987
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
2988
};
2989
2990
static const uint8_t g_pvrtc_alpha_floor[256] =
2991
{
2992
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2993
0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2994
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2995
2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2996
3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
2997
4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
2998
5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
2999
6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8
3000
};
3001
3002
static const uint8_t g_pvrtc_alpha_ceil[256] =
3003
{
3004
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
3005
1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3006
2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
3007
3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
3008
4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
3009
5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
3010
6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
3011
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
3012
};
3013
3014
struct pvrtc4_block
3015
{
3016
uint32_t m_modulation;
3017
uint32_t m_endpoints;
3018
3019
pvrtc4_block() : m_modulation(0), m_endpoints(0) { }
3020
3021
inline bool operator== (const pvrtc4_block& rhs) const
3022
{
3023
return (m_modulation == rhs.m_modulation) && (m_endpoints == rhs.m_endpoints);
3024
}
3025
3026
inline void clear()
3027
{
3028
m_modulation = 0;
3029
m_endpoints = 0;
3030
}
3031
3032
inline bool get_block_uses_transparent_modulation() const
3033
{
3034
return (m_endpoints & 1) != 0;
3035
}
3036
3037
inline void set_block_uses_transparent_modulation(bool m)
3038
{
3039
m_endpoints = (m_endpoints & ~1U) | static_cast<uint32_t>(m);
3040
}
3041
3042
inline bool is_endpoint_opaque(uint32_t endpoint_index) const
3043
{
3044
static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U };
3045
return (m_endpoints & s_bitmasks[basisu::open_range_check(endpoint_index, 2U)]) != 0;
3046
}
3047
3048
inline void set_endpoint_opaque(uint32_t endpoint_index, bool opaque)
3049
{
3050
assert(endpoint_index < 2);
3051
static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U };
3052
if (opaque)
3053
m_endpoints |= s_bitmasks[endpoint_index];
3054
else
3055
m_endpoints &= ~s_bitmasks[endpoint_index];
3056
}
3057
3058
inline color32 get_endpoint_5554(uint32_t endpoint_index) const
3059
{
3060
assert(endpoint_index < 2);
3061
static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
3062
uint32_t packed = (m_endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
3063
3064
uint32_t r, g, b, a;
3065
if (packed & 0x8000)
3066
{
3067
// opaque 554 or 555
3068
r = (packed >> 10) & 31;
3069
g = (packed >> 5) & 31;
3070
b = packed & 31;
3071
3072
if (!endpoint_index)
3073
b |= (b >> 4);
3074
3075
a = 0xF;
3076
}
3077
else
3078
{
3079
// translucent 4433 or 4443
3080
r = (packed >> 7) & 0x1E;
3081
g = (packed >> 3) & 0x1E;
3082
b = (packed & 0xF) << 1;
3083
3084
r |= (r >> 4);
3085
g |= (g >> 4);
3086
3087
if (!endpoint_index)
3088
b |= (b >> 3);
3089
else
3090
b |= (b >> 4);
3091
3092
a = (packed >> 11) & 0xE;
3093
}
3094
3095
assert((r < 32) && (g < 32) && (b < 32) && (a < 16));
3096
3097
return color32(r, g, b, a);
3098
}
3099
3100
inline color32 get_endpoint_8888(uint32_t endpoint_index) const
3101
{
3102
assert(endpoint_index < 2);
3103
static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
3104
uint32_t packed = (m_endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
3105
3106
uint32_t r, g, b, a;
3107
if (packed & 0x8000)
3108
{
3109
// opaque 554 or 555
3110
// 1RRRRRGGGGGBBBBM
3111
// 1RRRRRGGGGGBBBBB
3112
r = (packed >> 10) & 31;
3113
g = (packed >> 5) & 31;
3114
b = packed & 31;
3115
3116
r = g_pvrtc_5[r];
3117
g = g_pvrtc_5[g];
3118
3119
if (!endpoint_index)
3120
b = g_pvrtc_4[b >> 1];
3121
else
3122
b = g_pvrtc_5[b];
3123
3124
a = 255;
3125
}
3126
else
3127
{
3128
// translucent 4433 or 4443
3129
// 0AAA RRRR GGGG BBBM
3130
// 0AAA RRRR GGGG BBBB
3131
r = (packed >> 8) & 0xF;
3132
g = (packed >> 4) & 0xF;
3133
b = packed & 0xF;
3134
a = (packed >> 12) & 7;
3135
3136
r = g_pvrtc_4[r];
3137
g = g_pvrtc_4[g];
3138
3139
if (!endpoint_index)
3140
b = g_pvrtc_3[b >> 1];
3141
else
3142
b = g_pvrtc_4[b];
3143
3144
a = g_pvrtc_alpha[a];
3145
}
3146
3147
return color32(r, g, b, a);
3148
}
3149
3150
inline uint32_t get_endpoint_l8(uint32_t endpoint_index) const
3151
{
3152
color32 c(get_endpoint_8888(endpoint_index));
3153
return c.r + c.g + c.b + c.a;
3154
}
3155
3156
inline uint32_t get_opaque_endpoint_l0() const
3157
{
3158
uint32_t packed = m_endpoints & 0xFFFE;
3159
3160
uint32_t r, g, b;
3161
assert(packed & 0x8000);
3162
3163
// opaque 554 or 555
3164
r = (packed >> 10) & 31;
3165
g = (packed >> 5) & 31;
3166
b = packed & 31;
3167
b |= (b >> 4);
3168
3169
return r + g + b;
3170
}
3171
3172
inline uint32_t get_opaque_endpoint_l1() const
3173
{
3174
uint32_t packed = m_endpoints >> 16;
3175
3176
uint32_t r, g, b;
3177
assert(packed & 0x8000);
3178
3179
// opaque 554 or 555
3180
r = (packed >> 10) & 31;
3181
g = (packed >> 5) & 31;
3182
b = packed & 31;
3183
3184
return r + g + b;
3185
}
3186
3187
static uint32_t get_component_precision_in_bits(uint32_t c, uint32_t endpoint_index, bool opaque_endpoint)
3188
{
3189
static const uint32_t s_comp_prec[4][4] =
3190
{
3191
// R0 G0 B0 A0 R1 G1 B1 A1
3192
{ 4, 4, 3, 3 },{ 4, 4, 4, 3 }, // transparent endpoint
3193
3194
{ 5, 5, 4, 0 },{ 5, 5, 5, 0 } // opaque endpoint
3195
};
3196
return s_comp_prec[basisu::open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)][basisu::open_range_check(c, 4U)];
3197
}
3198
3199
static color32 get_color_precision_in_bits(uint32_t endpoint_index, bool opaque_endpoint)
3200
{
3201
static const color32 s_color_prec[4] =
3202
{
3203
color32(4, 4, 3, 3), color32(4, 4, 4, 3), // transparent endpoint
3204
color32(5, 5, 4, 0), color32(5, 5, 5, 0) // opaque endpoint
3205
};
3206
return s_color_prec[basisu::open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)];
3207
}
3208
3209
inline void set_opaque_endpoint_floor(uint32_t endpoint_index, const color32& c)
3210
{
3211
assert(endpoint_index < 2);
3212
const uint32_t m = m_endpoints & 1;
3213
3214
uint32_t r = g_pvrtc_5_floor[c[0]], g = g_pvrtc_5_floor[c[1]], b = c[2];
3215
3216
if (!endpoint_index)
3217
b = g_pvrtc_4_floor[b] << 1;
3218
else
3219
b = g_pvrtc_5_floor[b];
3220
3221
// rgba=555 here
3222
assert((r < 32) && (g < 32) && (b < 32));
3223
3224
// 1RRRRRGGGGGBBBBM
3225
// 1RRRRRGGGGGBBBBB
3226
3227
// opaque 554 or 555
3228
uint32_t packed = 0x8000 | (r << 10) | (g << 5) | b;
3229
if (!endpoint_index)
3230
packed = (packed & ~1) | m;
3231
3232
assert(packed <= 0xFFFF);
3233
3234
if (endpoint_index)
3235
m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
3236
else
3237
m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
3238
}
3239
3240
inline void set_opaque_endpoint_ceil(uint32_t endpoint_index, const color32& c)
3241
{
3242
assert(endpoint_index < 2);
3243
const uint32_t m = m_endpoints & 1;
3244
3245
uint32_t r = g_pvrtc_5_ceil[c[0]], g = g_pvrtc_5_ceil[c[1]], b = c[2];
3246
3247
if (!endpoint_index)
3248
b = g_pvrtc_4_ceil[b] << 1;
3249
else
3250
b = g_pvrtc_5_ceil[b];
3251
3252
// rgba=555 here
3253
assert((r < 32) && (g < 32) && (b < 32));
3254
3255
// 1RRRRRGGGGGBBBBM
3256
// 1RRRRRGGGGGBBBBB
3257
3258
// opaque 554 or 555
3259
uint32_t packed = 0x8000 | (r << 10) | (g << 5) | b;
3260
if (!endpoint_index)
3261
packed |= m;
3262
3263
assert(packed <= 0xFFFF);
3264
3265
if (endpoint_index)
3266
m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
3267
else
3268
m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
3269
}
3270
3271
// opaque endpoints: 554 or 555
3272
// transparent endpoints: 3443 or 3444
3273
inline void set_endpoint_raw(uint32_t endpoint_index, const color32& c, bool opaque_endpoint)
3274
{
3275
assert(endpoint_index < 2);
3276
const uint32_t m = m_endpoints & 1;
3277
uint32_t r = c[0], g = c[1], b = c[2], a = c[3];
3278
3279
uint32_t packed;
3280
3281
if (opaque_endpoint)
3282
{
3283
if (!endpoint_index)
3284
{
3285
// 554
3286
// 1RRRRRGGGGGBBBBM
3287
assert((r < 32) && (g < 32) && (b < 16));
3288
packed = 0x8000 | (r << 10) | (g << 5) | (b << 1) | m;
3289
}
3290
else
3291
{
3292
// 555
3293
// 1RRRRRGGGGGBBBBB
3294
assert((r < 32) && (g < 32) && (b < 32));
3295
packed = 0x8000 | (r << 10) | (g << 5) | b;
3296
}
3297
}
3298
else
3299
{
3300
if (!endpoint_index)
3301
{
3302
// 3443
3303
// 0AAA RRRR GGGG BBBM
3304
assert((r < 16) && (g < 16) && (b < 8) && (a < 8));
3305
packed = (a << 12) | (r << 8) | (g << 4) | (b << 1) | m;
3306
}
3307
else
3308
{
3309
// 3444
3310
// 0AAA RRRR GGGG BBBB
3311
assert((r < 16) && (g < 16) && (b < 16) && (a < 8));
3312
packed = (a << 12) | (r << 8) | (g << 4) | b;
3313
}
3314
}
3315
3316
assert(packed <= 0xFFFF);
3317
3318
if (endpoint_index)
3319
m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
3320
else
3321
m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
3322
}
3323
3324
inline void set_endpoint_floor(uint32_t endpoint_index, const color32& c)
3325
{
3326
assert(endpoint_index < 2);
3327
3328
int a = g_pvrtc_alpha_floor[c.a];
3329
if (a == 8)
3330
{
3331
// 554 or 555
3332
uint32_t r = g_pvrtc_5_floor[c[0]], g = g_pvrtc_5_floor[c[1]], b = c[2];
3333
3334
if (!endpoint_index)
3335
b = g_pvrtc_4_floor[b];
3336
else
3337
b = g_pvrtc_5_floor[b];
3338
3339
set_endpoint_raw(endpoint_index, color32(r, g, b, a), true);
3340
}
3341
else
3342
{
3343
// 4433 or 4443
3344
uint32_t r = g_pvrtc_4_floor[c[0]], g = g_pvrtc_4_floor[c[1]], b = c[2];
3345
3346
if (!endpoint_index)
3347
b = g_pvrtc_3_floor[b];
3348
else
3349
b = g_pvrtc_4_floor[b];
3350
3351
set_endpoint_raw(endpoint_index, color32(r, g, b, a), false);
3352
}
3353
}
3354
3355
inline void set_endpoint_ceil(uint32_t endpoint_index, const color32& c)
3356
{
3357
assert(endpoint_index < 2);
3358
3359
int a = g_pvrtc_alpha_ceil[c.a];
3360
if (a == 8)
3361
{
3362
// 554 or 555
3363
uint32_t r = g_pvrtc_5_ceil[c[0]], g = g_pvrtc_5_ceil[c[1]], b = c[2];
3364
3365
if (!endpoint_index)
3366
b = g_pvrtc_4_ceil[b];
3367
else
3368
b = g_pvrtc_5_ceil[b];
3369
3370
set_endpoint_raw(endpoint_index, color32(r, g, b, a), true);
3371
}
3372
else
3373
{
3374
// 4433 or 4443
3375
uint32_t r = g_pvrtc_4_ceil[c[0]], g = g_pvrtc_4_ceil[c[1]], b = c[2];
3376
3377
if (!endpoint_index)
3378
b = g_pvrtc_3_ceil[b];
3379
else
3380
b = g_pvrtc_4_ceil[b];
3381
3382
set_endpoint_raw(endpoint_index, color32(r, g, b, a), false);
3383
}
3384
}
3385
3386
inline uint32_t get_modulation(uint32_t x, uint32_t y) const
3387
{
3388
assert((x < 4) && (y < 4));
3389
return (m_modulation >> ((y * 4 + x) * 2)) & 3;
3390
}
3391
3392
// Scaled by 8
3393
inline const uint32_t* get_scaled_modulation_values(bool block_uses_transparent_modulation) const
3394
{
3395
static const uint32_t s_block_scales[2][4] = { { 0, 3, 5, 8 },{ 0, 4, 4, 8 } };
3396
return s_block_scales[block_uses_transparent_modulation];
3397
}
3398
3399
// Scaled by 8
3400
inline uint32_t get_scaled_modulation(uint32_t x, uint32_t y) const
3401
{
3402
return get_scaled_modulation_values(get_block_uses_transparent_modulation())[get_modulation(x, y)];
3403
}
3404
3405
inline void set_modulation(uint32_t x, uint32_t y, uint32_t s)
3406
{
3407
assert((x < 4) && (y < 4) && (s < 4));
3408
uint32_t n = (y * 4 + x) * 2;
3409
m_modulation = (m_modulation & (~(3 << n))) | (s << n);
3410
assert(get_modulation(x, y) == s);
3411
}
3412
3413
// Assumes modulation was initialized to 0
3414
inline void set_modulation_fast(uint32_t x, uint32_t y, uint32_t s)
3415
{
3416
assert((x < 4) && (y < 4) && (s < 4));
3417
uint32_t n = (y * 4 + x) * 2;
3418
m_modulation |= (s << n);
3419
assert(get_modulation(x, y) == s);
3420
}
3421
};
3422
3423
#if 0
3424
static const uint8_t g_pvrtc_bilinear_weights[16][4] =
3425
{
3426
{ 4, 4, 4, 4 }, { 2, 6, 2, 6 }, { 8, 0, 8, 0 }, { 6, 2, 6, 2 },
3427
{ 2, 2, 6, 6 }, { 1, 3, 3, 9 }, { 4, 0, 12, 0 }, { 3, 1, 9, 3 },
3428
{ 8, 8, 0, 0 }, { 4, 12, 0, 0 }, { 16, 0, 0, 0 }, { 12, 4, 0, 0 },
3429
{ 6, 6, 2, 2 }, { 3, 9, 1, 3 }, { 12, 0, 4, 0 }, { 9, 3, 3, 1 },
3430
};
3431
#endif
3432
3433
struct pvrtc1_temp_block
3434
{
3435
decoder_etc_block m_etc1_block;
3436
uint32_t m_pvrtc_endpoints;
3437
};
3438
3439
static inline uint32_t get_opaque_endpoint_l0(uint32_t endpoints)
3440
{
3441
uint32_t packed = endpoints;
3442
3443
uint32_t r, g, b;
3444
assert(packed & 0x8000);
3445
3446
r = (packed >> 10) & 31;
3447
g = (packed >> 5) & 31;
3448
b = packed & 30;
3449
b |= (b >> 4);
3450
3451
return r + g + b;
3452
}
3453
3454
static inline uint32_t get_opaque_endpoint_l1(uint32_t endpoints)
3455
{
3456
uint32_t packed = endpoints >> 16;
3457
3458
uint32_t r, g, b;
3459
assert(packed & 0x8000);
3460
3461
r = (packed >> 10) & 31;
3462
g = (packed >> 5) & 31;
3463
b = packed & 31;
3464
3465
return r + g + b;
3466
}
3467
3468
static color32 get_endpoint_8888(uint32_t endpoints, uint32_t endpoint_index)
3469
{
3470
assert(endpoint_index < 2);
3471
static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
3472
uint32_t packed = (endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
3473
3474
uint32_t r, g, b, a;
3475
if (packed & 0x8000)
3476
{
3477
// opaque 554 or 555
3478
// 1RRRRRGGGGGBBBBM
3479
// 1RRRRRGGGGGBBBBB
3480
r = (packed >> 10) & 31;
3481
g = (packed >> 5) & 31;
3482
b = packed & 31;
3483
3484
r = g_pvrtc_5[r];
3485
g = g_pvrtc_5[g];
3486
3487
if (!endpoint_index)
3488
b = g_pvrtc_4[b >> 1];
3489
else
3490
b = g_pvrtc_5[b];
3491
3492
a = 255;
3493
}
3494
else
3495
{
3496
// translucent 4433 or 4443
3497
// 0AAA RRRR GGGG BBBM
3498
// 0AAA RRRR GGGG BBBB
3499
r = (packed >> 8) & 0xF;
3500
g = (packed >> 4) & 0xF;
3501
b = packed & 0xF;
3502
a = (packed >> 12) & 7;
3503
3504
r = g_pvrtc_4[r];
3505
g = g_pvrtc_4[g];
3506
3507
if (!endpoint_index)
3508
b = g_pvrtc_3[b >> 1];
3509
else
3510
b = g_pvrtc_4[b];
3511
3512
a = g_pvrtc_alpha[a];
3513
}
3514
3515
return color32(r, g, b, a);
3516
}
3517
3518
static uint32_t get_endpoint_l8(uint32_t endpoints, uint32_t endpoint_index)
3519
{
3520
color32 c(get_endpoint_8888(endpoints, endpoint_index));
3521
return c.r + c.g + c.b + c.a;
3522
}
3523
#endif
3524
3525
#if BASISD_SUPPORT_PVRTC1
3526
// TODO: Support decoding a non-pow2 ETC1S texture into the next larger pow2 PVRTC texture.
3527
static void fixup_pvrtc1_4_modulation_rgb(const decoder_etc_block* pETC_Blocks, const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y)
3528
{
3529
const uint32_t x_mask = num_blocks_x - 1;
3530
const uint32_t y_mask = num_blocks_y - 1;
3531
const uint32_t x_bits = basisu::total_bits(x_mask);
3532
const uint32_t y_bits = basisu::total_bits(y_mask);
3533
const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
3534
//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
3535
const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
3536
3537
uint32_t block_index = 0;
3538
3539
// really 3x3
3540
int e0[4][4], e1[4][4];
3541
3542
for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
3543
{
3544
const uint32_t* pE_rows[3];
3545
3546
for (int ey = 0; ey < 3; ey++)
3547
{
3548
int by = y + ey - 1;
3549
3550
const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
3551
3552
pE_rows[ey] = pE;
3553
3554
for (int ex = 0; ex < 3; ex++)
3555
{
3556
int bx = 0 + ex - 1;
3557
3558
const uint32_t e = pE[bx & x_mask];
3559
3560
e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31;
3561
e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31;
3562
}
3563
}
3564
3565
const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
3566
3567
for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
3568
{
3569
const decoder_etc_block& src_block = pETC_Blocks[block_index];
3570
3571
const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
3572
3573
uint32_t swizzled = x_swizzle | y_swizzle;
3574
if (num_blocks_x != num_blocks_y)
3575
{
3576
swizzled &= swizzle_mask;
3577
3578
if (num_blocks_x > num_blocks_y)
3579
swizzled |= ((x >> min_bits) << (min_bits * 2));
3580
else
3581
swizzled |= ((y >> min_bits) << (min_bits * 2));
3582
}
3583
3584
pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
3585
pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
3586
3587
uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1];
3588
uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1];
3589
uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1];
3590
3591
const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1];
3592
int by = (base_r + base_g + base_b) * 16;
3593
int block_colors_y_x16[4];
3594
block_colors_y_x16[0] = by + pInten_table48[2];
3595
block_colors_y_x16[1] = by + pInten_table48[3];
3596
block_colors_y_x16[2] = by + pInten_table48[1];
3597
block_colors_y_x16[3] = by + pInten_table48[0];
3598
3599
{
3600
const uint32_t ex = 2;
3601
int bx = x + ex - 1;
3602
bx &= x_mask;
3603
3604
#define DO_ROW(ey) \
3605
{ \
3606
const uint32_t e = pE_rows[ey][bx]; \
3607
e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \
3608
e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \
3609
}
3610
3611
DO_ROW(0);
3612
DO_ROW(1);
3613
DO_ROW(2);
3614
#undef DO_ROW
3615
}
3616
3617
uint32_t mod = 0;
3618
3619
uint32_t lookup_x[4];
3620
3621
#define DO_LOOKUP(lx) { \
3622
const uint32_t byte_ofs = 7 - (((lx) * 4) >> 3); \
3623
const uint32_t lsb_bits = src_block.m_bytes[byte_ofs] >> (((lx) & 1) * 4); \
3624
const uint32_t msb_bits = src_block.m_bytes[byte_ofs - 2] >> (((lx) & 1) * 4); \
3625
lookup_x[lx] = (lsb_bits & 0xF) | ((msb_bits & 0xF) << 4); }
3626
3627
DO_LOOKUP(0);
3628
DO_LOOKUP(1);
3629
DO_LOOKUP(2);
3630
DO_LOOKUP(3);
3631
#undef DO_LOOKUP
3632
3633
#define DO_PIX(lx, ly, w0, w1, w2, w3) \
3634
{ \
3635
int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
3636
int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
3637
int cl = block_colors_y_x16[g_etc1_x_selector_unpack[ly][lookup_x[lx]]]; \
3638
int dl = cb_l - ca_l; \
3639
int vl = cl - ca_l; \
3640
int p = vl * 16; \
3641
if (ca_l > cb_l) { p = -p; dl = -dl; } \
3642
uint32_t m = 0; \
3643
if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
3644
if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
3645
if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
3646
mod |= m; \
3647
}
3648
3649
{
3650
const uint32_t ex = 0, ey = 0;
3651
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3652
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3653
DO_PIX(0, 0, 4, 4, 4, 4);
3654
DO_PIX(1, 0, 2, 6, 2, 6);
3655
DO_PIX(0, 1, 2, 2, 6, 6);
3656
DO_PIX(1, 1, 1, 3, 3, 9);
3657
}
3658
3659
{
3660
const uint32_t ex = 1, ey = 0;
3661
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3662
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3663
DO_PIX(2, 0, 8, 0, 8, 0);
3664
DO_PIX(3, 0, 6, 2, 6, 2);
3665
DO_PIX(2, 1, 4, 0, 12, 0);
3666
DO_PIX(3, 1, 3, 1, 9, 3);
3667
}
3668
3669
{
3670
const uint32_t ex = 0, ey = 1;
3671
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3672
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3673
DO_PIX(0, 2, 8, 8, 0, 0);
3674
DO_PIX(1, 2, 4, 12, 0, 0);
3675
DO_PIX(0, 3, 6, 6, 2, 2);
3676
DO_PIX(1, 3, 3, 9, 1, 3);
3677
}
3678
3679
{
3680
const uint32_t ex = 1, ey = 1;
3681
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3682
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3683
DO_PIX(2, 2, 16, 0, 0, 0);
3684
DO_PIX(3, 2, 12, 4, 0, 0);
3685
DO_PIX(2, 3, 12, 0, 4, 0);
3686
DO_PIX(3, 3, 9, 3, 3, 1);
3687
}
3688
#undef DO_PIX
3689
3690
pDst_block->m_modulation = mod;
3691
3692
e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
3693
e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
3694
e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
3695
3696
e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
3697
e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
3698
e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
3699
3700
} // x
3701
} // y
3702
}
3703
3704
static void fixup_pvrtc1_4_modulation_rgba(
3705
const decoder_etc_block* pETC_Blocks,
3706
const uint32_t* pPVRTC_endpoints,
3707
void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, void *pAlpha_blocks,
3708
const endpoint* pEndpoints, const selector* pSelectors)
3709
{
3710
const uint32_t x_mask = num_blocks_x - 1;
3711
const uint32_t y_mask = num_blocks_y - 1;
3712
const uint32_t x_bits = basisu::total_bits(x_mask);
3713
const uint32_t y_bits = basisu::total_bits(y_mask);
3714
const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
3715
//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
3716
const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
3717
3718
uint32_t block_index = 0;
3719
3720
// really 3x3
3721
int e0[4][4], e1[4][4];
3722
3723
for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
3724
{
3725
const uint32_t* pE_rows[3];
3726
3727
for (int ey = 0; ey < 3; ey++)
3728
{
3729
int by = y + ey - 1;
3730
3731
const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
3732
3733
pE_rows[ey] = pE;
3734
3735
for (int ex = 0; ex < 3; ex++)
3736
{
3737
int bx = 0 + ex - 1;
3738
3739
const uint32_t e = pE[bx & x_mask];
3740
3741
e0[ex][ey] = get_endpoint_l8(e, 0);
3742
e1[ex][ey] = get_endpoint_l8(e, 1);
3743
}
3744
}
3745
3746
const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
3747
3748
for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
3749
{
3750
const decoder_etc_block& src_block = pETC_Blocks[block_index];
3751
3752
const uint16_t* pSrc_alpha_block = reinterpret_cast<const uint16_t*>(static_cast<const uint32_t*>(pAlpha_blocks) + x + (y * num_blocks_x));
3753
const endpoint* pAlpha_endpoints = &pEndpoints[pSrc_alpha_block[0]];
3754
const selector* pAlpha_selectors = &pSelectors[pSrc_alpha_block[1]];
3755
3756
const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
3757
3758
uint32_t swizzled = x_swizzle | y_swizzle;
3759
if (num_blocks_x != num_blocks_y)
3760
{
3761
swizzled &= swizzle_mask;
3762
3763
if (num_blocks_x > num_blocks_y)
3764
swizzled |= ((x >> min_bits) << (min_bits * 2));
3765
else
3766
swizzled |= ((y >> min_bits) << (min_bits * 2));
3767
}
3768
3769
pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
3770
pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
3771
3772
uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1];
3773
uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1];
3774
uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1];
3775
3776
const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1];
3777
int by = (base_r + base_g + base_b) * 16;
3778
int block_colors_y_x16[4];
3779
block_colors_y_x16[0] = basisu::clamp<int>(by + pInten_table48[0], 0, 48 * 255);
3780
block_colors_y_x16[1] = basisu::clamp<int>(by + pInten_table48[1], 0, 48 * 255);
3781
block_colors_y_x16[2] = basisu::clamp<int>(by + pInten_table48[2], 0, 48 * 255);
3782
block_colors_y_x16[3] = basisu::clamp<int>(by + pInten_table48[3], 0, 48 * 255);
3783
3784
uint32_t alpha_base_g = g_etc_5_to_8[pAlpha_endpoints->m_color5.g] * 16;
3785
const int* pInten_table16 = g_etc1_inten_tables16[pAlpha_endpoints->m_inten5];
3786
int alpha_block_colors_x16[4];
3787
alpha_block_colors_x16[0] = basisu::clamp<int>(alpha_base_g + pInten_table16[0], 0, 16 * 255);
3788
alpha_block_colors_x16[1] = basisu::clamp<int>(alpha_base_g + pInten_table16[1], 0, 16 * 255);
3789
alpha_block_colors_x16[2] = basisu::clamp<int>(alpha_base_g + pInten_table16[2], 0, 16 * 255);
3790
alpha_block_colors_x16[3] = basisu::clamp<int>(alpha_base_g + pInten_table16[3], 0, 16 * 255);
3791
3792
// clamp((base_r + base_g + base_b) * 16 + color_inten[s] * 48) + clamp(alpha_base_g * 16 + alpha_inten[as] * 16)
3793
3794
{
3795
const uint32_t ex = 2;
3796
int bx = x + ex - 1;
3797
bx &= x_mask;
3798
3799
#define DO_ROW(ey) \
3800
{ \
3801
const uint32_t e = pE_rows[ey][bx]; \
3802
e0[ex][ey] = get_endpoint_l8(e, 0); \
3803
e1[ex][ey] = get_endpoint_l8(e, 1); \
3804
}
3805
3806
DO_ROW(0);
3807
DO_ROW(1);
3808
DO_ROW(2);
3809
#undef DO_ROW
3810
}
3811
3812
uint32_t mod = 0;
3813
3814
#define DO_PIX(lx, ly, w0, w1, w2, w3) \
3815
{ \
3816
int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
3817
int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
3818
int cl = block_colors_y_x16[(src_block.m_bytes[4 + ly] >> (lx * 2)) & 3] + alpha_block_colors_x16[(pAlpha_selectors->m_selectors[ly] >> (lx * 2)) & 3]; \
3819
int dl = cb_l - ca_l; \
3820
int vl = cl - ca_l; \
3821
int p = vl * 16; \
3822
if (ca_l > cb_l) { p = -p; dl = -dl; } \
3823
uint32_t m = 0; \
3824
if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
3825
if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
3826
if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
3827
mod |= m; \
3828
}
3829
3830
{
3831
const uint32_t ex = 0, ey = 0;
3832
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3833
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3834
DO_PIX(0, 0, 4, 4, 4, 4);
3835
DO_PIX(1, 0, 2, 6, 2, 6);
3836
DO_PIX(0, 1, 2, 2, 6, 6);
3837
DO_PIX(1, 1, 1, 3, 3, 9);
3838
}
3839
3840
{
3841
const uint32_t ex = 1, ey = 0;
3842
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3843
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3844
DO_PIX(2, 0, 8, 0, 8, 0);
3845
DO_PIX(3, 0, 6, 2, 6, 2);
3846
DO_PIX(2, 1, 4, 0, 12, 0);
3847
DO_PIX(3, 1, 3, 1, 9, 3);
3848
}
3849
3850
{
3851
const uint32_t ex = 0, ey = 1;
3852
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3853
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3854
DO_PIX(0, 2, 8, 8, 0, 0);
3855
DO_PIX(1, 2, 4, 12, 0, 0);
3856
DO_PIX(0, 3, 6, 6, 2, 2);
3857
DO_PIX(1, 3, 3, 9, 1, 3);
3858
}
3859
3860
{
3861
const uint32_t ex = 1, ey = 1;
3862
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3863
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3864
DO_PIX(2, 2, 16, 0, 0, 0);
3865
DO_PIX(3, 2, 12, 4, 0, 0);
3866
DO_PIX(2, 3, 12, 0, 4, 0);
3867
DO_PIX(3, 3, 9, 3, 3, 1);
3868
}
3869
#undef DO_PIX
3870
3871
pDst_block->m_modulation = mod;
3872
3873
e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
3874
e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
3875
e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
3876
3877
e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
3878
e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
3879
e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
3880
3881
} // x
3882
} // y
3883
}
3884
#endif // BASISD_SUPPORT_PVRTC1
3885
3886
#if BASISD_SUPPORT_BC7_MODE5
3887
static dxt_selector_range g_etc1_to_bc7_m5_selector_ranges[] =
3888
{
3889
{ 0, 3 },
3890
{ 1, 3 },
3891
{ 0, 2 },
3892
{ 1, 2 },
3893
{ 2, 3 },
3894
{ 0, 1 },
3895
};
3896
3897
const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5_selector_ranges) / sizeof(g_etc1_to_bc7_m5_selector_ranges[0]);
3898
3899
static uint32_t g_etc1_to_bc7_m5_selector_range_index[4][4];
3900
3901
const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS = 10;
3902
static const uint8_t g_etc1_to_bc7_m5_selector_mappings[NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS][4] =
3903
{
3904
{ 0, 0, 1, 1 },
3905
{ 0, 0, 1, 2 },
3906
{ 0, 0, 1, 3 },
3907
{ 0, 0, 2, 3 },
3908
{ 0, 1, 1, 1 },
3909
{ 0, 1, 2, 2 },
3910
{ 0, 1, 2, 3 },
3911
{ 0, 2, 3, 3 },
3912
{ 1, 2, 2, 2 },
3913
{ 1, 2, 3, 3 },
3914
};
3915
3916
struct etc1_to_bc7_m5_solution
3917
{
3918
uint8_t m_lo;
3919
uint8_t m_hi;
3920
uint16_t m_err;
3921
};
3922
3923
static const etc1_to_bc7_m5_solution g_etc1_to_bc7_m5_color[32 * 8 * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS * NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES] = {
3924
#include "basisu_transcoder_tables_bc7_m5_color.inc"
3925
};
3926
3927
static dxt_selector_range g_etc1_to_bc7_m5a_selector_ranges[] =
3928
{
3929
{ 0, 3 },
3930
{ 1, 3 },
3931
{ 0, 2 },
3932
{ 1, 2 },
3933
{ 2, 3 },
3934
{ 0, 1 }
3935
};
3936
3937
const uint32_t NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5a_selector_ranges) / sizeof(g_etc1_to_bc7_m5a_selector_ranges[0]);
3938
3939
static uint32_t g_etc1_to_bc7_m5a_selector_range_index[4][4];
3940
3941
struct etc1_g_to_bc7_m5a_conversion
3942
{
3943
uint8_t m_lo, m_hi;
3944
uint8_t m_trans;
3945
};
3946
3947
static etc1_g_to_bc7_m5a_conversion g_etc1_g_to_bc7_m5a[8 * 32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES] =
3948
{
3949
#include "basisu_transcoder_tables_bc7_m5_alpha.inc"
3950
};
3951
3952
static inline uint32_t set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t cur_ofs)
3953
{
3954
assert(num_bits < 32);
3955
assert(val < (1ULL << num_bits));
3956
3957
uint32_t mask = static_cast<uint32_t>((1ULL << num_bits) - 1);
3958
3959
while (num_bits)
3960
{
3961
const uint32_t n = basisu::minimum<uint32_t>(8 - (cur_ofs & 7), num_bits);
3962
3963
pBytes[cur_ofs >> 3] &= ~static_cast<uint8_t>(mask << (cur_ofs & 7));
3964
pBytes[cur_ofs >> 3] |= static_cast<uint8_t>(val << (cur_ofs & 7));
3965
3966
val >>= n;
3967
mask >>= n;
3968
3969
num_bits -= n;
3970
cur_ofs += n;
3971
}
3972
3973
return cur_ofs;
3974
}
3975
3976
struct bc7_mode_5
3977
{
3978
union
3979
{
3980
struct
3981
{
3982
uint64_t m_mode : 6;
3983
uint64_t m_rot : 2;
3984
3985
uint64_t m_r0 : 7;
3986
uint64_t m_r1 : 7;
3987
uint64_t m_g0 : 7;
3988
uint64_t m_g1 : 7;
3989
uint64_t m_b0 : 7;
3990
uint64_t m_b1 : 7;
3991
uint64_t m_a0 : 8;
3992
uint64_t m_a1_0 : 6;
3993
3994
} m_lo;
3995
3996
uint64_t m_lo_bits;
3997
};
3998
3999
union
4000
{
4001
struct
4002
{
4003
uint64_t m_a1_1 : 2;
4004
4005
// bit 2
4006
uint64_t m_c00 : 1;
4007
uint64_t m_c10 : 2;
4008
uint64_t m_c20 : 2;
4009
uint64_t m_c30 : 2;
4010
4011
uint64_t m_c01 : 2;
4012
uint64_t m_c11 : 2;
4013
uint64_t m_c21 : 2;
4014
uint64_t m_c31 : 2;
4015
4016
uint64_t m_c02 : 2;
4017
uint64_t m_c12 : 2;
4018
uint64_t m_c22 : 2;
4019
uint64_t m_c32 : 2;
4020
4021
uint64_t m_c03 : 2;
4022
uint64_t m_c13 : 2;
4023
uint64_t m_c23 : 2;
4024
uint64_t m_c33 : 2;
4025
4026
// bit 33
4027
uint64_t m_a00 : 1;
4028
uint64_t m_a10 : 2;
4029
uint64_t m_a20 : 2;
4030
uint64_t m_a30 : 2;
4031
4032
uint64_t m_a01 : 2;
4033
uint64_t m_a11 : 2;
4034
uint64_t m_a21 : 2;
4035
uint64_t m_a31 : 2;
4036
4037
uint64_t m_a02 : 2;
4038
uint64_t m_a12 : 2;
4039
uint64_t m_a22 : 2;
4040
uint64_t m_a32 : 2;
4041
4042
uint64_t m_a03 : 2;
4043
uint64_t m_a13 : 2;
4044
uint64_t m_a23 : 2;
4045
uint64_t m_a33 : 2;
4046
4047
} m_hi;
4048
4049
uint64_t m_hi_bits;
4050
};
4051
};
4052
4053
#if BASISD_WRITE_NEW_BC7_MODE5_TABLES
4054
static void create_etc1_to_bc7_m5_color_conversion_table()
4055
{
4056
FILE* pFile = nullptr;
4057
fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_color.inc", "w");
4058
4059
uint32_t n = 0;
4060
4061
for (int inten = 0; inten < 8; inten++)
4062
{
4063
for (uint32_t g = 0; g < 32; g++)
4064
{
4065
color32 block_colors[4];
4066
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
4067
4068
for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES; sr++)
4069
{
4070
const uint32_t low_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_low;
4071
const uint32_t high_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_high;
4072
4073
for (uint32_t m = 0; m < NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS; m++)
4074
{
4075
uint32_t best_lo = 0;
4076
uint32_t best_hi = 0;
4077
uint64_t best_err = UINT64_MAX;
4078
4079
for (uint32_t hi = 0; hi <= 127; hi++)
4080
{
4081
for (uint32_t lo = 0; lo <= 127; lo++)
4082
{
4083
uint32_t colors[4];
4084
4085
colors[0] = (lo << 1) | (lo >> 6);
4086
colors[3] = (hi << 1) | (hi >> 6);
4087
4088
colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64;
4089
colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64;
4090
4091
uint64_t total_err = 0;
4092
4093
for (uint32_t s = low_selector; s <= high_selector; s++)
4094
{
4095
int err = block_colors[s].g - colors[g_etc1_to_bc7_m5_selector_mappings[m][s]];
4096
4097
int err_scale = 1;
4098
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
4099
// the low/high selectors which are clamping to either 0 or 255.
4100
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
4101
err_scale = 5;
4102
4103
total_err += (err * err) * err_scale;
4104
}
4105
4106
if (total_err < best_err)
4107
{
4108
best_err = total_err;
4109
best_lo = lo;
4110
best_hi = hi;
4111
}
4112
}
4113
}
4114
4115
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
4116
4117
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
4118
n++;
4119
if ((n & 31) == 31)
4120
fprintf(pFile, "\n");
4121
} // m
4122
} // sr
4123
} // g
4124
} // inten
4125
4126
fclose(pFile);
4127
}
4128
4129
static void create_etc1_to_bc7_m5_alpha_conversion_table()
4130
{
4131
FILE* pFile = nullptr;
4132
fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_alpha.inc", "w");
4133
4134
uint32_t n = 0;
4135
4136
for (int inten = 0; inten < 8; inten++)
4137
{
4138
for (uint32_t g = 0; g < 32; g++)
4139
{
4140
color32 block_colors[4];
4141
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
4142
4143
for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES; sr++)
4144
{
4145
const uint32_t low_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_low;
4146
const uint32_t high_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_high;
4147
4148
uint32_t best_lo = 0;
4149
uint32_t best_hi = 0;
4150
uint64_t best_err = UINT64_MAX;
4151
uint32_t best_output_selectors = 0;
4152
4153
for (uint32_t hi = 0; hi <= 255; hi++)
4154
{
4155
for (uint32_t lo = 0; lo <= 255; lo++)
4156
{
4157
uint32_t colors[4];
4158
4159
colors[0] = lo;
4160
colors[3] = hi;
4161
4162
colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64;
4163
colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64;
4164
4165
uint64_t total_err = 0;
4166
uint32_t output_selectors = 0;
4167
4168
for (uint32_t s = low_selector; s <= high_selector; s++)
4169
{
4170
int best_mapping_err = INT_MAX;
4171
int best_k = 0;
4172
for (int k = 0; k < 4; k++)
4173
{
4174
int mapping_err = block_colors[s].g - colors[k];
4175
mapping_err *= mapping_err;
4176
4177
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
4178
// the low/high selectors which are clamping to either 0 or 255.
4179
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
4180
mapping_err *= 5;
4181
4182
if (mapping_err < best_mapping_err)
4183
{
4184
best_mapping_err = mapping_err;
4185
best_k = k;
4186
}
4187
} // k
4188
4189
total_err += best_mapping_err;
4190
output_selectors |= (best_k << (s * 2));
4191
} // s
4192
4193
if (total_err < best_err)
4194
{
4195
best_err = total_err;
4196
best_lo = lo;
4197
best_hi = hi;
4198
best_output_selectors = output_selectors;
4199
}
4200
4201
} // lo
4202
} // hi
4203
4204
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_output_selectors);
4205
n++;
4206
if ((n & 31) == 31)
4207
fprintf(pFile, "\n");
4208
4209
} // sr
4210
} // g
4211
} // inten
4212
4213
fclose(pFile);
4214
}
4215
#endif // BASISD_WRITE_NEW_BC7_MODE5_TABLES
4216
4217
struct bc7_m5_match_entry
4218
{
4219
uint8_t m_hi;
4220
uint8_t m_lo;
4221
};
4222
4223
static bc7_m5_match_entry g_bc7_m5_equals_1[256] =
4224
{
4225
{0,0},{1,0},{3,0},{4,0},{6,0},{7,0},{9,0},{10,0},{12,0},{13,0},{15,0},{16,0},{18,0},{20,0},{21,0},{23,0},
4226
{24,0},{26,0},{27,0},{29,0},{30,0},{32,0},{33,0},{35,0},{36,0},{38,0},{39,0},{41,0},{42,0},{44,0},{45,0},{47,0},
4227
{48,0},{50,0},{52,0},{53,0},{55,0},{56,0},{58,0},{59,0},{61,0},{62,0},{64,0},{65,0},{66,0},{68,0},{69,0},{71,0},
4228
{72,0},{74,0},{75,0},{77,0},{78,0},{80,0},{82,0},{83,0},{85,0},{86,0},{88,0},{89,0},{91,0},{92,0},{94,0},{95,0},
4229
{97,0},{98,0},{100,0},{101,0},{103,0},{104,0},{106,0},{107,0},{109,0},{110,0},{112,0},{114,0},{115,0},{117,0},{118,0},{120,0},
4230
{121,0},{123,0},{124,0},{126,0},{127,0},{127,1},{126,2},{126,3},{127,3},{127,4},{126,5},{126,6},{127,6},{127,7},{126,8},{126,9},
4231
{127,9},{127,10},{126,11},{126,12},{127,12},{127,13},{126,14},{125,15},{127,15},{126,16},{126,17},{127,17},{127,18},{126,19},{126,20},{127,20},
4232
{127,21},{126,22},{126,23},{127,23},{127,24},{126,25},{126,26},{127,26},{127,27},{126,28},{126,29},{127,29},{127,30},{126,31},{126,32},{127,32},
4233
{127,33},{126,34},{126,35},{127,35},{127,36},{126,37},{126,38},{127,38},{127,39},{126,40},{126,41},{127,41},{127,42},{126,43},{126,44},{127,44},
4234
{127,45},{126,46},{125,47},{127,47},{126,48},{126,49},{127,49},{127,50},{126,51},{126,52},{127,52},{127,53},{126,54},{126,55},{127,55},{127,56},
4235
{126,57},{126,58},{127,58},{127,59},{126,60},{126,61},{127,61},{127,62},{126,63},{125,64},{126,64},{126,65},{127,65},{127,66},{126,67},{126,68},
4236
{127,68},{127,69},{126,70},{126,71},{127,71},{127,72},{126,73},{126,74},{127,74},{127,75},{126,76},{125,77},{127,77},{126,78},{126,79},{127,79},
4237
{127,80},{126,81},{126,82},{127,82},{127,83},{126,84},{126,85},{127,85},{127,86},{126,87},{126,88},{127,88},{127,89},{126,90},{126,91},{127,91},
4238
{127,92},{126,93},{126,94},{127,94},{127,95},{126,96},{126,97},{127,97},{127,98},{126,99},{126,100},{127,100},{127,101},{126,102},{126,103},{127,103},
4239
{127,104},{126,105},{126,106},{127,106},{127,107},{126,108},{125,109},{127,109},{126,110},{126,111},{127,111},{127,112},{126,113},{126,114},{127,114},{127,115},
4240
{126,116},{126,117},{127,117},{127,118},{126,119},{126,120},{127,120},{127,121},{126,122},{126,123},{127,123},{127,124},{126,125},{126,126},{127,126},{127,127}
4241
};
4242
4243
static void transcoder_init_bc7_mode5()
4244
{
4245
#if 0
4246
// This is a little too much work to do at init time, so precompute it.
4247
for (int i = 0; i < 256; i++)
4248
{
4249
int lowest_e = 256;
4250
for (int lo = 0; lo < 128; lo++)
4251
{
4252
for (int hi = 0; hi < 128; hi++)
4253
{
4254
const int lo_e = (lo << 1) | (lo >> 6);
4255
const int hi_e = (hi << 1) | (hi >> 6);
4256
4257
// Selector 1
4258
int v = (lo_e * (64 - 21) + hi_e * 21 + 32) >> 6;
4259
int e = abs(v - i);
4260
4261
if (e < lowest_e)
4262
{
4263
g_bc7_m5_equals_1[i].m_hi = static_cast<uint8_t>(hi);
4264
g_bc7_m5_equals_1[i].m_lo = static_cast<uint8_t>(lo);
4265
4266
lowest_e = e;
4267
}
4268
4269
} // hi
4270
4271
} // lo
4272
4273
printf("{%u,%u},", g_bc7_m5_equals_1[i].m_hi, g_bc7_m5_equals_1[i].m_lo);
4274
if ((i & 15) == 15) printf("\n");
4275
}
4276
#endif
4277
4278
for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES; i++)
4279
{
4280
uint32_t l = g_etc1_to_bc7_m5_selector_ranges[i].m_low;
4281
uint32_t h = g_etc1_to_bc7_m5_selector_ranges[i].m_high;
4282
g_etc1_to_bc7_m5_selector_range_index[l][h] = i;
4283
}
4284
4285
for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES; i++)
4286
{
4287
uint32_t l = g_etc1_to_bc7_m5a_selector_ranges[i].m_low;
4288
uint32_t h = g_etc1_to_bc7_m5a_selector_ranges[i].m_high;
4289
g_etc1_to_bc7_m5a_selector_range_index[l][h] = i;
4290
}
4291
}
4292
4293
static void convert_etc1s_to_bc7_m5_color(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
4294
{
4295
bc7_mode_5* pDst_block = static_cast<bc7_mode_5*>(pDst);
4296
4297
// First ensure the block is cleared to all 0's
4298
static_cast<uint64_t*>(pDst)[0] = 0;
4299
static_cast<uint64_t*>(pDst)[1] = 0;
4300
4301
// Set alpha to 255
4302
pDst_block->m_lo.m_mode = 1 << 5;
4303
pDst_block->m_lo.m_a0 = 255;
4304
pDst_block->m_lo.m_a1_0 = 63;
4305
pDst_block->m_hi.m_a1_1 = 3;
4306
4307
const uint32_t low_selector = pSelector->m_lo_selector;
4308
const uint32_t high_selector = pSelector->m_hi_selector;
4309
4310
const uint32_t base_color_r = pEndpoints->m_color5.r;
4311
const uint32_t base_color_g = pEndpoints->m_color5.g;
4312
const uint32_t base_color_b = pEndpoints->m_color5.b;
4313
const uint32_t inten_table = pEndpoints->m_inten5;
4314
4315
if (pSelector->m_num_unique_selectors == 1)
4316
{
4317
// Solid color block - use precomputed tables and set selectors to 1.
4318
uint32_t r, g, b;
4319
decoder_etc_block::get_block_color5(pEndpoints->m_color5, inten_table, low_selector, r, g, b);
4320
4321
pDst_block->m_lo.m_r0 = g_bc7_m5_equals_1[r].m_lo;
4322
pDst_block->m_lo.m_g0 = g_bc7_m5_equals_1[g].m_lo;
4323
pDst_block->m_lo.m_b0 = g_bc7_m5_equals_1[b].m_lo;
4324
4325
pDst_block->m_lo.m_r1 = g_bc7_m5_equals_1[r].m_hi;
4326
pDst_block->m_lo.m_g1 = g_bc7_m5_equals_1[g].m_hi;
4327
pDst_block->m_lo.m_b1 = g_bc7_m5_equals_1[b].m_hi;
4328
4329
set_block_bits((uint8_t*)pDst, 0x2aaaaaab, 31, 66);
4330
return;
4331
}
4332
else if (pSelector->m_num_unique_selectors == 2)
4333
{
4334
// Only one or two unique selectors, so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks.
4335
color32 block_colors[4];
4336
4337
decoder_etc_block::get_block_colors5(block_colors, color32(base_color_r, base_color_g, base_color_b, 255), inten_table);
4338
4339
const uint32_t r0 = block_colors[low_selector].r;
4340
const uint32_t g0 = block_colors[low_selector].g;
4341
const uint32_t b0 = block_colors[low_selector].b;
4342
4343
const uint32_t r1 = block_colors[high_selector].r;
4344
const uint32_t g1 = block_colors[high_selector].g;
4345
const uint32_t b1 = block_colors[high_selector].b;
4346
4347
pDst_block->m_lo.m_r0 = r0 >> 1;
4348
pDst_block->m_lo.m_g0 = g0 >> 1;
4349
pDst_block->m_lo.m_b0 = b0 >> 1;
4350
4351
pDst_block->m_lo.m_r1 = r1 >> 1;
4352
pDst_block->m_lo.m_g1 = g1 >> 1;
4353
pDst_block->m_lo.m_b1 = b1 >> 1;
4354
4355
uint32_t output_low_selector = 0, output_bit_offset = 0, output_bits = 0;
4356
4357
for (uint32_t y = 0; y < 4; y++)
4358
{
4359
for (uint32_t x = 0; x < 4; x++)
4360
{
4361
uint32_t s = pSelector->get_selector(x, y);
4362
uint32_t os = (s == low_selector) ? output_low_selector : (3 ^ output_low_selector);
4363
4364
uint32_t num_bits = 2;
4365
4366
if ((x | y) == 0)
4367
{
4368
if (os & 2)
4369
{
4370
pDst_block->m_lo.m_r0 = r1 >> 1;
4371
pDst_block->m_lo.m_g0 = g1 >> 1;
4372
pDst_block->m_lo.m_b0 = b1 >> 1;
4373
4374
pDst_block->m_lo.m_r1 = r0 >> 1;
4375
pDst_block->m_lo.m_g1 = g0 >> 1;
4376
pDst_block->m_lo.m_b1 = b0 >> 1;
4377
4378
output_low_selector = 3;
4379
os = 0;
4380
}
4381
4382
num_bits = 1;
4383
}
4384
4385
output_bits |= (os << output_bit_offset);
4386
output_bit_offset += num_bits;
4387
}
4388
}
4389
4390
set_block_bits((uint8_t*)pDst, output_bits, 31, 66);
4391
return;
4392
}
4393
4394
const uint32_t selector_range_table = g_etc1_to_bc7_m5_selector_range_index[low_selector][high_selector];
4395
4396
//[32][8][RANGES][MAPPING]
4397
const etc1_to_bc7_m5_solution* pTable_r = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_r) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
4398
const etc1_to_bc7_m5_solution* pTable_g = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_g) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
4399
const etc1_to_bc7_m5_solution* pTable_b = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_b) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
4400
4401
uint32_t best_err = UINT_MAX;
4402
uint32_t best_mapping = 0;
4403
4404
assert(NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS == 10);
4405
#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
4406
DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
4407
DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
4408
#undef DO_ITER
4409
4410
const uint8_t* pSelectors_xlat = &g_etc1_to_bc7_m5_selector_mappings[best_mapping][0];
4411
4412
uint32_t s_inv = 0;
4413
if (pSelectors_xlat[pSelector->get_selector(0, 0)] & 2)
4414
{
4415
pDst_block->m_lo.m_r0 = pTable_r[best_mapping].m_hi;
4416
pDst_block->m_lo.m_g0 = pTable_g[best_mapping].m_hi;
4417
pDst_block->m_lo.m_b0 = pTable_b[best_mapping].m_hi;
4418
4419
pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_lo;
4420
pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_lo;
4421
pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_lo;
4422
4423
s_inv = 3;
4424
}
4425
else
4426
{
4427
pDst_block->m_lo.m_r0 = pTable_r[best_mapping].m_lo;
4428
pDst_block->m_lo.m_g0 = pTable_g[best_mapping].m_lo;
4429
pDst_block->m_lo.m_b0 = pTable_b[best_mapping].m_lo;
4430
4431
pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_hi;
4432
pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_hi;
4433
pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_hi;
4434
}
4435
4436
uint32_t output_bits = 0, output_bit_ofs = 0;
4437
4438
for (uint32_t y = 0; y < 4; y++)
4439
{
4440
for (uint32_t x = 0; x < 4; x++)
4441
{
4442
const uint32_t s = pSelector->get_selector(x, y);
4443
4444
const uint32_t os = pSelectors_xlat[s] ^ s_inv;
4445
4446
output_bits |= (os << output_bit_ofs);
4447
4448
output_bit_ofs += (((x | y) == 0) ? 1 : 2);
4449
}
4450
}
4451
4452
set_block_bits((uint8_t*)pDst, output_bits, 31, 66);
4453
}
4454
4455
static void convert_etc1s_to_bc7_m5_alpha(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
4456
{
4457
bc7_mode_5* pDst_block = static_cast<bc7_mode_5*>(pDst);
4458
4459
const uint32_t low_selector = pSelector->m_lo_selector;
4460
const uint32_t high_selector = pSelector->m_hi_selector;
4461
4462
const uint32_t base_color_r = pEndpoints->m_color5.r;
4463
const uint32_t inten_table = pEndpoints->m_inten5;
4464
4465
if (pSelector->m_num_unique_selectors == 1)
4466
{
4467
uint32_t r;
4468
decoder_etc_block::get_block_color5_r(pEndpoints->m_color5, inten_table, low_selector, r);
4469
4470
pDst_block->m_lo.m_a0 = r;
4471
pDst_block->m_lo.m_a1_0 = r & 63;
4472
pDst_block->m_hi.m_a1_1 = r >> 6;
4473
4474
return;
4475
}
4476
else if (pSelector->m_num_unique_selectors == 2)
4477
{
4478
// Only one or two unique selectors, so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks.
4479
int block_colors[4];
4480
4481
decoder_etc_block::get_block_colors5_g(block_colors, pEndpoints->m_color5, inten_table);
4482
4483
pDst_block->m_lo.m_a0 = block_colors[low_selector];
4484
pDst_block->m_lo.m_a1_0 = block_colors[high_selector] & 63;
4485
pDst_block->m_hi.m_a1_1 = block_colors[high_selector] >> 6;
4486
4487
uint32_t output_low_selector = 0, output_bit_offset = 0, output_bits = 0;
4488
4489
for (uint32_t y = 0; y < 4; y++)
4490
{
4491
for (uint32_t x = 0; x < 4; x++)
4492
{
4493
const uint32_t s = pSelector->get_selector(x, y);
4494
uint32_t os = (s == low_selector) ? output_low_selector : (3 ^ output_low_selector);
4495
4496
uint32_t num_bits = 2;
4497
4498
if ((x | y) == 0)
4499
{
4500
if (os & 2)
4501
{
4502
pDst_block->m_lo.m_a0 = block_colors[high_selector];
4503
pDst_block->m_lo.m_a1_0 = block_colors[low_selector] & 63;
4504
pDst_block->m_hi.m_a1_1 = block_colors[low_selector] >> 6;
4505
4506
output_low_selector = 3;
4507
os = 0;
4508
}
4509
4510
num_bits = 1;
4511
}
4512
4513
output_bits |= (os << output_bit_offset);
4514
output_bit_offset += num_bits;
4515
}
4516
}
4517
4518
set_block_bits((uint8_t*)pDst, output_bits, 31, 97);
4519
return;
4520
}
4521
4522
const uint32_t selector_range_table = g_etc1_to_bc7_m5a_selector_range_index[low_selector][high_selector];
4523
4524
const etc1_g_to_bc7_m5a_conversion* pTable = &g_etc1_g_to_bc7_m5a[inten_table * (32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES) + base_color_r * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES + selector_range_table];
4525
4526
pDst_block->m_lo.m_a0 = pTable->m_lo;
4527
pDst_block->m_lo.m_a1_0 = pTable->m_hi & 63;
4528
pDst_block->m_hi.m_a1_1 = pTable->m_hi >> 6;
4529
4530
uint32_t output_bit_offset = 0, output_bits = 0, selector_trans = pTable->m_trans;
4531
4532
for (uint32_t y = 0; y < 4; y++)
4533
{
4534
for (uint32_t x = 0; x < 4; x++)
4535
{
4536
const uint32_t s = pSelector->get_selector(x, y);
4537
uint32_t os = (selector_trans >> (s * 2)) & 3;
4538
4539
uint32_t num_bits = 2;
4540
4541
if ((x | y) == 0)
4542
{
4543
if (os & 2)
4544
{
4545
pDst_block->m_lo.m_a0 = pTable->m_hi;
4546
pDst_block->m_lo.m_a1_0 = pTable->m_lo & 63;
4547
pDst_block->m_hi.m_a1_1 = pTable->m_lo >> 6;
4548
4549
selector_trans ^= 0xFF;
4550
os ^= 3;
4551
}
4552
4553
num_bits = 1;
4554
}
4555
4556
output_bits |= (os << output_bit_offset);
4557
output_bit_offset += num_bits;
4558
}
4559
}
4560
4561
set_block_bits((uint8_t*)pDst, output_bits, 31, 97);
4562
}
4563
4564
static inline vec3F rgb_to_ycocg(const vec3F& rgb)
4565
{
4566
return vec3F(rgb.dot(vec3F(0.25f, 0.5f, 0.25f)), rgb.dot(vec3F(0.5f, 0.0f, -0.5f)), rgb.dot(vec3F(-0.25f, 0.5f, -0.25f)));
4567
}
4568
4569
static inline vec2F rgb_to_cocg(const vec3F& rgb)
4570
{
4571
return vec2F(rgb.dot(vec3F(0.5f, 0.0f, -0.5f)), rgb.dot(vec3F(-0.25f, 0.5f, -0.25f)));
4572
}
4573
4574
static inline vec3F ycocg_to_rgb(const vec3F& ycocg)
4575
{
4576
return vec3F(ycocg.dot(vec3F(1.0f, 1.0f, -1.0f)), ycocg.dot(vec3F(1.0f, 0.0f, 1.0f)), ycocg.dot(vec3F(1.0f, -1.0f, -1.0f)));
4577
}
4578
4579
static inline vec3F color32_to_vec3F(const color32& c)
4580
{
4581
return vec3F(c.r, c.g, c.b);
4582
}
4583
4584
static inline vec3F color5_to_ycocg(const endpoint& e)
4585
{
4586
const int r = (e.m_color5[0] << 3) | (e.m_color5[0] >> 2);
4587
const int g = (e.m_color5[1] << 3) | (e.m_color5[1] >> 2);
4588
const int b = (e.m_color5[2] << 3) | (e.m_color5[2] >> 2);
4589
return rgb_to_ycocg(vec3F((float)r, (float)g, (float)b));
4590
}
4591
4592
static inline vec2F color5_to_cocg(const endpoint& e)
4593
{
4594
const int r = (e.m_color5[0] << 3) | (e.m_color5[0] >> 2);
4595
const int g = (e.m_color5[1] << 3) | (e.m_color5[1] >> 2);
4596
const int b = (e.m_color5[2] << 3) | (e.m_color5[2] >> 2);
4597
return rgb_to_cocg(vec3F((float)r, (float)g, (float)b));
4598
}
4599
4600
static inline uint32_t bc7_7_to_8(uint32_t v)
4601
{
4602
assert(v < 128);
4603
return (v << 1) | (v >> 6);
4604
}
4605
4606
static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w)
4607
{
4608
assert(w < 4);
4609
return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6;
4610
}
4611
4612
static inline vec2F get_endpoint_cocg_clamped(int bx, int by, const basisu::vector2D<uint16_t>& decoded_endpoints, const endpoint* pEndpoints)
4613
{
4614
const uint32_t endpoint_index = decoded_endpoints.at_clamped(bx, by);
4615
return color5_to_cocg(pEndpoints[endpoint_index]);
4616
}
4617
4618
static void chroma_filter_bc7_mode5(const basisu::vector2D<uint16_t>& decoded_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t output_row_pitch_in_blocks_or_pixels, const endpoint *pEndpoints)
4619
{
4620
const bool hq_bc7_mode_5_encoder_mode = false;
4621
4622
const int CHROMA_THRESH = 10;
4623
4624
uint32_t total_filtered_blocks = 0;
4625
BASISU_NOTE_UNUSED(total_filtered_blocks);
4626
4627
for (int by = 0; by < (int)num_blocks_y; by++)
4628
{
4629
for (int bx = 0; bx < (int)num_blocks_x; bx++)
4630
{
4631
vec2F center_cocg(color5_to_cocg(pEndpoints[decoded_endpoints(bx, by)]));
4632
4633
//bool filter_flag = false;
4634
for (int dy = -1; dy <= 1; dy++)
4635
{
4636
const int oy = by + dy;
4637
if ((oy < 0) || (oy >= (int)num_blocks_y))
4638
continue;
4639
4640
for (int dx = -1; dx <= 1; dx++)
4641
{
4642
if ((dx | dy) == 0)
4643
continue;
4644
4645
const int ox = bx + dx;
4646
if ((ox < 0) || (ox >= (int)num_blocks_x))
4647
continue;
4648
4649
vec2F nearby_cocg(color5_to_cocg(pEndpoints[decoded_endpoints(ox, oy)]));
4650
4651
float delta_co = fabsf(nearby_cocg[0] - center_cocg[0]);
4652
float delta_cg = fabsf(nearby_cocg[1] - center_cocg[1]);
4653
4654
if ((delta_co > CHROMA_THRESH) || (delta_cg > CHROMA_THRESH))
4655
{
4656
//filter_flag = true;
4657
goto do_filter;
4658
}
4659
4660
} // dx
4661
} // dy
4662
4663
continue;
4664
4665
do_filter:;
4666
4667
total_filtered_blocks++;
4668
4669
bc7_mode_5* pDst_block = (bc7_mode_5*)(static_cast<uint8_t*>(pDst_blocks) + (bx + by * output_row_pitch_in_blocks_or_pixels) * sizeof(bc7_mode_5));
4670
4671
//memset(pDst_block, 0x80, 16);
4672
4673
int lr = bc7_7_to_8(pDst_block->m_lo.m_r0);
4674
int lg = bc7_7_to_8(pDst_block->m_lo.m_g0);
4675
int lb = bc7_7_to_8(pDst_block->m_lo.m_b0);
4676
4677
int hr = bc7_7_to_8(pDst_block->m_lo.m_r1);
4678
int hg = bc7_7_to_8(pDst_block->m_lo.m_g1);
4679
int hb = bc7_7_to_8(pDst_block->m_lo.m_b1);
4680
4681
float y_vals[4];
4682
for (uint32_t i = 0; i < 4; i++)
4683
{
4684
int cr = bc7_interp2(lr, hr, i);
4685
int cg = bc7_interp2(lg, hg, i);
4686
int cb = bc7_interp2(lb, hb, i);
4687
y_vals[i] = (float)cr * .25f + (float)cg * .5f + (float)cb * .25f;
4688
} // i
4689
4690
uint64_t sel_bits = pDst_block->m_hi_bits >> 2;
4691
4692
float block_y_vals[16]; // [y][x]
4693
float y_sum = 0.0f, y_sum_sq = 0.0f;
4694
4695
for (uint32_t i = 0; i < 16; i++)
4696
{
4697
uint32_t sel = sel_bits & (i ? 3 : 1);
4698
sel_bits >>= (i ? 2 : 1);
4699
float y = y_vals[sel];
4700
block_y_vals[i] = y;
4701
y_sum += y;
4702
y_sum_sq += y * y;
4703
4704
} // i
4705
4706
const float S = 1.0f / 16.0f;
4707
float y_var = (y_sum_sq * S) - basisu::squaref(y_sum * S);
4708
4709
// Don't bother if the block is too smooth.
4710
const float Y_VAR_SKIP_THRESH = 3.0f;
4711
if (y_var < Y_VAR_SKIP_THRESH)
4712
continue;
4713
4714
color32 block_to_pack[16];
4715
4716
for (int bpy = 0; bpy < 4; bpy++)
4717
{
4718
const int uby = by + ((bpy - 2) >> 2);
4719
4720
for (int bpx = 0; bpx < 4; bpx++)
4721
{
4722
const float fx = ((float)((bpx + 2) & 3) + .5f) * (1.0f / 4.0f);
4723
const float fy = ((float)((bpy + 2) & 3) + .5f) * (1.0f / 4.0f);
4724
4725
const int ubx = bx + ((bpx - 2) >> 2);
4726
4727
vec2F a(get_endpoint_cocg_clamped(ubx, uby, decoded_endpoints, pEndpoints));
4728
vec2F b(get_endpoint_cocg_clamped(ubx + 1, uby, decoded_endpoints, pEndpoints));
4729
vec2F c(get_endpoint_cocg_clamped(ubx, uby + 1, decoded_endpoints, pEndpoints));
4730
vec2F d(get_endpoint_cocg_clamped(ubx + 1, uby + 1, decoded_endpoints, pEndpoints));
4731
4732
assert((fx >= 0) && (fx <= 1.0f) && (fy >= 0) && (fy <= 1.0f));
4733
4734
// TODO: Could merge this into 4 muls on each corner by weights
4735
vec2F ab = vec2F::lerp(a, b, fx);
4736
vec2F cd = vec2F::lerp(c, d, fx);
4737
vec2F f = vec2F::lerp(ab, cd, fy);
4738
4739
vec3F final_ycocg(block_y_vals[bpx + bpy * 4], f[0], f[1]);
4740
4741
vec3F final_conv(ycocg_to_rgb(final_ycocg));
4742
final_conv.clamp(0.0f, 255.0f);
4743
4744
block_to_pack[bpx + bpy * 4].set_noclamp_rgba((int)(.5f + final_conv[0]), (int)(.5f + final_conv[1]), (int)(.5f + final_conv[2]), 255);
4745
4746
} // x
4747
} // y
4748
4749
bc7_mode_5_encoder::encode_bc7_mode_5_block(pDst_block, block_to_pack, hq_bc7_mode_5_encoder_mode);
4750
4751
} // bx
4752
} // by
4753
4754
//basisu::fmt_printf("Chroma thresh: {}, Total blocks to filter: {} out of {} {}\n", CHROMA_THRESH, total_filtered_blocks, num_blocks_x * num_blocks_y, (float)total_filtered_blocks * 100.0f / (num_blocks_x * num_blocks_y));
4755
}
4756
#endif // BASISD_SUPPORT_BC7_MODE5
4757
4758
#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_UASTC
4759
static const uint8_t g_etc2_eac_a8_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 };
4760
#endif
4761
4762
#if BASISD_SUPPORT_ETC2_EAC_A8
4763
static void convert_etc1s_to_etc2_eac_a8(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
4764
{
4765
const uint32_t low_selector = pSelector->m_lo_selector;
4766
const uint32_t high_selector = pSelector->m_hi_selector;
4767
4768
const color32& base_color = pEndpoints->m_color5;
4769
const uint32_t inten_table = pEndpoints->m_inten5;
4770
4771
if (low_selector == high_selector)
4772
{
4773
uint32_t r;
4774
decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
4775
4776
// Constant alpha block
4777
// Select table 13, use selector 4 (0), set multiplier to 1 and base color g
4778
pDst_block->m_base = r;
4779
pDst_block->m_table = 13;
4780
pDst_block->m_multiplier = 1;
4781
4782
// selectors are all 4's
4783
memcpy(pDst_block->m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
4784
4785
return;
4786
}
4787
4788
uint32_t selector_range_table = 0;
4789
for (selector_range_table = 0; selector_range_table < NUM_ETC2_EAC_SELECTOR_RANGES; selector_range_table++)
4790
if ((low_selector == s_etc2_eac_selector_ranges[selector_range_table].m_low) && (high_selector == s_etc2_eac_selector_ranges[selector_range_table].m_high))
4791
break;
4792
if (selector_range_table >= NUM_ETC2_EAC_SELECTOR_RANGES)
4793
selector_range_table = 0;
4794
4795
const etc1_g_to_eac_conversion* pTable_entry = &s_etc1_g_to_etc2_a8[base_color.r + inten_table * 32][selector_range_table];
4796
4797
pDst_block->m_base = pTable_entry->m_base;
4798
pDst_block->m_table = pTable_entry->m_table_mul >> 4;
4799
pDst_block->m_multiplier = pTable_entry->m_table_mul & 15;
4800
4801
uint64_t selector_bits = 0;
4802
4803
for (uint32_t y = 0; y < 4; y++)
4804
{
4805
for (uint32_t x = 0; x < 4; x++)
4806
{
4807
uint32_t s = pSelector->get_selector(x, y);
4808
4809
uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
4810
4811
const uint32_t dst_ofs = 45 - (y + x * 4) * 3;
4812
selector_bits |= (static_cast<uint64_t>(ds) << dst_ofs);
4813
}
4814
}
4815
4816
pDst_block->set_selector_bits(selector_bits);
4817
}
4818
#endif // BASISD_SUPPORT_ETC2_EAC_A8
4819
4820
#if BASISD_SUPPORT_ETC2_EAC_RG11
4821
static const etc1_g_to_eac_conversion s_etc1_g_to_etc2_r11[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] =
4822
{
4823
{{0,1,3328},{0,1,3328},{0,16,457},{0,16,456}},
4824
{{0,226,3936},{0,226,3936},{0,17,424},{8,0,472}},
4825
{{6,178,4012},{6,178,4008},{0,146,501},{16,0,472}},
4826
{{14,178,4012},{14,178,4008},{8,146,501},{24,0,472}},
4827
{{23,178,4012},{23,178,4008},{17,146,501},{33,0,472}},
4828
{{31,178,4012},{31,178,4008},{25,146,501},{41,0,472}},
4829
{{39,178,4012},{39,178,4008},{33,146,501},{49,0,472}},
4830
{{47,178,4012},{47,178,4008},{41,146,501},{27,228,496}},
4831
{{56,178,4012},{56,178,4008},{50,146,501},{36,228,496}},
4832
{{64,178,4012},{64,178,4008},{58,146,501},{44,228,496}},
4833
{{72,178,4012},{72,178,4008},{66,146,501},{52,228,496}},
4834
{{80,178,4012},{80,178,4008},{74,146,501},{60,228,496}},
4835
{{89,178,4012},{89,178,4008},{83,146,501},{69,228,496}},
4836
{{97,178,4012},{97,178,4008},{91,146,501},{77,228,496}},
4837
{{105,178,4012},{105,178,4008},{99,146,501},{85,228,496}},
4838
{{113,178,4012},{113,178,4008},{107,146,501},{93,228,496}},
4839
{{122,178,4012},{122,178,4008},{116,146,501},{102,228,496}},
4840
{{130,178,4012},{130,178,4008},{124,146,501},{110,228,496}},
4841
{{138,178,4012},{138,178,4008},{132,146,501},{118,228,496}},
4842
{{146,178,4012},{146,178,4008},{140,146,501},{126,228,496}},
4843
{{155,178,4012},{155,178,4008},{149,146,501},{135,228,496}},
4844
{{163,178,4012},{163,178,4008},{157,146,501},{143,228,496}},
4845
{{171,178,4012},{171,178,4008},{165,146,501},{151,228,496}},
4846
{{179,178,4012},{179,178,4008},{173,146,501},{159,228,496}},
4847
{{188,178,4012},{188,178,4008},{182,146,501},{168,228,496}},
4848
{{196,178,4012},{196,178,4008},{190,146,501},{176,228,496}},
4849
{{204,178,4012},{204,178,4008},{198,146,501},{184,228,496}},
4850
{{212,178,4012},{212,178,4008},{206,146,501},{192,228,496}},
4851
{{221,178,4012},{221,178,4008},{215,146,501},{201,228,496}},
4852
{{229,178,4012},{229,178,4008},{223,146,501},{209,228,496}},
4853
{{235,66,4012},{221,100,4008},{231,146,501},{217,228,496}},
4854
{{211,102,4085},{254,32,4040},{211,102,501},{254,32,456}},
4855
{{0,2,3328},{0,2,3328},{0,1,320},{0,1,320}},
4856
{{7,162,3905},{7,162,3904},{0,17,480},{0,17,480}},
4857
{{15,162,3906},{15,162,3904},{1,117,352},{1,117,352}},
4858
{{23,162,3906},{23,162,3904},{5,34,500},{4,53,424}},
4859
{{32,162,3906},{32,162,3904},{14,34,500},{3,69,424}},
4860
{{40,162,3906},{40,162,3904},{22,34,500},{1,133,496}},
4861
{{48,162,3906},{48,162,3904},{30,34,500},{4,85,496}},
4862
{{56,162,3906},{56,162,3904},{38,34,500},{12,85,496}},
4863
{{65,162,3906},{65,162,3904},{47,34,500},{1,106,424}},
4864
{{73,162,3906},{73,162,3904},{55,34,500},{9,106,424}},
4865
{{81,162,3906},{81,162,3904},{63,34,500},{7,234,496}},
4866
{{89,162,3906},{89,162,3904},{71,34,500},{15,234,496}},
4867
{{98,162,3906},{98,162,3904},{80,34,500},{24,234,496}},
4868
{{106,162,3906},{106,162,3904},{88,34,500},{32,234,496}},
4869
{{114,162,3906},{114,162,3904},{96,34,500},{40,234,496}},
4870
{{122,162,3906},{122,162,3904},{104,34,500},{48,234,496}},
4871
{{131,162,3906},{131,162,3904},{113,34,500},{57,234,496}},
4872
{{139,162,3906},{139,162,3904},{121,34,500},{65,234,496}},
4873
{{147,162,3906},{147,162,3904},{129,34,500},{73,234,496}},
4874
{{155,162,3906},{155,162,3904},{137,34,500},{81,234,496}},
4875
{{164,162,3906},{164,162,3904},{146,34,500},{90,234,496}},
4876
{{172,162,3906},{172,162,3904},{154,34,500},{98,234,496}},
4877
{{180,162,3906},{180,162,3904},{162,34,500},{106,234,496}},
4878
{{188,162,3906},{188,162,3904},{170,34,500},{114,234,496}},
4879
{{197,162,3906},{197,162,3904},{179,34,500},{123,234,496}},
4880
{{205,162,3906},{205,162,3904},{187,34,500},{131,234,496}},
4881
{{213,162,3906},{213,162,3904},{195,34,500},{139,234,496}},
4882
{{221,162,3906},{221,162,3904},{203,34,500},{147,234,496}},
4883
{{230,162,3906},{230,162,3904},{212,34,500},{156,234,496}},
4884
{{238,162,3906},{174,106,4008},{220,34,500},{164,234,496}},
4885
{{240,178,4001},{182,106,4008},{228,34,500},{172,234,496}},
4886
{{166,108,4085},{115,31,4080},{166,108,501},{115,31,496}},
4887
{{1,68,3328},{1,68,3328},{0,1,384},{0,1,384}},
4888
{{1,51,3968},{1,51,3968},{0,2,384},{0,2,384}},
4889
{{21,18,3851},{21,18,3848},{1,50,488},{1,50,488}},
4890
{{26,195,3851},{29,18,3848},{0,67,488},{0,67,488}},
4891
{{35,195,3851},{38,18,3848},{12,115,488},{0,3,496}},
4892
{{43,195,3851},{46,18,3848},{20,115,488},{2,6,424}},
4893
{{51,195,3851},{54,18,3848},{36,66,482},{4,22,424}},
4894
{{59,195,3851},{62,18,3848},{44,66,482},{3,73,424}},
4895
{{68,195,3851},{71,18,3848},{53,66,482},{3,22,496}},
4896
{{76,195,3851},{79,18,3848},{61,66,482},{2,137,496}},
4897
{{84,195,3851},{87,18,3848},{69,66,482},{1,89,496}},
4898
{{92,195,3851},{95,18,3848},{77,66,482},{9,89,496}},
4899
{{101,195,3851},{104,18,3848},{86,66,482},{18,89,496}},
4900
{{109,195,3851},{112,18,3848},{94,66,482},{26,89,496}},
4901
{{117,195,3851},{120,18,3848},{102,66,482},{34,89,496}},
4902
{{125,195,3851},{128,18,3848},{110,66,482},{42,89,496}},
4903
{{134,195,3851},{137,18,3848},{119,66,482},{51,89,496}},
4904
{{141,195,3907},{145,18,3848},{127,66,482},{59,89,496}},
4905
{{149,195,3907},{153,18,3848},{135,66,482},{67,89,496}},
4906
{{157,195,3907},{161,18,3848},{143,66,482},{75,89,496}},
4907
{{166,195,3907},{170,18,3848},{152,66,482},{84,89,496}},
4908
{{174,195,3907},{178,18,3848},{160,66,482},{92,89,496}},
4909
{{182,195,3907},{186,18,3848},{168,66,482},{100,89,496}},
4910
{{190,195,3907},{194,18,3848},{176,66,482},{108,89,496}},
4911
{{199,195,3907},{203,18,3848},{185,66,482},{117,89,496}},
4912
{{207,195,3907},{211,18,3848},{193,66,482},{125,89,496}},
4913
{{215,195,3907},{219,18,3848},{201,66,482},{133,89,496}},
4914
{{223,195,3907},{227,18,3848},{209,66,482},{141,89,496}},
4915
{{232,195,3907},{168,89,4008},{218,66,482},{150,89,496}},
4916
{{236,18,3907},{176,89,4008},{226,66,482},{158,89,496}},
4917
{{158,90,4085},{103,31,4080},{158,90,501},{103,31,496}},
4918
{{166,90,4085},{111,31,4080},{166,90,501},{111,31,496}},
4919
{{0,70,3328},{0,70,3328},{0,17,448},{0,17,448}},
4920
{{0,117,3904},{0,117,3904},{0,35,384},{0,35,384}},
4921
{{13,165,3905},{13,165,3904},{2,211,480},{2,211,480}},
4922
{{21,165,3906},{21,165,3904},{1,51,488},{1,51,488}},
4923
{{30,165,3906},{30,165,3904},{7,61,352},{7,61,352}},
4924
{{38,165,3906},{38,165,3904},{2,125,352},{2,125,352}},
4925
{{46,165,3906},{46,165,3904},{1,37,500},{10,125,352}},
4926
{{54,165,3906},{54,165,3904},{9,37,500},{5,61,424}},
4927
{{63,165,3906},{63,165,3904},{18,37,500},{1,189,424}},
4928
{{71,165,3906},{71,165,3904},{26,37,500},{9,189,424}},
4929
{{79,165,3906},{79,165,3904},{34,37,500},{4,77,424}},
4930
{{87,165,3906},{87,165,3904},{42,37,500},{12,77,424}},
4931
{{96,165,3906},{96,165,3904},{51,37,500},{8,93,424}},
4932
{{104,165,3906},{104,165,3904},{59,37,500},{3,141,496}},
4933
{{112,165,3906},{112,165,3904},{68,37,500},{11,141,496}},
4934
{{120,165,3906},{120,165,3904},{76,37,500},{6,93,496}},
4935
{{129,165,3906},{129,165,3904},{85,37,500},{15,93,496}},
4936
{{70,254,4012},{137,165,3904},{93,37,500},{23,93,496}},
4937
{{145,165,3906},{145,165,3904},{101,37,500},{31,93,496}},
4938
{{86,254,4012},{153,165,3904},{109,37,500},{39,93,496}},
4939
{{163,165,3906},{162,165,3904},{118,37,500},{48,93,496}},
4940
{{171,165,3906},{170,165,3904},{126,37,500},{56,93,496}},
4941
{{179,165,3906},{178,165,3904},{134,37,500},{64,93,496}},
4942
{{187,165,3906},{187,165,3904},{142,37,500},{72,93,496}},
4943
{{196,165,3906},{196,165,3904},{151,37,500},{81,93,496}},
4944
{{204,165,3906},{204,165,3904},{159,37,500},{89,93,496}},
4945
{{212,165,3906},{136,77,4008},{167,37,500},{97,93,496}},
4946
{{220,165,3906},{131,93,4008},{175,37,500},{105,93,496}},
4947
{{214,181,4001},{140,93,4008},{184,37,500},{114,93,496}},
4948
{{222,181,4001},{148,93,4008},{192,37,500},{122,93,496}},
4949
{{115,95,4085},{99,31,4080},{115,95,501},{99,31,496}},
4950
{{123,95,4085},{107,31,4080},{123,95,501},{107,31,496}},
4951
{{0,102,3840},{0,102,3840},{0,18,384},{0,18,384}},
4952
{{5,167,3904},{5,167,3904},{0,13,256},{0,13,256}},
4953
{{4,54,3968},{4,54,3968},{1,67,448},{1,67,448}},
4954
{{30,198,3850},{30,198,3848},{0,3,480},{0,3,480}},
4955
{{39,198,3850},{39,198,3848},{3,52,488},{3,52,488}},
4956
{{47,198,3851},{47,198,3848},{3,4,488},{3,4,488}},
4957
{{55,198,3851},{55,198,3848},{1,70,488},{1,70,488}},
4958
{{53,167,3906},{63,198,3848},{3,22,488},{3,22,488}},
4959
{{62,167,3906},{72,198,3848},{24,118,488},{0,6,496}},
4960
{{70,167,3906},{80,198,3848},{32,118,488},{2,89,488}},
4961
{{78,167,3906},{88,198,3848},{40,118,488},{1,73,496}},
4962
{{86,167,3906},{96,198,3848},{48,118,488},{0,28,424}},
4963
{{95,167,3906},{105,198,3848},{57,118,488},{9,28,424}},
4964
{{103,167,3906},{113,198,3848},{65,118,488},{5,108,496}},
4965
{{111,167,3906},{121,198,3848},{73,118,488},{13,108,496}},
4966
{{119,167,3906},{129,198,3848},{81,118,488},{21,108,496}},
4967
{{128,167,3906},{138,198,3848},{90,118,488},{6,28,496}},
4968
{{136,167,3906},{146,198,3848},{98,118,488},{14,28,496}},
4969
{{145,167,3906},{154,198,3848},{106,118,488},{22,28,496}},
4970
{{153,167,3906},{162,198,3848},{114,118,488},{30,28,496}},
4971
{{162,167,3906},{171,198,3848},{123,118,488},{39,28,496}},
4972
{{170,167,3906},{179,198,3848},{131,118,488},{47,28,496}},
4973
{{178,167,3906},{187,198,3848},{139,118,488},{55,28,496}},
4974
{{186,167,3906},{195,198,3848},{147,118,488},{63,28,496}},
4975
{{194,167,3906},{120,12,4008},{156,118,488},{72,28,496}},
4976
{{206,198,3907},{116,28,4008},{164,118,488},{80,28,496}},
4977
{{214,198,3907},{124,28,4008},{172,118,488},{88,28,496}},
4978
{{222,198,3395},{132,28,4008},{180,118,488},{96,28,496}},
4979
{{207,134,4001},{141,28,4008},{189,118,488},{105,28,496}},
4980
{{95,30,4085},{86,31,4080},{95,30,501},{86,31,496}},
4981
{{103,30,4085},{94,31,4080},{103,30,501},{94,31,496}},
4982
{{111,30,4085},{102,31,4080},{111,30,501},{102,31,496}},
4983
{{0,104,3840},{0,104,3840},{0,18,448},{0,18,448}},
4984
{{4,39,3904},{4,39,3904},{0,4,384},{0,4,384}},
4985
{{0,56,3968},{0,56,3968},{0,84,448},{0,84,448}},
4986
{{6,110,3328},{6,110,3328},{0,20,448},{0,20,448}},
4987
{{41,200,3850},{41,200,3848},{1,4,480},{1,4,480}},
4988
{{49,200,3850},{49,200,3848},{1,8,416},{1,8,416}},
4989
{{57,200,3851},{57,200,3848},{1,38,488},{1,38,488}},
4990
{{65,200,3851},{65,200,3848},{1,120,488},{1,120,488}},
4991
{{74,200,3851},{74,200,3848},{2,72,488},{2,72,488}},
4992
{{68,6,3907},{82,200,3848},{2,24,488},{2,24,488}},
4993
{{77,6,3907},{90,200,3848},{26,120,488},{10,24,488}},
4994
{{97,63,3330},{98,200,3848},{34,120,488},{2,8,496}},
4995
{{106,63,3330},{107,200,3848},{43,120,488},{3,92,488}},
4996
{{114,63,3330},{115,200,3848},{51,120,488},{11,92,488}},
4997
{{122,63,3330},{123,200,3848},{59,120,488},{7,76,496}},
4998
{{130,63,3330},{131,200,3848},{67,120,488},{15,76,496}},
4999
{{139,63,3330},{140,200,3848},{76,120,488},{24,76,496}},
5000
{{147,63,3330},{148,200,3848},{84,120,488},{32,76,496}},
5001
{{155,63,3330},{156,200,3848},{92,120,488},{40,76,496}},
5002
{{164,63,3330},{164,200,3848},{100,120,488},{48,76,496}},
5003
{{173,63,3330},{173,200,3848},{109,120,488},{57,76,496}},
5004
{{184,6,3851},{181,200,3848},{117,120,488},{65,76,496}},
5005
{{192,6,3851},{133,28,3936},{125,120,488},{73,76,496}},
5006
{{189,200,3907},{141,28,3936},{133,120,488},{81,76,496}},
5007
{{198,200,3907},{138,108,4000},{142,120,488},{90,76,496}},
5008
{{206,200,3907},{146,108,4000},{150,120,488},{98,76,496}},
5009
{{214,200,3395},{154,108,4000},{158,120,488},{106,76,496}},
5010
{{190,136,4001},{162,108,4000},{166,120,488},{114,76,496}},
5011
{{123,30,4076},{87,15,4080},{123,30,492},{87,15,496}},
5012
{{117,110,4084},{80,31,4080},{117,110,500},{80,31,496}},
5013
{{125,110,4084},{88,31,4080},{125,110,500},{88,31,496}},
5014
{{133,110,4084},{96,31,4080},{133,110,500},{96,31,496}},
5015
{{9,56,3904},{9,56,3904},{0,67,448},{0,67,448}},
5016
{{1,8,3904},{1,8,3904},{1,84,448},{1,84,448}},
5017
{{1,124,3904},{1,124,3904},{0,39,384},{0,39,384}},
5018
{{9,124,3904},{9,124,3904},{1,4,448},{1,4,448}},
5019
{{6,76,3904},{6,76,3904},{0,70,448},{0,70,448}},
5020
{{62,6,3859},{62,6,3856},{2,38,480},{2,38,480}},
5021
{{70,6,3859},{70,6,3856},{5,43,416},{5,43,416}},
5022
{{78,6,3859},{78,6,3856},{2,11,416},{2,11,416}},
5023
{{87,6,3859},{87,6,3856},{0,171,488},{0,171,488}},
5024
{{67,8,3906},{95,6,3856},{8,171,488},{8,171,488}},
5025
{{75,8,3907},{103,6,3856},{5,123,488},{5,123,488}},
5026
{{83,8,3907},{111,6,3856},{2,75,488},{2,75,488}},
5027
{{92,8,3907},{120,6,3856},{0,27,488},{0,27,488}},
5028
{{100,8,3907},{128,6,3856},{8,27,488},{8,27,488}},
5029
{{120,106,3843},{136,6,3856},{99,6,387},{16,27,488}},
5030
{{128,106,3843},{144,6,3856},{107,6,387},{2,11,496}},
5031
{{137,106,3843},{153,6,3856},{117,6,387},{11,11,496}},
5032
{{145,106,3843},{161,6,3856},{125,6,387},{19,11,496}},
5033
{{163,8,3851},{137,43,3904},{133,6,387},{27,11,496}},
5034
{{171,8,3851},{145,43,3904},{141,6,387},{35,11,496}},
5035
{{180,8,3851},{110,11,4000},{150,6,387},{44,11,496}},
5036
{{188,8,3851},{118,11,4000},{158,6,387},{52,11,496}},
5037
{{172,72,3907},{126,11,4000},{166,6,387},{60,11,496}},
5038
{{174,6,3971},{134,11,4000},{174,6,387},{68,11,496}},
5039
{{183,6,3971},{143,11,4000},{183,6,387},{77,11,496}},
5040
{{191,6,3971},{151,11,4000},{191,6,387},{85,11,496}},
5041
{{199,6,3971},{159,11,4000},{199,6,387},{93,11,496}},
5042
{{92,12,4084},{69,15,4080},{92,12,500},{69,15,496}},
5043
{{101,12,4084},{78,15,4080},{101,12,500},{78,15,496}},
5044
{{110,12,4084},{86,15,4080},{110,12,500},{86,15,496}},
5045
{{118,12,4084},{79,31,4080},{118,12,500},{79,31,496}},
5046
{{126,12,4084},{87,31,4080},{126,12,500},{87,31,496}},
5047
{{71,8,3602},{71,8,3600},{2,21,384},{2,21,384}},
5048
{{79,8,3611},{79,8,3608},{0,69,448},{0,69,448}},
5049
{{87,8,3611},{87,8,3608},{0,23,384},{0,23,384}},
5050
{{95,8,3611},{95,8,3608},{1,5,448},{1,5,448}},
5051
{{104,8,3611},{104,8,3608},{0,88,448},{0,88,448}},
5052
{{112,8,3611},{112,8,3608},{0,72,448},{0,72,448}},
5053
{{120,8,3611},{121,8,3608},{36,21,458},{36,21,456}},
5054
{{133,47,3091},{129,8,3608},{44,21,458},{44,21,456}},
5055
{{142,47,3091},{138,8,3608},{53,21,459},{53,21,456}},
5056
{{98,12,3850},{98,12,3848},{61,21,459},{61,21,456}},
5057
{{106,12,3850},{106,12,3848},{10,92,480},{69,21,456}},
5058
{{114,12,3851},{114,12,3848},{18,92,480},{77,21,456}},
5059
{{123,12,3851},{123,12,3848},{3,44,488},{86,21,456}},
5060
{{95,12,3906},{95,12,3904},{11,44,488},{94,21,456}},
5061
{{103,12,3906},{103,12,3904},{19,44,488},{102,21,456}},
5062
{{111,12,3907},{111,12,3904},{27,44,489},{110,21,456}},
5063
{{120,12,3907},{120,12,3904},{36,44,489},{119,21,456}},
5064
{{128,12,3907},{128,12,3904},{44,44,489},{127,21,456}},
5065
{{136,12,3907},{136,12,3904},{52,44,489},{135,21,456}},
5066
{{144,12,3907},{144,12,3904},{60,44,490},{144,21,456}},
5067
{{153,12,3907},{153,12,3904},{69,44,490},{153,21,456}},
5068
{{161,12,3395},{149,188,3968},{77,44,490},{161,21,456}},
5069
{{169,12,3395},{199,21,3928},{85,44,490},{169,21,456}},
5070
{{113,95,4001},{202,69,3992},{125,8,483},{177,21,456}},
5071
{{122,95,4001},{201,21,3984},{134,8,483},{186,21,456}},
5072
{{143,8,4067},{209,21,3984},{142,8,483},{194,21,456}},
5073
{{151,8,4067},{47,15,4080},{151,8,483},{47,15,496}},
5074
{{159,8,4067},{55,15,4080},{159,8,483},{55,15,496}},
5075
{{168,8,4067},{64,15,4080},{168,8,483},{64,15,496}},
5076
{{160,40,4075},{72,15,4080},{160,40,491},{72,15,496}},
5077
{{168,40,4075},{80,15,4080},{168,40,491},{80,15,496}},
5078
{{144,8,4082},{88,15,4080},{144,8,498},{88,15,496}},
5079
};
5080
5081
static void convert_etc1s_to_etc2_eac_r11(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
5082
{
5083
const uint32_t low_selector = pSelector->m_lo_selector;
5084
const uint32_t high_selector = pSelector->m_hi_selector;
5085
5086
const color32& base_color = pEndpoints->m_color5;
5087
const uint32_t inten_table = pEndpoints->m_inten5;
5088
5089
if (low_selector == high_selector)
5090
{
5091
uint32_t r;
5092
decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
5093
5094
// Constant alpha block
5095
// Select table 13, use selector 4 (0), set multiplier to 1 and base color r
5096
pDst_block->m_base = r;
5097
pDst_block->m_table = 13;
5098
pDst_block->m_multiplier = 1;
5099
5100
// selectors are all 4's
5101
static const uint8_t s_etc2_eac_r11_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 };
5102
memcpy(pDst_block->m_selectors, s_etc2_eac_r11_sel4, sizeof(s_etc2_eac_r11_sel4));
5103
5104
return;
5105
}
5106
5107
uint32_t selector_range_table = 0;
5108
for (selector_range_table = 0; selector_range_table < NUM_ETC2_EAC_SELECTOR_RANGES; selector_range_table++)
5109
if ((low_selector == s_etc2_eac_selector_ranges[selector_range_table].m_low) && (high_selector == s_etc2_eac_selector_ranges[selector_range_table].m_high))
5110
break;
5111
if (selector_range_table >= NUM_ETC2_EAC_SELECTOR_RANGES)
5112
selector_range_table = 0;
5113
5114
const etc1_g_to_eac_conversion* pTable_entry = &s_etc1_g_to_etc2_r11[base_color.r + inten_table * 32][selector_range_table];
5115
5116
pDst_block->m_base = pTable_entry->m_base;
5117
pDst_block->m_table = pTable_entry->m_table_mul >> 4;
5118
pDst_block->m_multiplier = pTable_entry->m_table_mul & 15;
5119
5120
uint64_t selector_bits = 0;
5121
5122
for (uint32_t y = 0; y < 4; y++)
5123
{
5124
for (uint32_t x = 0; x < 4; x++)
5125
{
5126
uint32_t s = pSelector->get_selector(x, y);
5127
5128
uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
5129
5130
const uint32_t dst_ofs = 45 - (y + x * 4) * 3;
5131
selector_bits |= (static_cast<uint64_t>(ds) << dst_ofs);
5132
}
5133
}
5134
5135
pDst_block->set_selector_bits(selector_bits);
5136
}
5137
#endif // BASISD_SUPPORT_ETC2_EAC_RG11
5138
5139
// ASTC
5140
struct etc1_to_astc_solution
5141
{
5142
uint8_t m_lo;
5143
uint8_t m_hi;
5144
uint16_t m_err;
5145
};
5146
5147
#if BASISD_SUPPORT_ASTC
5148
static dxt_selector_range g_etc1_to_astc_selector_ranges[] =
5149
{
5150
{ 0, 3 },
5151
5152
{ 1, 3 },
5153
{ 0, 2 },
5154
5155
{ 1, 2 },
5156
5157
{ 2, 3 },
5158
{ 0, 1 },
5159
};
5160
5161
const uint32_t NUM_ETC1_TO_ASTC_SELECTOR_RANGES = sizeof(g_etc1_to_astc_selector_ranges) / sizeof(g_etc1_to_astc_selector_ranges[0]);
5162
5163
static uint32_t g_etc1_to_astc_selector_range_index[4][4];
5164
5165
const uint32_t NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS = 10;
5166
static const uint8_t g_etc1_to_astc_selector_mappings[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS][4] =
5167
{
5168
{ 0, 0, 1, 1 },
5169
{ 0, 0, 1, 2 },
5170
{ 0, 0, 1, 3 },
5171
{ 0, 0, 2, 3 },
5172
{ 0, 1, 1, 1 },
5173
{ 0, 1, 2, 2 },
5174
{ 0, 1, 2, 3 },
5175
{ 0, 2, 3, 3 },
5176
{ 1, 2, 2, 2 },
5177
{ 1, 2, 3, 3 },
5178
};
5179
5180
static const etc1_to_astc_solution g_etc1_to_astc[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = {
5181
#include "basisu_transcoder_tables_astc.inc"
5182
};
5183
5184
// The best selector mapping to use given a base base+inten table and used selector range for converting grayscale data.
5185
static uint8_t g_etc1_to_astc_best_grayscale_mapping[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES];
5186
5187
#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
5188
static const etc1_to_astc_solution g_etc1_to_astc_0_255[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = {
5189
#include "basisu_transcoder_tables_astc_0_255.inc"
5190
};
5191
static uint8_t g_etc1_to_astc_best_grayscale_mapping_0_255[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES];
5192
#endif
5193
5194
static uint32_t g_ise_to_unquant[48];
5195
5196
#if BASISD_WRITE_NEW_ASTC_TABLES
5197
static void create_etc1_to_astc_conversion_table_0_47()
5198
{
5199
FILE* pFile = nullptr;
5200
fopen_s(&pFile, "basisu_transcoder_tables_astc.inc", "w");
5201
5202
uint32_t n = 0;
5203
5204
for (int inten = 0; inten < 8; inten++)
5205
{
5206
for (uint32_t g = 0; g < 32; g++)
5207
{
5208
color32 block_colors[4];
5209
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
5210
5211
for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++)
5212
{
5213
const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low;
5214
const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high;
5215
5216
uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5217
uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5218
uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5219
uint64_t highest_best_err = 0;
5220
5221
for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
5222
{
5223
uint32_t best_lo = 0;
5224
uint32_t best_hi = 0;
5225
uint64_t best_err = UINT64_MAX;
5226
5227
for (uint32_t hi = 0; hi <= 47; hi++)
5228
{
5229
for (uint32_t lo = 0; lo <= 47; lo++)
5230
{
5231
uint32_t colors[4];
5232
5233
for (uint32_t s = 0; s < 4; s++)
5234
{
5235
uint32_t s_scaled = s | (s << 2) | (s << 4);
5236
if (s_scaled > 32)
5237
s_scaled++;
5238
5239
uint32_t c0 = g_ise_to_unquant[lo] | (g_ise_to_unquant[lo] << 8);
5240
uint32_t c1 = g_ise_to_unquant[hi] | (g_ise_to_unquant[hi] << 8);
5241
colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8;
5242
}
5243
5244
uint64_t total_err = 0;
5245
5246
for (uint32_t s = low_selector; s <= high_selector; s++)
5247
{
5248
int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]];
5249
5250
int err_scale = 1;
5251
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
5252
// the low/high selectors which are clamping to either 0 or 255.
5253
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
5254
err_scale = 8;
5255
5256
total_err += (err * err) * err_scale;
5257
}
5258
5259
if (total_err < best_err)
5260
{
5261
best_err = total_err;
5262
best_lo = lo;
5263
best_hi = hi;
5264
}
5265
}
5266
}
5267
5268
mapping_best_low[m] = best_lo;
5269
mapping_best_high[m] = best_hi;
5270
mapping_best_err[m] = best_err;
5271
highest_best_err = basisu::maximum(highest_best_err, best_err);
5272
5273
} // m
5274
5275
for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
5276
{
5277
uint64_t err = mapping_best_err[m];
5278
5279
err = basisu::minimum<uint64_t>(err, 0xFFFF);
5280
5281
fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err);
5282
5283
n++;
5284
if ((n & 31) == 31)
5285
fprintf(pFile, "\n");
5286
} // m
5287
5288
} // sr
5289
} // g
5290
} // inten
5291
5292
fclose(pFile);
5293
}
5294
5295
static void create_etc1_to_astc_conversion_table_0_255()
5296
{
5297
FILE* pFile = nullptr;
5298
fopen_s(&pFile, "basisu_transcoder_tables_astc_0_255.inc", "w");
5299
5300
uint32_t n = 0;
5301
5302
for (int inten = 0; inten < 8; inten++)
5303
{
5304
for (uint32_t g = 0; g < 32; g++)
5305
{
5306
color32 block_colors[4];
5307
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
5308
5309
for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++)
5310
{
5311
const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low;
5312
const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high;
5313
5314
uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5315
uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5316
uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5317
uint64_t highest_best_err = 0;
5318
5319
for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
5320
{
5321
uint32_t best_lo = 0;
5322
uint32_t best_hi = 0;
5323
uint64_t best_err = UINT64_MAX;
5324
5325
for (uint32_t hi = 0; hi <= 255; hi++)
5326
{
5327
for (uint32_t lo = 0; lo <= 255; lo++)
5328
{
5329
uint32_t colors[4];
5330
5331
for (uint32_t s = 0; s < 4; s++)
5332
{
5333
uint32_t s_scaled = s | (s << 2) | (s << 4);
5334
if (s_scaled > 32)
5335
s_scaled++;
5336
5337
uint32_t c0 = lo | (lo << 8);
5338
uint32_t c1 = hi | (hi << 8);
5339
colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8;
5340
}
5341
5342
uint64_t total_err = 0;
5343
5344
for (uint32_t s = low_selector; s <= high_selector; s++)
5345
{
5346
int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]];
5347
5348
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
5349
// the low/high selectors which are clamping to either 0 or 255.
5350
int err_scale = 1;
5351
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
5352
err_scale = 8;
5353
5354
total_err += (err * err) * err_scale;
5355
}
5356
5357
if (total_err < best_err)
5358
{
5359
best_err = total_err;
5360
best_lo = lo;
5361
best_hi = hi;
5362
}
5363
}
5364
}
5365
5366
mapping_best_low[m] = best_lo;
5367
mapping_best_high[m] = best_hi;
5368
mapping_best_err[m] = best_err;
5369
highest_best_err = basisu::maximum(highest_best_err, best_err);
5370
} // m
5371
5372
for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
5373
{
5374
uint64_t err = mapping_best_err[m];
5375
5376
err = basisu::minimum<uint64_t>(err, 0xFFFF);
5377
5378
fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err);
5379
5380
n++;
5381
if ((n & 31) == 31)
5382
fprintf(pFile, "\n");
5383
} // m
5384
5385
} // sr
5386
} // g
5387
} // inten
5388
5389
fclose(pFile);
5390
}
5391
#endif
5392
5393
#endif
5394
5395
#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC
5396
// Table encodes 5 trits to 8 output bits. 3^5 entries.
5397
// Inverse of the trit bit manipulation process in https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
5398
static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,
5399
43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154,
5400
131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202,
5401
208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224,
5402
225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159,
5403
191, 223, 124, 125, 126 };
5404
5405
// Extracts bits [low,high]
5406
static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)
5407
{
5408
return (bits >> low) & ((1 << (high - low + 1)) - 1);
5409
}
5410
5411
// Writes bits to output in an endian safe way
5412
static inline void astc_set_bits(uint32_t* pOutput, int& bit_pos, uint32_t value, uint32_t total_bits)
5413
{
5414
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5415
5416
while (total_bits)
5417
{
5418
const uint32_t bits_to_write = basisu::minimum<int>(total_bits, 8 - (bit_pos & 7));
5419
5420
pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7));
5421
5422
bit_pos += bits_to_write;
5423
total_bits -= bits_to_write;
5424
value >>= bits_to_write;
5425
}
5426
}
5427
5428
// Encodes 5 values to output, usable for any range that uses trits and bits
5429
static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
5430
{
5431
// First extract the trits and the bits from the 5 input values
5432
int trits = 0, bits[5];
5433
const uint32_t bit_mask = (1 << n) - 1;
5434
for (int i = 0; i < 5; i++)
5435
{
5436
static const int s_muls[5] = { 1, 3, 9, 27, 81 };
5437
5438
const int t = pValues[i] >> n;
5439
5440
trits += t * s_muls[i];
5441
bits[i] = pValues[i] & bit_mask;
5442
}
5443
5444
// Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits.
5445
// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
5446
5447
assert(trits < 243);
5448
const int T = g_astc_trit_encode[trits];
5449
5450
// Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94.
5451
astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2);
5452
5453
astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |
5454
(bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);
5455
}
5456
#endif // #if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC
5457
5458
#if BASISD_SUPPORT_ASTC
5459
struct astc_block_params
5460
{
5461
// 2 groups of 5, but only a max of 8 are used (RRGGBBAA00)
5462
uint8_t m_endpoints[10];
5463
uint8_t m_weights[32];
5464
};
5465
5466
// Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2).
5467
// We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity.
5468
// Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color.
5469
// 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47.
5470
// Note the input [0,47] endpoint values are not linear - they are encoded as outlined in the ASTC spec:
5471
// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization
5472
// 32 total weights, stored as 16 CA CA, each ranging from 0-3.
5473
static void astc_pack_block_cem_12_weight_range2(uint32_t *pOutput, const astc_block_params* pBlock)
5474
{
5475
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5476
5477
// Write constant block mode, color component selector, number of partitions, color endpoint mode
5478
// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
5479
pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x01; pBytes[3] = 0x00;
5480
pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0;
5481
5482
pOutput[2] = 0;
5483
pOutput[3] = 0;
5484
5485
// Pack 8 endpoints (each ranging between [0,47]) using BISE starting at bit 17
5486
int bit_pos = 17;
5487
astc_encode_trits(pOutput, pBlock->m_endpoints, bit_pos, 4);
5488
astc_encode_trits(pOutput, pBlock->m_endpoints + 5, bit_pos, 4);
5489
5490
// Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order.
5491
5492
for (uint32_t i = 0; i < 32; i++)
5493
{
5494
static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
5495
const uint32_t ofs = 126 - (i * 2);
5496
pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
5497
}
5498
}
5499
5500
// CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights
5501
// This ASTC mode is basically block truncation coding (BTC) using 1-bit weights and 8-bit/component endpoints - very convenient.
5502
static void astc_pack_block_cem_12_weight_range0(uint32_t* pOutput, const astc_block_params* pBlock)
5503
{
5504
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5505
5506
// Write constant block mode, color component selector, number of partitions, color endpoint mode
5507
// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
5508
pBytes[0] = 0x41; pBytes[1] = 0x84; pBytes[2] = 0x01; pBytes[3] = 0x00;
5509
pOutput[1] = 0;
5510
pBytes[8] = 0x00; pBytes[9] = 0x00; pBytes[10] = 0x00; pBytes[11] = 0xc0;
5511
pOutput[3] = 0;
5512
5513
// Pack 8 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
5514
int bit_pos = 17;
5515
for (uint32_t i = 0; i < 8; i++)
5516
astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
5517
5518
// Pack 32 1-bit weights, which are stored from the top down into the block in opposite bit order.
5519
for (uint32_t i = 0; i < 32; i++)
5520
{
5521
const uint32_t ofs = 127 - i;
5522
pBytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7));
5523
}
5524
}
5525
5526
#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
5527
// Optional 8-bit endpoint packing functions.
5528
5529
// CEM mode 4 (LDR Luminance+Alpha Direct), 8-bit endpoints, 2 bit weights
5530
static void astc_pack_block_cem_4_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock)
5531
{
5532
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5533
5534
// Write constant block mode, color component selector, number of partitions, color endpoint mode
5535
// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
5536
pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x00; pBytes[3] = 0x00;
5537
pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0;
5538
5539
pOutput[2] = 0;
5540
pOutput[3] = 0;
5541
5542
// Pack 4 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
5543
int bit_pos = 17;
5544
for (uint32_t i = 0; i < 4; i++)
5545
astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
5546
5547
// Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order.
5548
for (uint32_t i = 0; i < 32; i++)
5549
{
5550
static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
5551
const uint32_t ofs = 126 - (i * 2);
5552
pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
5553
}
5554
}
5555
5556
// CEM mode 8 (LDR RGB Direct), 8-bit endpoints, 2 bit weights
5557
static void astc_pack_block_cem_8_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock)
5558
{
5559
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5560
5561
// Write constant block mode, color component selector, number of partitions, color endpoint mode
5562
// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
5563
pBytes[0] = 0x42; pBytes[1] = 0x00; pBytes[2] = 0x01; pBytes[3] = 0x00;
5564
5565
pOutput[1] = 0;
5566
pOutput[2] = 0;
5567
pOutput[3] = 0;
5568
5569
// Pack 6 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
5570
int bit_pos = 17;
5571
for (uint32_t i = 0; i < 6; i++)
5572
astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
5573
5574
// Pack 16 2-bit weights, which are stored from the top down into the block in opposite bit order.
5575
for (uint32_t i = 0; i < 16; i++)
5576
{
5577
static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
5578
const uint32_t ofs = 126 - (i * 2);
5579
pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
5580
}
5581
}
5582
#endif
5583
5584
// Optimal quantized [0,47] entry to use given [0,255] input
5585
static uint8_t g_astc_single_color_encoding_0[256];
5586
5587
// Optimal quantized [0,47] low/high values given [0,255] input assuming a selector of 1
5588
static struct
5589
{
5590
uint8_t m_lo, m_hi;
5591
} g_astc_single_color_encoding_1[256];
5592
5593
static void transcoder_init_astc()
5594
{
5595
for (uint32_t base_color = 0; base_color < 32; base_color++)
5596
{
5597
for (uint32_t inten_table = 0; inten_table < 8; inten_table++)
5598
{
5599
for (uint32_t range_index = 0; range_index < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; range_index++)
5600
{
5601
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(inten_table * 32 + base_color) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + range_index * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5602
5603
uint32_t best_mapping = 0;
5604
uint32_t best_err = UINT32_MAX;
5605
for (uint32_t mapping_index = 0; mapping_index < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; mapping_index++)
5606
{
5607
if (pTable_g[mapping_index].m_err < best_err)
5608
{
5609
best_err = pTable_g[mapping_index].m_err;
5610
best_mapping = mapping_index;
5611
}
5612
}
5613
5614
g_etc1_to_astc_best_grayscale_mapping[base_color][inten_table][range_index] = static_cast<uint8_t>(best_mapping);
5615
}
5616
}
5617
}
5618
5619
#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
5620
for (uint32_t base_color = 0; base_color < 32; base_color++)
5621
{
5622
for (uint32_t inten_table = 0; inten_table < 8; inten_table++)
5623
{
5624
for (uint32_t range_index = 0; range_index < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; range_index++)
5625
{
5626
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + range_index * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5627
5628
uint32_t best_mapping = 0;
5629
uint32_t best_err = UINT32_MAX;
5630
for (uint32_t mapping_index = 0; mapping_index < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; mapping_index++)
5631
{
5632
if (pTable_g[mapping_index].m_err < best_err)
5633
{
5634
best_err = pTable_g[mapping_index].m_err;
5635
best_mapping = mapping_index;
5636
}
5637
}
5638
5639
g_etc1_to_astc_best_grayscale_mapping_0_255[base_color][inten_table][range_index] = static_cast<uint8_t>(best_mapping);
5640
}
5641
}
5642
}
5643
#endif
5644
5645
for (uint32_t i = 0; i < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; i++)
5646
{
5647
uint32_t l = g_etc1_to_astc_selector_ranges[i].m_low;
5648
uint32_t h = g_etc1_to_astc_selector_ranges[i].m_high;
5649
g_etc1_to_astc_selector_range_index[l][h] = i;
5650
}
5651
5652
// Endpoint dequantization, see:
5653
// https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization
5654
for (uint32_t trit = 0; trit < 3; trit++)
5655
{
5656
for (uint32_t bit = 0; bit < 16; bit++)
5657
{
5658
const uint32_t A = (bit & 1) ? 511 : 0;
5659
const uint32_t B = (bit >> 1) | ((bit >> 1) << 6);
5660
const uint32_t C = 22;
5661
const uint32_t D = trit;
5662
5663
uint32_t unq = D * C + B;
5664
unq = unq ^ A;
5665
unq = (A & 0x80) | (unq >> 2);
5666
5667
g_ise_to_unquant[bit | (trit << 4)] = unq;
5668
}
5669
}
5670
5671
// Compute table used for optimal single color encoding.
5672
for (int i = 0; i < 256; i++)
5673
{
5674
int lowest_e = INT_MAX;
5675
5676
for (int lo = 0; lo < 48; lo++)
5677
{
5678
for (int hi = 0; hi < 48; hi++)
5679
{
5680
const int lo_v = g_ise_to_unquant[lo];
5681
const int hi_v = g_ise_to_unquant[hi];
5682
5683
int l = lo_v | (lo_v << 8);
5684
int h = hi_v | (hi_v << 8);
5685
5686
int v = ((l * (64 - 21) + (h * 21) + 32) / 64) >> 8;
5687
5688
int e = abs(v - i);
5689
5690
if (e < lowest_e)
5691
{
5692
g_astc_single_color_encoding_1[i].m_hi = static_cast<uint8_t>(hi);
5693
g_astc_single_color_encoding_1[i].m_lo = static_cast<uint8_t>(lo);
5694
5695
lowest_e = e;
5696
}
5697
5698
} // hi
5699
} // lo
5700
}
5701
5702
for (int i = 0; i < 256; i++)
5703
{
5704
int lowest_e = INT_MAX;
5705
5706
for (int lo = 0; lo < 48; lo++)
5707
{
5708
const int lo_v = g_ise_to_unquant[lo];
5709
5710
int e = abs(lo_v - i);
5711
5712
if (e < lowest_e)
5713
{
5714
g_astc_single_color_encoding_0[i] = static_cast<uint8_t>(lo);
5715
5716
lowest_e = e;
5717
}
5718
} // lo
5719
}
5720
}
5721
5722
// Converts opaque or color+alpha ETC1S block to ASTC 4x4.
5723
// This function tries to use the best ASTC mode given the block's actual contents.
5724
static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector,
5725
bool transcode_alpha, const endpoint *pEndpoint_codebook, const selector *pSelector_codebook)
5726
{
5727
astc_block_params blk;
5728
5729
blk.m_endpoints[8] = 0;
5730
blk.m_endpoints[9] = 0;
5731
5732
int constant_alpha_val = 255;
5733
int num_unique_alpha_selectors = 1;
5734
5735
if (transcode_alpha)
5736
{
5737
const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
5738
5739
num_unique_alpha_selectors = alpha_selectors.m_num_unique_selectors;
5740
5741
if (num_unique_alpha_selectors == 1)
5742
{
5743
const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
5744
5745
const color32& alpha_base_color = alpha_endpoint.m_color5;
5746
const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
5747
5748
int alpha_block_colors[4];
5749
decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
5750
5751
constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
5752
}
5753
}
5754
5755
const color32& base_color = pEndpoints->m_color5;
5756
const uint32_t inten_table = pEndpoints->m_inten5;
5757
5758
const uint32_t low_selector = pSelector->m_lo_selector;
5759
const uint32_t high_selector = pSelector->m_hi_selector;
5760
5761
// Handle solid color or BTC blocks, which can always be encoded from ETC1S to ASTC losslessly.
5762
if ((pSelector->m_num_unique_selectors == 1) && (num_unique_alpha_selectors == 1))
5763
{
5764
// Both color and alpha are constant, write a solid color block and exit.
5765
// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-void-extent-blocks
5766
uint32_t r, g, b;
5767
decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
5768
5769
uint32_t* pOutput = static_cast<uint32_t*>(pDst_block);
5770
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pDst_block);
5771
5772
pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff;
5773
5774
pOutput[1] = 0xffffffff;
5775
pOutput[2] = 0;
5776
pOutput[3] = 0;
5777
5778
int bit_pos = 64;
5779
astc_set_bits(pOutput, bit_pos, r | (r << 8), 16);
5780
astc_set_bits(pOutput, bit_pos, g | (g << 8), 16);
5781
astc_set_bits(pOutput, bit_pos, b | (b << 8), 16);
5782
astc_set_bits(pOutput, bit_pos, constant_alpha_val | (constant_alpha_val << 8), 16);
5783
5784
return;
5785
}
5786
else if ((pSelector->m_num_unique_selectors <= 2) && (num_unique_alpha_selectors <= 2))
5787
{
5788
// Both color and alpha use <= 2 unique selectors each.
5789
// Use block truncation coding, which is lossless with ASTC (8-bit endpoints, 1-bit weights).
5790
color32 block_colors[4];
5791
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
5792
5793
blk.m_endpoints[0] = block_colors[low_selector].r;
5794
blk.m_endpoints[2] = block_colors[low_selector].g;
5795
blk.m_endpoints[4] = block_colors[low_selector].b;
5796
5797
blk.m_endpoints[1] = block_colors[high_selector].r;
5798
blk.m_endpoints[3] = block_colors[high_selector].g;
5799
blk.m_endpoints[5] = block_colors[high_selector].b;
5800
5801
int s0 = blk.m_endpoints[0] + blk.m_endpoints[2] + blk.m_endpoints[4];
5802
int s1 = blk.m_endpoints[1] + blk.m_endpoints[3] + blk.m_endpoints[5];
5803
bool invert = false;
5804
if (s1 < s0)
5805
{
5806
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
5807
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
5808
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
5809
invert = true;
5810
}
5811
5812
if (transcode_alpha)
5813
{
5814
const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
5815
const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
5816
5817
const color32& alpha_base_color = alpha_endpoint.m_color5;
5818
const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
5819
5820
const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
5821
const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
5822
5823
int alpha_block_colors[4];
5824
decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
5825
5826
blk.m_endpoints[6] = static_cast<uint8_t>(alpha_block_colors[alpha_low_selector]);
5827
blk.m_endpoints[7] = static_cast<uint8_t>(alpha_block_colors[alpha_high_selector]);
5828
5829
for (uint32_t y = 0; y < 4; y++)
5830
{
5831
for (uint32_t x = 0; x < 4; x++)
5832
{
5833
uint32_t s = alpha_selectors.get_selector(x, y);
5834
s = (s == alpha_high_selector) ? 1 : 0;
5835
5836
blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(s);
5837
} // x
5838
} // y
5839
}
5840
else
5841
{
5842
blk.m_endpoints[6] = 255;
5843
blk.m_endpoints[7] = 255;
5844
5845
for (uint32_t i = 0; i < 16; i++)
5846
blk.m_weights[i * 2 + 1] = 0;
5847
}
5848
5849
for (uint32_t y = 0; y < 4; y++)
5850
{
5851
for (uint32_t x = 0; x < 4; x++)
5852
{
5853
uint32_t s = pSelector->get_selector(x, y);
5854
5855
s = (s == high_selector) ? 1 : 0;
5856
5857
if (invert)
5858
s = 1 - s;
5859
5860
blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(s);
5861
} // x
5862
} // y
5863
5864
astc_pack_block_cem_12_weight_range0(reinterpret_cast<uint32_t*>(pDst_block), &blk);
5865
5866
return;
5867
}
5868
5869
// Either alpha and/or color use > 2 unique selectors each, so we must do something more complex.
5870
5871
#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
5872
// The optional higher quality modes use 8-bits endpoints vs. [0,47] endpoints.
5873
5874
// If the block's base color is grayscale, all pixels are grayscale, so encode the block as Luminance+Alpha.
5875
if ((base_color.r == base_color.g) && (base_color.r == base_color.b))
5876
{
5877
if (transcode_alpha)
5878
{
5879
const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
5880
const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
5881
5882
const color32& alpha_base_color = alpha_endpoint.m_color5;
5883
const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
5884
5885
const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
5886
const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
5887
5888
if (num_unique_alpha_selectors <= 2)
5889
{
5890
// Simple alpha block with only 1 or 2 unique values, so use BTC. This is lossless.
5891
int alpha_block_colors[4];
5892
decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
5893
5894
blk.m_endpoints[2] = static_cast<uint8_t>(alpha_block_colors[alpha_low_selector]);
5895
blk.m_endpoints[3] = static_cast<uint8_t>(alpha_block_colors[alpha_high_selector]);
5896
5897
for (uint32_t i = 0; i < 16; i++)
5898
{
5899
uint32_t s = alpha_selectors.get_selector(i & 3, i >> 2);
5900
blk.m_weights[i * 2 + 1] = (s == alpha_high_selector) ? 3 : 0;
5901
}
5902
}
5903
else
5904
{
5905
// Convert ETC1S alpha
5906
const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector];
5907
5908
//[32][8][RANGES][MAPPING]
5909
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5910
5911
const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[alpha_base_color.g][alpha_inten_table][alpha_selector_range_table];
5912
5913
blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
5914
blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
5915
5916
const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
5917
5918
for (uint32_t y = 0; y < 4; y++)
5919
{
5920
for (uint32_t x = 0; x < 4; x++)
5921
{
5922
uint32_t s = alpha_selectors.get_selector(x, y);
5923
uint32_t as = pSelectors_xlat[s];
5924
5925
blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
5926
} // x
5927
} // y
5928
}
5929
}
5930
else
5931
{
5932
// No alpha slice - set output alpha to all 255's
5933
blk.m_endpoints[2] = 255;
5934
blk.m_endpoints[3] = 255;
5935
5936
for (uint32_t i = 0; i < 16; i++)
5937
blk.m_weights[i * 2 + 1] = 0;
5938
}
5939
5940
if (pSelector->m_num_unique_selectors <= 2)
5941
{
5942
// Simple color block with only 1 or 2 unique values, so use BTC. This is lossless.
5943
int block_colors[4];
5944
decoder_etc_block::get_block_colors5_g(block_colors, base_color, inten_table);
5945
5946
blk.m_endpoints[0] = static_cast<uint8_t>(block_colors[low_selector]);
5947
blk.m_endpoints[1] = static_cast<uint8_t>(block_colors[high_selector]);
5948
5949
for (uint32_t i = 0; i < 16; i++)
5950
{
5951
uint32_t s = pSelector->get_selector(i & 3, i >> 2);
5952
blk.m_weights[i * 2] = (s == high_selector) ? 3 : 0;
5953
}
5954
}
5955
else
5956
{
5957
// Convert ETC1S alpha
5958
const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
5959
5960
//[32][8][RANGES][MAPPING]
5961
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5962
5963
const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[base_color.g][inten_table][selector_range_table];
5964
5965
blk.m_endpoints[0] = pTable_g[best_mapping].m_lo;
5966
blk.m_endpoints[1] = pTable_g[best_mapping].m_hi;
5967
5968
const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
5969
5970
for (uint32_t y = 0; y < 4; y++)
5971
{
5972
for (uint32_t x = 0; x < 4; x++)
5973
{
5974
uint32_t s = pSelector->get_selector(x, y);
5975
uint32_t as = pSelectors_xlat[s];
5976
5977
blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
5978
} // x
5979
} // y
5980
}
5981
5982
astc_pack_block_cem_4_weight_range2(reinterpret_cast<uint32_t*>(pDst_block), &blk);
5983
return;
5984
}
5985
5986
// The block isn't grayscale and it uses > 2 unique selectors for opaque and/or alpha.
5987
// Check for fully opaque blocks, if so use 8-bit endpoints for slightly higher opaque quality (higher than BC1, but lower than BC7 mode 6 opaque).
5988
if ((num_unique_alpha_selectors == 1) && (constant_alpha_val == 255))
5989
{
5990
// Convert ETC1S color
5991
const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
5992
5993
//[32][8][RANGES][MAPPING]
5994
const etc1_to_astc_solution* pTable_r = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5995
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5996
const etc1_to_astc_solution* pTable_b = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5997
5998
uint32_t best_err = UINT_MAX;
5999
uint32_t best_mapping = 0;
6000
6001
assert(NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS == 10);
6002
#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
6003
DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
6004
DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
6005
#undef DO_ITER
6006
6007
blk.m_endpoints[0] = pTable_r[best_mapping].m_lo;
6008
blk.m_endpoints[1] = pTable_r[best_mapping].m_hi;
6009
6010
blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
6011
blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
6012
6013
blk.m_endpoints[4] = pTable_b[best_mapping].m_lo;
6014
blk.m_endpoints[5] = pTable_b[best_mapping].m_hi;
6015
6016
int s0 = blk.m_endpoints[0] + blk.m_endpoints[2] + blk.m_endpoints[4];
6017
int s1 = blk.m_endpoints[1] + blk.m_endpoints[3] + blk.m_endpoints[5];
6018
bool invert = false;
6019
6020
if (s1 < s0)
6021
{
6022
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
6023
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
6024
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
6025
invert = true;
6026
}
6027
6028
const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
6029
6030
for (uint32_t y = 0; y < 4; y++)
6031
{
6032
for (uint32_t x = 0; x < 4; x++)
6033
{
6034
uint32_t s = pSelector->get_selector(x, y);
6035
uint32_t as = pSelectors_xlat[s];
6036
if (invert)
6037
as = 3 - as;
6038
6039
blk.m_weights[x + y * 4] = static_cast<uint8_t>(as);
6040
} // x
6041
} // y
6042
6043
// Now pack to ASTC
6044
astc_pack_block_cem_8_weight_range2(reinterpret_cast<uint32_t*>(pDst_block), &blk);
6045
return;
6046
}
6047
#endif //#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
6048
6049
// Nothing else worked, so fall back to CEM Mode 12 (LDR RGBA Direct), [0,47] endpoints, weight range 2 (2-bit weights), dual planes.
6050
// This mode can handle everything, but at slightly less quality than BC1.
6051
if (transcode_alpha)
6052
{
6053
const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
6054
const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
6055
6056
const color32& alpha_base_color = alpha_endpoint.m_color5;
6057
const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
6058
6059
const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
6060
const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
6061
6062
if (alpha_low_selector == alpha_high_selector)
6063
{
6064
// Solid alpha block - use precomputed tables.
6065
int alpha_block_colors[4];
6066
decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
6067
6068
const uint32_t g = alpha_block_colors[alpha_low_selector];
6069
6070
blk.m_endpoints[6] = g_astc_single_color_encoding_1[g].m_lo;
6071
blk.m_endpoints[7] = g_astc_single_color_encoding_1[g].m_hi;
6072
6073
for (uint32_t i = 0; i < 16; i++)
6074
blk.m_weights[i * 2 + 1] = 1;
6075
}
6076
else if ((alpha_inten_table >= 7) && (alpha_selectors.m_num_unique_selectors == 2) && (alpha_low_selector == 0) && (alpha_high_selector == 3))
6077
{
6078
// Handle outlier case where only the two outer colors are used with inten table 7.
6079
color32 alpha_block_colors[4];
6080
6081
decoder_etc_block::get_block_colors5(alpha_block_colors, alpha_base_color, alpha_inten_table);
6082
6083
const uint32_t g0 = alpha_block_colors[0].g;
6084
const uint32_t g1 = alpha_block_colors[3].g;
6085
6086
blk.m_endpoints[6] = g_astc_single_color_encoding_0[g0];
6087
blk.m_endpoints[7] = g_astc_single_color_encoding_0[g1];
6088
6089
for (uint32_t y = 0; y < 4; y++)
6090
{
6091
for (uint32_t x = 0; x < 4; x++)
6092
{
6093
uint32_t s = alpha_selectors.get_selector(x, y);
6094
uint32_t as = (s == alpha_high_selector) ? 3 : 0;
6095
6096
blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
6097
} // x
6098
} // y
6099
}
6100
else
6101
{
6102
// Convert ETC1S alpha
6103
const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector];
6104
6105
//[32][8][RANGES][MAPPING]
6106
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
6107
6108
const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping[alpha_base_color.g][alpha_inten_table][alpha_selector_range_table];
6109
6110
blk.m_endpoints[6] = pTable_g[best_mapping].m_lo;
6111
blk.m_endpoints[7] = pTable_g[best_mapping].m_hi;
6112
6113
const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
6114
6115
for (uint32_t y = 0; y < 4; y++)
6116
{
6117
for (uint32_t x = 0; x < 4; x++)
6118
{
6119
uint32_t s = alpha_selectors.get_selector(x, y);
6120
uint32_t as = pSelectors_xlat[s];
6121
6122
blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
6123
} // x
6124
} // y
6125
}
6126
}
6127
else
6128
{
6129
// No alpha slice - set output alpha to all 255's
6130
// 1 is 255 when dequantized
6131
blk.m_endpoints[6] = 1;
6132
blk.m_endpoints[7] = 1;
6133
6134
for (uint32_t i = 0; i < 16; i++)
6135
blk.m_weights[i * 2 + 1] = 0;
6136
}
6137
6138
if (low_selector == high_selector)
6139
{
6140
// Solid color block - use precomputed tables of optimal endpoints assuming selector weights are all 1.
6141
color32 block_colors[4];
6142
6143
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
6144
6145
const uint32_t r = block_colors[low_selector].r;
6146
const uint32_t g = block_colors[low_selector].g;
6147
const uint32_t b = block_colors[low_selector].b;
6148
6149
blk.m_endpoints[0] = g_astc_single_color_encoding_1[r].m_lo;
6150
blk.m_endpoints[1] = g_astc_single_color_encoding_1[r].m_hi;
6151
6152
blk.m_endpoints[2] = g_astc_single_color_encoding_1[g].m_lo;
6153
blk.m_endpoints[3] = g_astc_single_color_encoding_1[g].m_hi;
6154
6155
blk.m_endpoints[4] = g_astc_single_color_encoding_1[b].m_lo;
6156
blk.m_endpoints[5] = g_astc_single_color_encoding_1[b].m_hi;
6157
6158
int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
6159
int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
6160
bool invert = false;
6161
6162
if (s1 < s0)
6163
{
6164
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
6165
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
6166
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
6167
invert = true;
6168
}
6169
6170
for (uint32_t i = 0; i < 16; i++)
6171
blk.m_weights[i * 2] = invert ? 2 : 1;
6172
}
6173
else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
6174
{
6175
// Handle outlier case where only the two outer colors are used with inten table 7.
6176
color32 block_colors[4];
6177
6178
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
6179
6180
const uint32_t r0 = block_colors[0].r;
6181
const uint32_t g0 = block_colors[0].g;
6182
const uint32_t b0 = block_colors[0].b;
6183
6184
const uint32_t r1 = block_colors[3].r;
6185
const uint32_t g1 = block_colors[3].g;
6186
const uint32_t b1 = block_colors[3].b;
6187
6188
blk.m_endpoints[0] = g_astc_single_color_encoding_0[r0];
6189
blk.m_endpoints[1] = g_astc_single_color_encoding_0[r1];
6190
6191
blk.m_endpoints[2] = g_astc_single_color_encoding_0[g0];
6192
blk.m_endpoints[3] = g_astc_single_color_encoding_0[g1];
6193
6194
blk.m_endpoints[4] = g_astc_single_color_encoding_0[b0];
6195
blk.m_endpoints[5] = g_astc_single_color_encoding_0[b1];
6196
6197
int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
6198
int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
6199
bool invert = false;
6200
6201
if (s1 < s0)
6202
{
6203
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
6204
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
6205
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
6206
invert = true;
6207
}
6208
6209
for (uint32_t y = 0; y < 4; y++)
6210
{
6211
for (uint32_t x = 0; x < 4; x++)
6212
{
6213
uint32_t s = pSelector->get_selector(x, y);
6214
uint32_t as = (s == low_selector) ? 0 : 3;
6215
6216
if (invert)
6217
as = 3 - as;
6218
6219
blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
6220
} // x
6221
} // y
6222
}
6223
else
6224
{
6225
// Convert ETC1S color
6226
const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
6227
6228
//[32][8][RANGES][MAPPING]
6229
const etc1_to_astc_solution* pTable_r = &g_etc1_to_astc[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
6230
const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
6231
const etc1_to_astc_solution* pTable_b = &g_etc1_to_astc[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
6232
6233
uint32_t best_err = UINT_MAX;
6234
uint32_t best_mapping = 0;
6235
6236
assert(NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS == 10);
6237
#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
6238
DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
6239
DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
6240
#undef DO_ITER
6241
6242
blk.m_endpoints[0] = pTable_r[best_mapping].m_lo;
6243
blk.m_endpoints[1] = pTable_r[best_mapping].m_hi;
6244
6245
blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
6246
blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
6247
6248
blk.m_endpoints[4] = pTable_b[best_mapping].m_lo;
6249
blk.m_endpoints[5] = pTable_b[best_mapping].m_hi;
6250
6251
int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
6252
int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
6253
bool invert = false;
6254
6255
if (s1 < s0)
6256
{
6257
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
6258
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
6259
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
6260
invert = true;
6261
}
6262
6263
const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
6264
6265
for (uint32_t y = 0; y < 4; y++)
6266
{
6267
for (uint32_t x = 0; x < 4; x++)
6268
{
6269
uint32_t s = pSelector->get_selector(x, y);
6270
uint32_t as = pSelectors_xlat[s];
6271
if (invert)
6272
as = 3 - as;
6273
6274
blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
6275
} // x
6276
} // y
6277
}
6278
6279
// Now pack to ASTC
6280
astc_pack_block_cem_12_weight_range2(reinterpret_cast<uint32_t *>(pDst_block), &blk);
6281
}
6282
#endif
6283
6284
#if BASISD_SUPPORT_ATC
6285
// ATC and PVRTC2 both use these tables.
6286
struct etc1s_to_atc_solution
6287
{
6288
uint8_t m_lo;
6289
uint8_t m_hi;
6290
uint16_t m_err;
6291
};
6292
6293
static dxt_selector_range g_etc1s_to_atc_selector_ranges[] =
6294
{
6295
{ 0, 3 },
6296
{ 1, 3 },
6297
{ 0, 2 },
6298
{ 1, 2 },
6299
{ 2, 3 },
6300
{ 0, 1 },
6301
};
6302
6303
const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_RANGES = sizeof(g_etc1s_to_atc_selector_ranges) / sizeof(g_etc1s_to_atc_selector_ranges[0]);
6304
6305
static uint32_t g_etc1s_to_atc_selector_range_index[4][4];
6306
6307
const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS = 10;
6308
static const uint8_t g_etc1s_to_atc_selector_mappings[NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS][4] =
6309
{
6310
{ 0, 0, 1, 1 },
6311
{ 0, 0, 1, 2 },
6312
{ 0, 0, 1, 3 },
6313
{ 0, 0, 2, 3 },
6314
{ 0, 1, 1, 1 },
6315
{ 0, 1, 2, 2 },
6316
{ 0, 1, 2, 3 }, //6 - identity
6317
{ 0, 2, 3, 3 },
6318
{ 1, 2, 2, 2 },
6319
{ 1, 2, 3, 3 },
6320
};
6321
const uint32_t ATC_IDENTITY_SELECTOR_MAPPING_INDEX = 6;
6322
6323
#if BASISD_SUPPORT_PVRTC2
6324
static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_45[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
6325
#include "basisu_transcoder_tables_pvrtc2_45.inc"
6326
};
6327
6328
#if 0
6329
static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_alpha_33[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
6330
#include "basisu_transcoder_tables_pvrtc2_alpha_33.inc"
6331
};
6332
#endif
6333
6334
#endif
6335
6336
static const etc1s_to_atc_solution g_etc1s_to_atc_55[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
6337
#include "basisu_transcoder_tables_atc_55.inc"
6338
};
6339
6340
static const etc1s_to_atc_solution g_etc1s_to_atc_56[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
6341
#include "basisu_transcoder_tables_atc_56.inc"
6342
};
6343
6344
struct atc_match_entry
6345
{
6346
uint8_t m_lo;
6347
uint8_t m_hi;
6348
};
6349
static atc_match_entry g_pvrtc2_match45_equals_1[256], g_atc_match55_equals_1[256], g_atc_match56_equals_1[256]; // selector 1
6350
static atc_match_entry g_pvrtc2_match4[256], g_atc_match5[256], g_atc_match6[256];
6351
6352
static void prepare_atc_single_color_table(atc_match_entry* pTable, int size0, int size1, int sel)
6353
{
6354
for (int i = 0; i < 256; i++)
6355
{
6356
int lowest_e = 256;
6357
for (int lo = 0; lo < size0; lo++)
6358
{
6359
int lo_e = lo;
6360
if (size0 == 16)
6361
{
6362
lo_e = (lo_e << 1) | (lo_e >> 3);
6363
lo_e = (lo_e << 3) | (lo_e >> 2);
6364
}
6365
else if (size0 == 32)
6366
lo_e = (lo_e << 3) | (lo_e >> 2);
6367
else
6368
lo_e = (lo_e << 2) | (lo_e >> 4);
6369
6370
for (int hi = 0; hi < size1; hi++)
6371
{
6372
int hi_e = hi;
6373
if (size1 == 16)
6374
{
6375
// This is only for PVRTC2 - expand to 5 then 8
6376
hi_e = (hi_e << 1) | (hi_e >> 3);
6377
hi_e = (hi_e << 3) | (hi_e >> 2);
6378
}
6379
else if (size1 == 32)
6380
hi_e = (hi_e << 3) | (hi_e >> 2);
6381
else
6382
hi_e = (hi_e << 2) | (hi_e >> 4);
6383
6384
int e;
6385
6386
if (sel == 1)
6387
{
6388
// Selector 1
6389
e = abs(((lo_e * 5 + hi_e * 3) / 8) - i);
6390
}
6391
else
6392
{
6393
assert(sel == 3);
6394
6395
// Selector 3
6396
e = abs(hi_e - i);
6397
}
6398
6399
if (e < lowest_e)
6400
{
6401
pTable[i].m_lo = static_cast<uint8_t>(lo);
6402
pTable[i].m_hi = static_cast<uint8_t>(hi);
6403
6404
lowest_e = e;
6405
}
6406
6407
} // hi
6408
} // lo
6409
} // i
6410
}
6411
6412
static void transcoder_init_atc()
6413
{
6414
prepare_atc_single_color_table(g_pvrtc2_match45_equals_1, 16, 32, 1);
6415
prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1);
6416
prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1);
6417
6418
prepare_atc_single_color_table(g_pvrtc2_match4, 1, 16, 3);
6419
prepare_atc_single_color_table(g_atc_match5, 1, 32, 3);
6420
prepare_atc_single_color_table(g_atc_match6, 1, 64, 3);
6421
6422
for (uint32_t i = 0; i < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; i++)
6423
{
6424
uint32_t l = g_etc1s_to_atc_selector_ranges[i].m_low;
6425
uint32_t h = g_etc1s_to_atc_selector_ranges[i].m_high;
6426
g_etc1s_to_atc_selector_range_index[l][h] = i;
6427
}
6428
}
6429
6430
struct atc_block
6431
{
6432
uint8_t m_lo[2];
6433
uint8_t m_hi[2];
6434
uint8_t m_sels[4];
6435
6436
void set_low_color(uint32_t r, uint32_t g, uint32_t b)
6437
{
6438
assert((r < 32) && (g < 32) && (b < 32));
6439
uint32_t x = (r << 10) | (g << 5) | b;
6440
m_lo[0] = x & 0xFF;
6441
m_lo[1] = (x >> 8) & 0xFF;
6442
}
6443
6444
void set_high_color(uint32_t r, uint32_t g, uint32_t b)
6445
{
6446
assert((r < 32) && (g < 64) && (b < 32));
6447
uint32_t x = (r << 11) | (g << 5) | b;
6448
m_hi[0] = x & 0xFF;
6449
m_hi[1] = (x >> 8) & 0xFF;
6450
}
6451
};
6452
6453
static void convert_etc1s_to_atc(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
6454
{
6455
atc_block* pBlock = static_cast<atc_block*>(pDst);
6456
6457
const uint32_t low_selector = pSelector->m_lo_selector;
6458
const uint32_t high_selector = pSelector->m_hi_selector;
6459
6460
const color32& base_color = pEndpoints->m_color5;
6461
const uint32_t inten_table = pEndpoints->m_inten5;
6462
6463
if (low_selector == high_selector)
6464
{
6465
uint32_t r, g, b;
6466
decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
6467
6468
pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match56_equals_1[g].m_lo, g_atc_match55_equals_1[b].m_lo);
6469
pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match56_equals_1[g].m_hi, g_atc_match55_equals_1[b].m_hi);
6470
6471
pBlock->m_sels[0] = 0x55;
6472
pBlock->m_sels[1] = 0x55;
6473
pBlock->m_sels[2] = 0x55;
6474
pBlock->m_sels[3] = 0x55;
6475
6476
return;
6477
}
6478
else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
6479
{
6480
color32 block_colors[4];
6481
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
6482
6483
const uint32_t r0 = block_colors[0].r;
6484
const uint32_t g0 = block_colors[0].g;
6485
const uint32_t b0 = block_colors[0].b;
6486
6487
const uint32_t r1 = block_colors[3].r;
6488
const uint32_t g1 = block_colors[3].g;
6489
const uint32_t b1 = block_colors[3].b;
6490
6491
pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_atc_match5[b0].m_hi);
6492
pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match6[g1].m_hi, g_atc_match5[b1].m_hi);
6493
6494
pBlock->m_sels[0] = pSelector->m_selectors[0];
6495
pBlock->m_sels[1] = pSelector->m_selectors[1];
6496
pBlock->m_sels[2] = pSelector->m_selectors[2];
6497
pBlock->m_sels[3] = pSelector->m_selectors[3];
6498
6499
return;
6500
}
6501
6502
const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector];
6503
6504
//[32][8][RANGES][MAPPING]
6505
const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
6506
const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_56[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
6507
const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
6508
6509
uint32_t best_err = UINT_MAX;
6510
uint32_t best_mapping = 0;
6511
6512
assert(NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS == 10);
6513
#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
6514
DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
6515
DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
6516
#undef DO_ITER
6517
6518
pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
6519
pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
6520
6521
if (ATC_IDENTITY_SELECTOR_MAPPING_INDEX == best_mapping)
6522
{
6523
pBlock->m_sels[0] = pSelector->m_selectors[0];
6524
pBlock->m_sels[1] = pSelector->m_selectors[1];
6525
pBlock->m_sels[2] = pSelector->m_selectors[2];
6526
pBlock->m_sels[3] = pSelector->m_selectors[3];
6527
}
6528
else
6529
{
6530
const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0];
6531
6532
const uint32_t sel_bits0 = pSelector->m_selectors[0];
6533
const uint32_t sel_bits1 = pSelector->m_selectors[1];
6534
const uint32_t sel_bits2 = pSelector->m_selectors[2];
6535
const uint32_t sel_bits3 = pSelector->m_selectors[3];
6536
6537
uint32_t atc_sels0 = 0, atc_sels1 = 0, atc_sels2 = 0, atc_sels3 = 0;
6538
6539
#define DO_X(x) { \
6540
const uint32_t x_shift = (x) * 2; \
6541
atc_sels0 |= (pSelectors_xlat[(sel_bits0 >> x_shift) & 3] << x_shift); \
6542
atc_sels1 |= (pSelectors_xlat[(sel_bits1 >> x_shift) & 3] << x_shift); \
6543
atc_sels2 |= (pSelectors_xlat[(sel_bits2 >> x_shift) & 3] << x_shift); \
6544
atc_sels3 |= (pSelectors_xlat[(sel_bits3 >> x_shift) & 3] << x_shift); }
6545
6546
DO_X(0);
6547
DO_X(1);
6548
DO_X(2);
6549
DO_X(3);
6550
#undef DO_X
6551
6552
pBlock->m_sels[0] = (uint8_t)atc_sels0;
6553
pBlock->m_sels[1] = (uint8_t)atc_sels1;
6554
pBlock->m_sels[2] = (uint8_t)atc_sels2;
6555
pBlock->m_sels[3] = (uint8_t)atc_sels3;
6556
}
6557
}
6558
6559
#if BASISD_WRITE_NEW_ATC_TABLES
6560
static void create_etc1s_to_atc_conversion_tables()
6561
{
6562
// ATC 55
6563
FILE* pFile = nullptr;
6564
fopen_s(&pFile, "basisu_transcoder_tables_atc_55.inc", "w");
6565
6566
uint32_t n = 0;
6567
6568
for (int inten = 0; inten < 8; inten++)
6569
{
6570
for (uint32_t g = 0; g < 32; g++)
6571
{
6572
color32 block_colors[4];
6573
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6574
6575
for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6576
{
6577
const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6578
const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6579
6580
for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6581
{
6582
uint32_t best_lo = 0;
6583
uint32_t best_hi = 0;
6584
uint64_t best_err = UINT64_MAX;
6585
6586
for (uint32_t hi = 0; hi <= 31; hi++)
6587
{
6588
for (uint32_t lo = 0; lo <= 31; lo++)
6589
{
6590
uint32_t colors[4];
6591
6592
colors[0] = (lo << 3) | (lo >> 2);
6593
colors[3] = (hi << 3) | (hi >> 2);
6594
6595
colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6596
colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6597
6598
uint64_t total_err = 0;
6599
6600
for (uint32_t s = low_selector; s <= high_selector; s++)
6601
{
6602
int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6603
6604
int err_scale = 1;
6605
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6606
// the low/high selectors which are clamping to either 0 or 255.
6607
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6608
err_scale = 5;
6609
6610
total_err += (err * err) * err_scale;
6611
}
6612
6613
if (total_err < best_err)
6614
{
6615
best_err = total_err;
6616
best_lo = lo;
6617
best_hi = hi;
6618
}
6619
}
6620
}
6621
6622
//assert(best_err <= 0xFFFF);
6623
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6624
6625
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6626
n++;
6627
if ((n & 31) == 31)
6628
fprintf(pFile, "\n");
6629
} // m
6630
} // sr
6631
} // g
6632
} // inten
6633
6634
fclose(pFile);
6635
pFile = nullptr;
6636
6637
// ATC 56
6638
fopen_s(&pFile, "basisu_transcoder_tables_atc_56.inc", "w");
6639
6640
n = 0;
6641
6642
for (int inten = 0; inten < 8; inten++)
6643
{
6644
for (uint32_t g = 0; g < 32; g++)
6645
{
6646
color32 block_colors[4];
6647
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6648
6649
for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6650
{
6651
const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6652
const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6653
6654
for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6655
{
6656
uint32_t best_lo = 0;
6657
uint32_t best_hi = 0;
6658
uint64_t best_err = UINT64_MAX;
6659
6660
for (uint32_t hi = 0; hi <= 63; hi++)
6661
{
6662
for (uint32_t lo = 0; lo <= 31; lo++)
6663
{
6664
uint32_t colors[4];
6665
6666
colors[0] = (lo << 3) | (lo >> 2);
6667
colors[3] = (hi << 2) | (hi >> 4);
6668
6669
colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6670
colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6671
6672
uint64_t total_err = 0;
6673
6674
for (uint32_t s = low_selector; s <= high_selector; s++)
6675
{
6676
int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6677
6678
int err_scale = 1;
6679
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6680
// the low/high selectors which are clamping to either 0 or 255.
6681
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6682
err_scale = 5;
6683
6684
total_err += (err * err) * err_scale;
6685
}
6686
6687
if (total_err < best_err)
6688
{
6689
best_err = total_err;
6690
best_lo = lo;
6691
best_hi = hi;
6692
}
6693
}
6694
}
6695
6696
//assert(best_err <= 0xFFFF);
6697
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6698
6699
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6700
n++;
6701
if ((n & 31) == 31)
6702
fprintf(pFile, "\n");
6703
} // m
6704
} // sr
6705
} // g
6706
} // inten
6707
6708
fclose(pFile);
6709
6710
// PVRTC2 45
6711
fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_45.inc", "w");
6712
6713
n = 0;
6714
6715
for (int inten = 0; inten < 8; inten++)
6716
{
6717
for (uint32_t g = 0; g < 32; g++)
6718
{
6719
color32 block_colors[4];
6720
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6721
6722
for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6723
{
6724
const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6725
const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6726
6727
for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6728
{
6729
uint32_t best_lo = 0;
6730
uint32_t best_hi = 0;
6731
uint64_t best_err = UINT64_MAX;
6732
6733
for (uint32_t hi = 0; hi <= 31; hi++)
6734
{
6735
for (uint32_t lo = 0; lo <= 15; lo++)
6736
{
6737
uint32_t colors[4];
6738
6739
colors[0] = (lo << 1) | (lo >> 3);
6740
colors[0] = (colors[0] << 3) | (colors[0] >> 2);
6741
6742
colors[3] = (hi << 3) | (hi >> 2);
6743
6744
colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6745
colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6746
6747
uint64_t total_err = 0;
6748
6749
for (uint32_t s = low_selector; s <= high_selector; s++)
6750
{
6751
int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6752
6753
int err_scale = 1;
6754
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6755
// the low/high selectors which are clamping to either 0 or 255.
6756
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6757
err_scale = 5;
6758
6759
total_err += (err * err) * err_scale;
6760
}
6761
6762
if (total_err < best_err)
6763
{
6764
best_err = total_err;
6765
best_lo = lo;
6766
best_hi = hi;
6767
}
6768
}
6769
}
6770
6771
//assert(best_err <= 0xFFFF);
6772
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6773
6774
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6775
n++;
6776
if ((n & 31) == 31)
6777
fprintf(pFile, "\n");
6778
} // m
6779
} // sr
6780
} // g
6781
} // inten
6782
6783
fclose(pFile);
6784
6785
#if 0
6786
// PVRTC2 34
6787
fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_34.inc", "w");
6788
6789
n = 0;
6790
6791
for (int inten = 0; inten < 8; inten++)
6792
{
6793
for (uint32_t g = 0; g < 32; g++)
6794
{
6795
color32 block_colors[4];
6796
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6797
6798
for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6799
{
6800
const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6801
const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6802
6803
for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6804
{
6805
uint32_t best_lo = 0;
6806
uint32_t best_hi = 0;
6807
uint64_t best_err = UINT64_MAX;
6808
6809
for (uint32_t hi = 0; hi <= 15; hi++)
6810
{
6811
for (uint32_t lo = 0; lo <= 7; lo++)
6812
{
6813
uint32_t colors[4];
6814
6815
colors[0] = (lo << 2) | (lo >> 1);
6816
colors[0] = (colors[0] << 3) | (colors[0] >> 2);
6817
6818
colors[3] = (hi << 1) | (hi >> 3);
6819
colors[3] = (colors[3] << 3) | (colors[3] >> 2);
6820
6821
colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6822
colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6823
6824
uint64_t total_err = 0;
6825
6826
for (uint32_t s = low_selector; s <= high_selector; s++)
6827
{
6828
int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6829
6830
int err_scale = 1;
6831
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6832
// the low/high selectors which are clamping to either 0 or 255.
6833
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6834
err_scale = 5;
6835
6836
total_err += (err * err) * err_scale;
6837
}
6838
6839
if (total_err < best_err)
6840
{
6841
best_err = total_err;
6842
best_lo = lo;
6843
best_hi = hi;
6844
}
6845
}
6846
}
6847
6848
//assert(best_err <= 0xFFFF);
6849
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6850
6851
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6852
n++;
6853
if ((n & 31) == 31)
6854
fprintf(pFile, "\n");
6855
} // m
6856
} // sr
6857
} // g
6858
} // inten
6859
6860
fclose(pFile);
6861
#endif
6862
#if 0
6863
// PVRTC2 44
6864
fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_44.inc", "w");
6865
6866
n = 0;
6867
6868
for (int inten = 0; inten < 8; inten++)
6869
{
6870
for (uint32_t g = 0; g < 32; g++)
6871
{
6872
color32 block_colors[4];
6873
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6874
6875
for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6876
{
6877
const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6878
const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6879
6880
for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6881
{
6882
uint32_t best_lo = 0;
6883
uint32_t best_hi = 0;
6884
uint64_t best_err = UINT64_MAX;
6885
6886
for (uint32_t hi = 0; hi <= 15; hi++)
6887
{
6888
for (uint32_t lo = 0; lo <= 15; lo++)
6889
{
6890
uint32_t colors[4];
6891
6892
colors[0] = (lo << 1) | (lo >> 3);
6893
colors[0] = (colors[0] << 3) | (colors[0] >> 2);
6894
6895
colors[3] = (hi << 1) | (hi >> 3);
6896
colors[3] = (colors[3] << 3) | (colors[3] >> 2);
6897
6898
colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6899
colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6900
6901
uint64_t total_err = 0;
6902
6903
for (uint32_t s = low_selector; s <= high_selector; s++)
6904
{
6905
int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6906
6907
int err_scale = 1;
6908
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6909
// the low/high selectors which are clamping to either 0 or 255.
6910
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6911
err_scale = 5;
6912
6913
total_err += (err * err) * err_scale;
6914
}
6915
6916
if (total_err < best_err)
6917
{
6918
best_err = total_err;
6919
best_lo = lo;
6920
best_hi = hi;
6921
}
6922
}
6923
}
6924
6925
//assert(best_err <= 0xFFFF);
6926
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6927
6928
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6929
n++;
6930
if ((n & 31) == 31)
6931
fprintf(pFile, "\n");
6932
} // m
6933
} // sr
6934
} // g
6935
} // inten
6936
6937
fclose(pFile);
6938
#endif
6939
6940
// PVRTC2 alpha 33
6941
fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_alpha_33.inc", "w");
6942
6943
n = 0;
6944
6945
for (int inten = 0; inten < 8; inten++)
6946
{
6947
for (uint32_t g = 0; g < 32; g++)
6948
{
6949
color32 block_colors[4];
6950
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6951
6952
for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6953
{
6954
const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6955
const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6956
6957
for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6958
{
6959
uint32_t best_lo = 0;
6960
uint32_t best_hi = 0;
6961
uint64_t best_err = UINT64_MAX;
6962
6963
for (uint32_t hi = 0; hi <= 7; hi++)
6964
{
6965
for (uint32_t lo = 0; lo <= 7; lo++)
6966
{
6967
uint32_t colors[4];
6968
6969
colors[0] = (lo << 1);
6970
colors[0] = (colors[0] << 4) | colors[0];
6971
6972
colors[3] = (hi << 1) | 1;
6973
colors[3] = (colors[3] << 4) | colors[3];
6974
6975
colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6976
colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6977
6978
uint64_t total_err = 0;
6979
6980
for (uint32_t s = low_selector; s <= high_selector; s++)
6981
{
6982
int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6983
6984
int err_scale = 1;
6985
// Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6986
// the low/high selectors which are clamping to either 0 or 255.
6987
if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6988
err_scale = 5;
6989
6990
total_err += (err * err) * err_scale;
6991
}
6992
6993
if (total_err < best_err)
6994
{
6995
best_err = total_err;
6996
best_lo = lo;
6997
best_hi = hi;
6998
}
6999
}
7000
}
7001
7002
//assert(best_err <= 0xFFFF);
7003
best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
7004
7005
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
7006
n++;
7007
if ((n & 31) == 31)
7008
fprintf(pFile, "\n");
7009
} // m
7010
} // sr
7011
} // g
7012
} // inten
7013
7014
fclose(pFile);
7015
}
7016
#endif // BASISD_WRITE_NEW_ATC_TABLES
7017
7018
#endif // BASISD_SUPPORT_ATC
7019
7020
#if BASISD_SUPPORT_PVRTC2
7021
struct pvrtc2_block
7022
{
7023
uint8_t m_modulation[4];
7024
7025
union
7026
{
7027
union
7028
{
7029
// Opaque mode: RGB colora=554 and colorb=555
7030
struct
7031
{
7032
uint32_t m_mod_flag : 1;
7033
uint32_t m_blue_a : 4;
7034
uint32_t m_green_a : 5;
7035
uint32_t m_red_a : 5;
7036
uint32_t m_hard_flag : 1;
7037
uint32_t m_blue_b : 5;
7038
uint32_t m_green_b : 5;
7039
uint32_t m_red_b : 5;
7040
uint32_t m_opaque_flag : 1;
7041
7042
} m_opaque_color_data;
7043
7044
// Transparent mode: RGBA colora=4433 and colorb=4443
7045
struct
7046
{
7047
uint32_t m_mod_flag : 1;
7048
uint32_t m_blue_a : 3;
7049
uint32_t m_green_a : 4;
7050
uint32_t m_red_a : 4;
7051
uint32_t m_alpha_a : 3;
7052
uint32_t m_hard_flag : 1;
7053
uint32_t m_blue_b : 4;
7054
uint32_t m_green_b : 4;
7055
uint32_t m_red_b : 4;
7056
uint32_t m_alpha_b : 3;
7057
uint32_t m_opaque_flag : 1;
7058
7059
} m_trans_color_data;
7060
};
7061
7062
uint32_t m_color_data_bits;
7063
};
7064
7065
// 554
7066
void set_low_color(uint32_t r, uint32_t g, uint32_t b)
7067
{
7068
assert((r < 32) && (g < 32) && (b < 16));
7069
m_opaque_color_data.m_red_a = r;
7070
m_opaque_color_data.m_green_a = g;
7071
m_opaque_color_data.m_blue_a = b;
7072
}
7073
7074
// 555
7075
void set_high_color(uint32_t r, uint32_t g, uint32_t b)
7076
{
7077
assert((r < 32) && (g < 32) && (b < 32));
7078
m_opaque_color_data.m_red_b = r;
7079
m_opaque_color_data.m_green_b = g;
7080
m_opaque_color_data.m_blue_b = b;
7081
}
7082
7083
// 4433
7084
void set_trans_low_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
7085
{
7086
assert((r < 16) && (g < 16) && (b < 8) && (a < 8));
7087
m_trans_color_data.m_red_a = r;
7088
m_trans_color_data.m_green_a = g;
7089
m_trans_color_data.m_blue_a = b;
7090
m_trans_color_data.m_alpha_a = a;
7091
}
7092
7093
// 4443
7094
void set_trans_high_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
7095
{
7096
assert((r < 16) && (g < 16) && (b < 16) && (a < 8));
7097
m_trans_color_data.m_red_b = r;
7098
m_trans_color_data.m_green_b = g;
7099
m_trans_color_data.m_blue_b = b;
7100
m_trans_color_data.m_alpha_b = a;
7101
}
7102
};
7103
7104
static struct
7105
{
7106
uint8_t m_l, m_h;
7107
} g_pvrtc2_trans_match34[256];
7108
7109
static struct
7110
{
7111
uint8_t m_l, m_h;
7112
} g_pvrtc2_trans_match44[256];
7113
7114
static struct
7115
{
7116
uint8_t m_l, m_h;
7117
} g_pvrtc2_alpha_match33[256];
7118
7119
static struct
7120
{
7121
uint8_t m_l, m_h;
7122
} g_pvrtc2_alpha_match33_0[256];
7123
7124
static struct
7125
{
7126
uint8_t m_l, m_h;
7127
} g_pvrtc2_alpha_match33_3[256];
7128
7129
// PVRTC2 can be forced to look like a slightly weaker variant of ATC/BC1, so that's what we do here for simplicity.
7130
static void convert_etc1s_to_pvrtc2_rgb(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
7131
{
7132
pvrtc2_block* pBlock = static_cast<pvrtc2_block*>(pDst);
7133
7134
pBlock->m_opaque_color_data.m_hard_flag = 1;
7135
pBlock->m_opaque_color_data.m_mod_flag = 0;
7136
pBlock->m_opaque_color_data.m_opaque_flag = 1;
7137
7138
const uint32_t low_selector = pSelector->m_lo_selector;
7139
const uint32_t high_selector = pSelector->m_hi_selector;
7140
7141
const color32& base_color = pEndpoints->m_color5;
7142
const uint32_t inten_table = pEndpoints->m_inten5;
7143
7144
if (low_selector == high_selector)
7145
{
7146
uint32_t r, g, b;
7147
decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
7148
7149
pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match55_equals_1[g].m_lo, g_pvrtc2_match45_equals_1[b].m_lo);
7150
pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match55_equals_1[g].m_hi, g_pvrtc2_match45_equals_1[b].m_hi);
7151
7152
pBlock->m_modulation[0] = 0x55;
7153
pBlock->m_modulation[1] = 0x55;
7154
pBlock->m_modulation[2] = 0x55;
7155
pBlock->m_modulation[3] = 0x55;
7156
7157
return;
7158
}
7159
else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
7160
{
7161
color32 block_colors[4];
7162
decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
7163
7164
const uint32_t r0 = block_colors[0].r;
7165
const uint32_t g0 = block_colors[0].g;
7166
const uint32_t b0 = block_colors[0].b;
7167
7168
const uint32_t r1 = block_colors[3].r;
7169
const uint32_t g1 = block_colors[3].g;
7170
const uint32_t b1 = block_colors[3].b;
7171
7172
pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_pvrtc2_match4[b0].m_hi);
7173
pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match5[g1].m_hi, g_atc_match5[b1].m_hi);
7174
7175
pBlock->m_modulation[0] = pSelector->m_selectors[0];
7176
pBlock->m_modulation[1] = pSelector->m_selectors[1];
7177
pBlock->m_modulation[2] = pSelector->m_selectors[2];
7178
pBlock->m_modulation[3] = pSelector->m_selectors[3];
7179
7180
return;
7181
}
7182
7183
const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector];
7184
7185
//[32][8][RANGES][MAPPING]
7186
const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
7187
const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
7188
const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_pvrtc2_45[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
7189
7190
uint32_t best_err = UINT_MAX;
7191
uint32_t best_mapping = 0;
7192
7193
assert(NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS == 10);
7194
#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
7195
DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
7196
DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
7197
#undef DO_ITER
7198
7199
pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
7200
pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
7201
7202
if (ATC_IDENTITY_SELECTOR_MAPPING_INDEX == best_mapping)
7203
{
7204
pBlock->m_modulation[0] = pSelector->m_selectors[0];
7205
pBlock->m_modulation[1] = pSelector->m_selectors[1];
7206
pBlock->m_modulation[2] = pSelector->m_selectors[2];
7207
pBlock->m_modulation[3] = pSelector->m_selectors[3];
7208
}
7209
else
7210
{
7211
// TODO: We could make this faster using several precomputed 256 entry tables, like ETC1S->BC1 does.
7212
const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0];
7213
7214
const uint32_t sel_bits0 = pSelector->m_selectors[0];
7215
const uint32_t sel_bits1 = pSelector->m_selectors[1];
7216
const uint32_t sel_bits2 = pSelector->m_selectors[2];
7217
const uint32_t sel_bits3 = pSelector->m_selectors[3];
7218
7219
uint32_t sels0 = 0, sels1 = 0, sels2 = 0, sels3 = 0;
7220
7221
#define DO_X(x) { \
7222
const uint32_t x_shift = (x) * 2; \
7223
sels0 |= (pSelectors_xlat[(sel_bits0 >> x_shift) & 3] << x_shift); \
7224
sels1 |= (pSelectors_xlat[(sel_bits1 >> x_shift) & 3] << x_shift); \
7225
sels2 |= (pSelectors_xlat[(sel_bits2 >> x_shift) & 3] << x_shift); \
7226
sels3 |= (pSelectors_xlat[(sel_bits3 >> x_shift) & 3] << x_shift); }
7227
7228
DO_X(0);
7229
DO_X(1);
7230
DO_X(2);
7231
DO_X(3);
7232
#undef DO_X
7233
7234
pBlock->m_modulation[0] = (uint8_t)sels0;
7235
pBlock->m_modulation[1] = (uint8_t)sels1;
7236
pBlock->m_modulation[2] = (uint8_t)sels2;
7237
pBlock->m_modulation[3] = (uint8_t)sels3;
7238
}
7239
}
7240
7241
static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x; pV->c[3] = x; return pV; }
7242
static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x; pV->c[1] = y; pV->c[2] = z; pV->c[3] = w; return pV; }
7243
static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; }
7244
static inline vec4F vec4F_saturate(const vec4F* pV) { vec4F res; res.c[0] = saturate(pV->c[0]); res.c[1] = saturate(pV->c[1]); res.c[2] = saturate(pV->c[2]); res.c[3] = saturate(pV->c[3]); return res; }
7245
static inline vec4F vec4F_from_color(const color32* pC) { vec4F res; vec4F_set(&res, pC->c[0], pC->c[1], pC->c[2], pC->c[3]); return res; }
7246
static inline vec4F vec4F_add(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] + pRHS->c[0], pLHS->c[1] + pRHS->c[1], pLHS->c[2] + pRHS->c[2], pLHS->c[3] + pRHS->c[3]); return res; }
7247
static inline vec4F vec4F_sub(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] - pRHS->c[0], pLHS->c[1] - pRHS->c[1], pLHS->c[2] - pRHS->c[2], pLHS->c[3] - pRHS->c[3]); return res; }
7248
static inline float vec4F_dot(const vec4F* pLHS, const vec4F* pRHS) { return pLHS->c[0] * pRHS->c[0] + pLHS->c[1] * pRHS->c[1] + pLHS->c[2] * pRHS->c[2] + pLHS->c[3] * pRHS->c[3]; }
7249
static inline vec4F vec4F_mul(const vec4F* pLHS, float s) { vec4F res; vec4F_set(&res, pLHS->c[0] * s, pLHS->c[1] * s, pLHS->c[2] * s, pLHS->c[3] * s); return res; }
7250
static inline vec4F* vec4F_normalize_in_place(vec4F* pV) { float s = pV->c[0] * pV->c[0] + pV->c[1] * pV->c[1] + pV->c[2] * pV->c[2] + pV->c[3] * pV->c[3]; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->c[0] *= s; pV->c[1] *= s; pV->c[2] *= s; pV->c[3] *= s; } return pV; }
7251
7252
static color32 convert_rgba_5554_to_8888(const color32& col)
7253
{
7254
return color32((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]);
7255
}
7256
7257
static inline int sq(int x) { return x * x; }
7258
7259
// PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is expanded from 4 to 8 bits means it can never be 0.
7260
// This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha!
7261
// And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it.
7262
static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook)
7263
{
7264
pvrtc2_block* pBlock = static_cast<pvrtc2_block*>(pDst);
7265
7266
const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pBlock)[0]];
7267
const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pBlock)[1]];
7268
7269
pBlock->m_opaque_color_data.m_hard_flag = 1;
7270
pBlock->m_opaque_color_data.m_mod_flag = 0;
7271
pBlock->m_opaque_color_data.m_opaque_flag = 0;
7272
7273
const int num_unique_alpha_selectors = alpha_selectors.m_num_unique_selectors;
7274
7275
const color32& alpha_base_color = alpha_endpoint.m_color5;
7276
const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
7277
7278
int constant_alpha_val = -1;
7279
7280
int alpha_block_colors[4];
7281
decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
7282
7283
if (num_unique_alpha_selectors == 1)
7284
{
7285
constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
7286
}
7287
else
7288
{
7289
constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
7290
7291
for (uint32_t i = alpha_selectors.m_lo_selector + 1; i <= alpha_selectors.m_hi_selector; i++)
7292
{
7293
if (constant_alpha_val != alpha_block_colors[i])
7294
{
7295
constant_alpha_val = -1;
7296
break;
7297
}
7298
}
7299
}
7300
7301
if (constant_alpha_val >= 250)
7302
{
7303
// It's opaque enough, so don't bother trying to encode it as an alpha block.
7304
convert_etc1s_to_pvrtc2_rgb(pDst, pEndpoints, pSelector);
7305
return;
7306
}
7307
7308
const color32& base_color = pEndpoints->m_color5;
7309
const uint32_t inten_table = pEndpoints->m_inten5;
7310
7311
const uint32_t low_selector = pSelector->m_lo_selector;
7312
const uint32_t high_selector = pSelector->m_hi_selector;
7313
7314
const int num_unique_color_selectors = pSelector->m_num_unique_selectors;
7315
7316
// We need to reencode the block at the pixel level, unfortunately, from two ETC1S planes.
7317
// Do 4D incremental PCA, project all pixels to this hyperline, then quantize to packed endpoints and compute the modulation values.
7318
const int br = (base_color.r << 3) | (base_color.r >> 2);
7319
const int bg = (base_color.g << 3) | (base_color.g >> 2);
7320
const int bb = (base_color.b << 3) | (base_color.b >> 2);
7321
7322
color32 block_cols[4];
7323
for (uint32_t i = 0; i < 4; i++)
7324
{
7325
const int ci = g_etc1_inten_tables[inten_table][i];
7326
block_cols[i].set_clamped(br + ci, bg + ci, bb + ci, alpha_block_colors[i]);
7327
}
7328
7329
bool solid_color_block = true;
7330
if (num_unique_color_selectors > 1)
7331
{
7332
for (uint32_t i = low_selector + 1; i <= high_selector; i++)
7333
{
7334
if ((block_cols[low_selector].r != block_cols[i].r) || (block_cols[low_selector].g != block_cols[i].g) || (block_cols[low_selector].b != block_cols[i].b))
7335
{
7336
solid_color_block = false;
7337
break;
7338
}
7339
}
7340
}
7341
7342
if ((solid_color_block) && (constant_alpha_val >= 0))
7343
{
7344
// Constant color/alpha block.
7345
// This is more complex than it may seem because of the way color and alpha are packed in PVRTC2. We need to evaluate mod0, mod1 and mod3 encodings to find the best one.
7346
uint32_t r, g, b;
7347
decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
7348
7349
// Mod 0
7350
uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255;
7351
uint32_t la0 = g_pvrtc2_alpha_match33_0[constant_alpha_val].m_l;
7352
7353
uint32_t cr0 = (lr0 << 1) | (lr0 >> 3);
7354
uint32_t cg0 = (lg0 << 1) | (lg0 >> 3);
7355
uint32_t cb0 = (lb0 << 2) | (lb0 >> 1);
7356
uint32_t ca0 = (la0 << 1);
7357
7358
cr0 = (cr0 << 3) | (cr0 >> 2);
7359
cg0 = (cg0 << 3) | (cg0 >> 2);
7360
cb0 = (cb0 << 3) | (cb0 >> 2);
7361
ca0 = (ca0 << 4) | ca0;
7362
7363
uint32_t err0 = sq(cr0 - r) + sq(cg0 - g) + sq(cb0 - b) + sq(ca0 - constant_alpha_val) * 2;
7364
7365
// If the alpha is < 3 or so we're kinda screwed. It's better to have some RGB error than it is to turn a 100% transparent area slightly opaque.
7366
if ((err0 == 0) || (constant_alpha_val < 3))
7367
{
7368
pBlock->set_trans_low_color(lr0, lg0, lb0, la0);
7369
pBlock->set_trans_high_color(0, 0, 0, 0);
7370
7371
pBlock->m_modulation[0] = 0;
7372
pBlock->m_modulation[1] = 0;
7373
pBlock->m_modulation[2] = 0;
7374
pBlock->m_modulation[3] = 0;
7375
return;
7376
}
7377
7378
// Mod 3
7379
uint32_t lr3 = (r * 15 + 128) / 255, lg3 = (g * 15 + 128) / 255, lb3 = (b * 15 + 128) / 255;
7380
uint32_t la3 = g_pvrtc2_alpha_match33_3[constant_alpha_val].m_l;
7381
7382
uint32_t cr3 = (lr3 << 1) | (lr3 >> 3);
7383
uint32_t cg3 = (lg3 << 1) | (lg3 >> 3);
7384
uint32_t cb3 = (lb3 << 1) | (lb3 >> 3);
7385
uint32_t ca3 = (la3 << 1) | 1;
7386
7387
cr3 = (cr3 << 3) | (cr3 >> 2);
7388
cg3 = (cg3 << 3) | (cg3 >> 2);
7389
cb3 = (cb3 << 3) | (cb3 >> 2);
7390
ca3 = (ca3 << 4) | ca3;
7391
7392
uint32_t err3 = sq(cr3 - r) + sq(cg3 - g) + sq(cb3 - b) + sq(ca3 - constant_alpha_val) * 2;
7393
7394
// Mod 1
7395
uint32_t lr1 = g_pvrtc2_trans_match44[r].m_l, lg1 = g_pvrtc2_trans_match44[g].m_l, lb1 = g_pvrtc2_trans_match34[b].m_l;
7396
uint32_t hr1 = g_pvrtc2_trans_match44[r].m_h, hg1 = g_pvrtc2_trans_match44[g].m_h, hb1 = g_pvrtc2_trans_match34[b].m_h;
7397
uint32_t la1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_l, ha1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_h;
7398
7399
uint32_t clr1 = (lr1 << 1) | (lr1 >> 3);
7400
uint32_t clg1 = (lg1 << 1) | (lg1 >> 3);
7401
uint32_t clb1 = (lb1 << 2) | (lb1 >> 1);
7402
uint32_t cla1 = (la1 << 1);
7403
7404
clr1 = (clr1 << 3) | (clr1 >> 2);
7405
clg1 = (clg1 << 3) | (clg1 >> 2);
7406
clb1 = (clb1 << 3) | (clb1 >> 2);
7407
cla1 = (cla1 << 4) | cla1;
7408
7409
uint32_t chr1 = (hr1 << 1) | (hr1 >> 3);
7410
uint32_t chg1 = (hg1 << 1) | (hg1 >> 3);
7411
uint32_t chb1 = (hb1 << 1) | (hb1 >> 3);
7412
uint32_t cha1 = (ha1 << 1) | 1;
7413
7414
chr1 = (chr1 << 3) | (chr1 >> 2);
7415
chg1 = (chg1 << 3) | (chg1 >> 2);
7416
chb1 = (chb1 << 3) | (chb1 >> 2);
7417
cha1 = (cha1 << 4) | cha1;
7418
7419
uint32_t r1 = (clr1 * 5 + chr1 * 3) / 8;
7420
uint32_t g1 = (clg1 * 5 + chg1 * 3) / 8;
7421
uint32_t b1 = (clb1 * 5 + chb1 * 3) / 8;
7422
uint32_t a1 = (cla1 * 5 + cha1 * 3) / 8;
7423
7424
uint32_t err1 = sq(r1 - r) + sq(g1 - g) + sq(b1 - b) + sq(a1 - constant_alpha_val) * 2;
7425
7426
if ((err1 < err0) && (err1 < err3))
7427
{
7428
pBlock->set_trans_low_color(lr1, lg1, lb1, la1);
7429
pBlock->set_trans_high_color(hr1, hg1, hb1, ha1);
7430
7431
pBlock->m_modulation[0] = 0x55;
7432
pBlock->m_modulation[1] = 0x55;
7433
pBlock->m_modulation[2] = 0x55;
7434
pBlock->m_modulation[3] = 0x55;
7435
}
7436
else if (err0 < err3)
7437
{
7438
pBlock->set_trans_low_color(lr0, lg0, lb0, la0);
7439
pBlock->set_trans_high_color(0, 0, 0, 0);
7440
7441
pBlock->m_modulation[0] = 0;
7442
pBlock->m_modulation[1] = 0;
7443
pBlock->m_modulation[2] = 0;
7444
pBlock->m_modulation[3] = 0;
7445
}
7446
else
7447
{
7448
pBlock->set_trans_low_color(0, 0, 0, 0);
7449
pBlock->set_trans_high_color(lr3, lg3, lb3, la3);
7450
7451
pBlock->m_modulation[0] = 0xFF;
7452
pBlock->m_modulation[1] = 0xFF;
7453
pBlock->m_modulation[2] = 0xFF;
7454
pBlock->m_modulation[3] = 0xFF;
7455
}
7456
7457
return;
7458
}
7459
7460
// It's a complex block with non-solid color and/or alpha pixels.
7461
vec4F minColor, maxColor;
7462
7463
if (solid_color_block)
7464
{
7465
// It's a solid color block.
7466
uint32_t low_a = block_cols[alpha_selectors.m_lo_selector].a;
7467
uint32_t high_a = block_cols[alpha_selectors.m_hi_selector].a;
7468
7469
const float S = 1.0f / 255.0f;
7470
vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, low_a * S);
7471
vec4F_set(&maxColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, high_a * S);
7472
}
7473
else if (constant_alpha_val >= 0)
7474
{
7475
// It's a solid alpha block.
7476
const float S = 1.0f / 255.0f;
7477
vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, constant_alpha_val * S);
7478
vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, constant_alpha_val * S);
7479
}
7480
// See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis).
7481
// To keep quality up we need to use full 4D PCA in this case.
7482
else if ((block_cols[low_selector].c[0] == 0) || (block_cols[high_selector].c[0] == 255) ||
7483
(block_cols[low_selector].c[1] == 0) || (block_cols[high_selector].c[1] == 255) ||
7484
(block_cols[low_selector].c[2] == 0) || (block_cols[high_selector].c[2] == 255) ||
7485
(block_cols[alpha_selectors.m_lo_selector].c[3] == 0) || (block_cols[alpha_selectors.m_hi_selector].c[3] == 255))
7486
{
7487
// Find principle component of RGBA colors treated as 4D vectors.
7488
color32 pixels[16];
7489
7490
uint32_t sum_r = 0, sum_g = 0, sum_b = 0, sum_a = 0;
7491
for (uint32_t i = 0; i < 16; i++)
7492
{
7493
color32 rgb(block_cols[pSelector->get_selector(i & 3, i >> 2)]);
7494
uint32_t a = block_cols[alpha_selectors.get_selector(i & 3, i >> 2)].a;
7495
7496
pixels[i].set(rgb.r, rgb.g, rgb.b, a);
7497
7498
sum_r += rgb.r;
7499
sum_g += rgb.g;
7500
sum_b += rgb.b;
7501
sum_a += a;
7502
}
7503
7504
vec4F meanColor;
7505
vec4F_set(&meanColor, (float)sum_r, (float)sum_g, (float)sum_b, (float)sum_a);
7506
vec4F meanColorScaled = vec4F_mul(&meanColor, 1.0f / 16.0f);
7507
7508
meanColor = vec4F_mul(&meanColor, 1.0f / (float)(16.0f * 255.0f));
7509
vec4F_saturate_in_place(&meanColor);
7510
7511
vec4F axis;
7512
vec4F_set_scalar(&axis, 0.0f);
7513
// Why this incremental method? Because it's stable and predictable. Covar+power method can require a lot of iterations to converge in 4D.
7514
for (uint32_t i = 0; i < 16; i++)
7515
{
7516
vec4F color = vec4F_from_color(&pixels[i]);
7517
color = vec4F_sub(&color, &meanColorScaled);
7518
vec4F a = vec4F_mul(&color, color.c[0]);
7519
vec4F b = vec4F_mul(&color, color.c[1]);
7520
vec4F c = vec4F_mul(&color, color.c[2]);
7521
vec4F d = vec4F_mul(&color, color.c[3]);
7522
vec4F n = i ? axis : color;
7523
vec4F_normalize_in_place(&n);
7524
axis.c[0] += vec4F_dot(&a, &n);
7525
axis.c[1] += vec4F_dot(&b, &n);
7526
axis.c[2] += vec4F_dot(&c, &n);
7527
axis.c[3] += vec4F_dot(&d, &n);
7528
}
7529
7530
vec4F_normalize_in_place(&axis);
7531
7532
if (vec4F_dot(&axis, &axis) < .5f)
7533
vec4F_set_scalar(&axis, .5f);
7534
7535
float l = 1e+9f, h = -1e+9f;
7536
7537
for (uint32_t i = 0; i < 16; i++)
7538
{
7539
vec4F color = vec4F_from_color(&pixels[i]);
7540
7541
vec4F q = vec4F_sub(&color, &meanColorScaled);
7542
float d = vec4F_dot(&q, &axis);
7543
7544
l = basisu::minimum(l, d);
7545
h = basisu::maximum(h, d);
7546
}
7547
7548
l *= (1.0f / 255.0f);
7549
h *= (1.0f / 255.0f);
7550
7551
vec4F b0 = vec4F_mul(&axis, l);
7552
vec4F b1 = vec4F_mul(&axis, h);
7553
vec4F c0 = vec4F_add(&meanColor, &b0);
7554
vec4F c1 = vec4F_add(&meanColor, &b1);
7555
minColor = vec4F_saturate(&c0);
7556
maxColor = vec4F_saturate(&c1);
7557
if (minColor.c[3] > maxColor.c[3])
7558
{
7559
// VS 2019 release Code Generator issue
7560
//std::swap(minColor, maxColor);
7561
7562
float a = minColor.c[0], b = minColor.c[1], c = minColor.c[2], d = minColor.c[3];
7563
minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3];
7564
minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3];
7565
maxColor.c[0] = a; maxColor.c[1] = b; maxColor.c[2] = c; maxColor.c[3] = d;
7566
}
7567
}
7568
else
7569
{
7570
// We know the RGB axis is luma, because it's an ETC1S block and none of the block colors got clamped. So we only need to use 2D PCA.
7571
// We project each LA vector onto two 2D lines with axes (1,1) and (1,-1) and find the largest projection to determine if axis A is flipped relative to L.
7572
uint32_t block_cols_l[4], block_cols_a[4];
7573
for (uint32_t i = 0; i < 4; i++)
7574
{
7575
block_cols_l[i] = block_cols[i].r + block_cols[i].g + block_cols[i].b;
7576
block_cols_a[i] = block_cols[i].a * 3;
7577
}
7578
7579
int p0_min = INT_MAX, p0_max = INT_MIN;
7580
int p1_min = INT_MAX, p1_max = INT_MIN;
7581
for (uint32_t y = 0; y < 4; y++)
7582
{
7583
const uint32_t cs = pSelector->m_selectors[y];
7584
const uint32_t as = alpha_selectors.m_selectors[y];
7585
7586
{
7587
const int l = block_cols_l[cs & 3];
7588
const int a = block_cols_a[as & 3];
7589
const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
7590
const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
7591
}
7592
{
7593
const int l = block_cols_l[(cs >> 2) & 3];
7594
const int a = block_cols_a[(as >> 2) & 3];
7595
const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
7596
const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
7597
}
7598
{
7599
const int l = block_cols_l[(cs >> 4) & 3];
7600
const int a = block_cols_a[(as >> 4) & 3];
7601
const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
7602
const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
7603
}
7604
{
7605
const int l = block_cols_l[cs >> 6];
7606
const int a = block_cols_a[as >> 6];
7607
const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
7608
const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
7609
}
7610
}
7611
7612
int dist0 = p0_max - p0_min;
7613
int dist1 = p1_max - p1_min;
7614
7615
const float S = 1.0f / 255.0f;
7616
7617
vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, block_cols[alpha_selectors.m_lo_selector].a * S);
7618
vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, block_cols[alpha_selectors.m_hi_selector].a * S);
7619
7620
// See if the A component of the principle axis is flipped relative to L. If so, we need to flip either RGB or A bounds.
7621
if (dist1 > dist0)
7622
{
7623
std::swap(minColor.c[0], maxColor.c[0]);
7624
std::swap(minColor.c[1], maxColor.c[1]);
7625
std::swap(minColor.c[2], maxColor.c[2]);
7626
}
7627
}
7628
7629
// 4433 4443
7630
color32 trialMinColor, trialMaxColor;
7631
7632
trialMinColor.set_clamped((int)(minColor.c[0] * 15.0f + .5f), (int)(minColor.c[1] * 15.0f + .5f), (int)(minColor.c[2] * 7.0f + .5f), (int)(minColor.c[3] * 7.0f + .5f));
7633
trialMaxColor.set_clamped((int)(maxColor.c[0] * 15.0f + .5f), (int)(maxColor.c[1] * 15.0f + .5f), (int)(maxColor.c[2] * 15.0f + .5f), (int)(maxColor.c[3] * 7.0f + .5f));
7634
7635
pBlock->set_trans_low_color(trialMinColor.r, trialMinColor.g, trialMinColor.b, trialMinColor.a);
7636
pBlock->set_trans_high_color(trialMaxColor.r, trialMaxColor.g, trialMaxColor.b, trialMaxColor.a);
7637
7638
color32 color_a((trialMinColor.r << 1) | (trialMinColor.r >> 3), (trialMinColor.g << 1) | (trialMinColor.g >> 3), (trialMinColor.b << 2) | (trialMinColor.b >> 1), trialMinColor.a << 1);
7639
color32 color_b((trialMaxColor.r << 1) | (trialMaxColor.r >> 3), (trialMaxColor.g << 1) | (trialMaxColor.g >> 3), (trialMaxColor.b << 1) | (trialMaxColor.b >> 3), (trialMaxColor.a << 1) | 1);
7640
7641
color32 color0(convert_rgba_5554_to_8888(color_a));
7642
color32 color3(convert_rgba_5554_to_8888(color_b));
7643
7644
const int lr = color0.r;
7645
const int lg = color0.g;
7646
const int lb = color0.b;
7647
const int la = color0.a;
7648
7649
const int axis_r = color3.r - lr;
7650
const int axis_g = color3.g - lg;
7651
const int axis_b = color3.b - lb;
7652
const int axis_a = color3.a - la;
7653
const int len_a = (axis_r * axis_r) + (axis_g * axis_g) + (axis_b * axis_b) + (axis_a * axis_a);
7654
7655
const int thresh01 = (len_a * 3) / 16;
7656
const int thresh12 = len_a >> 1;
7657
const int thresh23 = (len_a * 13) / 16;
7658
7659
if ((axis_r | axis_g | axis_b) == 0)
7660
{
7661
int ca_sel[4];
7662
7663
for (uint32_t i = 0; i < 4; i++)
7664
{
7665
int ca = (block_cols[i].a - la) * axis_a;
7666
ca_sel[i] = (ca >= thresh23) + (ca >= thresh12) + (ca >= thresh01);
7667
}
7668
7669
for (uint32_t y = 0; y < 4; y++)
7670
{
7671
const uint32_t a_sels = alpha_selectors.m_selectors[y];
7672
7673
uint32_t sel = ca_sel[a_sels & 3] | (ca_sel[(a_sels >> 2) & 3] << 2) | (ca_sel[(a_sels >> 4) & 3] << 4) | (ca_sel[a_sels >> 6] << 6);
7674
7675
pBlock->m_modulation[y] = (uint8_t)sel;
7676
}
7677
}
7678
else
7679
{
7680
int cy[4], ca[4];
7681
7682
for (uint32_t i = 0; i < 4; i++)
7683
{
7684
cy[i] = (block_cols[i].r - lr) * axis_r + (block_cols[i].g - lg) * axis_g + (block_cols[i].b - lb) * axis_b;
7685
ca[i] = (block_cols[i].a - la) * axis_a;
7686
}
7687
7688
for (uint32_t y = 0; y < 4; y++)
7689
{
7690
const uint32_t c_sels = pSelector->m_selectors[y];
7691
const uint32_t a_sels = alpha_selectors.m_selectors[y];
7692
7693
const int d0 = cy[c_sels & 3] + ca[a_sels & 3];
7694
const int d1 = cy[(c_sels >> 2) & 3] + ca[(a_sels >> 2) & 3];
7695
const int d2 = cy[(c_sels >> 4) & 3] + ca[(a_sels >> 4) & 3];
7696
const int d3 = cy[c_sels >> 6] + ca[a_sels >> 6];
7697
7698
uint32_t sel = ((d0 >= thresh23) + (d0 >= thresh12) + (d0 >= thresh01)) |
7699
(((d1 >= thresh23) + (d1 >= thresh12) + (d1 >= thresh01)) << 2) |
7700
(((d2 >= thresh23) + (d2 >= thresh12) + (d2 >= thresh01)) << 4) |
7701
(((d3 >= thresh23) + (d3 >= thresh12) + (d3 >= thresh01)) << 6);
7702
7703
pBlock->m_modulation[y] = (uint8_t)sel;
7704
}
7705
}
7706
}
7707
7708
static void transcoder_init_pvrtc2()
7709
{
7710
for (uint32_t v = 0; v < 256; v++)
7711
{
7712
int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7713
7714
for (uint32_t l = 0; l < 8; l++)
7715
{
7716
uint32_t le = (l << 1);
7717
le = (le << 4) | le;
7718
7719
for (uint32_t h = 0; h < 8; h++)
7720
{
7721
uint32_t he = (h << 1) | 1;
7722
he = (he << 4) | he;
7723
7724
uint32_t m = (le * 5 + he * 3) / 8;
7725
7726
int err = (int)labs((int)v - (int)m);
7727
if (err < lowest_err)
7728
{
7729
lowest_err = err;
7730
best_l = l;
7731
best_h = h;
7732
}
7733
}
7734
}
7735
7736
g_pvrtc2_alpha_match33[v].m_l = (uint8_t)best_l;
7737
g_pvrtc2_alpha_match33[v].m_h = (uint8_t)best_h;
7738
}
7739
7740
for (uint32_t v = 0; v < 256; v++)
7741
{
7742
int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7743
7744
for (uint32_t l = 0; l < 8; l++)
7745
{
7746
uint32_t le = (l << 1);
7747
le = (le << 4) | le;
7748
7749
int err = (int)labs((int)v - (int)le);
7750
if (err < lowest_err)
7751
{
7752
lowest_err = err;
7753
best_l = l;
7754
best_h = l;
7755
}
7756
}
7757
7758
g_pvrtc2_alpha_match33_0[v].m_l = (uint8_t)best_l;
7759
g_pvrtc2_alpha_match33_0[v].m_h = (uint8_t)best_h;
7760
}
7761
7762
for (uint32_t v = 0; v < 256; v++)
7763
{
7764
int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7765
7766
for (uint32_t h = 0; h < 8; h++)
7767
{
7768
uint32_t he = (h << 1) | 1;
7769
he = (he << 4) | he;
7770
7771
int err = (int)labs((int)v - (int)he);
7772
if (err < lowest_err)
7773
{
7774
lowest_err = err;
7775
best_l = h;
7776
best_h = h;
7777
}
7778
}
7779
7780
g_pvrtc2_alpha_match33_3[v].m_l = (uint8_t)best_l;
7781
g_pvrtc2_alpha_match33_3[v].m_h = (uint8_t)best_h;
7782
}
7783
7784
for (uint32_t v = 0; v < 256; v++)
7785
{
7786
int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7787
7788
for (uint32_t l = 0; l < 8; l++)
7789
{
7790
uint32_t le = (l << 2) | (l >> 1);
7791
le = (le << 3) | (le >> 2);
7792
7793
for (uint32_t h = 0; h < 16; h++)
7794
{
7795
uint32_t he = (h << 1) | (h >> 3);
7796
he = (he << 3) | (he >> 2);
7797
7798
uint32_t m = (le * 5 + he * 3) / 8;
7799
7800
int err = (int)labs((int)v - (int)m);
7801
if (err < lowest_err)
7802
{
7803
lowest_err = err;
7804
best_l = l;
7805
best_h = h;
7806
}
7807
}
7808
}
7809
7810
g_pvrtc2_trans_match34[v].m_l = (uint8_t)best_l;
7811
g_pvrtc2_trans_match34[v].m_h = (uint8_t)best_h;
7812
}
7813
7814
for (uint32_t v = 0; v < 256; v++)
7815
{
7816
int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7817
7818
for (uint32_t l = 0; l < 16; l++)
7819
{
7820
uint32_t le = (l << 1) | (l >> 3);
7821
le = (le << 3) | (le >> 2);
7822
7823
for (uint32_t h = 0; h < 16; h++)
7824
{
7825
uint32_t he = (h << 1) | (h >> 3);
7826
he = (he << 3) | (he >> 2);
7827
7828
uint32_t m = (le * 5 + he * 3) / 8;
7829
7830
int err = (int)labs((int)v - (int)m);
7831
if (err < lowest_err)
7832
{
7833
lowest_err = err;
7834
best_l = l;
7835
best_h = h;
7836
}
7837
}
7838
}
7839
7840
g_pvrtc2_trans_match44[v].m_l = (uint8_t)best_l;
7841
g_pvrtc2_trans_match44[v].m_h = (uint8_t)best_h;
7842
}
7843
}
7844
#endif // BASISD_SUPPORT_PVRTC2
7845
7846
//------------------------------------------------------------------------------------------------
7847
7848
// BC7 mode 5 RGB encoder
7849
7850
#if BASISD_SUPPORT_BC7_MODE5
7851
namespace bc7_mode_5_encoder
7852
{
7853
static float g_mode5_rgba_midpoints[128];
7854
7855
void encode_bc7_mode5_init()
7856
{
7857
// Mode 5 endpoint midpoints
7858
for (uint32_t i = 0; i < 128; i++)
7859
{
7860
uint32_t vl = (i << 1);
7861
vl |= (vl >> 7);
7862
float lo = vl / 255.0f;
7863
7864
uint32_t vh = basisu::minimumi(127, i + 1) << 1;
7865
vh |= (vh >> 7);
7866
float hi = vh / 255.0f;
7867
7868
if (i == 127)
7869
g_mode5_rgba_midpoints[i] = 1e+15f;
7870
else
7871
g_mode5_rgba_midpoints[i] = (lo + hi) / 2.0f;
7872
}
7873
}
7874
7875
static inline uint32_t from_7(uint32_t v)
7876
{
7877
assert(v < 128);
7878
return (v << 1) | (v >> 6);
7879
}
7880
7881
static inline int to_7(float c)
7882
{
7883
assert((c >= 0) && (c <= 1.0f));
7884
7885
int vl = (int)(c * 127.0f);
7886
vl += (c > g_mode5_rgba_midpoints[vl]);
7887
return clampi(vl, 0, 127);
7888
}
7889
7890
static inline int to_7(int c8)
7891
{
7892
assert((c8 >= 0) && (c8 <= 255));
7893
7894
float c = (float)c8 * (1.0f / 255.0f);
7895
7896
int vl = (int)(c * 127.0f);
7897
vl += (c > g_mode5_rgba_midpoints[vl]);
7898
return clampi(vl, 0, 127);
7899
}
7900
7901
// This is usable with ASTC as well, which uses the same 2-bit interpolation weights.
7902
static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w)
7903
{
7904
assert(w < 4);
7905
return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6;
7906
}
7907
7908
static void eval_weights(
7909
const color32 *pPixels, uint8_t* pWeights,
7910
int lr, int lg, int lb,
7911
int hr, int hg, int hb)
7912
{
7913
lr = from_7(lr); lg = from_7(lg); lb = from_7(lb);
7914
hr = from_7(hr); hg = from_7(hg); hb = from_7(hb);
7915
7916
int cr[4], cg[4], cb[4];
7917
for (uint32_t i = 0; i < 4; i++)
7918
{
7919
cr[i] = (uint8_t)bc7_interp2(lr, hr, i);
7920
cg[i] = (uint8_t)bc7_interp2(lg, hg, i);
7921
cb[i] = (uint8_t)bc7_interp2(lb, hb, i);
7922
}
7923
7924
#if 0
7925
for (uint32_t i = 0; i < 16; i++)
7926
{
7927
const int pr = pPixels[i].r, pg = pPixels[i].g, pb = pPixels[i].b;
7928
7929
uint32_t best_err = UINT32_MAX;
7930
uint32_t best_idx = 0;
7931
for (uint32_t j = 0; j < 4; j++)
7932
{
7933
uint32_t e = square(pr - cr[j]) + square(pg - cg[j]) + square(pb - cb[j]);
7934
if (e < best_err)
7935
{
7936
best_err = e;
7937
best_idx = j;
7938
}
7939
7940
pWeights[i] = (uint8_t)best_idx;
7941
}
7942
} // i
7943
#else
7944
int ar = cr[3] - cr[0], ag = cg[3] - cg[0], ab = cb[3] - cb[0];
7945
7946
int dots[4];
7947
for (uint32_t i = 0; i < 4; i++)
7948
dots[i] = (int)cr[i] * ar + (int)cg[i] * ag + (int)cb[i] * ab;
7949
7950
// seems very rare in LDR, so rare that it doesn't matter
7951
//assert(dots[0] <= dots[1]);
7952
//assert(dots[1] <= dots[2]);
7953
//assert(dots[2] <= dots[3]);
7954
7955
int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
7956
7957
ar *= 2; ag *= 2; ab *= 2;
7958
7959
for (uint32_t i = 0; i < 16; i += 4)
7960
{
7961
const int d0 = pPixels[i + 0].r * ar + pPixels[i + 0].g * ag + pPixels[i + 0].b * ab;
7962
const int d1 = pPixels[i + 1].r * ar + pPixels[i + 1].g * ag + pPixels[i + 1].b * ab;
7963
const int d2 = pPixels[i + 2].r * ar + pPixels[i + 2].g * ag + pPixels[i + 2].b * ab;
7964
const int d3 = pPixels[i + 3].r * ar + pPixels[i + 3].g * ag + pPixels[i + 3].b * ab;
7965
7966
pWeights[i + 0] = (d0 > t0) + (d0 >= t1) + (d0 >= t2);
7967
pWeights[i + 1] = (d1 > t0) + (d1 >= t1) + (d1 >= t2);
7968
pWeights[i + 2] = (d2 > t0) + (d2 >= t1) + (d2 >= t2);
7969
pWeights[i + 3] = (d3 > t0) + (d3 >= t1) + (d3 >= t2);
7970
}
7971
#endif
7972
}
7973
7974
static void pack_bc7_mode5_rgb_block(
7975
bc7_mode_5* pDst_block,
7976
int lr, int lg, int lb, int hr, int hg, int hb,
7977
const uint8_t* pWeights)
7978
{
7979
assert((lr >= 0) && (lr <= 127));
7980
assert((lg >= 0) && (lg <= 127));
7981
assert((lb >= 0) && (lb <= 127));
7982
assert((hr >= 0) && (hr <= 127));
7983
assert((hg >= 0) && (hg <= 127));
7984
assert((hb >= 0) && (hb <= 127));
7985
7986
pDst_block->m_lo_bits = 0;
7987
7988
uint8_t weight_inv = 0;
7989
if (pWeights[0] & 2)
7990
{
7991
std::swap(lr, hr);
7992
std::swap(lg, hg);
7993
std::swap(lb, hb);
7994
weight_inv = 3;
7995
}
7996
assert((pWeights[0] ^ weight_inv) <= 1);
7997
7998
pDst_block->m_lo.m_mode = 32;
7999
pDst_block->m_lo.m_r0 = lr;
8000
pDst_block->m_lo.m_r1 = hr;
8001
pDst_block->m_lo.m_g0 = lg;
8002
pDst_block->m_lo.m_g1 = hg;
8003
pDst_block->m_lo.m_b0 = lb;
8004
pDst_block->m_lo.m_b1 = hb;
8005
8006
pDst_block->m_lo.m_a0 = 255;
8007
pDst_block->m_lo.m_a1_0 = 63;
8008
8009
uint64_t sel_bits = 3;
8010
uint32_t cur_ofs = 2;
8011
for (uint32_t i = 0; i < 16; i++)
8012
{
8013
assert(pWeights[i] <= 3);
8014
sel_bits |= ((uint64_t)(weight_inv ^ pWeights[i])) << cur_ofs;
8015
cur_ofs += (i ? 2 : 1);
8016
}
8017
8018
pDst_block->m_hi_bits = sel_bits;
8019
}
8020
8021
// This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w))
8022
// where w is [0,1/3,2/3,1]. 9 is the perfect multiplier.
8023
static const uint32_t g_weight_vals4[4] = { 0x000009, 0x010204, 0x040201, 0x090000 };
8024
8025
static inline bool compute_least_squares_endpoints4_rgb(
8026
const color32 *pColors, const uint8_t* pSelectors,
8027
int& lr, int& lg, int& lb, int& hr, int& hg, int& hb,
8028
int total_r, int total_g, int total_b)
8029
{
8030
uint32_t uq00_r = 0, uq00_g = 0, uq00_b = 0;
8031
uint32_t weight_accum = 0;
8032
for (uint32_t i = 0; i < 16; i++)
8033
{
8034
const uint8_t r = pColors[i].r, g = pColors[i].g, b = pColors[i].b;
8035
const uint8_t sel = pSelectors[i];
8036
8037
weight_accum += g_weight_vals4[sel];
8038
uq00_r += sel * r;
8039
uq00_g += sel * g;
8040
uq00_b += sel * b;
8041
}
8042
8043
int q10_r = total_r * 3 - uq00_r;
8044
int q10_g = total_g * 3 - uq00_g;
8045
int q10_b = total_b * 3 - uq00_b;
8046
8047
float z00 = (float)((weight_accum >> 16) & 0xFF);
8048
float z10 = (float)((weight_accum >> 8) & 0xFF);
8049
float z11 = (float)(weight_accum & 0xFF);
8050
float z01 = z10;
8051
8052
float det = z00 * z11 - z01 * z10;
8053
if (fabs(det) < 1e-8f)
8054
return false;
8055
8056
det = (3.0f / 255.0f) / det;
8057
8058
float iz00, iz01, iz10, iz11;
8059
iz00 = z11 * det;
8060
iz01 = -z01 * det;
8061
iz10 = -z10 * det;
8062
iz11 = z00 * det;
8063
8064
float fhr = basisu::clamp(iz00 * (float)uq00_r + iz01 * q10_r, 0.0f, 1.0f);
8065
float flr = basisu::clamp(iz10 * (float)uq00_r + iz11 * q10_r, 0.0f, 1.0f);
8066
8067
float fhg = basisu::clamp(iz00 * (float)uq00_g + iz01 * q10_g, 0.0f, 1.0f);
8068
float flg = basisu::clamp(iz10 * (float)uq00_g + iz11 * q10_g, 0.0f, 1.0f);
8069
8070
float fhb = basisu::clamp(iz00 * (float)uq00_b + iz01 * q10_b, 0.0f, 1.0f);
8071
float flb = basisu::clamp(iz10 * (float)uq00_b + iz11 * q10_b, 0.0f, 1.0f);
8072
8073
lr = to_7(flr); lg = to_7(flg); lb = to_7(flb);
8074
hr = to_7(fhr); hg = to_7(fhg); hb = to_7(fhb);
8075
8076
return true;
8077
}
8078
8079
void encode_bc7_mode_5_block(void* pDst_block, color32* pPixels, bool hq_mode)
8080
{
8081
assert(g_mode5_rgba_midpoints[1]);
8082
8083
int total_r = 0, total_g = 0, total_b = 0;
8084
8085
int min_r = 255, min_g = 255, min_b = 255;
8086
int max_r = 0, max_g = 0, max_b = 0;
8087
8088
for (uint32_t i = 0; i < 16; i++)
8089
{
8090
int r = pPixels[i].r, g = pPixels[i].g, b = pPixels[i].b;
8091
8092
total_r += r; total_g += g; total_b += b;
8093
8094
min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b);
8095
max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b);
8096
}
8097
8098
if ((min_r == max_r) && (min_g == max_g) && (min_b == max_b))
8099
{
8100
const int lr = g_bc7_m5_equals_1[min_r].m_lo, lg = g_bc7_m5_equals_1[min_g].m_lo, lb = g_bc7_m5_equals_1[min_b].m_lo;
8101
const int hr = g_bc7_m5_equals_1[min_r].m_hi, hg = g_bc7_m5_equals_1[min_g].m_hi, hb = g_bc7_m5_equals_1[min_b].m_hi;
8102
uint8_t solid_weights[16];
8103
memset(solid_weights, 1, 16);
8104
pack_bc7_mode5_rgb_block((bc7_mode_5*)pDst_block, lr, lg, lb, hr, hg, hb, solid_weights);
8105
return;
8106
}
8107
8108
int mean_r = (total_r + 8) >> 4, mean_g = (total_g + 8) >> 4, mean_b = (total_b + 8) >> 4;
8109
8110
// covar rows are:
8111
// 0, 1, 2
8112
// 1, 3, 4
8113
// 2, 4, 5
8114
int icov[6] = { 0, 0, 0, 0, 0, 0 };
8115
8116
for (uint32_t i = 0; i < 16; i++)
8117
{
8118
int r = (int)pPixels[i].r - mean_r;
8119
int g = (int)pPixels[i].g - mean_g;
8120
int b = (int)pPixels[i].b - mean_b;
8121
icov[0] += r * r; icov[1] += r * g; icov[2] += r * b;
8122
icov[3] += g * g; icov[4] += g * b;
8123
icov[5] += b * b;
8124
}
8125
8126
int block_max_var = basisu::maximum(icov[0], icov[3], icov[5]); // not divided by 16, i.e. scaled by 16
8127
8128
// TODO: Tune this
8129
const int32_t SIMPLE_BLOCK_THRESH = 10 * 16;
8130
8131
if ((!hq_mode) && (block_max_var < SIMPLE_BLOCK_THRESH))
8132
{
8133
const int L = 16, H = 239;
8134
8135
int lr = to_7(lerp_8bit(min_r, max_r, L));
8136
int lg = to_7(lerp_8bit(min_g, max_g, L));
8137
int lb = to_7(lerp_8bit(min_b, max_b, L));
8138
8139
int hr = to_7(lerp_8bit(min_r, max_r, H));
8140
int hg = to_7(lerp_8bit(min_g, max_g, H));
8141
int hb = to_7(lerp_8bit(min_b, max_b, H));
8142
8143
uint8_t cur_weights[16];
8144
eval_weights(pPixels, cur_weights, lr, lg, lb, hr, hg, hb);
8145
8146
pack_bc7_mode5_rgb_block((bc7_mode_5*)pDst_block, lr, lg, lb, hr, hg, hb, cur_weights);
8147
return;
8148
}
8149
8150
float cov[6];
8151
for (uint32_t i = 0; i < 6; i++)
8152
cov[i] = (float)icov[i];
8153
8154
const float sc = 1.0f / (float)block_max_var;
8155
const float wx = sc * cov[0], wy = sc * cov[3], wz = sc * cov[5];
8156
8157
const float alt_xr = cov[0] * wx + cov[1] * wy + cov[2] * wz;
8158
const float alt_xg = cov[1] * wx + cov[3] * wy + cov[4] * wz;
8159
const float alt_xb = cov[2] * wx + cov[4] * wy + cov[5] * wz;
8160
8161
int saxis_r = 306, saxis_g = 601, saxis_b = 117;
8162
8163
float k = basisu::maximum(fabsf(alt_xr), fabsf(alt_xg), fabsf(alt_xb));
8164
if (fabs(k) >= basisu::SMALL_FLOAT_VAL)
8165
{
8166
float m = 2048.0f / k;
8167
saxis_r = (int)(alt_xr * m);
8168
saxis_g = (int)(alt_xg * m);
8169
saxis_b = (int)(alt_xb * m);
8170
}
8171
8172
saxis_r = (int)((uint32_t)saxis_r << 4U);
8173
saxis_g = (int)((uint32_t)saxis_g << 4U);
8174
saxis_b = (int)((uint32_t)saxis_b << 4U);
8175
8176
int low_dot = INT_MAX, high_dot = INT_MIN;
8177
8178
for (uint32_t i = 0; i < 16; i += 4)
8179
{
8180
int dot0 = ((pPixels[i].r * saxis_r + pPixels[i].g * saxis_g + pPixels[i].b * saxis_b) & ~0xF) + i;
8181
int dot1 = ((pPixels[i + 1].r * saxis_r + pPixels[i + 1].g * saxis_g + pPixels[i + 1].b * saxis_b) & ~0xF) + i + 1;
8182
int dot2 = ((pPixels[i + 2].r * saxis_r + pPixels[i + 2].g * saxis_g + pPixels[i + 2].b * saxis_b) & ~0xF) + i + 2;
8183
int dot3 = ((pPixels[i + 3].r * saxis_r + pPixels[i + 3].g * saxis_g + pPixels[i + 3].b * saxis_b) & ~0xF) + i + 3;
8184
8185
int min_d01 = basisu::minimum(dot0, dot1);
8186
int max_d01 = basisu::maximum(dot0, dot1);
8187
8188
int min_d23 = basisu::minimum(dot2, dot3);
8189
int max_d23 = basisu::maximum(dot2, dot3);
8190
8191
int min_d = basisu::minimum(min_d01, min_d23);
8192
int max_d = basisu::maximum(max_d01, max_d23);
8193
8194
low_dot = basisu::minimum(low_dot, min_d);
8195
high_dot = basisu::maximum(high_dot, max_d);
8196
}
8197
int low_c = low_dot & 15;
8198
int high_c = high_dot & 15;
8199
8200
int lr = to_7(pPixels[low_c].r), lg = to_7(pPixels[low_c].g), lb = to_7(pPixels[low_c].b);
8201
int hr = to_7(pPixels[high_c].r), hg = to_7(pPixels[high_c].g), hb = to_7(pPixels[high_c].b);
8202
8203
uint8_t cur_weights[16];
8204
eval_weights(pPixels, cur_weights, lr, lg, lb, hr, hg, hb);
8205
8206
if (compute_least_squares_endpoints4_rgb(
8207
pPixels, cur_weights,
8208
lr, lg, lb, hr, hg, hb,
8209
total_r, total_g, total_b))
8210
{
8211
eval_weights(pPixels, cur_weights, lr, lg, lb, hr, hg, hb);
8212
}
8213
8214
#if 0
8215
lr = 0; lg = 0; lb = 0;
8216
hr = 0; hg = 0; hb = 0;
8217
#endif
8218
8219
pack_bc7_mode5_rgb_block((bc7_mode_5*)pDst_block, lr, lg, lb, hr, hg, hb, cur_weights);
8220
}
8221
8222
} // namespace bc7_mode_5_encoder
8223
8224
#endif // BASISD_SUPPORT_BC7_MODE5
8225
8226
//------------------------------------------------------------------------------------------------
8227
8228
basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder() :
8229
m_pGlobal_codebook(nullptr),
8230
m_selector_history_buf_size(0)
8231
{
8232
}
8233
8234
bool basisu_lowlevel_etc1s_transcoder::decode_palettes(
8235
uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size,
8236
uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size)
8237
{
8238
if (m_pGlobal_codebook)
8239
{
8240
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 11\n");
8241
return false;
8242
}
8243
bitwise_decoder sym_codec;
8244
8245
huffman_decoding_table color5_delta_model0, color5_delta_model1, color5_delta_model2, inten_delta_model;
8246
8247
if (!sym_codec.init(pEndpoints_data, endpoints_data_size))
8248
{
8249
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 0\n");
8250
return false;
8251
}
8252
8253
if (!sym_codec.read_huffman_table(color5_delta_model0))
8254
{
8255
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1\n");
8256
return false;
8257
}
8258
8259
if (!sym_codec.read_huffman_table(color5_delta_model1))
8260
{
8261
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1a\n");
8262
return false;
8263
}
8264
8265
if (!sym_codec.read_huffman_table(color5_delta_model2))
8266
{
8267
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2a\n");
8268
return false;
8269
}
8270
8271
if (!sym_codec.read_huffman_table(inten_delta_model))
8272
{
8273
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n");
8274
return false;
8275
}
8276
8277
if (!color5_delta_model0.is_valid() || !color5_delta_model1.is_valid() || !color5_delta_model2.is_valid() || !inten_delta_model.is_valid())
8278
{
8279
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n");
8280
return false;
8281
}
8282
8283
const bool endpoints_are_grayscale = sym_codec.get_bits(1) != 0;
8284
8285
m_local_endpoints.resize(num_endpoints);
8286
8287
color32 prev_color5(16, 16, 16, 0);
8288
uint32_t prev_inten = 0;
8289
8290
for (uint32_t i = 0; i < num_endpoints; i++)
8291
{
8292
uint32_t inten_delta = sym_codec.decode_huffman(inten_delta_model);
8293
m_local_endpoints[i].m_inten5 = static_cast<uint8_t>((inten_delta + prev_inten) & 7);
8294
prev_inten = m_local_endpoints[i].m_inten5;
8295
8296
for (uint32_t c = 0; c < (endpoints_are_grayscale ? 1U : 3U); c++)
8297
{
8298
int delta;
8299
if (prev_color5[c] <= basist::COLOR5_PAL0_PREV_HI)
8300
delta = sym_codec.decode_huffman(color5_delta_model0);
8301
else if (prev_color5[c] <= basist::COLOR5_PAL1_PREV_HI)
8302
delta = sym_codec.decode_huffman(color5_delta_model1);
8303
else
8304
delta = sym_codec.decode_huffman(color5_delta_model2);
8305
8306
int v = (prev_color5[c] + delta) & 31;
8307
8308
m_local_endpoints[i].m_color5[c] = static_cast<uint8_t>(v);
8309
8310
prev_color5[c] = static_cast<uint8_t>(v);
8311
}
8312
8313
if (endpoints_are_grayscale)
8314
{
8315
m_local_endpoints[i].m_color5[1] = m_local_endpoints[i].m_color5[0];
8316
m_local_endpoints[i].m_color5[2] = m_local_endpoints[i].m_color5[0];
8317
}
8318
}
8319
8320
sym_codec.stop();
8321
8322
m_local_selectors.resize(num_selectors);
8323
8324
if (!sym_codec.init(pSelectors_data, selectors_data_size))
8325
{
8326
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 5\n");
8327
return false;
8328
}
8329
8330
basist::huffman_decoding_table delta_selector_pal_model;
8331
8332
const bool used_global_selector_cb = (sym_codec.get_bits(1) == 1);
8333
8334
if (used_global_selector_cb)
8335
{
8336
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: global selector codebooks are unsupported\n");
8337
return false;
8338
}
8339
else
8340
{
8341
const bool used_hybrid_selector_cb = (sym_codec.get_bits(1) == 1);
8342
8343
if (used_hybrid_selector_cb)
8344
{
8345
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: hybrid global selector codebooks are unsupported\n");
8346
return false;
8347
}
8348
8349
const bool used_raw_encoding = (sym_codec.get_bits(1) == 1);
8350
8351
if (used_raw_encoding)
8352
{
8353
for (uint32_t i = 0; i < num_selectors; i++)
8354
{
8355
for (uint32_t j = 0; j < 4; j++)
8356
{
8357
uint32_t cur_byte = sym_codec.get_bits(8);
8358
8359
for (uint32_t k = 0; k < 4; k++)
8360
m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
8361
}
8362
8363
m_local_selectors[i].init_flags();
8364
}
8365
}
8366
else
8367
{
8368
if (!sym_codec.read_huffman_table(delta_selector_pal_model))
8369
{
8370
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10\n");
8371
return false;
8372
}
8373
8374
if ((num_selectors > 1) && (!delta_selector_pal_model.is_valid()))
8375
{
8376
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10a\n");
8377
return false;
8378
}
8379
8380
uint8_t prev_bytes[4] = { 0, 0, 0, 0 };
8381
8382
for (uint32_t i = 0; i < num_selectors; i++)
8383
{
8384
if (!i)
8385
{
8386
for (uint32_t j = 0; j < 4; j++)
8387
{
8388
uint32_t cur_byte = sym_codec.get_bits(8);
8389
prev_bytes[j] = static_cast<uint8_t>(cur_byte);
8390
8391
for (uint32_t k = 0; k < 4; k++)
8392
m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
8393
}
8394
m_local_selectors[i].init_flags();
8395
continue;
8396
}
8397
8398
for (uint32_t j = 0; j < 4; j++)
8399
{
8400
int delta_byte = sym_codec.decode_huffman(delta_selector_pal_model);
8401
8402
uint32_t cur_byte = delta_byte ^ prev_bytes[j];
8403
prev_bytes[j] = static_cast<uint8_t>(cur_byte);
8404
8405
for (uint32_t k = 0; k < 4; k++)
8406
m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
8407
}
8408
m_local_selectors[i].init_flags();
8409
}
8410
}
8411
}
8412
8413
sym_codec.stop();
8414
8415
return true;
8416
}
8417
8418
bool basisu_lowlevel_etc1s_transcoder::decode_tables(const uint8_t* pTable_data, uint32_t table_data_size)
8419
{
8420
basist::bitwise_decoder sym_codec;
8421
if (!sym_codec.init(pTable_data, table_data_size))
8422
{
8423
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 0\n");
8424
return false;
8425
}
8426
8427
if (!sym_codec.read_huffman_table(m_endpoint_pred_model))
8428
{
8429
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1\n");
8430
return false;
8431
}
8432
8433
if (m_endpoint_pred_model.get_code_sizes().size() == 0)
8434
{
8435
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1a\n");
8436
return false;
8437
}
8438
8439
if (!sym_codec.read_huffman_table(m_delta_endpoint_model))
8440
{
8441
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2\n");
8442
return false;
8443
}
8444
8445
if (m_delta_endpoint_model.get_code_sizes().size() == 0)
8446
{
8447
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2a\n");
8448
return false;
8449
}
8450
8451
if (!sym_codec.read_huffman_table(m_selector_model))
8452
{
8453
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3\n");
8454
return false;
8455
}
8456
8457
if (m_selector_model.get_code_sizes().size() == 0)
8458
{
8459
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3a\n");
8460
return false;
8461
}
8462
8463
if (!sym_codec.read_huffman_table(m_selector_history_buf_rle_model))
8464
{
8465
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4\n");
8466
return false;
8467
}
8468
8469
if (m_selector_history_buf_rle_model.get_code_sizes().size() == 0)
8470
{
8471
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4a\n");
8472
return false;
8473
}
8474
8475
m_selector_history_buf_size = sym_codec.get_bits(13);
8476
// Check for bogus values.
8477
if (!m_selector_history_buf_size)
8478
{
8479
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 5\n");
8480
return false;
8481
}
8482
8483
sym_codec.stop();
8484
8485
return true;
8486
}
8487
8488
bool basisu_lowlevel_etc1s_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
8489
uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
8490
basisu_transcoder_state* pState, bool transcode_alpha, void *pAlpha_blocks, uint32_t output_rows_in_pixels, uint32_t decode_flags)
8491
{
8492
// 'pDst_blocks' unused when disabling *all* hardware transcode options
8493
// (and 'bc1_allow_threecolor_blocks' when disabling DXT)
8494
BASISU_NOTE_UNUSED(pDst_blocks);
8495
BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
8496
BASISU_NOTE_UNUSED(transcode_alpha);
8497
BASISU_NOTE_UNUSED(pAlpha_blocks);
8498
8499
assert(g_transcoder_initialized);
8500
if (!g_transcoder_initialized)
8501
{
8502
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: Transcoder not globally initialized.\n");
8503
return false;
8504
}
8505
8506
if (!pState)
8507
pState = &m_def_state;
8508
8509
const uint32_t total_blocks = num_blocks_x * num_blocks_y;
8510
8511
if (!output_row_pitch_in_blocks_or_pixels)
8512
{
8513
if (basis_block_format_is_uncompressed(fmt))
8514
output_row_pitch_in_blocks_or_pixels = orig_width;
8515
else
8516
{
8517
if (fmt == block_format::cFXT1_RGB)
8518
output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8;
8519
else
8520
output_row_pitch_in_blocks_or_pixels = num_blocks_x;
8521
}
8522
}
8523
8524
if (basis_block_format_is_uncompressed(fmt))
8525
{
8526
if (!output_rows_in_pixels)
8527
output_rows_in_pixels = orig_height;
8528
}
8529
8530
basisu::vector<uint32_t>* pPrev_frame_indices = nullptr;
8531
if (is_video)
8532
{
8533
// TODO: Add check to make sure the caller hasn't tried skipping past p-frames
8534
//const bool alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
8535
//const uint32_t level_index = slice_desc.m_level_index;
8536
8537
if (level_index >= basisu_transcoder_state::cMaxPrevFrameLevels)
8538
{
8539
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: unsupported level_index\n");
8540
return false;
8541
}
8542
8543
pPrev_frame_indices = &pState->m_prev_frame_indices[is_alpha_slice][level_index];
8544
if (pPrev_frame_indices->size() < total_blocks)
8545
pPrev_frame_indices->resize(total_blocks);
8546
}
8547
8548
basist::bitwise_decoder sym_codec;
8549
8550
if (!sym_codec.init(pImage_data, image_data_size))
8551
{
8552
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: sym_codec.init failed\n");
8553
return false;
8554
}
8555
8556
approx_move_to_front selector_history_buf(m_selector_history_buf_size);
8557
8558
uint32_t cur_selector_rle_count = 0;
8559
8560
decoder_etc_block block;
8561
memset(&block, 0, sizeof(block));
8562
8563
//block.set_flip_bit(true);
8564
// Setting the flip bit to false to be compatible with the Khronos KDFS.
8565
block.set_flip_bit(false);
8566
8567
block.set_diff_bit(true);
8568
8569
// Important: This MUST be freed before this function returns.
8570
void* pPVRTC_work_mem = nullptr;
8571
uint32_t* pPVRTC_endpoints = nullptr;
8572
if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
8573
{
8574
pPVRTC_work_mem = malloc(num_blocks_x * num_blocks_y * (sizeof(decoder_etc_block) + sizeof(uint32_t)));
8575
if (!pPVRTC_work_mem)
8576
{
8577
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: malloc failed\n");
8578
return false;
8579
}
8580
pPVRTC_endpoints = (uint32_t*)&((decoder_etc_block*)pPVRTC_work_mem)[num_blocks_x * num_blocks_y];
8581
}
8582
8583
if (pState->m_block_endpoint_preds[0].size() < num_blocks_x)
8584
{
8585
pState->m_block_endpoint_preds[0].resize(num_blocks_x);
8586
pState->m_block_endpoint_preds[1].resize(num_blocks_x);
8587
}
8588
8589
uint32_t cur_pred_bits = 0;
8590
int prev_endpoint_pred_sym = 0;
8591
int endpoint_pred_repeat_count = 0;
8592
uint32_t prev_endpoint_index = 0;
8593
const endpoint_vec& endpoints = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_endpoints : m_local_endpoints;
8594
const selector_vec& selectors = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_selectors : m_local_selectors;
8595
if (!endpoints.size() || !selectors.size())
8596
{
8597
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: global codebooks must be unpacked first\n");
8598
8599
if (pPVRTC_work_mem)
8600
free(pPVRTC_work_mem);
8601
8602
return false;
8603
}
8604
8605
const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = (uint32_t)selectors.size();
8606
const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = m_selector_history_buf_size + SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX;
8607
8608
#if BASISD_SUPPORT_BC7_MODE5
8609
const bool bc7_chroma_filtering = ((decode_flags & cDecodeFlagsNoETC1SChromaFiltering) == 0) &&
8610
((fmt == block_format::cBC7_M5_COLOR) || (fmt == block_format::cBC7));
8611
8612
basisu::vector2D<uint16_t> decoded_endpoints;
8613
if (bc7_chroma_filtering)
8614
{
8615
if (!decoded_endpoints.try_resize(num_blocks_x, num_blocks_y))
8616
{
8617
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: allocation failed\n");
8618
8619
if (pPVRTC_work_mem)
8620
free(pPVRTC_work_mem);
8621
8622
return false;
8623
}
8624
}
8625
#endif
8626
8627
for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
8628
{
8629
const uint32_t cur_block_endpoint_pred_array = block_y & 1;
8630
8631
for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
8632
{
8633
// Decode endpoint index predictor symbols
8634
if ((block_x & 1) == 0)
8635
{
8636
if ((block_y & 1) == 0)
8637
{
8638
if (endpoint_pred_repeat_count)
8639
{
8640
endpoint_pred_repeat_count--;
8641
cur_pred_bits = prev_endpoint_pred_sym;
8642
}
8643
else
8644
{
8645
cur_pred_bits = sym_codec.decode_huffman(m_endpoint_pred_model);
8646
if (cur_pred_bits == ENDPOINT_PRED_REPEAT_LAST_SYMBOL)
8647
{
8648
endpoint_pred_repeat_count = sym_codec.decode_vlc(ENDPOINT_PRED_COUNT_VLC_BITS) + ENDPOINT_PRED_MIN_REPEAT_COUNT - 1;
8649
8650
cur_pred_bits = prev_endpoint_pred_sym;
8651
}
8652
else
8653
{
8654
prev_endpoint_pred_sym = cur_pred_bits;
8655
}
8656
}
8657
8658
pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_pred_bits = (uint8_t)(cur_pred_bits >> 4);
8659
}
8660
else
8661
{
8662
cur_pred_bits = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_pred_bits;
8663
}
8664
}
8665
8666
// Decode endpoint index
8667
uint32_t endpoint_index, selector_index = 0;
8668
8669
const uint32_t pred = cur_pred_bits & 3;
8670
cur_pred_bits >>= 2;
8671
8672
if (pred == 0)
8673
{
8674
// Left
8675
if (!block_x)
8676
{
8677
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (0)\n");
8678
if (pPVRTC_work_mem)
8679
free(pPVRTC_work_mem);
8680
return false;
8681
}
8682
8683
endpoint_index = prev_endpoint_index;
8684
}
8685
else if (pred == 1)
8686
{
8687
// Upper
8688
if (!block_y)
8689
{
8690
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (1)\n");
8691
if (pPVRTC_work_mem)
8692
free(pPVRTC_work_mem);
8693
return false;
8694
}
8695
8696
endpoint_index = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_endpoint_index;
8697
}
8698
else if (pred == 2)
8699
{
8700
if (is_video)
8701
{
8702
assert(pred == CR_ENDPOINT_PRED_INDEX);
8703
endpoint_index = (*pPrev_frame_indices)[block_x + block_y * num_blocks_x];
8704
selector_index = endpoint_index >> 16;
8705
endpoint_index &= 0xFFFFU;
8706
}
8707
else
8708
{
8709
// Upper left
8710
if ((!block_x) || (!block_y))
8711
{
8712
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (2)\n");
8713
if (pPVRTC_work_mem)
8714
free(pPVRTC_work_mem);
8715
return false;
8716
}
8717
8718
endpoint_index = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x - 1].m_endpoint_index;
8719
}
8720
}
8721
else
8722
{
8723
// Decode and apply delta
8724
const uint32_t delta_sym = sym_codec.decode_huffman(m_delta_endpoint_model);
8725
8726
endpoint_index = delta_sym + prev_endpoint_index;
8727
if (endpoint_index >= endpoints.size())
8728
endpoint_index -= (int)endpoints.size();
8729
}
8730
8731
pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_endpoint_index = (uint16_t)endpoint_index;
8732
8733
prev_endpoint_index = endpoint_index;
8734
8735
// Decode selector index
8736
if ((!is_video) || (pred != CR_ENDPOINT_PRED_INDEX))
8737
{
8738
int selector_sym;
8739
if (cur_selector_rle_count > 0)
8740
{
8741
cur_selector_rle_count--;
8742
8743
selector_sym = (int)selectors.size();
8744
}
8745
else
8746
{
8747
selector_sym = sym_codec.decode_huffman(m_selector_model);
8748
8749
if (selector_sym == static_cast<int>(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX))
8750
{
8751
int run_sym = sym_codec.decode_huffman(m_selector_history_buf_rle_model);
8752
8753
if (run_sym == (SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
8754
cur_selector_rle_count = sym_codec.decode_vlc(7) + SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
8755
else
8756
cur_selector_rle_count = run_sym + SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
8757
8758
if (cur_selector_rle_count > total_blocks)
8759
{
8760
// The file is corrupted or we've got a bug.
8761
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (3)\n");
8762
if (pPVRTC_work_mem)
8763
free(pPVRTC_work_mem);
8764
return false;
8765
}
8766
8767
selector_sym = (int)selectors.size();
8768
8769
cur_selector_rle_count--;
8770
}
8771
}
8772
8773
if (selector_sym >= (int)selectors.size())
8774
{
8775
assert(m_selector_history_buf_size > 0);
8776
8777
int history_buf_index = selector_sym - (int)selectors.size();
8778
8779
if (history_buf_index >= (int)selector_history_buf.size())
8780
{
8781
// The file is corrupted or we've got a bug.
8782
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (4)\n");
8783
if (pPVRTC_work_mem)
8784
free(pPVRTC_work_mem);
8785
return false;
8786
}
8787
8788
selector_index = selector_history_buf[history_buf_index];
8789
8790
if (history_buf_index != 0)
8791
selector_history_buf.use(history_buf_index);
8792
}
8793
else
8794
{
8795
selector_index = selector_sym;
8796
8797
if (m_selector_history_buf_size)
8798
selector_history_buf.add(selector_index);
8799
}
8800
}
8801
8802
if ((endpoint_index >= endpoints.size()) || (selector_index >= selectors.size()))
8803
{
8804
// The file is corrupted or we've got a bug.
8805
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (5)\n");
8806
if (pPVRTC_work_mem)
8807
free(pPVRTC_work_mem);
8808
return false;
8809
}
8810
8811
if (is_video)
8812
(*pPrev_frame_indices)[block_x + block_y * num_blocks_x] = endpoint_index | (selector_index << 16);
8813
8814
#if BASISD_ENABLE_DEBUG_FLAGS
8815
if ((g_debug_flags & cDebugFlagVisCRs) && ((fmt == block_format::cETC1) || (fmt == block_format::cBC1)))
8816
{
8817
if ((is_video) && (pred == 2))
8818
{
8819
decoder_etc_block* pDst_block = reinterpret_cast<decoder_etc_block*>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
8820
memset(pDst_block, 0xFF, 8);
8821
continue;
8822
}
8823
}
8824
#endif
8825
8826
const endpoint* pEndpoints = &endpoints[endpoint_index];
8827
const selector* pSelector = &selectors[selector_index];
8828
8829
switch (fmt)
8830
{
8831
case block_format::cETC1:
8832
{
8833
decoder_etc_block* pDst_block = reinterpret_cast<decoder_etc_block*>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
8834
8835
block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
8836
block.set_inten_table(0, pEndpoints->m_inten5);
8837
block.set_inten_table(1, pEndpoints->m_inten5);
8838
8839
pDst_block->m_uint32[0] = block.m_uint32[0];
8840
pDst_block->set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
8841
8842
break;
8843
}
8844
case block_format::cBC1:
8845
{
8846
#if BASISD_SUPPORT_DXT1
8847
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8848
#if BASISD_ENABLE_DEBUG_FLAGS
8849
if (g_debug_flags & (cDebugFlagVisBC1Sels | cDebugFlagVisBC1Endpoints))
8850
convert_etc1s_to_dxt1_vis(static_cast<dxt1_block*>(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks);
8851
else
8852
#endif
8853
convert_etc1s_to_dxt1(static_cast<dxt1_block*>(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks);
8854
#else
8855
assert(0);
8856
#endif
8857
break;
8858
}
8859
case block_format::cBC4:
8860
{
8861
#if BASISD_SUPPORT_DXT5A
8862
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8863
convert_etc1s_to_dxt5a(static_cast<dxt5a_block*>(pDst_block), pEndpoints, pSelector);
8864
#else
8865
assert(0);
8866
#endif
8867
break;
8868
}
8869
case block_format::cPVRTC1_4_RGB:
8870
{
8871
#if BASISD_SUPPORT_PVRTC1
8872
block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
8873
block.set_inten_table(0, pEndpoints->m_inten5);
8874
block.set_inten_table(1, pEndpoints->m_inten5);
8875
block.set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
8876
8877
((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block;
8878
8879
const color32& base_color = pEndpoints->m_color5;
8880
const uint32_t inten_table = pEndpoints->m_inten5;
8881
8882
const uint32_t low_selector = pSelector->m_lo_selector;
8883
const uint32_t high_selector = pSelector->m_hi_selector;
8884
8885
// Get block's RGB bounding box
8886
color32 block_colors[2];
8887
decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector);
8888
8889
assert(block_colors[0][0] <= block_colors[1][0]);
8890
assert(block_colors[0][1] <= block_colors[1][1]);
8891
assert(block_colors[0][2] <= block_colors[1][2]);
8892
8893
// Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
8894
pvrtc4_block temp;
8895
temp.set_opaque_endpoint_floor(0, block_colors[0]);
8896
temp.set_opaque_endpoint_ceil(1, block_colors[1]);
8897
8898
pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints;
8899
#else
8900
assert(0);
8901
#endif
8902
8903
break;
8904
}
8905
case block_format::cPVRTC1_4_RGBA:
8906
{
8907
#if BASISD_SUPPORT_PVRTC1
8908
assert(pAlpha_blocks);
8909
8910
block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
8911
block.set_inten_table(0, pEndpoints->m_inten5);
8912
block.set_inten_table(1, pEndpoints->m_inten5);
8913
block.set_raw_selector_bits(pSelector->m_selectors[0], pSelector->m_selectors[1], pSelector->m_selectors[2], pSelector->m_selectors[3]);
8914
8915
((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block;
8916
8917
// Get block's RGBA bounding box
8918
const color32& base_color = pEndpoints->m_color5;
8919
const uint32_t inten_table = pEndpoints->m_inten5;
8920
const uint32_t low_selector = pSelector->m_lo_selector;
8921
const uint32_t high_selector = pSelector->m_hi_selector;
8922
color32 block_colors[2];
8923
decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector);
8924
8925
assert(block_colors[0][0] <= block_colors[1][0]);
8926
assert(block_colors[0][1] <= block_colors[1][1]);
8927
assert(block_colors[0][2] <= block_colors[1][2]);
8928
8929
const uint16_t* pAlpha_block = reinterpret_cast<uint16_t*>(static_cast<uint8_t*>(pAlpha_blocks) + (block_x + block_y * num_blocks_x) * sizeof(uint32_t));
8930
8931
const endpoint* pAlpha_endpoints = &endpoints[pAlpha_block[0]];
8932
const selector* pAlpha_selector = &selectors[pAlpha_block[1]];
8933
8934
const color32& alpha_base_color = pAlpha_endpoints->m_color5;
8935
const uint32_t alpha_inten_table = pAlpha_endpoints->m_inten5;
8936
const uint32_t alpha_low_selector = pAlpha_selector->m_lo_selector;
8937
const uint32_t alpha_high_selector = pAlpha_selector->m_hi_selector;
8938
uint32_t alpha_block_colors[2];
8939
decoder_etc_block::get_block_colors5_bounds_g(alpha_block_colors, alpha_base_color, alpha_inten_table, alpha_low_selector, alpha_high_selector);
8940
assert(alpha_block_colors[0] <= alpha_block_colors[1]);
8941
block_colors[0].a = (uint8_t)alpha_block_colors[0];
8942
block_colors[1].a = (uint8_t)alpha_block_colors[1];
8943
8944
// Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
8945
pvrtc4_block temp;
8946
temp.set_endpoint_floor(0, block_colors[0]);
8947
temp.set_endpoint_ceil(1, block_colors[1]);
8948
8949
pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints;
8950
#else
8951
assert(0);
8952
#endif
8953
8954
break;
8955
}
8956
case block_format::cBC7: // for more consistency with UASTC
8957
case block_format::cBC7_M5_COLOR:
8958
{
8959
#if BASISD_SUPPORT_BC7_MODE5
8960
if (bc7_chroma_filtering)
8961
{
8962
assert(endpoint_index <= UINT16_MAX);
8963
decoded_endpoints(block_x, block_y) = (uint16_t)endpoint_index;
8964
}
8965
8966
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8967
convert_etc1s_to_bc7_m5_color(pDst_block, pEndpoints, pSelector);
8968
#else
8969
assert(0);
8970
#endif
8971
break;
8972
}
8973
case block_format::cBC7_M5_ALPHA:
8974
{
8975
#if BASISD_SUPPORT_BC7_MODE5
8976
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8977
convert_etc1s_to_bc7_m5_alpha(pDst_block, pEndpoints, pSelector);
8978
#else
8979
assert(0);
8980
#endif
8981
break;
8982
}
8983
case block_format::cETC2_EAC_A8:
8984
{
8985
#if BASISD_SUPPORT_ETC2_EAC_A8
8986
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8987
convert_etc1s_to_etc2_eac_a8(static_cast<eac_block*>(pDst_block), pEndpoints, pSelector);
8988
#else
8989
assert(0);
8990
#endif
8991
break;
8992
}
8993
case block_format::cASTC_4x4:
8994
{
8995
#if BASISD_SUPPORT_ASTC
8996
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8997
convert_etc1s_to_astc_4x4(pDst_block, pEndpoints, pSelector, transcode_alpha, &endpoints[0], &selectors[0]);
8998
#else
8999
assert(0);
9000
#endif
9001
break;
9002
}
9003
case block_format::cATC_RGB:
9004
{
9005
#if BASISD_SUPPORT_ATC
9006
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
9007
convert_etc1s_to_atc(pDst_block, pEndpoints, pSelector);
9008
#else
9009
assert(0);
9010
#endif
9011
break;
9012
}
9013
case block_format::cFXT1_RGB:
9014
{
9015
#if BASISD_SUPPORT_FXT1
9016
const uint32_t fxt1_block_x = block_x >> 1;
9017
const uint32_t fxt1_block_y = block_y;
9018
const uint32_t fxt1_subblock = block_x & 1;
9019
9020
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (fxt1_block_x + fxt1_block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
9021
9022
convert_etc1s_to_fxt1(pDst_block, pEndpoints, pSelector, fxt1_subblock);
9023
#else
9024
assert(0);
9025
#endif
9026
break;
9027
}
9028
case block_format::cPVRTC2_4_RGB:
9029
{
9030
#if BASISD_SUPPORT_PVRTC2
9031
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
9032
convert_etc1s_to_pvrtc2_rgb(pDst_block, pEndpoints, pSelector);
9033
#endif
9034
break;
9035
}
9036
case block_format::cPVRTC2_4_RGBA:
9037
{
9038
#if BASISD_SUPPORT_PVRTC2
9039
assert(transcode_alpha);
9040
9041
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
9042
9043
convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &endpoints[0], &selectors[0]);
9044
#endif
9045
break;
9046
}
9047
case block_format::cIndices:
9048
{
9049
uint16_t* pDst_block = reinterpret_cast<uint16_t *>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
9050
pDst_block[0] = static_cast<uint16_t>(endpoint_index);
9051
pDst_block[1] = static_cast<uint16_t>(selector_index);
9052
break;
9053
}
9054
case block_format::cA32:
9055
{
9056
assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
9057
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
9058
9059
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9060
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9061
9062
int colors[4];
9063
decoder_etc_block::get_block_colors5_g(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9064
9065
if (max_x == 4)
9066
{
9067
for (uint32_t y = 0; y < max_y; y++)
9068
{
9069
const uint32_t s = pSelector->m_selectors[y];
9070
9071
pDst_pixels[3] = static_cast<uint8_t>(colors[s & 3]);
9072
pDst_pixels[3+4] = static_cast<uint8_t>(colors[(s >> 2) & 3]);
9073
pDst_pixels[3+8] = static_cast<uint8_t>(colors[(s >> 4) & 3]);
9074
pDst_pixels[3+12] = static_cast<uint8_t>(colors[(s >> 6) & 3]);
9075
9076
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
9077
}
9078
}
9079
else
9080
{
9081
for (uint32_t y = 0; y < max_y; y++)
9082
{
9083
const uint32_t s = pSelector->m_selectors[y];
9084
9085
for (uint32_t x = 0; x < max_x; x++)
9086
pDst_pixels[3 + 4 * x] = static_cast<uint8_t>(colors[(s >> (x * 2)) & 3]);
9087
9088
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
9089
}
9090
}
9091
9092
break;
9093
}
9094
case block_format::cRGB32:
9095
{
9096
assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
9097
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
9098
9099
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9100
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9101
9102
color32 colors[4];
9103
decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9104
9105
for (uint32_t y = 0; y < max_y; y++)
9106
{
9107
const uint32_t s = pSelector->m_selectors[y];
9108
9109
for (uint32_t x = 0; x < max_x; x++)
9110
{
9111
const color32& c = colors[(s >> (x * 2)) & 3];
9112
9113
pDst_pixels[0 + 4 * x] = c.r;
9114
pDst_pixels[1 + 4 * x] = c.g;
9115
pDst_pixels[2 + 4 * x] = c.b;
9116
}
9117
9118
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
9119
}
9120
9121
break;
9122
}
9123
case block_format::cRGBA32:
9124
{
9125
assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
9126
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
9127
9128
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9129
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9130
9131
color32 colors[4];
9132
decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9133
9134
for (uint32_t y = 0; y < max_y; y++)
9135
{
9136
const uint32_t s = pSelector->m_selectors[y];
9137
9138
for (uint32_t x = 0; x < max_x; x++)
9139
{
9140
const color32& c = colors[(s >> (x * 2)) & 3];
9141
9142
pDst_pixels[0 + 4 * x] = c.r;
9143
pDst_pixels[1 + 4 * x] = c.g;
9144
pDst_pixels[2 + 4 * x] = c.b;
9145
pDst_pixels[3 + 4 * x] = 255;
9146
}
9147
9148
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
9149
}
9150
9151
break;
9152
}
9153
case block_format::cRGB565:
9154
case block_format::cBGR565:
9155
{
9156
assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
9157
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
9158
9159
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9160
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9161
9162
color32 colors[4];
9163
decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9164
9165
uint16_t packed_colors[4];
9166
if (fmt == block_format::cRGB565)
9167
{
9168
for (uint32_t i = 0; i < 4; i++)
9169
{
9170
packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].b, 31));
9171
if (BASISD_IS_BIG_ENDIAN)
9172
packed_colors[i] = byteswap_uint16(packed_colors[i]);
9173
}
9174
}
9175
else
9176
{
9177
for (uint32_t i = 0; i < 4; i++)
9178
{
9179
packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].b, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].r, 31));
9180
if (BASISD_IS_BIG_ENDIAN)
9181
packed_colors[i] = byteswap_uint16(packed_colors[i]);
9182
}
9183
}
9184
9185
for (uint32_t y = 0; y < max_y; y++)
9186
{
9187
const uint32_t s = pSelector->m_selectors[y];
9188
9189
for (uint32_t x = 0; x < max_x; x++)
9190
reinterpret_cast<uint16_t *>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
9191
9192
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
9193
}
9194
9195
break;
9196
}
9197
case block_format::cRGBA4444_COLOR:
9198
{
9199
assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
9200
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
9201
9202
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9203
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9204
9205
color32 colors[4];
9206
decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9207
9208
uint16_t packed_colors[4];
9209
for (uint32_t i = 0; i < 4; i++)
9210
{
9211
packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4));
9212
}
9213
9214
for (uint32_t y = 0; y < max_y; y++)
9215
{
9216
const uint32_t s = pSelector->m_selectors[y];
9217
9218
for (uint32_t x = 0; x < max_x; x++)
9219
{
9220
uint16_t cur = reinterpret_cast<uint16_t*>(pDst_pixels)[x];
9221
if (BASISD_IS_BIG_ENDIAN)
9222
cur = byteswap_uint16(cur);
9223
9224
cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3];
9225
9226
if (BASISD_IS_BIG_ENDIAN)
9227
cur = byteswap_uint16(cur);
9228
9229
reinterpret_cast<uint16_t*>(pDst_pixels)[x] = cur;
9230
}
9231
9232
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
9233
}
9234
9235
break;
9236
}
9237
case block_format::cRGBA4444_COLOR_OPAQUE:
9238
{
9239
assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
9240
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
9241
9242
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9243
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9244
9245
color32 colors[4];
9246
decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9247
9248
uint16_t packed_colors[4];
9249
for (uint32_t i = 0; i < 4; i++)
9250
{
9251
packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4) | 0xF);
9252
if (BASISD_IS_BIG_ENDIAN)
9253
packed_colors[i] = byteswap_uint16(packed_colors[i]);
9254
}
9255
9256
for (uint32_t y = 0; y < max_y; y++)
9257
{
9258
const uint32_t s = pSelector->m_selectors[y];
9259
9260
for (uint32_t x = 0; x < max_x; x++)
9261
reinterpret_cast<uint16_t*>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
9262
9263
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
9264
}
9265
9266
break;
9267
}
9268
case block_format::cRGBA4444_ALPHA:
9269
{
9270
assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
9271
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
9272
9273
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9274
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9275
9276
color32 colors[4];
9277
decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
9278
9279
uint16_t packed_colors[4];
9280
for (uint32_t i = 0; i < 4; i++)
9281
{
9282
packed_colors[i] = mul_8(colors[i].g, 15);
9283
if (BASISD_IS_BIG_ENDIAN)
9284
packed_colors[i] = byteswap_uint16(packed_colors[i]);
9285
}
9286
9287
for (uint32_t y = 0; y < max_y; y++)
9288
{
9289
const uint32_t s = pSelector->m_selectors[y];
9290
9291
for (uint32_t x = 0; x < max_x; x++)
9292
{
9293
reinterpret_cast<uint16_t*>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
9294
}
9295
9296
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
9297
}
9298
9299
break;
9300
}
9301
case block_format::cETC2_EAC_R11:
9302
{
9303
#if BASISD_SUPPORT_ETC2_EAC_RG11
9304
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
9305
convert_etc1s_to_etc2_eac_r11(static_cast<eac_block*>(pDst_block), pEndpoints, pSelector);
9306
#else
9307
assert(0);
9308
#endif
9309
break;
9310
}
9311
default:
9312
{
9313
assert(0);
9314
break;
9315
}
9316
}
9317
9318
} // block_x
9319
9320
} // block_y
9321
9322
if (endpoint_pred_repeat_count != 0)
9323
{
9324
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: endpoint_pred_repeat_count != 0. The file is corrupted or this is a bug\n");
9325
9326
if (pPVRTC_work_mem)
9327
free(pPVRTC_work_mem);
9328
9329
return false;
9330
}
9331
9332
//assert(endpoint_pred_repeat_count == 0);
9333
9334
#if BASISD_SUPPORT_PVRTC1
9335
// PVRTC post process - create per-pixel modulation values.
9336
if (fmt == block_format::cPVRTC1_4_RGB)
9337
fixup_pvrtc1_4_modulation_rgb((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y);
9338
else if (fmt == block_format::cPVRTC1_4_RGBA)
9339
fixup_pvrtc1_4_modulation_rgba((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y, pAlpha_blocks, &endpoints[0], &selectors[0]);
9340
#endif // BASISD_SUPPORT_PVRTC1
9341
9342
#if BASISD_SUPPORT_BC7_MODE5
9343
if (bc7_chroma_filtering)
9344
{
9345
chroma_filter_bc7_mode5(decoded_endpoints, pDst_blocks, num_blocks_x, num_blocks_y, output_row_pitch_in_blocks_or_pixels, &endpoints[0]);
9346
}
9347
#endif
9348
9349
if (pPVRTC_work_mem)
9350
free(pPVRTC_work_mem);
9351
9352
return true;
9353
}
9354
9355
bool basis_validate_output_buffer_size(
9356
basis_tex_format source_format,
9357
transcoder_texture_format target_format,
9358
uint32_t output_blocks_buf_size_in_blocks_or_pixels,
9359
uint32_t orig_width, uint32_t orig_height,
9360
uint32_t output_row_pitch_in_blocks_or_pixels,
9361
uint32_t output_rows_in_pixels)
9362
{
9363
BASISU_NOTE_UNUSED(source_format);
9364
9365
if (basis_transcoder_format_is_uncompressed(target_format))
9366
{
9367
// Assume the output buffer is orig_width by orig_height
9368
if (!output_row_pitch_in_blocks_or_pixels)
9369
output_row_pitch_in_blocks_or_pixels = orig_width;
9370
9371
if (!output_rows_in_pixels)
9372
output_rows_in_pixels = orig_height;
9373
9374
// Now make sure the output buffer is large enough, or we'll overwrite memory.
9375
if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
9376
{
9377
BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
9378
return false;
9379
}
9380
}
9381
else
9382
{
9383
const uint32_t dst_block_width = basis_get_block_width(target_format);
9384
const uint32_t dst_block_height = basis_get_block_height(target_format);
9385
//const uint32_t bytes_per_block = basis_get_bytes_per_block_or_pixel(target_format);
9386
9387
// Take into account the destination format's block width/height.
9388
const uint32_t num_dst_blocks_x = (orig_width + dst_block_width - 1) / dst_block_width;
9389
const uint32_t num_dst_blocks_y = (orig_height + dst_block_height - 1) / dst_block_height;
9390
const uint32_t total_dst_blocks = num_dst_blocks_x * num_dst_blocks_y;
9391
9392
assert(total_dst_blocks);
9393
9394
// Note this only computes the # of blocks we will write during transcoding, but for PVRTC1 OpenGL may require more for very small textures.
9395
// basis_compute_transcoded_image_size_in_bytes() may return larger buffers.
9396
if (output_blocks_buf_size_in_blocks_or_pixels < total_dst_blocks)
9397
{
9398
BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels is too small\n");
9399
return false;
9400
}
9401
}
9402
9403
return true;
9404
}
9405
9406
uint32_t basis_compute_transcoded_image_size_in_bytes(transcoder_texture_format target_format, uint32_t orig_width, uint32_t orig_height)
9407
{
9408
assert(orig_width && orig_height);
9409
9410
const uint32_t dst_block_width = basis_get_block_width(target_format);
9411
const uint32_t dst_block_height = basis_get_block_height(target_format);
9412
9413
if (basis_transcoder_format_is_uncompressed(target_format))
9414
{
9415
// Uncompressed formats are just plain raster images.
9416
const uint32_t bytes_per_pixel = basis_get_uncompressed_bytes_per_pixel(target_format);
9417
const uint32_t bytes_per_line = orig_width * bytes_per_pixel;
9418
const uint32_t bytes_per_slice = bytes_per_line * orig_height;
9419
return bytes_per_slice;
9420
}
9421
9422
// Compressed formats are 2D arrays of blocks.
9423
const uint32_t bytes_per_block = basis_get_bytes_per_block_or_pixel(target_format);
9424
9425
if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA))
9426
{
9427
// For PVRTC1, Basis only writes (or requires) total_blocks * bytes_per_block. But GL requires extra padding for very small textures:
9428
// https://www.khronos.org/registry/OpenGL/extensions/IMG/IMG_texture_compression_pvrtc.txt
9429
const uint32_t width = (orig_width + 3) & ~3;
9430
const uint32_t height = (orig_height + 3) & ~3;
9431
const uint32_t size_in_bytes = (std::max(8U, width) * std::max(8U, height) * 4 + 7) / 8;
9432
return size_in_bytes;
9433
}
9434
9435
// Take into account the destination format's block width/height.
9436
const uint32_t num_dst_blocks_x = (orig_width + dst_block_width - 1) / dst_block_width;
9437
const uint32_t num_dst_blocks_y = (orig_height + dst_block_height - 1) / dst_block_height;
9438
const uint32_t total_dst_blocks = num_dst_blocks_x * num_dst_blocks_y;
9439
9440
assert(total_dst_blocks);
9441
9442
return total_dst_blocks * bytes_per_block;
9443
}
9444
9445
bool basisu_lowlevel_etc1s_transcoder::transcode_image(
9446
transcoder_texture_format target_format,
9447
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
9448
const uint8_t* pCompressed_data, uint32_t compressed_data_length,
9449
uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
9450
uint64_t rgb_offset, uint32_t rgb_length, uint64_t alpha_offset, uint32_t alpha_length,
9451
uint32_t decode_flags,
9452
bool basis_file_has_alpha_slices,
9453
bool is_video,
9454
uint32_t output_row_pitch_in_blocks_or_pixels,
9455
basisu_transcoder_state* pState,
9456
uint32_t output_rows_in_pixels)
9457
{
9458
if (((uint64_t)rgb_offset + rgb_length) > (uint64_t)compressed_data_length)
9459
{
9460
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (color)\n");
9461
return false;
9462
}
9463
9464
if (alpha_length)
9465
{
9466
if (((uint64_t)alpha_offset + alpha_length) > (uint64_t)compressed_data_length)
9467
{
9468
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (alpha)\n");
9469
return false;
9470
}
9471
}
9472
else
9473
{
9474
assert(!basis_file_has_alpha_slices);
9475
}
9476
9477
if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA))
9478
{
9479
if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4)))
9480
{
9481
// PVRTC1 only supports power of 2 dimensions
9482
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n");
9483
return false;
9484
}
9485
}
9486
9487
if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices))
9488
{
9489
// Switch to PVRTC1 RGB if the input doesn't have alpha.
9490
target_format = transcoder_texture_format::cTFPVRTC1_4_RGB;
9491
}
9492
9493
const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
9494
const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
9495
const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
9496
9497
if (!basis_validate_output_buffer_size(basis_tex_format::cETC1S, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels))
9498
{
9499
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output buffer size too small\n");
9500
return false;
9501
}
9502
9503
bool status = false;
9504
9505
const uint8_t* pData = pCompressed_data + rgb_offset;
9506
uint32_t data_len = rgb_length;
9507
bool is_alpha_slice = false;
9508
9509
// If the caller wants us to transcode the mip level's alpha data, then use the next slice.
9510
if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats))
9511
{
9512
pData = pCompressed_data + alpha_offset;
9513
data_len = alpha_length;
9514
is_alpha_slice = true;
9515
}
9516
9517
switch (target_format)
9518
{
9519
case transcoder_texture_format::cTFETC1_RGB:
9520
{
9521
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9522
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9523
9524
if (!status)
9525
{
9526
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC1 failed\n");
9527
}
9528
break;
9529
}
9530
case transcoder_texture_format::cTFBC1_RGB:
9531
{
9532
#if !BASISD_SUPPORT_DXT1
9533
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC1/DXT1 unsupported\n");
9534
return false;
9535
#else
9536
// status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9537
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC1, bytes_per_block_or_pixel, true, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9538
if (!status)
9539
{
9540
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC1 failed\n");
9541
}
9542
break;
9543
#endif
9544
}
9545
case transcoder_texture_format::cTFBC4_R:
9546
{
9547
#if !BASISD_SUPPORT_DXT5A
9548
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC4/DXT5A unsupported\n");
9549
return false;
9550
#else
9551
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9552
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9553
if (!status)
9554
{
9555
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC4 failed\n");
9556
}
9557
break;
9558
#endif
9559
}
9560
case transcoder_texture_format::cTFPVRTC1_4_RGB:
9561
{
9562
#if !BASISD_SUPPORT_PVRTC1
9563
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n");
9564
return false;
9565
#else
9566
// output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?)
9567
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9568
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9569
if (!status)
9570
{
9571
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGB failed\n");
9572
}
9573
break;
9574
#endif
9575
}
9576
case transcoder_texture_format::cTFPVRTC1_4_RGBA:
9577
{
9578
#if !BASISD_SUPPORT_PVRTC1
9579
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n");
9580
return false;
9581
#else
9582
assert(basis_file_has_alpha_slices);
9583
assert(alpha_length);
9584
9585
// Temp buffer to hold alpha block endpoint/selector indices
9586
basisu::vector<uint32_t> temp_block_indices(total_slice_blocks);
9587
9588
// First transcode alpha data to temp buffer
9589
//status = transcode_slice(pData, data_size, slice_index + 1, &temp_block_indices[0], total_slice_blocks, block_format::cIndices, sizeof(uint32_t), decode_flags, pSlice_descs[slice_index].m_num_blocks_x, pState);
9590
status = transcode_slice(&temp_block_indices[0], num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, num_blocks_x, pState, false, nullptr, 0, decode_flags);
9591
if (!status)
9592
{
9593
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (0)\n");
9594
}
9595
else
9596
{
9597
// output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?)
9598
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, &temp_block_indices[0]);
9599
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, &temp_block_indices[0], 0, decode_flags);
9600
if (!status)
9601
{
9602
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (1)\n");
9603
}
9604
}
9605
9606
break;
9607
#endif
9608
}
9609
case transcoder_texture_format::cTFBC7_RGBA:
9610
case transcoder_texture_format::cTFBC7_ALT:
9611
{
9612
#if !BASISD_SUPPORT_BC7_MODE5
9613
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC7 unsupported\n");
9614
return false;
9615
#else
9616
assert(bytes_per_block_or_pixel == 16);
9617
// We used to support transcoding just alpha to BC7 - but is that useful at all?
9618
9619
// First transcode the color slice. The cBC7_M5_COLOR transcoder will output opaque mode 5 blocks.
9620
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_COLOR, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9621
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC7_M5_COLOR, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9622
9623
if ((status) && (basis_file_has_alpha_slices))
9624
{
9625
// Now transcode the alpha slice. The cBC7_M5_ALPHA transcoder will now change the opaque mode 5 blocks to blocks with alpha.
9626
//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_ALPHA, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9627
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC7_M5_ALPHA, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9628
}
9629
9630
if (!status)
9631
{
9632
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC7 failed (0)\n");
9633
}
9634
9635
break;
9636
#endif
9637
}
9638
case transcoder_texture_format::cTFETC2_RGBA:
9639
{
9640
#if !BASISD_SUPPORT_ETC2_EAC_A8
9641
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ETC2 EAC A8 unsupported\n");
9642
return false;
9643
#else
9644
assert(bytes_per_block_or_pixel == 16);
9645
9646
if (basis_file_has_alpha_slices)
9647
{
9648
// First decode the alpha data
9649
//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9650
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_A8, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9651
}
9652
else
9653
{
9654
//write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels);
9655
basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels);
9656
status = true;
9657
}
9658
9659
if (status)
9660
{
9661
// Now decode the color data
9662
//status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9663
status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9664
if (!status)
9665
{
9666
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 RGB failed\n");
9667
}
9668
}
9669
else
9670
{
9671
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 A failed\n");
9672
}
9673
break;
9674
#endif
9675
}
9676
case transcoder_texture_format::cTFBC3_RGBA:
9677
{
9678
#if !BASISD_SUPPORT_DXT1
9679
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT1 unsupported\n");
9680
return false;
9681
#elif !BASISD_SUPPORT_DXT5A
9682
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
9683
return false;
9684
#else
9685
assert(bytes_per_block_or_pixel == 16);
9686
9687
// First decode the alpha data
9688
if (basis_file_has_alpha_slices)
9689
{
9690
//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9691
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9692
}
9693
else
9694
{
9695
basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
9696
status = true;
9697
}
9698
9699
if (status)
9700
{
9701
// Now decode the color data. Forbid 3 color blocks, which aren't allowed in BC3.
9702
//status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, 16, decode_flags | cDecodeFlagsBC1ForbidThreeColorBlocks, output_row_pitch_in_blocks_or_pixels, pState);
9703
status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9704
if (!status)
9705
{
9706
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 RGB failed\n");
9707
}
9708
}
9709
else
9710
{
9711
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 A failed\n");
9712
}
9713
9714
break;
9715
#endif
9716
}
9717
case transcoder_texture_format::cTFBC5_RG:
9718
{
9719
#if !BASISD_SUPPORT_DXT5A
9720
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
9721
return false;
9722
#else
9723
assert(bytes_per_block_or_pixel == 16);
9724
9725
//bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
9726
// uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
9727
// basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0);
9728
9729
// Decode the R data (actually the green channel of the color data slice in the basis file)
9730
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9731
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9732
if (status)
9733
{
9734
if (basis_file_has_alpha_slices)
9735
{
9736
// Decode the G data (actually the green channel of the alpha data slice in the basis file)
9737
//status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9738
status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9739
if (!status)
9740
{
9741
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 1 failed\n");
9742
}
9743
}
9744
else
9745
{
9746
basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
9747
status = true;
9748
}
9749
}
9750
else
9751
{
9752
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 channel 0 failed\n");
9753
}
9754
break;
9755
#endif
9756
}
9757
case transcoder_texture_format::cTFASTC_4x4_RGBA:
9758
{
9759
#if !BASISD_SUPPORT_ASTC
9760
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ASTC unsupported\n");
9761
return false;
9762
#else
9763
assert(bytes_per_block_or_pixel == 16);
9764
9765
if (basis_file_has_alpha_slices)
9766
{
9767
// First decode the alpha data to the output (we're using the output texture as a temp buffer here).
9768
//status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9769
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9770
if (status)
9771
{
9772
// Now decode the color data and transcode to ASTC. The transcoder function will read the alpha selector data from the output texture as it converts and
9773
// transcode both the alpha and color data at the same time to ASTC.
9774
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState);
9775
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels, decode_flags);
9776
}
9777
}
9778
else
9779
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9780
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9781
9782
if (!status)
9783
{
9784
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ASTC failed (0)\n");
9785
}
9786
9787
break;
9788
#endif
9789
}
9790
case transcoder_texture_format::cTFATC_RGB:
9791
{
9792
#if !BASISD_SUPPORT_ATC
9793
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n");
9794
return false;
9795
#else
9796
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9797
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9798
if (!status)
9799
{
9800
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC_RGB failed\n");
9801
}
9802
break;
9803
#endif
9804
}
9805
case transcoder_texture_format::cTFATC_RGBA:
9806
{
9807
#if !BASISD_SUPPORT_ATC
9808
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n");
9809
return false;
9810
#elif !BASISD_SUPPORT_DXT5A
9811
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
9812
return false;
9813
#else
9814
assert(bytes_per_block_or_pixel == 16);
9815
9816
// First decode the alpha data
9817
if (basis_file_has_alpha_slices)
9818
{
9819
//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9820
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9821
}
9822
else
9823
{
9824
basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
9825
status = true;
9826
}
9827
9828
if (status)
9829
{
9830
//status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9831
status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9832
if (!status)
9833
{
9834
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC RGB failed\n");
9835
}
9836
}
9837
else
9838
{
9839
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC A failed\n");
9840
}
9841
break;
9842
#endif
9843
}
9844
case transcoder_texture_format::cTFPVRTC2_4_RGB:
9845
{
9846
#if !BASISD_SUPPORT_PVRTC2
9847
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n");
9848
return false;
9849
#else
9850
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9851
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9852
if (!status)
9853
{
9854
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGB failed\n");
9855
}
9856
break;
9857
#endif
9858
}
9859
case transcoder_texture_format::cTFPVRTC2_4_RGBA:
9860
{
9861
#if !BASISD_SUPPORT_PVRTC2
9862
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n");
9863
return false;
9864
#else
9865
if (basis_file_has_alpha_slices)
9866
{
9867
// First decode the alpha data to the output (we're using the output texture as a temp buffer here).
9868
//status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9869
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9870
if (!status)
9871
{
9872
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to failed\n");
9873
}
9874
else
9875
{
9876
// Now decode the color data and transcode to PVRTC2 RGBA.
9877
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState);
9878
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels, decode_flags);
9879
}
9880
}
9881
else
9882
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9883
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9884
9885
if (!status)
9886
{
9887
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGBA failed\n");
9888
}
9889
9890
break;
9891
#endif
9892
}
9893
case transcoder_texture_format::cTFRGBA32:
9894
{
9895
// Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
9896
9897
// First decode the alpha data
9898
if (basis_file_has_alpha_slices)
9899
//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9900
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cA32, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9901
else
9902
status = true;
9903
9904
if (status)
9905
{
9906
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9907
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9908
if (!status)
9909
{
9910
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 RGB failed\n");
9911
}
9912
}
9913
else
9914
{
9915
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 A failed\n");
9916
}
9917
9918
break;
9919
}
9920
case transcoder_texture_format::cTFRGB565:
9921
case transcoder_texture_format::cTFBGR565:
9922
{
9923
// Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
9924
9925
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (fmt == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9926
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, (target_format == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9927
if (!status)
9928
{
9929
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGB565 RGB failed\n");
9930
}
9931
9932
break;
9933
}
9934
case transcoder_texture_format::cTFRGBA4444:
9935
{
9936
// Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
9937
9938
// First decode the alpha data
9939
if (basis_file_has_alpha_slices)
9940
//status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9941
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9942
else
9943
status = true;
9944
9945
if (status)
9946
{
9947
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9948
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9949
if (!status)
9950
{
9951
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 RGB failed\n");
9952
}
9953
}
9954
else
9955
{
9956
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 A failed\n");
9957
}
9958
9959
break;
9960
}
9961
case transcoder_texture_format::cTFFXT1_RGB:
9962
{
9963
#if !BASISD_SUPPORT_FXT1
9964
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: FXT1 unsupported\n");
9965
return false;
9966
#else
9967
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cFXT1_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9968
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cFXT1_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9969
if (!status)
9970
{
9971
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to FXT1_RGB failed\n");
9972
}
9973
break;
9974
#endif
9975
}
9976
case transcoder_texture_format::cTFETC2_EAC_R11:
9977
{
9978
#if !BASISD_SUPPORT_ETC2_EAC_RG11
9979
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n");
9980
return false;
9981
#else
9982
//status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9983
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
9984
if (!status)
9985
{
9986
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 failed\n");
9987
}
9988
9989
break;
9990
#endif
9991
}
9992
case transcoder_texture_format::cTFETC2_EAC_RG11:
9993
{
9994
#if !BASISD_SUPPORT_ETC2_EAC_RG11
9995
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n");
9996
return false;
9997
#else
9998
assert(bytes_per_block_or_pixel == 16);
9999
10000
if (basis_file_has_alpha_slices)
10001
{
10002
// First decode the alpha data to G
10003
//status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10004
status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
10005
}
10006
else
10007
{
10008
basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cETC2_EAC_R11, 16, output_row_pitch_in_blocks_or_pixels);
10009
status = true;
10010
}
10011
10012
if (status)
10013
{
10014
// Now decode the color data to R
10015
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10016
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels, decode_flags);
10017
if (!status)
10018
{
10019
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 R failed\n");
10020
}
10021
}
10022
else
10023
{
10024
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 G failed\n");
10025
}
10026
10027
break;
10028
#endif
10029
}
10030
default:
10031
{
10032
assert(0);
10033
BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: Invalid fmt\n");
10034
break;
10035
}
10036
}
10037
10038
return status;
10039
}
10040
10041
//------------------------------------------------------------------------------------------------
10042
10043
basisu_lowlevel_uastc_ldr_4x4_transcoder::basisu_lowlevel_uastc_ldr_4x4_transcoder()
10044
{
10045
}
10046
10047
bool basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice(
10048
void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
10049
uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
10050
const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
10051
basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
10052
{
10053
BASISU_NOTE_UNUSED(pState);
10054
BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
10055
10056
assert(g_transcoder_initialized);
10057
if (!g_transcoder_initialized)
10058
{
10059
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice: Transcoder not globally initialized.\n");
10060
return false;
10061
}
10062
10063
#if BASISD_SUPPORT_UASTC
10064
const uint32_t total_blocks = num_blocks_x * num_blocks_y;
10065
10066
if (!output_row_pitch_in_blocks_or_pixels)
10067
{
10068
if (basis_block_format_is_uncompressed(fmt))
10069
output_row_pitch_in_blocks_or_pixels = orig_width;
10070
else
10071
{
10072
if (fmt == block_format::cFXT1_RGB)
10073
output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8;
10074
else
10075
output_row_pitch_in_blocks_or_pixels = num_blocks_x;
10076
}
10077
}
10078
10079
if (basis_block_format_is_uncompressed(fmt))
10080
{
10081
if (!output_rows_in_pixels)
10082
output_rows_in_pixels = orig_height;
10083
}
10084
10085
uint32_t total_expected_block_bytes = sizeof(uastc_block) * total_blocks;
10086
if (image_data_size < total_expected_block_bytes)
10087
{
10088
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
10089
return false;
10090
}
10091
10092
const uastc_block* pSource_block = reinterpret_cast<const uastc_block *>(pImage_data);
10093
10094
const bool high_quality = (decode_flags & cDecodeFlagsHighQuality) != 0;
10095
const bool from_alpha = has_alpha && (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
10096
10097
bool status = false;
10098
if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
10099
{
10100
if (fmt == block_format::cPVRTC1_4_RGBA)
10101
transcode_uastc_to_pvrtc1_4_rgba((const uastc_block*)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality);
10102
else
10103
transcode_uastc_to_pvrtc1_4_rgb((const uastc_block *)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality, from_alpha);
10104
}
10105
else
10106
{
10107
for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
10108
{
10109
void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
10110
10111
for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t *)pDst_block + output_block_or_pixel_stride_in_bytes)
10112
{
10113
switch (fmt)
10114
{
10115
case block_format::cUASTC_4x4:
10116
{
10117
memcpy(pDst_block, pSource_block, sizeof(uastc_block));
10118
status = true;
10119
break;
10120
}
10121
case block_format::cETC1:
10122
{
10123
if (from_alpha)
10124
status = transcode_uastc_to_etc1(*pSource_block, pDst_block, 3);
10125
else
10126
status = transcode_uastc_to_etc1(*pSource_block, pDst_block);
10127
break;
10128
}
10129
case block_format::cETC2_RGBA:
10130
{
10131
status = transcode_uastc_to_etc2_rgba(*pSource_block, pDst_block);
10132
break;
10133
}
10134
case block_format::cBC1:
10135
{
10136
status = transcode_uastc_to_bc1(*pSource_block, pDst_block, high_quality);
10137
break;
10138
}
10139
case block_format::cBC3:
10140
{
10141
status = transcode_uastc_to_bc3(*pSource_block, pDst_block, high_quality);
10142
break;
10143
}
10144
case block_format::cBC4:
10145
{
10146
if (channel0 < 0)
10147
channel0 = 0;
10148
status = transcode_uastc_to_bc4(*pSource_block, pDst_block, high_quality, channel0);
10149
break;
10150
}
10151
case block_format::cBC5:
10152
{
10153
if (channel0 < 0)
10154
channel0 = 0;
10155
if (channel1 < 0)
10156
channel1 = 3;
10157
status = transcode_uastc_to_bc5(*pSource_block, pDst_block, high_quality, channel0, channel1);
10158
break;
10159
}
10160
case block_format::cBC7:
10161
case block_format::cBC7_M5_COLOR: // for consistently with ETC1S
10162
{
10163
status = transcode_uastc_to_bc7(*pSource_block, pDst_block);
10164
break;
10165
}
10166
case block_format::cASTC_4x4:
10167
{
10168
status = transcode_uastc_to_astc(*pSource_block, pDst_block);
10169
break;
10170
}
10171
case block_format::cETC2_EAC_R11:
10172
{
10173
if (channel0 < 0)
10174
channel0 = 0;
10175
status = transcode_uastc_to_etc2_eac_r11(*pSource_block, pDst_block, high_quality, channel0);
10176
break;
10177
}
10178
case block_format::cETC2_EAC_RG11:
10179
{
10180
if (channel0 < 0)
10181
channel0 = 0;
10182
if (channel1 < 0)
10183
channel1 = 3;
10184
status = transcode_uastc_to_etc2_eac_rg11(*pSource_block, pDst_block, high_quality, channel0, channel1);
10185
break;
10186
}
10187
case block_format::cRGBA32:
10188
{
10189
color32 block_pixels[4][4];
10190
status = unpack_uastc(*pSource_block, (color32 *)block_pixels, false);
10191
10192
assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
10193
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
10194
10195
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
10196
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
10197
10198
for (uint32_t y = 0; y < max_y; y++)
10199
{
10200
for (uint32_t x = 0; x < max_x; x++)
10201
{
10202
const color32& c = block_pixels[y][x];
10203
10204
pDst_pixels[0 + 4 * x] = c.r;
10205
pDst_pixels[1 + 4 * x] = c.g;
10206
pDst_pixels[2 + 4 * x] = c.b;
10207
pDst_pixels[3 + 4 * x] = c.a;
10208
}
10209
10210
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
10211
}
10212
10213
break;
10214
}
10215
case block_format::cRGB565:
10216
case block_format::cBGR565:
10217
{
10218
color32 block_pixels[4][4];
10219
status = unpack_uastc(*pSource_block, (color32*)block_pixels, false);
10220
10221
assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
10222
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
10223
10224
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
10225
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
10226
10227
for (uint32_t y = 0; y < max_y; y++)
10228
{
10229
for (uint32_t x = 0; x < max_x; x++)
10230
{
10231
const color32& c = block_pixels[y][x];
10232
10233
const uint16_t packed = (fmt == block_format::cRGB565) ? static_cast<uint16_t>((mul_8(c.r, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.b, 31)) :
10234
static_cast<uint16_t>((mul_8(c.b, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.r, 31));
10235
10236
pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF);
10237
pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF);
10238
}
10239
10240
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
10241
}
10242
10243
break;
10244
}
10245
case block_format::cRGBA4444:
10246
{
10247
color32 block_pixels[4][4];
10248
status = unpack_uastc(*pSource_block, (color32*)block_pixels, false);
10249
10250
assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
10251
uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
10252
10253
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
10254
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
10255
10256
for (uint32_t y = 0; y < max_y; y++)
10257
{
10258
for (uint32_t x = 0; x < max_x; x++)
10259
{
10260
const color32& c = block_pixels[y][x];
10261
10262
const uint16_t packed = static_cast<uint16_t>((mul_8(c.r, 15) << 12) | (mul_8(c.g, 15) << 8) | (mul_8(c.b, 15) << 4) | mul_8(c.a, 15));
10263
10264
pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF);
10265
pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF);
10266
}
10267
10268
pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
10269
}
10270
break;
10271
}
10272
default:
10273
assert(0);
10274
break;
10275
10276
}
10277
10278
if (!status)
10279
{
10280
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice: Transcoder failed to unpack a UASTC block - this is a bug, or the data was corrupted\n");
10281
return false;
10282
}
10283
10284
} // block_x
10285
10286
} // block_y
10287
}
10288
10289
return true;
10290
#else
10291
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_slice: UASTC is unsupported\n");
10292
10293
BASISU_NOTE_UNUSED(decode_flags);
10294
BASISU_NOTE_UNUSED(channel0);
10295
BASISU_NOTE_UNUSED(channel1);
10296
BASISU_NOTE_UNUSED(output_rows_in_pixels);
10297
BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
10298
BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
10299
BASISU_NOTE_UNUSED(fmt);
10300
BASISU_NOTE_UNUSED(image_data_size);
10301
BASISU_NOTE_UNUSED(pImage_data);
10302
BASISU_NOTE_UNUSED(num_blocks_x);
10303
BASISU_NOTE_UNUSED(num_blocks_y);
10304
BASISU_NOTE_UNUSED(pDst_blocks);
10305
10306
return false;
10307
#endif
10308
}
10309
10310
bool basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image(
10311
transcoder_texture_format target_format,
10312
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
10313
const uint8_t* pCompressed_data, uint32_t compressed_data_length,
10314
uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
10315
uint64_t slice_offset, uint32_t slice_length,
10316
uint32_t decode_flags,
10317
bool has_alpha,
10318
bool is_video,
10319
uint32_t output_row_pitch_in_blocks_or_pixels,
10320
basisu_transcoder_state* pState,
10321
uint32_t output_rows_in_pixels,
10322
int channel0, int channel1)
10323
{
10324
BASISU_NOTE_UNUSED(is_video);
10325
BASISU_NOTE_UNUSED(level_index);
10326
10327
if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
10328
{
10329
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: source data buffer too small\n");
10330
return false;
10331
}
10332
10333
if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA))
10334
{
10335
if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4)))
10336
{
10337
// PVRTC1 only supports power of 2 dimensions
10338
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n");
10339
return false;
10340
}
10341
}
10342
10343
if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!has_alpha))
10344
{
10345
// Switch to PVRTC1 RGB if the input doesn't have alpha.
10346
target_format = transcoder_texture_format::cTFPVRTC1_4_RGB;
10347
}
10348
10349
const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
10350
const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
10351
//const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
10352
10353
if (!basis_validate_output_buffer_size(basis_tex_format::cUASTC4x4, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels))
10354
{
10355
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: output buffer size too small\n");
10356
return false;
10357
}
10358
10359
bool status = false;
10360
10361
// UASTC4x4
10362
switch (target_format)
10363
{
10364
case transcoder_texture_format::cTFETC1_RGB:
10365
{
10366
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10367
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC1,
10368
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10369
10370
if (!status)
10371
{
10372
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to ETC1 failed\n");
10373
}
10374
break;
10375
}
10376
case transcoder_texture_format::cTFETC2_RGBA:
10377
{
10378
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10379
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_RGBA,
10380
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10381
if (!status)
10382
{
10383
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to ETC2 failed\n");
10384
}
10385
break;
10386
}
10387
case transcoder_texture_format::cTFBC1_RGB:
10388
{
10389
// TODO: ETC1S allows BC1 from alpha channel. That doesn't seem actually useful, though.
10390
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10391
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC1,
10392
bytes_per_block_or_pixel, true, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10393
if (!status)
10394
{
10395
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC1 failed\n");
10396
}
10397
break;
10398
}
10399
case transcoder_texture_format::cTFBC3_RGBA:
10400
{
10401
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC3, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10402
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC3,
10403
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10404
if (!status)
10405
{
10406
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC3 failed\n");
10407
}
10408
break;
10409
}
10410
case transcoder_texture_format::cTFBC4_R:
10411
{
10412
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
10413
// nullptr, 0,
10414
// ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
10415
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC4,
10416
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
10417
((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0, -1, decode_flags);
10418
if (!status)
10419
{
10420
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC4 failed\n");
10421
}
10422
break;
10423
}
10424
case transcoder_texture_format::cTFBC5_RG:
10425
{
10426
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC5, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
10427
// nullptr, 0,
10428
// 0, 3);
10429
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC5,
10430
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
10431
0, 3, decode_flags);
10432
if (!status)
10433
{
10434
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC5 failed\n");
10435
}
10436
break;
10437
}
10438
case transcoder_texture_format::cTFBC7_RGBA:
10439
case transcoder_texture_format::cTFBC7_ALT:
10440
{
10441
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10442
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC7,
10443
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10444
if (!status)
10445
{
10446
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to BC7 failed\n");
10447
}
10448
break;
10449
}
10450
case transcoder_texture_format::cTFPVRTC1_4_RGB:
10451
{
10452
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10453
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGB,
10454
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10455
if (!status)
10456
{
10457
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to PVRTC1 RGB 4bpp failed\n");
10458
}
10459
break;
10460
}
10461
case transcoder_texture_format::cTFPVRTC1_4_RGBA:
10462
{
10463
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10464
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGBA,
10465
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10466
if (!status)
10467
{
10468
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to PVRTC1 RGBA 4bpp failed\n");
10469
}
10470
break;
10471
}
10472
case transcoder_texture_format::cTFASTC_4x4_RGBA:
10473
{
10474
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10475
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_4x4,
10476
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10477
if (!status)
10478
{
10479
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to ASTC 4x4 failed\n");
10480
}
10481
break;
10482
}
10483
case transcoder_texture_format::cTFATC_RGB:
10484
case transcoder_texture_format::cTFATC_RGBA:
10485
{
10486
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->ATC currently unsupported\n");
10487
return false;
10488
}
10489
case transcoder_texture_format::cTFFXT1_RGB:
10490
{
10491
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->FXT1 currently unsupported\n");
10492
return false;
10493
}
10494
case transcoder_texture_format::cTFPVRTC2_4_RGB:
10495
{
10496
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n");
10497
return false;
10498
}
10499
case transcoder_texture_format::cTFPVRTC2_4_RGBA:
10500
{
10501
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n");
10502
return false;
10503
}
10504
case transcoder_texture_format::cTFETC2_EAC_R11:
10505
{
10506
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
10507
// nullptr, 0,
10508
// ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
10509
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_R11,
10510
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
10511
((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0, -1, decode_flags);
10512
if (!status)
10513
{
10514
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to EAC R11 failed\n");
10515
}
10516
break;
10517
}
10518
case transcoder_texture_format::cTFETC2_EAC_RG11:
10519
{
10520
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_RG11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
10521
// nullptr, 0,
10522
// 0, 3);
10523
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_RG11,
10524
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
10525
0, 3, decode_flags);
10526
if (!status)
10527
{
10528
BASISU_DEVEL_ERROR("basisu_basisu_lowlevel_uastc_ldr_4x4_transcodertranscoder::transcode_image: transcode_slice() to EAC RG11 failed\n");
10529
}
10530
break;
10531
}
10532
case transcoder_texture_format::cTFRGBA32:
10533
{
10534
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA32, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10535
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA32,
10536
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10537
if (!status)
10538
{
10539
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to RGBA32 failed\n");
10540
}
10541
break;
10542
}
10543
case transcoder_texture_format::cTFRGB565:
10544
{
10545
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGB565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10546
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB565,
10547
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10548
if (!status)
10549
{
10550
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to RGB565 failed\n");
10551
}
10552
break;
10553
}
10554
case transcoder_texture_format::cTFBGR565:
10555
{
10556
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBGR565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10557
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBGR565,
10558
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10559
if (!status)
10560
{
10561
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to RGB565 failed\n");
10562
}
10563
break;
10564
}
10565
case transcoder_texture_format::cTFRGBA4444:
10566
{
10567
//status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
10568
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA4444,
10569
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, -1, -1, decode_flags);
10570
if (!status)
10571
{
10572
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: transcode_slice() to RGBA4444 failed\n");
10573
}
10574
break;
10575
}
10576
default:
10577
{
10578
assert(0);
10579
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_ldr_4x4_transcoder::transcode_image: Invalid format\n");
10580
break;
10581
}
10582
}
10583
10584
return status;
10585
}
10586
10587
//------------------------------------------------------------------------------------------------
10588
// UASTC HDR 4x4
10589
10590
basisu_lowlevel_uastc_hdr_4x4_transcoder::basisu_lowlevel_uastc_hdr_4x4_transcoder()
10591
{
10592
}
10593
10594
bool basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice(
10595
void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
10596
uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
10597
const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
10598
basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
10599
{
10600
BASISU_NOTE_UNUSED(pState);
10601
BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
10602
BASISU_NOTE_UNUSED(has_alpha);
10603
BASISU_NOTE_UNUSED(channel0);
10604
BASISU_NOTE_UNUSED(channel1);
10605
BASISU_NOTE_UNUSED(decode_flags);
10606
BASISU_NOTE_UNUSED(orig_width);
10607
BASISU_NOTE_UNUSED(orig_height);
10608
10609
assert(g_transcoder_initialized);
10610
if (!g_transcoder_initialized)
10611
{
10612
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: Transcoder not globally initialized.\n");
10613
return false;
10614
}
10615
10616
#if BASISD_SUPPORT_UASTC_HDR
10617
const uint32_t total_blocks = num_blocks_x * num_blocks_y;
10618
10619
if (!output_row_pitch_in_blocks_or_pixels)
10620
{
10621
if (basis_block_format_is_uncompressed(fmt))
10622
output_row_pitch_in_blocks_or_pixels = orig_width;
10623
else
10624
output_row_pitch_in_blocks_or_pixels = num_blocks_x;
10625
}
10626
10627
if (basis_block_format_is_uncompressed(fmt))
10628
{
10629
if (!output_rows_in_pixels)
10630
output_rows_in_pixels = orig_height;
10631
}
10632
10633
uint32_t total_expected_block_bytes = sizeof(astc_blk) * total_blocks;
10634
if (image_data_size < total_expected_block_bytes)
10635
{
10636
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
10637
return false;
10638
}
10639
10640
const astc_blk* pSource_block = reinterpret_cast<const astc_blk*>(pImage_data);
10641
10642
bool status = false;
10643
10644
// TODO: Optimize pure memcpy() case.
10645
10646
for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
10647
{
10648
void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
10649
10650
for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes)
10651
{
10652
switch (fmt)
10653
{
10654
case block_format::cUASTC_HDR_4x4:
10655
case block_format::cASTC_HDR_4x4:
10656
{
10657
// Nothing to do, UASTC HDR 4x4 is just ASTC.
10658
memcpy(pDst_block, pSource_block, sizeof(uastc_block));
10659
status = true;
10660
break;
10661
}
10662
case block_format::cBC6H:
10663
{
10664
status = astc_hdr_transcode_to_bc6h(*pSource_block, *(bc6h_block *)pDst_block);
10665
break;
10666
}
10667
case block_format::cRGB_9E5:
10668
{
10669
astc_helpers::log_astc_block log_blk;
10670
status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
10671
if (status)
10672
{
10673
uint32_t* pDst_pixels = reinterpret_cast<uint32_t*>(
10674
static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t)
10675
);
10676
10677
uint32_t blk_texels[4][4];
10678
10679
status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeRGB9E5);
10680
10681
if (status)
10682
{
10683
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
10684
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
10685
10686
for (uint32_t y = 0; y < max_y; y++)
10687
{
10688
memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x);
10689
10690
pDst_pixels += output_row_pitch_in_blocks_or_pixels;
10691
} // y
10692
}
10693
}
10694
10695
break;
10696
}
10697
case block_format::cRGBA_HALF:
10698
{
10699
astc_helpers::log_astc_block log_blk;
10700
status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
10701
if (status)
10702
{
10703
half_float* pDst_pixels = reinterpret_cast<half_float*>(
10704
static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4
10705
);
10706
10707
half_float blk_texels[4][4][4];
10708
status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
10709
10710
if (status)
10711
{
10712
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
10713
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
10714
10715
for (uint32_t y = 0; y < max_y; y++)
10716
{
10717
for (uint32_t x = 0; x < max_x; x++)
10718
{
10719
pDst_pixels[0 + 4 * x] = blk_texels[y][x][0];
10720
pDst_pixels[1 + 4 * x] = blk_texels[y][x][1];
10721
pDst_pixels[2 + 4 * x] = blk_texels[y][x][2];
10722
pDst_pixels[3 + 4 * x] = blk_texels[y][x][3];
10723
} // x
10724
10725
pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4;
10726
} // y
10727
}
10728
}
10729
10730
break;
10731
}
10732
case block_format::cRGB_HALF:
10733
{
10734
astc_helpers:: log_astc_block log_blk;
10735
status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
10736
if (status)
10737
{
10738
half_float* pDst_pixels =
10739
reinterpret_cast<half_float*>(static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3);
10740
10741
half_float blk_texels[4][4][4];
10742
status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
10743
if (status)
10744
{
10745
const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
10746
const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
10747
10748
for (uint32_t y = 0; y < max_y; y++)
10749
{
10750
for (uint32_t x = 0; x < max_x; x++)
10751
{
10752
pDst_pixels[0 + 3 * x] = blk_texels[y][x][0];
10753
pDst_pixels[1 + 3 * x] = blk_texels[y][x][1];
10754
pDst_pixels[2 + 3 * x] = blk_texels[y][x][2];
10755
} // x
10756
10757
pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3;
10758
} // y
10759
}
10760
}
10761
10762
break;
10763
}
10764
default:
10765
assert(0);
10766
break;
10767
10768
}
10769
10770
if (!status)
10771
{
10772
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: Transcoder failed to unpack a UASTC HDR block - this is a bug, or the data was corrupted\n");
10773
return false;
10774
}
10775
10776
} // block_x
10777
10778
} // block_y
10779
10780
return true;
10781
#else
10782
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_slice: UASTC_HDR is unsupported\n");
10783
10784
BASISU_NOTE_UNUSED(decode_flags);
10785
BASISU_NOTE_UNUSED(channel0);
10786
BASISU_NOTE_UNUSED(channel1);
10787
BASISU_NOTE_UNUSED(output_rows_in_pixels);
10788
BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
10789
BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
10790
BASISU_NOTE_UNUSED(fmt);
10791
BASISU_NOTE_UNUSED(image_data_size);
10792
BASISU_NOTE_UNUSED(pImage_data);
10793
BASISU_NOTE_UNUSED(num_blocks_x);
10794
BASISU_NOTE_UNUSED(num_blocks_y);
10795
BASISU_NOTE_UNUSED(pDst_blocks);
10796
10797
return false;
10798
#endif
10799
}
10800
10801
bool basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image(
10802
transcoder_texture_format target_format,
10803
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
10804
const uint8_t* pCompressed_data, uint32_t compressed_data_length,
10805
uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
10806
uint64_t slice_offset, uint32_t slice_length,
10807
uint32_t decode_flags,
10808
bool has_alpha,
10809
bool is_video,
10810
uint32_t output_row_pitch_in_blocks_or_pixels,
10811
basisu_transcoder_state* pState,
10812
uint32_t output_rows_in_pixels,
10813
int channel0, int channel1)
10814
{
10815
BASISU_NOTE_UNUSED(is_video);
10816
BASISU_NOTE_UNUSED(level_index);
10817
BASISU_NOTE_UNUSED(decode_flags);
10818
10819
if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
10820
{
10821
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: source data buffer too small\n");
10822
return false;
10823
}
10824
10825
const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
10826
//const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
10827
10828
if (!basis_validate_output_buffer_size(basis_tex_format::cUASTC_HDR_4x4, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels))
10829
{
10830
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: output buffer size too small\n");
10831
return false;
10832
}
10833
10834
bool status = false;
10835
10836
switch (target_format)
10837
{
10838
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
10839
{
10840
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_4x4,
10841
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10842
10843
if (!status)
10844
{
10845
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n");
10846
}
10847
break;
10848
}
10849
case transcoder_texture_format::cTFBC6H:
10850
{
10851
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H,
10852
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10853
if (!status)
10854
{
10855
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to BC6H failed\n");
10856
}
10857
break;
10858
}
10859
case transcoder_texture_format::cTFRGB_HALF:
10860
{
10861
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF,
10862
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10863
if (!status)
10864
{
10865
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n");
10866
}
10867
break;
10868
}
10869
case transcoder_texture_format::cTFRGBA_HALF:
10870
{
10871
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF,
10872
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10873
if (!status)
10874
{
10875
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
10876
}
10877
break;
10878
}
10879
case transcoder_texture_format::cTFRGB_9E5:
10880
{
10881
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5,
10882
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
10883
if (!status)
10884
{
10885
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
10886
}
10887
break;
10888
}
10889
default:
10890
{
10891
assert(0);
10892
BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_4x4_transcoder::transcode_image: Invalid format\n");
10893
break;
10894
}
10895
}
10896
10897
return status;
10898
}
10899
10900
//------------------------------------------------------------------------------------------------
10901
// ASTC 6x6 HDR
10902
10903
basisu_lowlevel_astc_hdr_6x6_transcoder::basisu_lowlevel_astc_hdr_6x6_transcoder()
10904
{
10905
}
10906
10907
// num_blocks_x/num_blocks_y are source 6x6 blocks
10908
bool basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice(
10909
void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
10910
uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
10911
const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
10912
basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
10913
{
10914
BASISU_NOTE_UNUSED(pState);
10915
BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
10916
BASISU_NOTE_UNUSED(has_alpha);
10917
BASISU_NOTE_UNUSED(channel0);
10918
BASISU_NOTE_UNUSED(channel1);
10919
BASISU_NOTE_UNUSED(decode_flags);
10920
BASISU_NOTE_UNUSED(orig_width);
10921
BASISU_NOTE_UNUSED(orig_height);
10922
10923
assert(g_transcoder_initialized);
10924
if (!g_transcoder_initialized)
10925
{
10926
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder not globally initialized.\n");
10927
return false;
10928
}
10929
10930
#if BASISD_SUPPORT_UASTC_HDR
10931
const uint32_t total_src_blocks = num_blocks_x * num_blocks_y;
10932
10933
const uint32_t output_block_width = get_block_width(fmt);
10934
//const uint32_t output_block_height = get_block_height(fmt);
10935
10936
if (!output_row_pitch_in_blocks_or_pixels)
10937
{
10938
if (basis_block_format_is_uncompressed(fmt))
10939
output_row_pitch_in_blocks_or_pixels = orig_width;
10940
else
10941
output_row_pitch_in_blocks_or_pixels = (orig_width + output_block_width - 1) / output_block_width;
10942
}
10943
10944
if (basis_block_format_is_uncompressed(fmt))
10945
{
10946
if (!output_rows_in_pixels)
10947
output_rows_in_pixels = orig_height;
10948
}
10949
10950
uint32_t total_expected_block_bytes = sizeof(astc_blk) * total_src_blocks;
10951
if (image_data_size < total_expected_block_bytes)
10952
{
10953
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
10954
return false;
10955
}
10956
10957
const astc_blk* pSource_block = reinterpret_cast<const astc_blk*>(pImage_data);
10958
10959
bool status = false;
10960
10961
half_float unpacked_blocks[12][12][3]; // [y][x][c]
10962
10963
assert(((orig_width + 5) / 6) == num_blocks_x);
10964
assert(((orig_height + 5) / 6) == num_blocks_y);
10965
10966
if (fmt == block_format::cBC6H)
10967
{
10968
const uint32_t num_dst_blocks_x = (orig_width + 3) / 4;
10969
const uint32_t num_dst_blocks_y = (orig_height + 3) / 4;
10970
10971
if (!output_row_pitch_in_blocks_or_pixels)
10972
{
10973
output_row_pitch_in_blocks_or_pixels = num_dst_blocks_x;
10974
}
10975
else if (output_row_pitch_in_blocks_or_pixels < num_dst_blocks_x)
10976
{
10977
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: output_row_pitch_in_blocks_or_pixels is too low\n");
10978
return false;
10979
}
10980
10981
if (output_block_or_pixel_stride_in_bytes != sizeof(bc6h_block))
10982
{
10983
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: invalid output_block_or_pixel_stride_in_bytes\n");
10984
return false;
10985
}
10986
10987
fast_bc6h_params bc6h_enc_params;
10988
const bool hq_flag = (decode_flags & cDecodeFlagsHighQuality) != 0;
10989
bc6h_enc_params.m_max_2subset_pats_to_try = hq_flag ? 1 : 0;
10990
10991
for (uint32_t src_block_y = 0; src_block_y < num_blocks_y; src_block_y += 2)
10992
{
10993
const uint32_t num_inner_blocks_y = basisu::minimum<uint32_t>(2, num_blocks_y - src_block_y);
10994
10995
for (uint32_t src_block_x = 0; src_block_x < num_blocks_x; src_block_x += 2)
10996
{
10997
const uint32_t num_inner_blocks_x = basisu::minimum<uint32_t>(2, num_blocks_x - src_block_x);
10998
10999
for (uint32_t iy = 0; iy < num_inner_blocks_y; iy++)
11000
{
11001
for (uint32_t ix = 0; ix < num_inner_blocks_x; ix++)
11002
{
11003
const astc_blk* pS = pSource_block + (src_block_y + iy) * num_blocks_x + (src_block_x + ix);
11004
11005
half_float blk_texels[6][6][4];
11006
11007
astc_helpers::log_astc_block log_blk;
11008
status = astc_helpers::unpack_block(pS, log_blk, 6, 6);
11009
if (!status)
11010
{
11011
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n");
11012
return false;
11013
}
11014
11015
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16);
11016
if (!status)
11017
{
11018
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n");
11019
return false;
11020
}
11021
11022
for (uint32_t y = 0; y < 6; y++)
11023
{
11024
for (uint32_t x = 0; x < 6; x++)
11025
{
11026
unpacked_blocks[iy * 6 + y][ix * 6 + x][0] = blk_texels[y][x][0];
11027
unpacked_blocks[iy * 6 + y][ix * 6 + x][1] = blk_texels[y][x][1];
11028
unpacked_blocks[iy * 6 + y][ix * 6 + x][2] = blk_texels[y][x][2];
11029
11030
} // x
11031
} // y
11032
11033
} // ix
11034
11035
} // iy
11036
11037
const uint32_t dst_x = src_block_x * 6;
11038
assert((dst_x & 3) == 0);
11039
const uint32_t dst_block_x = dst_x >> 2;
11040
11041
const uint32_t dst_y = src_block_y * 6;
11042
assert((dst_y & 3) == 0);
11043
const uint32_t dst_block_y = dst_y >> 2;
11044
11045
const uint32_t num_inner_dst_blocks_x = basisu::minimum<uint32_t>(3, num_dst_blocks_x - dst_block_x);
11046
const uint32_t num_inner_dst_blocks_y = basisu::minimum<uint32_t>(3, num_dst_blocks_y - dst_block_y);
11047
11048
for (uint32_t dy = 0; dy < num_inner_dst_blocks_y; dy++)
11049
{
11050
for (uint32_t dx = 0; dx < num_inner_dst_blocks_x; dx++)
11051
{
11052
bc6h_block* pDst_block = (bc6h_block*)pDst_blocks + (dst_block_x + dx) + (dst_block_y + dy) * output_row_pitch_in_blocks_or_pixels;
11053
11054
half_float src_pixels[4][4][3]; // [y][x][c]
11055
11056
for (uint32_t y = 0; y < 4; y++)
11057
{
11058
const uint32_t src_pixel_y = basisu::minimum<uint32_t>(dy * 4 + y, num_inner_blocks_y * 6 - 1);
11059
11060
for (uint32_t x = 0; x < 4; x++)
11061
{
11062
const uint32_t src_pixel_x = basisu::minimum<uint32_t>(dx * 4 + x, num_inner_blocks_x * 6 - 1);
11063
11064
assert((src_pixel_y < 12) && (src_pixel_x < 12));
11065
11066
src_pixels[y][x][0] = unpacked_blocks[src_pixel_y][src_pixel_x][0];
11067
src_pixels[y][x][1] = unpacked_blocks[src_pixel_y][src_pixel_x][1];
11068
src_pixels[y][x][2] = unpacked_blocks[src_pixel_y][src_pixel_x][2];
11069
11070
} // x
11071
} // y
11072
11073
astc_6x6_hdr::fast_encode_bc6h(&src_pixels[0][0][0], pDst_block, bc6h_enc_params);
11074
11075
} // dx
11076
} // dy
11077
11078
} // block_x
11079
11080
} // block_y
11081
11082
status = true;
11083
}
11084
else
11085
{
11086
for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
11087
{
11088
void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
11089
11090
for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes)
11091
{
11092
switch (fmt)
11093
{
11094
case block_format::cASTC_HDR_6x6:
11095
{
11096
// Nothing to do, ASTC HDR 6x6 is just ASTC.
11097
// TODO: Optimize this copy
11098
memcpy(pDst_block, pSource_block, sizeof(astc_helpers::astc_block));
11099
status = true;
11100
break;
11101
}
11102
case block_format::cRGB_9E5:
11103
{
11104
astc_helpers::log_astc_block log_blk;
11105
status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6);
11106
if (status)
11107
{
11108
uint32_t* pDst_pixels = reinterpret_cast<uint32_t*>(
11109
static_cast<uint8_t*>(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t)
11110
);
11111
11112
uint32_t blk_texels[6][6];
11113
11114
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeRGB9E5);
11115
11116
if (status)
11117
{
11118
const uint32_t max_x = basisu::minimum<int>(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6);
11119
const uint32_t max_y = basisu::minimum<int>(6, (int)output_rows_in_pixels - (int)block_y * 6);
11120
11121
for (uint32_t y = 0; y < max_y; y++)
11122
{
11123
memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x);
11124
11125
pDst_pixels += output_row_pitch_in_blocks_or_pixels;
11126
} // y
11127
}
11128
}
11129
11130
break;
11131
}
11132
case block_format::cRGBA_HALF:
11133
{
11134
astc_helpers::log_astc_block log_blk;
11135
status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6);
11136
if (status)
11137
{
11138
half_float* pDst_pixels = reinterpret_cast<half_float*>(
11139
static_cast<uint8_t*>(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4
11140
);
11141
11142
half_float blk_texels[6][6][4];
11143
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16);
11144
11145
if (status)
11146
{
11147
const uint32_t max_x = basisu::minimum<int>(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6);
11148
const uint32_t max_y = basisu::minimum<int>(6, (int)output_rows_in_pixels - (int)block_y * 6);
11149
11150
for (uint32_t y = 0; y < max_y; y++)
11151
{
11152
for (uint32_t x = 0; x < max_x; x++)
11153
{
11154
pDst_pixels[0 + 4 * x] = blk_texels[y][x][0];
11155
pDst_pixels[1 + 4 * x] = blk_texels[y][x][1];
11156
pDst_pixels[2 + 4 * x] = blk_texels[y][x][2];
11157
pDst_pixels[3 + 4 * x] = blk_texels[y][x][3];
11158
} // x
11159
11160
pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4;
11161
} // y
11162
}
11163
}
11164
11165
break;
11166
}
11167
case block_format::cRGB_HALF:
11168
{
11169
astc_helpers::log_astc_block log_blk;
11170
status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6);
11171
if (status)
11172
{
11173
half_float* pDst_pixels =
11174
reinterpret_cast<half_float*>(static_cast<uint8_t*>(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3);
11175
11176
half_float blk_texels[6][6][4];
11177
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16);
11178
if (status)
11179
{
11180
const uint32_t max_x = basisu::minimum<int>(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6);
11181
const uint32_t max_y = basisu::minimum<int>(6, (int)output_rows_in_pixels - (int)block_y * 6);
11182
11183
for (uint32_t y = 0; y < max_y; y++)
11184
{
11185
for (uint32_t x = 0; x < max_x; x++)
11186
{
11187
pDst_pixels[0 + 3 * x] = blk_texels[y][x][0];
11188
pDst_pixels[1 + 3 * x] = blk_texels[y][x][1];
11189
pDst_pixels[2 + 3 * x] = blk_texels[y][x][2];
11190
} // x
11191
11192
pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3;
11193
} // y
11194
}
11195
}
11196
11197
break;
11198
}
11199
default:
11200
assert(0);
11201
break;
11202
11203
}
11204
11205
if (!status)
11206
{
11207
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n");
11208
return false;
11209
}
11210
11211
} // block_x
11212
11213
} // block_y
11214
}
11215
11216
return true;
11217
#else
11218
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_slice: ASTC HDR is unsupported\n");
11219
11220
BASISU_NOTE_UNUSED(decode_flags);
11221
BASISU_NOTE_UNUSED(channel0);
11222
BASISU_NOTE_UNUSED(channel1);
11223
BASISU_NOTE_UNUSED(output_rows_in_pixels);
11224
BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
11225
BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
11226
BASISU_NOTE_UNUSED(fmt);
11227
BASISU_NOTE_UNUSED(image_data_size);
11228
BASISU_NOTE_UNUSED(pImage_data);
11229
BASISU_NOTE_UNUSED(num_blocks_x);
11230
BASISU_NOTE_UNUSED(num_blocks_y);
11231
BASISU_NOTE_UNUSED(pDst_blocks);
11232
11233
return false;
11234
#endif
11235
}
11236
11237
bool basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image(
11238
transcoder_texture_format target_format,
11239
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
11240
const uint8_t* pCompressed_data, uint32_t compressed_data_length,
11241
uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
11242
uint64_t slice_offset, uint32_t slice_length,
11243
uint32_t decode_flags,
11244
bool has_alpha,
11245
bool is_video,
11246
uint32_t output_row_pitch_in_blocks_or_pixels,
11247
basisu_transcoder_state* pState,
11248
uint32_t output_rows_in_pixels,
11249
int channel0, int channel1)
11250
{
11251
BASISU_NOTE_UNUSED(is_video);
11252
BASISU_NOTE_UNUSED(level_index);
11253
BASISU_NOTE_UNUSED(decode_flags);
11254
11255
if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
11256
{
11257
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: source data buffer too small\n");
11258
return false;
11259
}
11260
11261
const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
11262
//const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
11263
11264
if (!basis_validate_output_buffer_size(basis_tex_format::cASTC_HDR_6x6, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels))
11265
{
11266
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: output buffer size too small\n");
11267
return false;
11268
}
11269
11270
bool status = false;
11271
11272
switch (target_format)
11273
{
11274
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
11275
{
11276
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_6x6,
11277
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11278
11279
if (!status)
11280
{
11281
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n");
11282
}
11283
break;
11284
}
11285
case transcoder_texture_format::cTFBC6H:
11286
{
11287
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H,
11288
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11289
if (!status)
11290
{
11291
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to BC6H failed\n");
11292
}
11293
break;
11294
}
11295
case transcoder_texture_format::cTFRGB_HALF:
11296
{
11297
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF,
11298
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11299
if (!status)
11300
{
11301
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n");
11302
}
11303
break;
11304
}
11305
case transcoder_texture_format::cTFRGBA_HALF:
11306
{
11307
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF,
11308
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11309
if (!status)
11310
{
11311
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
11312
}
11313
break;
11314
}
11315
case transcoder_texture_format::cTFRGB_9E5:
11316
{
11317
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5,
11318
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11319
if (!status)
11320
{
11321
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
11322
}
11323
break;
11324
}
11325
default:
11326
{
11327
assert(0);
11328
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_transcoder::transcode_image: Invalid format\n");
11329
break;
11330
}
11331
}
11332
11333
return status;
11334
}
11335
11336
//------------------------------------------------------------------------------------------------
11337
// ASTC 6x6 HDR intermediate
11338
11339
basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder()
11340
{
11341
}
11342
11343
// num_blocks_x/num_blocks_y are source 6x6 blocks
11344
bool basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice(
11345
void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
11346
uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha,
11347
const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
11348
basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
11349
{
11350
BASISU_NOTE_UNUSED(pState);
11351
BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
11352
BASISU_NOTE_UNUSED(has_alpha);
11353
BASISU_NOTE_UNUSED(channel0);
11354
BASISU_NOTE_UNUSED(channel1);
11355
BASISU_NOTE_UNUSED(decode_flags);
11356
BASISU_NOTE_UNUSED(orig_width);
11357
BASISU_NOTE_UNUSED(orig_height);
11358
11359
assert(g_transcoder_initialized);
11360
if (!g_transcoder_initialized)
11361
{
11362
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder not globally initialized.\n");
11363
return false;
11364
}
11365
11366
#if BASISD_SUPPORT_UASTC_HDR
11367
11368
// TODO: Optimize this
11369
11370
basisu::vector2D<astc_helpers::astc_block> decoded_blocks;
11371
uint32_t dec_width = 0, dec_height = 0;
11372
bool dec_status = astc_6x6_hdr::decode_6x6_hdr(pImage_data, image_data_size, decoded_blocks, dec_width, dec_height);
11373
if (!dec_status)
11374
{
11375
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: decode_6x6_hdr() failed.\n");
11376
return false;
11377
}
11378
11379
if ((dec_width != orig_width) || (dec_height != orig_height) ||
11380
(decoded_blocks.get_width() != num_blocks_x) || (decoded_blocks.get_height() != num_blocks_y))
11381
{
11382
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: unexpected decoded width/height\n");
11383
return false;
11384
}
11385
11386
//const uint32_t total_src_blocks = num_blocks_x * num_blocks_y;
11387
11388
const uint32_t output_block_width = get_block_width(fmt);
11389
//const uint32_t output_block_height = get_block_height(fmt);
11390
11391
if (!output_row_pitch_in_blocks_or_pixels)
11392
{
11393
if (basis_block_format_is_uncompressed(fmt))
11394
output_row_pitch_in_blocks_or_pixels = orig_width;
11395
else
11396
output_row_pitch_in_blocks_or_pixels = (orig_width + output_block_width - 1) / output_block_width;
11397
}
11398
11399
if (basis_block_format_is_uncompressed(fmt))
11400
{
11401
if (!output_rows_in_pixels)
11402
output_rows_in_pixels = orig_height;
11403
}
11404
11405
const astc_blk* pSource_block = (const astc_blk *)decoded_blocks.get_ptr();
11406
11407
bool status = false;
11408
11409
half_float unpacked_blocks[12][12][3]; // [y][x][c]
11410
11411
assert(((orig_width + 5) / 6) == num_blocks_x);
11412
assert(((orig_height + 5) / 6) == num_blocks_y);
11413
11414
if (fmt == block_format::cBC6H)
11415
{
11416
const uint32_t num_dst_blocks_x = (orig_width + 3) / 4;
11417
const uint32_t num_dst_blocks_y = (orig_height + 3) / 4;
11418
11419
if (!output_row_pitch_in_blocks_or_pixels)
11420
{
11421
output_row_pitch_in_blocks_or_pixels = num_dst_blocks_x;
11422
}
11423
else if (output_row_pitch_in_blocks_or_pixels < num_dst_blocks_x)
11424
{
11425
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: output_row_pitch_in_blocks_or_pixels is too low\n");
11426
return false;
11427
}
11428
11429
if (output_block_or_pixel_stride_in_bytes != sizeof(bc6h_block))
11430
{
11431
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: invalid output_block_or_pixel_stride_in_bytes\n");
11432
return false;
11433
}
11434
11435
fast_bc6h_params bc6h_enc_params;
11436
const bool hq_flag = (decode_flags & cDecodeFlagsHighQuality) != 0;
11437
bc6h_enc_params.m_max_2subset_pats_to_try = hq_flag ? 1 : 0;
11438
11439
for (uint32_t src_block_y = 0; src_block_y < num_blocks_y; src_block_y += 2)
11440
{
11441
const uint32_t num_inner_blocks_y = basisu::minimum<uint32_t>(2, num_blocks_y - src_block_y);
11442
11443
for (uint32_t src_block_x = 0; src_block_x < num_blocks_x; src_block_x += 2)
11444
{
11445
const uint32_t num_inner_blocks_x = basisu::minimum<uint32_t>(2, num_blocks_x - src_block_x);
11446
11447
for (uint32_t iy = 0; iy < num_inner_blocks_y; iy++)
11448
{
11449
for (uint32_t ix = 0; ix < num_inner_blocks_x; ix++)
11450
{
11451
const astc_blk* pS = pSource_block + (src_block_y + iy) * num_blocks_x + (src_block_x + ix);
11452
11453
half_float blk_texels[6][6][4];
11454
11455
astc_helpers::log_astc_block log_blk;
11456
status = astc_helpers::unpack_block(pS, log_blk, 6, 6);
11457
if (!status)
11458
{
11459
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n");
11460
return false;
11461
}
11462
11463
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16);
11464
if (!status)
11465
{
11466
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n");
11467
return false;
11468
}
11469
11470
for (uint32_t y = 0; y < 6; y++)
11471
{
11472
for (uint32_t x = 0; x < 6; x++)
11473
{
11474
unpacked_blocks[iy * 6 + y][ix * 6 + x][0] = blk_texels[y][x][0];
11475
unpacked_blocks[iy * 6 + y][ix * 6 + x][1] = blk_texels[y][x][1];
11476
unpacked_blocks[iy * 6 + y][ix * 6 + x][2] = blk_texels[y][x][2];
11477
} // x
11478
} // y
11479
11480
} // ix
11481
11482
} // iy
11483
11484
const uint32_t dst_x = src_block_x * 6;
11485
assert((dst_x & 3) == 0);
11486
const uint32_t dst_block_x = dst_x >> 2;
11487
11488
const uint32_t dst_y = src_block_y * 6;
11489
assert((dst_y & 3) == 0);
11490
const uint32_t dst_block_y = dst_y >> 2;
11491
11492
const uint32_t num_inner_dst_blocks_x = basisu::minimum<uint32_t>(3, num_dst_blocks_x - dst_block_x);
11493
const uint32_t num_inner_dst_blocks_y = basisu::minimum<uint32_t>(3, num_dst_blocks_y - dst_block_y);
11494
11495
for (uint32_t dy = 0; dy < num_inner_dst_blocks_y; dy++)
11496
{
11497
for (uint32_t dx = 0; dx < num_inner_dst_blocks_x; dx++)
11498
{
11499
bc6h_block* pDst_block = (bc6h_block*)pDst_blocks + (dst_block_x + dx) + (dst_block_y + dy) * output_row_pitch_in_blocks_or_pixels;
11500
11501
half_float src_pixels[4][4][3]; // [y][x][c]
11502
11503
for (uint32_t y = 0; y < 4; y++)
11504
{
11505
const uint32_t src_pixel_y = basisu::minimum<uint32_t>(dy * 4 + y, num_inner_blocks_y * 6 - 1);
11506
11507
for (uint32_t x = 0; x < 4; x++)
11508
{
11509
const uint32_t src_pixel_x = basisu::minimum<uint32_t>(dx * 4 + x, num_inner_blocks_x * 6 - 1);
11510
11511
assert((src_pixel_y < 12) && (src_pixel_x < 12));
11512
11513
src_pixels[y][x][0] = unpacked_blocks[src_pixel_y][src_pixel_x][0];
11514
src_pixels[y][x][1] = unpacked_blocks[src_pixel_y][src_pixel_x][1];
11515
src_pixels[y][x][2] = unpacked_blocks[src_pixel_y][src_pixel_x][2];
11516
11517
} // x
11518
} // y
11519
11520
astc_6x6_hdr::fast_encode_bc6h(&src_pixels[0][0][0], pDst_block, bc6h_enc_params);
11521
11522
} // dx
11523
} // dy
11524
11525
} // block_x
11526
11527
} // block_y
11528
11529
status = true;
11530
}
11531
else
11532
{
11533
for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
11534
{
11535
void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
11536
11537
for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes)
11538
{
11539
switch (fmt)
11540
{
11541
case block_format::cASTC_HDR_6x6:
11542
{
11543
// Nothing to do, ASTC HDR 6x6 is just ASTC.
11544
// TODO: Optimize this copy
11545
memcpy(pDst_block, pSource_block, sizeof(astc_helpers::astc_block));
11546
status = true;
11547
break;
11548
}
11549
case block_format::cRGB_9E5:
11550
{
11551
astc_helpers::log_astc_block log_blk;
11552
status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6);
11553
if (status)
11554
{
11555
uint32_t* pDst_pixels = reinterpret_cast<uint32_t*>(
11556
static_cast<uint8_t*>(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t)
11557
);
11558
11559
uint32_t blk_texels[6][6];
11560
11561
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeRGB9E5);
11562
11563
if (status)
11564
{
11565
const uint32_t max_x = basisu::minimum<int>(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6);
11566
const uint32_t max_y = basisu::minimum<int>(6, (int)output_rows_in_pixels - (int)block_y * 6);
11567
11568
for (uint32_t y = 0; y < max_y; y++)
11569
{
11570
memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x);
11571
11572
pDst_pixels += output_row_pitch_in_blocks_or_pixels;
11573
} // y
11574
}
11575
}
11576
11577
break;
11578
}
11579
case block_format::cRGBA_HALF:
11580
{
11581
astc_helpers::log_astc_block log_blk;
11582
status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6);
11583
if (status)
11584
{
11585
half_float* pDst_pixels = reinterpret_cast<half_float*>(
11586
static_cast<uint8_t*>(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4
11587
);
11588
11589
half_float blk_texels[6][6][4];
11590
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16);
11591
11592
if (status)
11593
{
11594
const uint32_t max_x = basisu::minimum<int>(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6);
11595
const uint32_t max_y = basisu::minimum<int>(6, (int)output_rows_in_pixels - (int)block_y * 6);
11596
11597
for (uint32_t y = 0; y < max_y; y++)
11598
{
11599
for (uint32_t x = 0; x < max_x; x++)
11600
{
11601
pDst_pixels[0 + 4 * x] = blk_texels[y][x][0];
11602
pDst_pixels[1 + 4 * x] = blk_texels[y][x][1];
11603
pDst_pixels[2 + 4 * x] = blk_texels[y][x][2];
11604
pDst_pixels[3 + 4 * x] = blk_texels[y][x][3];
11605
} // x
11606
11607
pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4;
11608
} // y
11609
}
11610
}
11611
11612
break;
11613
}
11614
case block_format::cRGB_HALF:
11615
{
11616
astc_helpers::log_astc_block log_blk;
11617
status = astc_helpers::unpack_block(pSource_block, log_blk, 6, 6);
11618
if (status)
11619
{
11620
half_float* pDst_pixels =
11621
reinterpret_cast<half_float*>(static_cast<uint8_t*>(pDst_blocks) + (block_x * 6 + block_y * 6 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3);
11622
11623
half_float blk_texels[6][6][4];
11624
status = astc_helpers::decode_block(log_blk, blk_texels, 6, 6, astc_helpers::cDecodeModeHDR16);
11625
if (status)
11626
{
11627
const uint32_t max_x = basisu::minimum<int>(6, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 6);
11628
const uint32_t max_y = basisu::minimum<int>(6, (int)output_rows_in_pixels - (int)block_y * 6);
11629
11630
for (uint32_t y = 0; y < max_y; y++)
11631
{
11632
for (uint32_t x = 0; x < max_x; x++)
11633
{
11634
pDst_pixels[0 + 3 * x] = blk_texels[y][x][0];
11635
pDst_pixels[1 + 3 * x] = blk_texels[y][x][1];
11636
pDst_pixels[2 + 3 * x] = blk_texels[y][x][2];
11637
} // x
11638
11639
pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3;
11640
} // y
11641
}
11642
}
11643
11644
break;
11645
}
11646
default:
11647
assert(0);
11648
break;
11649
11650
}
11651
11652
if (!status)
11653
{
11654
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: Transcoder failed to unpack a ASTC HDR block - this is a bug, or the data was corrupted\n");
11655
return false;
11656
}
11657
11658
} // block_x
11659
11660
} // block_y
11661
}
11662
11663
return true;
11664
#else
11665
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_slice: ASTC HDR is unsupported\n");
11666
11667
BASISU_NOTE_UNUSED(decode_flags);
11668
BASISU_NOTE_UNUSED(channel0);
11669
BASISU_NOTE_UNUSED(channel1);
11670
BASISU_NOTE_UNUSED(output_rows_in_pixels);
11671
BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
11672
BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
11673
BASISU_NOTE_UNUSED(fmt);
11674
BASISU_NOTE_UNUSED(image_data_size);
11675
BASISU_NOTE_UNUSED(pImage_data);
11676
BASISU_NOTE_UNUSED(num_blocks_x);
11677
BASISU_NOTE_UNUSED(num_blocks_y);
11678
BASISU_NOTE_UNUSED(pDst_blocks);
11679
11680
return false;
11681
#endif
11682
}
11683
11684
bool basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image(
11685
transcoder_texture_format target_format,
11686
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
11687
const uint8_t* pCompressed_data, uint32_t compressed_data_length,
11688
uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
11689
uint64_t slice_offset, uint32_t slice_length,
11690
uint32_t decode_flags,
11691
bool has_alpha,
11692
bool is_video,
11693
uint32_t output_row_pitch_in_blocks_or_pixels,
11694
basisu_transcoder_state* pState,
11695
uint32_t output_rows_in_pixels,
11696
int channel0, int channel1)
11697
{
11698
BASISU_NOTE_UNUSED(is_video);
11699
BASISU_NOTE_UNUSED(level_index);
11700
BASISU_NOTE_UNUSED(decode_flags);
11701
11702
if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
11703
{
11704
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: source data buffer too small\n");
11705
return false;
11706
}
11707
11708
const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
11709
//const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
11710
11711
if (!basis_validate_output_buffer_size(basis_tex_format::cASTC_HDR_6x6, target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels))
11712
{
11713
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: output buffer size too small\n");
11714
return false;
11715
}
11716
11717
bool status = false;
11718
11719
switch (target_format)
11720
{
11721
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
11722
{
11723
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_6x6,
11724
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11725
11726
if (!status)
11727
{
11728
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n");
11729
}
11730
break;
11731
}
11732
case transcoder_texture_format::cTFBC6H:
11733
{
11734
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H,
11735
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11736
if (!status)
11737
{
11738
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to BC6H failed\n");
11739
}
11740
break;
11741
}
11742
case transcoder_texture_format::cTFRGB_HALF:
11743
{
11744
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF,
11745
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11746
if (!status)
11747
{
11748
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n");
11749
}
11750
break;
11751
}
11752
case transcoder_texture_format::cTFRGBA_HALF:
11753
{
11754
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF,
11755
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1, decode_flags);
11756
if (!status)
11757
{
11758
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
11759
}
11760
break;
11761
}
11762
case transcoder_texture_format::cTFRGB_9E5:
11763
{
11764
status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5,
11765
bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1 , decode_flags);
11766
if (!status)
11767
{
11768
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
11769
}
11770
break;
11771
}
11772
default:
11773
{
11774
assert(0);
11775
BASISU_DEVEL_ERROR("basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder::transcode_image: Invalid format\n");
11776
break;
11777
}
11778
}
11779
11780
return status;
11781
}
11782
11783
//------------------------------------------------------------------------------------------------
11784
11785
basisu_transcoder::basisu_transcoder() :
11786
m_ready_to_transcode(false)
11787
{
11788
}
11789
11790
bool basisu_transcoder::validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const
11791
{
11792
if (!validate_header(pData, data_size))
11793
return false;
11794
11795
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
11796
11797
#if !BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS
11798
if (crc16(&pHeader->m_data_size, sizeof(basis_file_header) - BASISU_OFFSETOF(basis_file_header, m_data_size), 0) != pHeader->m_header_crc16)
11799
{
11800
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header CRC check failed\n");
11801
return false;
11802
}
11803
11804
if (full_validation)
11805
{
11806
if (crc16(reinterpret_cast<const uint8_t*>(pData) + sizeof(basis_file_header), pHeader->m_data_size, 0) != pHeader->m_data_crc16)
11807
{
11808
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: data CRC check failed\n");
11809
return false;
11810
}
11811
}
11812
#endif
11813
11814
return true;
11815
}
11816
11817
bool basisu_transcoder::validate_header_quick(const void* pData, uint32_t data_size) const
11818
{
11819
if (data_size <= sizeof(basis_file_header))
11820
return false;
11821
11822
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
11823
11824
if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header)))
11825
{
11826
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n");
11827
return false;
11828
}
11829
11830
uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size;
11831
if (data_size < expected_file_size)
11832
{
11833
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: source buffer is too small\n");
11834
return false;
11835
}
11836
11837
if ((!pHeader->m_total_slices) || (!pHeader->m_total_images))
11838
{
11839
BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header is invalid\n");
11840
return false;
11841
}
11842
11843
if ((pHeader->m_slice_desc_file_ofs >= data_size) ||
11844
((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices))
11845
)
11846
{
11847
BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n");
11848
return false;
11849
}
11850
11851
return true;
11852
}
11853
11854
bool basisu_transcoder::validate_header(const void* pData, uint32_t data_size) const
11855
{
11856
if (data_size <= sizeof(basis_file_header))
11857
{
11858
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small\n");
11859
return false;
11860
}
11861
11862
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
11863
11864
if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header)))
11865
{
11866
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n");
11867
return false;
11868
}
11869
11870
uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size;
11871
if (data_size < expected_file_size)
11872
{
11873
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small, or header is corrupted\n");
11874
return false;
11875
}
11876
11877
if ((!pHeader->m_total_images) || (!pHeader->m_total_slices))
11878
{
11879
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (total images or slices are 0)\n");
11880
return false;
11881
}
11882
11883
if (pHeader->m_total_images > pHeader->m_total_slices)
11884
{
11885
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (too many images)\n");
11886
return false;
11887
}
11888
11889
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
11890
{
11891
if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices)
11892
{
11893
if (pHeader->m_total_slices & 1)
11894
{
11895
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid alpha .basis file\n");
11896
return false;
11897
}
11898
}
11899
11900
// This flag dates back to pre-Basis Universal, when .basis supported full ETC1 too.
11901
if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0)
11902
{
11903
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n");
11904
return false;
11905
}
11906
}
11907
else
11908
{
11909
if ((pHeader->m_flags & cBASISHeaderFlagETC1S) != 0)
11910
{
11911
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n");
11912
return false;
11913
}
11914
}
11915
11916
if ((pHeader->m_slice_desc_file_ofs >= data_size) ||
11917
((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices))
11918
)
11919
{
11920
BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n");
11921
return false;
11922
}
11923
11924
return true;
11925
}
11926
11927
basis_texture_type basisu_transcoder::get_texture_type(const void* pData, uint32_t data_size) const
11928
{
11929
if (!validate_header_quick(pData, data_size))
11930
{
11931
BASISU_DEVEL_ERROR("basisu_transcoder::get_texture_type: header validation failed\n");
11932
return cBASISTexType2DArray;
11933
}
11934
11935
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
11936
11937
basis_texture_type btt = static_cast<basis_texture_type>(static_cast<uint8_t>(pHeader->m_tex_type));
11938
11939
if (btt >= cBASISTexTypeTotal)
11940
{
11941
BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header's texture type field is invalid\n");
11942
return cBASISTexType2DArray;
11943
}
11944
11945
return btt;
11946
}
11947
11948
bool basisu_transcoder::get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const
11949
{
11950
if (!validate_header_quick(pData, data_size))
11951
{
11952
BASISU_DEVEL_ERROR("basisu_transcoder::get_userdata: header validation failed\n");
11953
return false;
11954
}
11955
11956
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
11957
11958
userdata0 = pHeader->m_userdata0;
11959
userdata1 = pHeader->m_userdata1;
11960
return true;
11961
}
11962
11963
uint32_t basisu_transcoder::get_total_images(const void* pData, uint32_t data_size) const
11964
{
11965
if (!validate_header_quick(pData, data_size))
11966
{
11967
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header validation failed\n");
11968
return 0;
11969
}
11970
11971
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
11972
11973
return pHeader->m_total_images;
11974
}
11975
11976
basis_tex_format basisu_transcoder::get_basis_tex_format(const void* pData, uint32_t data_size) const
11977
{
11978
if (!validate_header_quick(pData, data_size))
11979
{
11980
BASISU_DEVEL_ERROR("basisu_transcoder::get_basis_tex_format: header validation failed\n");
11981
return basis_tex_format::cETC1S;
11982
}
11983
11984
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
11985
11986
return (basis_tex_format)(uint32_t)pHeader->m_tex_format;
11987
}
11988
11989
bool basisu_transcoder::get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const
11990
{
11991
if (!validate_header_quick(pData, data_size))
11992
{
11993
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: header validation failed\n");
11994
return false;
11995
}
11996
11997
int slice_index = find_first_slice_index(pData, data_size, image_index, 0);
11998
if (slice_index < 0)
11999
{
12000
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid slice index\n");
12001
return false;
12002
}
12003
12004
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
12005
12006
if (image_index >= pHeader->m_total_images)
12007
{
12008
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n");
12009
return false;
12010
}
12011
12012
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
12013
12014
uint32_t total_levels = 1;
12015
for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++)
12016
if (pSlice_descs[i].m_image_index == image_index)
12017
total_levels = basisu::maximum<uint32_t>(total_levels, pSlice_descs[i].m_level_index + 1);
12018
else
12019
break;
12020
12021
if (total_levels > 16)
12022
{
12023
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n");
12024
return false;
12025
}
12026
12027
const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
12028
12029
image_info.m_image_index = image_index;
12030
image_info.m_total_levels = total_levels;
12031
12032
image_info.m_alpha_flag = false;
12033
12034
// For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha.
12035
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
12036
image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
12037
else
12038
image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
12039
12040
image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
12041
12042
const uint32_t block_width = basis_tex_format_get_block_width((basis_tex_format)((uint32_t)pHeader->m_tex_format));
12043
const uint32_t block_height = basis_tex_format_get_block_height((basis_tex_format)((uint32_t)pHeader->m_tex_format));
12044
12045
image_info.m_width = slice_desc.m_num_blocks_x * block_width;
12046
image_info.m_height = slice_desc.m_num_blocks_y * block_height;
12047
image_info.m_orig_width = slice_desc.m_orig_width;
12048
image_info.m_orig_height = slice_desc.m_orig_height;
12049
image_info.m_num_blocks_x = slice_desc.m_num_blocks_x;
12050
image_info.m_num_blocks_y = slice_desc.m_num_blocks_y;
12051
image_info.m_block_width = block_width;
12052
image_info.m_block_height = block_height;
12053
image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y;
12054
image_info.m_first_slice_index = slice_index;
12055
12056
return true;
12057
}
12058
12059
uint32_t basisu_transcoder::get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const
12060
{
12061
if (!validate_header_quick(pData, data_size))
12062
{
12063
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: header validation failed\n");
12064
return false;
12065
}
12066
12067
int slice_index = find_first_slice_index(pData, data_size, image_index, 0);
12068
if (slice_index < 0)
12069
{
12070
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: failed finding slice\n");
12071
return false;
12072
}
12073
12074
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
12075
12076
if (image_index >= pHeader->m_total_images)
12077
{
12078
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image_index\n");
12079
return false;
12080
}
12081
12082
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
12083
12084
uint32_t total_levels = 1;
12085
for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++)
12086
if (pSlice_descs[i].m_image_index == image_index)
12087
total_levels = basisu::maximum<uint32_t>(total_levels, pSlice_descs[i].m_level_index + 1);
12088
else
12089
break;
12090
12091
const uint32_t cMaxSupportedLevels = 16;
12092
if (total_levels > cMaxSupportedLevels)
12093
{
12094
BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image levels!\n");
12095
return false;
12096
}
12097
12098
return total_levels;
12099
}
12100
12101
bool basisu_transcoder::get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const
12102
{
12103
if (!validate_header_quick(pData, data_size))
12104
{
12105
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: header validation failed\n");
12106
return false;
12107
}
12108
12109
int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
12110
if (slice_index < 0)
12111
{
12112
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: failed finding slice\n");
12113
return false;
12114
}
12115
12116
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
12117
12118
if (image_index >= pHeader->m_total_images)
12119
{
12120
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: invalid image_index\n");
12121
return false;
12122
}
12123
12124
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
12125
12126
const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
12127
12128
orig_width = slice_desc.m_orig_width;
12129
orig_height = slice_desc.m_orig_height;
12130
total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y;
12131
12132
return true;
12133
}
12134
12135
bool basisu_transcoder::get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& image_info, uint32_t image_index, uint32_t level_index) const
12136
{
12137
if (!validate_header_quick(pData, data_size))
12138
{
12139
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: validate_file_checksums failed\n");
12140
return false;
12141
}
12142
12143
int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
12144
if (slice_index < 0)
12145
{
12146
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: failed finding slice\n");
12147
return false;
12148
}
12149
12150
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
12151
12152
if (image_index >= pHeader->m_total_images)
12153
{
12154
BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: invalid image_index\n");
12155
return false;
12156
}
12157
12158
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
12159
12160
const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
12161
12162
image_info.m_image_index = image_index;
12163
image_info.m_level_index = level_index;
12164
12165
// For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha.
12166
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
12167
image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
12168
else
12169
image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
12170
12171
const uint32_t block_width = basis_tex_format_get_block_width((basis_tex_format)((uint32_t)pHeader->m_tex_format));
12172
const uint32_t block_height = basis_tex_format_get_block_height((basis_tex_format)((uint32_t)pHeader->m_tex_format));
12173
12174
image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
12175
image_info.m_width = slice_desc.m_num_blocks_x * block_width;
12176
image_info.m_height = slice_desc.m_num_blocks_y * block_height;
12177
image_info.m_orig_width = slice_desc.m_orig_width;
12178
image_info.m_orig_height = slice_desc.m_orig_height;
12179
image_info.m_block_width = block_width;
12180
image_info.m_block_height = block_height;
12181
image_info.m_num_blocks_x = slice_desc.m_num_blocks_x;
12182
image_info.m_num_blocks_y = slice_desc.m_num_blocks_y;
12183
image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y;
12184
image_info.m_first_slice_index = slice_index;
12185
12186
image_info.m_rgb_file_ofs = slice_desc.m_file_ofs;
12187
image_info.m_rgb_file_len = slice_desc.m_file_size;
12188
image_info.m_alpha_file_ofs = 0;
12189
image_info.m_alpha_file_len = 0;
12190
12191
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
12192
{
12193
if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices)
12194
{
12195
assert((slice_index + 1) < (int)pHeader->m_total_slices);
12196
image_info.m_alpha_file_ofs = pSlice_descs[slice_index + 1].m_file_ofs;
12197
image_info.m_alpha_file_len = pSlice_descs[slice_index + 1].m_file_size;
12198
}
12199
}
12200
12201
return true;
12202
}
12203
12204
bool basisu_transcoder::get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const
12205
{
12206
if (!validate_file_checksums(pData, data_size, false))
12207
{
12208
BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: validate_file_checksums failed\n");
12209
return false;
12210
}
12211
12212
const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
12213
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
12214
12215
file_info.m_version = pHeader->m_ver;
12216
12217
file_info.m_total_header_size = sizeof(basis_file_header) + pHeader->m_total_slices * sizeof(basis_slice_desc);
12218
12219
file_info.m_total_selectors = pHeader->m_total_selectors;
12220
file_info.m_selector_codebook_ofs = pHeader->m_selector_cb_file_ofs;
12221
file_info.m_selector_codebook_size = pHeader->m_selector_cb_file_size;
12222
12223
file_info.m_total_endpoints = pHeader->m_total_endpoints;
12224
file_info.m_endpoint_codebook_ofs = pHeader->m_endpoint_cb_file_ofs;
12225
file_info.m_endpoint_codebook_size = pHeader->m_endpoint_cb_file_size;
12226
12227
file_info.m_tables_ofs = pHeader->m_tables_file_ofs;
12228
file_info.m_tables_size = pHeader->m_tables_file_size;
12229
12230
file_info.m_tex_format = static_cast<basis_tex_format>(static_cast<int>(pHeader->m_tex_format));
12231
12232
file_info.m_etc1s = (pHeader->m_tex_format == (int)basis_tex_format::cETC1S);
12233
12234
file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0;
12235
file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
12236
12237
const uint32_t total_slices = pHeader->m_total_slices;
12238
12239
file_info.m_slice_info.resize(total_slices);
12240
12241
file_info.m_slices_size = 0;
12242
12243
file_info.m_tex_type = static_cast<basis_texture_type>(static_cast<uint8_t>(pHeader->m_tex_type));
12244
12245
if (file_info.m_tex_type > cBASISTexTypeTotal)
12246
{
12247
BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: invalid texture type, file is corrupted\n");
12248
return false;
12249
}
12250
12251
file_info.m_us_per_frame = pHeader->m_us_per_frame;
12252
file_info.m_userdata0 = pHeader->m_userdata0;
12253
file_info.m_userdata1 = pHeader->m_userdata1;
12254
12255
file_info.m_image_mipmap_levels.resize(0);
12256
file_info.m_image_mipmap_levels.resize(pHeader->m_total_images);
12257
12258
file_info.m_total_images = pHeader->m_total_images;
12259
12260
const uint32_t block_width = basis_tex_format_get_block_width((basis_tex_format)((uint32_t)pHeader->m_tex_format));
12261
const uint32_t block_height = basis_tex_format_get_block_height((basis_tex_format)((uint32_t)pHeader->m_tex_format));
12262
file_info.m_block_width = block_width;
12263
file_info.m_block_height = block_height;
12264
12265
for (uint32_t i = 0; i < total_slices; i++)
12266
{
12267
file_info.m_slices_size += pSlice_descs[i].m_file_size;
12268
12269
basisu_slice_info& slice_info = file_info.m_slice_info[i];
12270
12271
slice_info.m_orig_width = pSlice_descs[i].m_orig_width;
12272
slice_info.m_orig_height = pSlice_descs[i].m_orig_height;
12273
slice_info.m_width = pSlice_descs[i].m_num_blocks_x * block_width;
12274
slice_info.m_height = pSlice_descs[i].m_num_blocks_y * block_height;
12275
slice_info.m_num_blocks_x = pSlice_descs[i].m_num_blocks_x;
12276
slice_info.m_num_blocks_y = pSlice_descs[i].m_num_blocks_y;
12277
slice_info.m_block_width = block_width;
12278
slice_info.m_block_height = block_height;
12279
slice_info.m_total_blocks = slice_info.m_num_blocks_x * slice_info.m_num_blocks_y;
12280
slice_info.m_compressed_size = pSlice_descs[i].m_file_size;
12281
slice_info.m_slice_index = i;
12282
slice_info.m_image_index = pSlice_descs[i].m_image_index;
12283
slice_info.m_level_index = pSlice_descs[i].m_level_index;
12284
slice_info.m_unpacked_slice_crc16 = pSlice_descs[i].m_slice_data_crc16;
12285
slice_info.m_alpha_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsHasAlpha) != 0;
12286
slice_info.m_iframe_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
12287
12288
if (pSlice_descs[i].m_image_index >= pHeader->m_total_images)
12289
{
12290
BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice desc's image index is invalid\n");
12291
return false;
12292
}
12293
12294
file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] = basisu::maximum<uint32_t>(file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index], pSlice_descs[i].m_level_index + 1);
12295
12296
if (file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] > 16)
12297
{
12298
BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice mipmap level is invalid\n");
12299
return false;
12300
}
12301
}
12302
12303
return true;
12304
}
12305
12306
bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size)
12307
{
12308
if (!validate_header_quick(pData, data_size))
12309
{
12310
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: header validation failed\n");
12311
return false;
12312
}
12313
12314
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
12315
const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
12316
12317
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
12318
{
12319
if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
12320
{
12321
m_lowlevel_etc1s_decoder.clear();
12322
}
12323
12324
if (pHeader->m_flags & cBASISHeaderFlagUsesGlobalCodebook)
12325
{
12326
if (!m_lowlevel_etc1s_decoder.get_global_codebooks())
12327
{
12328
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: File uses global codebooks, but set_global_codebooks() has not been called\n");
12329
return false;
12330
}
12331
if (!m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size())
12332
{
12333
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebooks must be unpacked first by calling start_transcoding()\n");
12334
return false;
12335
}
12336
if ((m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size() != pHeader->m_total_endpoints) ||
12337
(m_lowlevel_etc1s_decoder.get_global_codebooks()->get_selectors().size() != pHeader->m_total_selectors))
12338
{
12339
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebook size mismatch (wrong codebooks for file).\n");
12340
return false;
12341
}
12342
if (!pHeader->m_tables_file_size)
12343
{
12344
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (2)\n");
12345
return false;
12346
}
12347
if (pHeader->m_tables_file_ofs > data_size)
12348
{
12349
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (4)\n");
12350
return false;
12351
}
12352
if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs))
12353
{
12354
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (5)\n");
12355
return false;
12356
}
12357
}
12358
else
12359
{
12360
if (!pHeader->m_endpoint_cb_file_size || !pHeader->m_selector_cb_file_size || !pHeader->m_tables_file_size)
12361
{
12362
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (0)\n");
12363
return false;
12364
}
12365
12366
if ((pHeader->m_endpoint_cb_file_ofs > data_size) || (pHeader->m_selector_cb_file_ofs > data_size) || (pHeader->m_tables_file_ofs > data_size))
12367
{
12368
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (1)\n");
12369
return false;
12370
}
12371
12372
if (pHeader->m_endpoint_cb_file_size > (data_size - pHeader->m_endpoint_cb_file_ofs))
12373
{
12374
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (2)\n");
12375
return false;
12376
}
12377
12378
if (pHeader->m_selector_cb_file_size > (data_size - pHeader->m_selector_cb_file_ofs))
12379
{
12380
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n");
12381
return false;
12382
}
12383
12384
if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs))
12385
{
12386
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n");
12387
return false;
12388
}
12389
12390
if (!m_lowlevel_etc1s_decoder.decode_palettes(
12391
pHeader->m_total_endpoints, pDataU8 + pHeader->m_endpoint_cb_file_ofs, pHeader->m_endpoint_cb_file_size,
12392
pHeader->m_total_selectors, pDataU8 + pHeader->m_selector_cb_file_ofs, pHeader->m_selector_cb_file_size))
12393
{
12394
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_palettes failed\n");
12395
return false;
12396
}
12397
}
12398
12399
if (!m_lowlevel_etc1s_decoder.decode_tables(pDataU8 + pHeader->m_tables_file_ofs, pHeader->m_tables_file_size))
12400
{
12401
BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_tables failed\n");
12402
return false;
12403
}
12404
}
12405
else
12406
{
12407
// Nothing special to do for UASTC/UASTC HDR.
12408
if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
12409
{
12410
m_lowlevel_etc1s_decoder.clear();
12411
}
12412
}
12413
12414
m_ready_to_transcode = true;
12415
12416
return true;
12417
}
12418
12419
bool basisu_transcoder::stop_transcoding()
12420
{
12421
m_lowlevel_etc1s_decoder.clear();
12422
12423
m_ready_to_transcode = false;
12424
12425
return true;
12426
}
12427
12428
bool basisu_transcoder::transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, block_format fmt,
12429
uint32_t output_block_or_pixel_stride_in_bytes, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, void *pAlpha_blocks, uint32_t output_rows_in_pixels, int channel0, int channel1) const
12430
{
12431
if (!m_ready_to_transcode)
12432
{
12433
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: must call start_transcoding first\n");
12434
return false;
12435
}
12436
12437
if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
12438
{
12439
// TODO: Not yet supported
12440
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n");
12441
return false;
12442
}
12443
12444
if (!validate_header_quick(pData, data_size))
12445
{
12446
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: header validation failed\n");
12447
return false;
12448
}
12449
12450
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
12451
12452
const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
12453
12454
if (slice_index >= pHeader->m_total_slices)
12455
{
12456
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: slice_index >= pHeader->m_total_slices\n");
12457
return false;
12458
}
12459
12460
const basis_slice_desc& slice_desc = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_index];
12461
12462
if (basis_block_format_is_uncompressed(fmt))
12463
{
12464
// Assume the output buffer is orig_width by orig_height
12465
if (!output_row_pitch_in_blocks_or_pixels)
12466
output_row_pitch_in_blocks_or_pixels = slice_desc.m_orig_width;
12467
12468
if (!output_rows_in_pixels)
12469
output_rows_in_pixels = slice_desc.m_orig_height;
12470
12471
// Now make sure the output buffer is large enough, or we'll overwrite memory.
12472
if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
12473
{
12474
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
12475
return false;
12476
}
12477
}
12478
else if (fmt == block_format::cFXT1_RGB)
12479
{
12480
const uint32_t num_blocks_fxt1_x = (slice_desc.m_orig_width + 7) / 8;
12481
const uint32_t num_blocks_fxt1_y = (slice_desc.m_orig_height + 3) / 4;
12482
const uint32_t total_blocks_fxt1 = num_blocks_fxt1_x * num_blocks_fxt1_y;
12483
12484
if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1)
12485
{
12486
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
12487
return false;
12488
}
12489
}
12490
else if (fmt == block_format::cASTC_HDR_6x6)
12491
{
12492
const uint32_t num_blocks_6x6_x = (slice_desc.m_orig_width + 5) / 6;
12493
const uint32_t num_blocks_6x6_y = (slice_desc.m_orig_height + 5) / 6;
12494
const uint32_t total_blocks_6x6 = num_blocks_6x6_x * num_blocks_6x6_y;
12495
12496
if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_6x6)
12497
{
12498
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_6x6\n");
12499
return false;
12500
}
12501
}
12502
else
12503
{
12504
// must be a 4x4 pixel block format
12505
const uint32_t num_blocks_4x4_x = (slice_desc.m_orig_width + 3) / 4;
12506
const uint32_t num_blocks_4x4_y = (slice_desc.m_orig_height + 3) / 4;
12507
const uint32_t total_4x4_blocks = num_blocks_4x4_x * num_blocks_4x4_y;
12508
12509
if (output_blocks_buf_size_in_blocks_or_pixels < total_4x4_blocks)
12510
{
12511
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks\n");
12512
return false;
12513
}
12514
}
12515
12516
if ((pHeader->m_tex_format == (uint32_t)basis_tex_format::cETC1S) || (pHeader->m_tex_format == (uint32_t)basis_tex_format::cUASTC4x4))
12517
{
12518
if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
12519
{
12520
if ((!basisu::is_pow2(slice_desc.m_num_blocks_x * 4)) || (!basisu::is_pow2(slice_desc.m_num_blocks_y * 4)))
12521
{
12522
// PVRTC1 only supports power of 2 dimensions
12523
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: PVRTC1 only supports power of 2 dimensions\n");
12524
return false;
12525
}
12526
}
12527
}
12528
12529
if (slice_desc.m_file_ofs > data_size)
12530
{
12531
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_ofs, or passed in buffer too small\n");
12532
return false;
12533
}
12534
12535
const uint32_t data_size_left = data_size - slice_desc.m_file_ofs;
12536
if (data_size_left < slice_desc.m_file_size)
12537
{
12538
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n");
12539
return false;
12540
}
12541
12542
if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6)
12543
{
12544
return m_lowlevel_astc_6x6_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
12545
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
12546
fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
12547
output_rows_in_pixels, channel0, channel1, decode_flags);
12548
}
12549
else if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)
12550
{
12551
return m_lowlevel_astc_6x6_hdr_intermediate_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
12552
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
12553
fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
12554
output_rows_in_pixels, channel0, channel1, decode_flags);
12555
}
12556
else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
12557
{
12558
return m_lowlevel_uastc_4x4_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
12559
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
12560
fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
12561
output_rows_in_pixels, channel0, channel1, decode_flags);
12562
}
12563
else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
12564
{
12565
return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
12566
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
12567
fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
12568
output_rows_in_pixels, channel0, channel1, decode_flags);
12569
}
12570
else
12571
{
12572
return m_lowlevel_etc1s_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
12573
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
12574
fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
12575
(decode_flags & cDecodeFlagsOutputHasAlphaIndices) != 0, pAlpha_blocks, output_rows_in_pixels);
12576
}
12577
}
12578
12579
int basisu_transcoder::find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const
12580
{
12581
BASISU_NOTE_UNUSED(data_size);
12582
12583
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
12584
const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
12585
12586
// For very large basis files this search could be painful
12587
// TODO: Binary search this
12588
for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++)
12589
{
12590
const basis_slice_desc& slice_desc = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_iter];
12591
if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index))
12592
return slice_iter;
12593
}
12594
12595
BASISU_DEVEL_ERROR("basisu_transcoder::find_first_slice_index: didn't find slice\n");
12596
12597
return -1;
12598
}
12599
12600
int basisu_transcoder::find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const
12601
{
12602
if (!validate_header_quick(pData, data_size))
12603
{
12604
BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: header validation failed\n");
12605
return false;
12606
}
12607
12608
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
12609
const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
12610
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs);
12611
12612
// For very large basis files this search could be painful
12613
// TODO: Binary search this
12614
for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++)
12615
{
12616
const basis_slice_desc& slice_desc = pSlice_descs[slice_iter];
12617
if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index))
12618
{
12619
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
12620
{
12621
const bool slice_alpha = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
12622
if (slice_alpha == alpha_data)
12623
return slice_iter;
12624
}
12625
else
12626
{
12627
return slice_iter;
12628
}
12629
}
12630
}
12631
12632
BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: didn't find slice\n");
12633
12634
return -1;
12635
}
12636
12637
void basisu_transcoder::write_opaque_alpha_blocks(
12638
uint32_t num_blocks_x, uint32_t num_blocks_y,
12639
void* pOutput_blocks, block_format fmt,
12640
uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels)
12641
{
12642
// 'num_blocks_y', 'pOutput_blocks' & 'block_stride_in_bytes' unused
12643
// when disabling BASISD_SUPPORT_ETC2_EAC_A8 *and* BASISD_SUPPORT_DXT5A
12644
BASISU_NOTE_UNUSED(num_blocks_y);
12645
BASISU_NOTE_UNUSED(pOutput_blocks);
12646
BASISU_NOTE_UNUSED(block_stride_in_bytes);
12647
12648
if (!output_row_pitch_in_blocks_or_pixels)
12649
output_row_pitch_in_blocks_or_pixels = num_blocks_x;
12650
12651
if ((fmt == block_format::cETC2_EAC_A8) || (fmt == block_format::cETC2_EAC_R11))
12652
{
12653
#if BASISD_SUPPORT_ETC2_EAC_A8
12654
eac_block blk;
12655
blk.m_base = 255;
12656
blk.m_multiplier = 1;
12657
blk.m_table = 13;
12658
12659
// Selectors are all 4's
12660
memcpy(&blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
12661
12662
for (uint32_t y = 0; y < num_blocks_y; y++)
12663
{
12664
uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes;
12665
for (uint32_t x = 0; x < num_blocks_x; x++)
12666
{
12667
memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk));
12668
dst_ofs += block_stride_in_bytes;
12669
}
12670
}
12671
#endif
12672
}
12673
else if (fmt == block_format::cBC4)
12674
{
12675
#if BASISD_SUPPORT_DXT5A
12676
dxt5a_block blk;
12677
blk.m_endpoints[0] = 255;
12678
blk.m_endpoints[1] = 255;
12679
memset(blk.m_selectors, 0, sizeof(blk.m_selectors));
12680
12681
for (uint32_t y = 0; y < num_blocks_y; y++)
12682
{
12683
uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes;
12684
for (uint32_t x = 0; x < num_blocks_x; x++)
12685
{
12686
memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk));
12687
dst_ofs += block_stride_in_bytes;
12688
}
12689
}
12690
#endif
12691
}
12692
}
12693
12694
bool basisu_transcoder::transcode_image_level(
12695
const void* pData, uint32_t data_size,
12696
uint32_t image_index, uint32_t level_index,
12697
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
12698
transcoder_texture_format fmt,
12699
uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state *pState, uint32_t output_rows_in_pixels) const
12700
{
12701
const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(fmt);
12702
12703
if (!m_ready_to_transcode)
12704
{
12705
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: must call start_transcoding() first\n");
12706
return false;
12707
}
12708
12709
//const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
12710
12711
if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
12712
{
12713
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n");
12714
// TODO: Not yet supported
12715
return false;
12716
}
12717
12718
if (!validate_header_quick(pData, data_size))
12719
{
12720
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: header validation failed\n");
12721
return false;
12722
}
12723
12724
const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
12725
12726
const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
12727
12728
const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs);
12729
12730
const bool basis_file_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
12731
12732
int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
12733
if (slice_index < 0)
12734
{
12735
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: failed finding slice index\n");
12736
// Unable to find the requested image/level
12737
return false;
12738
}
12739
12740
if ((fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices))
12741
{
12742
// Switch to PVRTC1 RGB if the input doesn't have alpha.
12743
fmt = transcoder_texture_format::cTFPVRTC1_4_RGB;
12744
}
12745
12746
if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
12747
{
12748
if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsHasAlpha)
12749
{
12750
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has out of order alpha slice\n");
12751
12752
// The first slice shouldn't have alpha data in a properly formed basis file
12753
return false;
12754
}
12755
12756
if (basis_file_has_alpha_slices)
12757
{
12758
// The alpha data should immediately follow the color data, and have the same resolution.
12759
if ((slice_index + 1U) >= pHeader->m_total_slices)
12760
{
12761
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice\n");
12762
// basis file is missing the alpha slice
12763
return false;
12764
}
12765
12766
// Basic sanity checks
12767
if ((pSlice_descs[slice_index + 1].m_flags & cSliceDescFlagsHasAlpha) == 0)
12768
{
12769
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice (flag check)\n");
12770
// This slice should have alpha data
12771
return false;
12772
}
12773
12774
if ((pSlice_descs[slice_index].m_num_blocks_x != pSlice_descs[slice_index + 1].m_num_blocks_x) || (pSlice_descs[slice_index].m_num_blocks_y != pSlice_descs[slice_index + 1].m_num_blocks_y))
12775
{
12776
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file slice dimensions bad\n");
12777
// Alpha slice should have been the same res as the color slice
12778
return false;
12779
}
12780
}
12781
}
12782
12783
bool status = false;
12784
12785
if ((pHeader->m_tex_format == (int)basis_tex_format::cETC1S) || (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4))
12786
{
12787
// Only do this on 4x4 LDR formats that supports transcoding to PVRTC1.
12788
const uint32_t total_slice_blocks = pSlice_descs[slice_index].m_num_blocks_x * pSlice_descs[slice_index].m_num_blocks_y;
12789
12790
if (((fmt == transcoder_texture_format::cTFPVRTC1_4_RGB) || (fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA)) && (output_blocks_buf_size_in_blocks_or_pixels > total_slice_blocks))
12791
{
12792
// The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves.
12793
// For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8.
12794
// However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block_or_pixel. This is all the transcoder actually writes to memory.
12795
memset(static_cast<uint8_t*>(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel);
12796
}
12797
}
12798
12799
if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6)
12800
{
12801
const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
12802
12803
// Use the container independent image transcode method.
12804
status = m_lowlevel_astc_6x6_hdr_decoder.transcode_image(fmt,
12805
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
12806
(const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
12807
pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
12808
decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
12809
}
12810
else if (pHeader->m_tex_format == (int)basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)
12811
{
12812
const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
12813
12814
// Use the container independent image transcode method.
12815
status = m_lowlevel_astc_6x6_hdr_intermediate_decoder.transcode_image(fmt,
12816
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
12817
(const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
12818
pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
12819
decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
12820
}
12821
else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
12822
{
12823
const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
12824
12825
// Use the container independent image transcode method.
12826
status = m_lowlevel_uastc_4x4_hdr_decoder.transcode_image(fmt,
12827
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
12828
(const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
12829
pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
12830
decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
12831
}
12832
else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
12833
{
12834
const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
12835
12836
// Use the container independent image transcode method.
12837
status = m_lowlevel_uastc_decoder.transcode_image(fmt,
12838
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
12839
(const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
12840
pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
12841
decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
12842
}
12843
else
12844
{
12845
// ETC1S
12846
const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
12847
const basis_slice_desc* pAlpha_slice_desc = basis_file_has_alpha_slices ? &pSlice_descs[slice_index + 1] : nullptr;
12848
12849
assert((pSlice_desc->m_flags & cSliceDescFlagsHasAlpha) == 0);
12850
12851
if (pAlpha_slice_desc)
12852
{
12853
// Basic sanity checks
12854
assert((pAlpha_slice_desc->m_flags & cSliceDescFlagsHasAlpha) != 0);
12855
assert(pSlice_desc->m_num_blocks_x == pAlpha_slice_desc->m_num_blocks_x);
12856
assert(pSlice_desc->m_num_blocks_y == pAlpha_slice_desc->m_num_blocks_y);
12857
assert(pSlice_desc->m_level_index == pAlpha_slice_desc->m_level_index);
12858
}
12859
12860
// Use the container independent image transcode method.
12861
status = m_lowlevel_etc1s_decoder.transcode_image(fmt,
12862
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
12863
(const uint8_t *)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
12864
pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
12865
(pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_ofs : 0U, (pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_size : 0U,
12866
decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
12867
12868
} // if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
12869
12870
if (!status)
12871
{
12872
BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning false\n");
12873
}
12874
else
12875
{
12876
//BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n");
12877
}
12878
12879
return status;
12880
}
12881
12882
uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt)
12883
{
12884
switch (fmt)
12885
{
12886
case transcoder_texture_format::cTFETC1_RGB:
12887
case transcoder_texture_format::cTFBC1_RGB:
12888
case transcoder_texture_format::cTFBC4_R:
12889
case transcoder_texture_format::cTFPVRTC1_4_RGB:
12890
case transcoder_texture_format::cTFPVRTC1_4_RGBA:
12891
case transcoder_texture_format::cTFATC_RGB:
12892
case transcoder_texture_format::cTFPVRTC2_4_RGB:
12893
case transcoder_texture_format::cTFPVRTC2_4_RGBA:
12894
case transcoder_texture_format::cTFETC2_EAC_R11:
12895
return 8;
12896
case transcoder_texture_format::cTFBC7_RGBA:
12897
case transcoder_texture_format::cTFBC7_ALT:
12898
case transcoder_texture_format::cTFBC6H:
12899
case transcoder_texture_format::cTFETC2_RGBA:
12900
case transcoder_texture_format::cTFBC3_RGBA:
12901
case transcoder_texture_format::cTFBC5_RG:
12902
case transcoder_texture_format::cTFASTC_4x4_RGBA:
12903
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
12904
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
12905
case transcoder_texture_format::cTFATC_RGBA:
12906
case transcoder_texture_format::cTFFXT1_RGB:
12907
case transcoder_texture_format::cTFETC2_EAC_RG11:
12908
return 16;
12909
case transcoder_texture_format::cTFRGBA32:
12910
case transcoder_texture_format::cTFRGB_9E5:
12911
return sizeof(uint32_t);
12912
case transcoder_texture_format::cTFRGB565:
12913
case transcoder_texture_format::cTFBGR565:
12914
case transcoder_texture_format::cTFRGBA4444:
12915
return sizeof(uint16_t);
12916
case transcoder_texture_format::cTFRGB_HALF:
12917
return sizeof(half_float) * 3;
12918
case transcoder_texture_format::cTFRGBA_HALF:
12919
return sizeof(half_float) * 4;
12920
default:
12921
assert(0);
12922
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
12923
break;
12924
}
12925
return 0;
12926
}
12927
12928
const char* basis_get_format_name(transcoder_texture_format fmt)
12929
{
12930
switch (fmt)
12931
{
12932
case transcoder_texture_format::cTFETC1_RGB: return "ETC1_RGB";
12933
case transcoder_texture_format::cTFBC1_RGB: return "BC1_RGB";
12934
case transcoder_texture_format::cTFBC4_R: return "BC4_R";
12935
case transcoder_texture_format::cTFPVRTC1_4_RGB: return "PVRTC1_4_RGB";
12936
case transcoder_texture_format::cTFPVRTC1_4_RGBA: return "PVRTC1_4_RGBA";
12937
case transcoder_texture_format::cTFBC7_RGBA: return "BC7_RGBA";
12938
case transcoder_texture_format::cTFBC7_ALT: return "BC7_RGBA";
12939
case transcoder_texture_format::cTFETC2_RGBA: return "ETC2_RGBA";
12940
case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA";
12941
case transcoder_texture_format::cTFBC5_RG: return "BC5_RG";
12942
case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA";
12943
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return "ASTC_HDR_4X4_RGBA";
12944
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: return "ASTC_HDR_6X6_RGBA";
12945
case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB";
12946
case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA";
12947
case transcoder_texture_format::cTFRGBA32: return "RGBA32";
12948
case transcoder_texture_format::cTFRGB565: return "RGB565";
12949
case transcoder_texture_format::cTFBGR565: return "BGR565";
12950
case transcoder_texture_format::cTFRGBA4444: return "RGBA4444";
12951
case transcoder_texture_format::cTFRGBA_HALF: return "RGBA_HALF";
12952
case transcoder_texture_format::cTFRGB_9E5: return "RGB_9E5";
12953
case transcoder_texture_format::cTFRGB_HALF: return "RGB_HALF";
12954
case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB";
12955
case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB";
12956
case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
12957
case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11";
12958
case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11";
12959
case transcoder_texture_format::cTFBC6H: return "BC6H";
12960
default:
12961
assert(0);
12962
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
12963
break;
12964
}
12965
return "";
12966
}
12967
12968
const char* basis_get_block_format_name(block_format fmt)
12969
{
12970
switch (fmt)
12971
{
12972
case block_format::cETC1: return "ETC1";
12973
case block_format::cBC1: return "BC1";
12974
case block_format::cPVRTC1_4_RGB: return "PVRTC1_4_RGB";
12975
case block_format::cPVRTC1_4_RGBA: return "PVRTC1_4_RGBA";
12976
case block_format::cBC7: return "BC7";
12977
case block_format::cETC2_RGBA: return "ETC2_RGBA";
12978
case block_format::cBC3: return "BC3";
12979
case block_format::cASTC_4x4: return "ASTC_4x4";
12980
case block_format::cATC_RGB: return "ATC_RGB";
12981
case block_format::cRGBA32: return "RGBA32";
12982
case block_format::cRGB565: return "RGB565";
12983
case block_format::cBGR565: return "BGR565";
12984
case block_format::cRGBA4444: return "RGBA4444";
12985
case block_format::cRGBA_HALF: return "RGBA_HALF";
12986
case block_format::cRGB_HALF: return "RGB_HALF";
12987
case block_format::cRGB_9E5: return "RGB_9E5";
12988
case block_format::cUASTC_4x4: return "UASTC_4x4";
12989
case block_format::cUASTC_HDR_4x4: return "UASTC_HDR_4x4";
12990
case block_format::cBC6H: return "BC6H";
12991
case block_format::cASTC_HDR_4x4: return "ASTC_HDR_4x4";
12992
case block_format::cASTC_HDR_6x6: return "ASTC_HDR_6x6";
12993
case block_format::cFXT1_RGB: return "FXT1_RGB";
12994
case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB";
12995
case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
12996
case block_format::cETC2_EAC_R11: return "ETC2_EAC_R11";
12997
case block_format::cETC2_EAC_RG11: return "ETC2_EAC_RG11";
12998
default:
12999
assert(0);
13000
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
13001
break;
13002
}
13003
return "";
13004
}
13005
13006
const char* basis_get_texture_type_name(basis_texture_type tex_type)
13007
{
13008
switch (tex_type)
13009
{
13010
case cBASISTexType2D: return "2D";
13011
case cBASISTexType2DArray: return "2D array";
13012
case cBASISTexTypeCubemapArray: return "cubemap array";
13013
case cBASISTexTypeVideoFrames: return "video";
13014
case cBASISTexTypeVolume: return "3D";
13015
default:
13016
assert(0);
13017
BASISU_DEVEL_ERROR("basis_get_texture_type_name: Invalid tex_type\n");
13018
break;
13019
}
13020
return "";
13021
}
13022
13023
bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt)
13024
{
13025
// TODO: Technically ASTC HDR does support alpha, but our ASTC HDR encoders don't yet support it. Unsure what to do here.
13026
switch (fmt)
13027
{
13028
case transcoder_texture_format::cTFETC2_RGBA:
13029
case transcoder_texture_format::cTFBC3_RGBA:
13030
case transcoder_texture_format::cTFASTC_4x4_RGBA:
13031
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: // technically this ASTC HDR format supports alpha, but we currently don't exploit that in our encoders
13032
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: // technically this ASTC HDR format supports alpha, but we currently don't exploit that in our encoders
13033
case transcoder_texture_format::cTFBC7_RGBA:
13034
case transcoder_texture_format::cTFBC7_ALT:
13035
case transcoder_texture_format::cTFPVRTC1_4_RGBA:
13036
case transcoder_texture_format::cTFPVRTC2_4_RGBA:
13037
case transcoder_texture_format::cTFATC_RGBA:
13038
case transcoder_texture_format::cTFRGBA32:
13039
case transcoder_texture_format::cTFRGBA4444:
13040
case transcoder_texture_format::cTFRGBA_HALF:
13041
return true;
13042
default:
13043
break;
13044
}
13045
return false;
13046
}
13047
13048
bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt)
13049
{
13050
switch (fmt)
13051
{
13052
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
13053
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
13054
case transcoder_texture_format::cTFBC6H:
13055
case transcoder_texture_format::cTFRGBA_HALF:
13056
case transcoder_texture_format::cTFRGB_HALF:
13057
case transcoder_texture_format::cTFRGB_9E5:
13058
return true;
13059
default:
13060
break;
13061
}
13062
return false;
13063
}
13064
13065
basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt)
13066
{
13067
switch (fmt)
13068
{
13069
case transcoder_texture_format::cTFETC1_RGB: return basisu::texture_format::cETC1;
13070
case transcoder_texture_format::cTFBC1_RGB: return basisu::texture_format::cBC1;
13071
case transcoder_texture_format::cTFBC4_R: return basisu::texture_format::cBC4;
13072
case transcoder_texture_format::cTFPVRTC1_4_RGB: return basisu::texture_format::cPVRTC1_4_RGB;
13073
case transcoder_texture_format::cTFPVRTC1_4_RGBA: return basisu::texture_format::cPVRTC1_4_RGBA;
13074
case transcoder_texture_format::cTFBC7_RGBA: return basisu::texture_format::cBC7;
13075
case transcoder_texture_format::cTFBC7_ALT: return basisu::texture_format::cBC7;
13076
case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA;
13077
case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3;
13078
case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5;
13079
case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC_LDR_4x4;
13080
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return basisu::texture_format::cASTC_HDR_4x4;
13081
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA: return basisu::texture_format::cASTC_HDR_6x6;
13082
case transcoder_texture_format::cTFBC6H: return basisu::texture_format::cBC6HUnsigned;
13083
case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB;
13084
case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA;
13085
case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32;
13086
case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565;
13087
case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565;
13088
case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444;
13089
case transcoder_texture_format::cTFRGBA_HALF: return basisu::texture_format::cRGBA_HALF;
13090
case transcoder_texture_format::cTFRGB_9E5: return basisu::texture_format::cRGB_9E5;
13091
case transcoder_texture_format::cTFRGB_HALF: return basisu::texture_format::cRGB_HALF;
13092
case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB;
13093
case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA;
13094
case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA;
13095
case transcoder_texture_format::cTFETC2_EAC_R11: return basisu::texture_format::cETC2_R11_EAC;
13096
case transcoder_texture_format::cTFETC2_EAC_RG11: return basisu::texture_format::cETC2_RG11_EAC;
13097
default:
13098
assert(0);
13099
BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
13100
break;
13101
}
13102
return basisu::texture_format::cInvalidTextureFormat;
13103
}
13104
13105
bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type)
13106
{
13107
switch (tex_type)
13108
{
13109
case transcoder_texture_format::cTFRGBA32:
13110
case transcoder_texture_format::cTFRGB565:
13111
case transcoder_texture_format::cTFBGR565:
13112
case transcoder_texture_format::cTFRGBA4444:
13113
case transcoder_texture_format::cTFRGB_HALF:
13114
case transcoder_texture_format::cTFRGBA_HALF:
13115
case transcoder_texture_format::cTFRGB_9E5:
13116
return true;
13117
default:
13118
break;
13119
}
13120
return false;
13121
}
13122
13123
bool basis_block_format_is_uncompressed(block_format blk_fmt)
13124
{
13125
switch (blk_fmt)
13126
{
13127
case block_format::cRGB32:
13128
case block_format::cRGBA32:
13129
case block_format::cA32:
13130
case block_format::cRGB565:
13131
case block_format::cBGR565:
13132
case block_format::cRGBA4444:
13133
case block_format::cRGBA4444_COLOR:
13134
case block_format::cRGBA4444_ALPHA:
13135
case block_format::cRGBA4444_COLOR_OPAQUE:
13136
case block_format::cRGBA_HALF:
13137
case block_format::cRGB_HALF:
13138
case block_format::cRGB_9E5:
13139
return true;
13140
default:
13141
break;
13142
}
13143
return false;
13144
}
13145
13146
uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt)
13147
{
13148
switch (fmt)
13149
{
13150
case transcoder_texture_format::cTFRGBA32:
13151
case transcoder_texture_format::cTFRGB_9E5:
13152
return sizeof(uint32_t);
13153
case transcoder_texture_format::cTFRGB565:
13154
case transcoder_texture_format::cTFBGR565:
13155
case transcoder_texture_format::cTFRGBA4444:
13156
return sizeof(uint16_t);
13157
case transcoder_texture_format::cTFRGB_HALF:
13158
return sizeof(half_float) * 3;
13159
case transcoder_texture_format::cTFRGBA_HALF:
13160
return sizeof(half_float) * 4;
13161
default:
13162
break;
13163
}
13164
return 0;
13165
}
13166
13167
uint32_t basis_get_block_width(transcoder_texture_format tex_type)
13168
{
13169
switch (tex_type)
13170
{
13171
case transcoder_texture_format::cTFFXT1_RGB:
13172
return 8;
13173
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
13174
return 6;
13175
default:
13176
break;
13177
}
13178
return 4;
13179
}
13180
13181
uint32_t basis_get_block_height(transcoder_texture_format tex_type)
13182
{
13183
switch (tex_type)
13184
{
13185
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
13186
return 6;
13187
default:
13188
break;
13189
}
13190
return 4;
13191
}
13192
13193
uint32_t basis_tex_format_get_block_width(basis_tex_format fmt)
13194
{
13195
switch (fmt)
13196
{
13197
case basis_tex_format::cASTC_HDR_6x6:
13198
case basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE:
13199
return 6;
13200
default:
13201
break;
13202
}
13203
return 4;
13204
}
13205
13206
uint32_t basis_tex_format_get_block_height(basis_tex_format fmt)
13207
{
13208
switch (fmt)
13209
{
13210
case basis_tex_format::cASTC_HDR_6x6:
13211
case basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE:
13212
return 6;
13213
default:
13214
break;
13215
}
13216
return 4;
13217
}
13218
13219
bool basis_tex_format_is_hdr(basis_tex_format fmt)
13220
{
13221
switch (fmt)
13222
{
13223
case basis_tex_format::cUASTC_HDR_4x4:
13224
case basis_tex_format::cASTC_HDR_6x6:
13225
case basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE:
13226
return true;
13227
default:
13228
break;
13229
}
13230
return false;
13231
}
13232
13233
bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt)
13234
{
13235
if ((fmt == basis_tex_format::cASTC_HDR_6x6) || (fmt == basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE))
13236
{
13237
// RDO UASTC HDR 6x6, or our custom intermediate format
13238
#if BASISD_SUPPORT_UASTC_HDR
13239
switch (tex_type)
13240
{
13241
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
13242
case transcoder_texture_format::cTFBC6H:
13243
case transcoder_texture_format::cTFRGBA_HALF:
13244
case transcoder_texture_format::cTFRGB_HALF:
13245
case transcoder_texture_format::cTFRGB_9E5:
13246
return true;
13247
default:
13248
break;
13249
}
13250
#endif
13251
}
13252
else if (fmt == basis_tex_format::cUASTC_HDR_4x4)
13253
{
13254
// UASTC HDR 4x4
13255
#if BASISD_SUPPORT_UASTC_HDR
13256
switch (tex_type)
13257
{
13258
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
13259
case transcoder_texture_format::cTFBC6H:
13260
case transcoder_texture_format::cTFRGBA_HALF:
13261
case transcoder_texture_format::cTFRGB_HALF:
13262
case transcoder_texture_format::cTFRGB_9E5:
13263
return true;
13264
default:
13265
break;
13266
}
13267
#endif
13268
}
13269
else if (fmt == basis_tex_format::cUASTC4x4)
13270
{
13271
// UASTC LDR 4x4
13272
#if BASISD_SUPPORT_UASTC
13273
switch (tex_type)
13274
{
13275
// These niche formats aren't currently supported for UASTC - everything else is.
13276
case transcoder_texture_format::cTFPVRTC2_4_RGB:
13277
case transcoder_texture_format::cTFPVRTC2_4_RGBA:
13278
case transcoder_texture_format::cTFATC_RGB:
13279
case transcoder_texture_format::cTFATC_RGBA:
13280
case transcoder_texture_format::cTFFXT1_RGB:
13281
// UASTC LDR doesn't support transcoding to HDR formats
13282
case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
13283
case transcoder_texture_format::cTFASTC_HDR_6x6_RGBA:
13284
case transcoder_texture_format::cTFBC6H:
13285
case transcoder_texture_format::cTFRGBA_HALF:
13286
case transcoder_texture_format::cTFRGB_HALF:
13287
case transcoder_texture_format::cTFRGB_9E5:
13288
return false;
13289
default:
13290
return true;
13291
}
13292
#endif
13293
}
13294
else
13295
{
13296
// ETC1S
13297
switch (tex_type)
13298
{
13299
// ETC1 and uncompressed are always supported.
13300
case transcoder_texture_format::cTFETC1_RGB:
13301
case transcoder_texture_format::cTFRGBA32:
13302
case transcoder_texture_format::cTFRGB565:
13303
case transcoder_texture_format::cTFBGR565:
13304
case transcoder_texture_format::cTFRGBA4444:
13305
return true;
13306
#if BASISD_SUPPORT_DXT1
13307
case transcoder_texture_format::cTFBC1_RGB:
13308
return true;
13309
#endif
13310
#if BASISD_SUPPORT_DXT5A
13311
case transcoder_texture_format::cTFBC4_R:
13312
case transcoder_texture_format::cTFBC5_RG:
13313
return true;
13314
#endif
13315
#if BASISD_SUPPORT_DXT1 && BASISD_SUPPORT_DXT5A
13316
case transcoder_texture_format::cTFBC3_RGBA:
13317
return true;
13318
#endif
13319
#if BASISD_SUPPORT_PVRTC1
13320
case transcoder_texture_format::cTFPVRTC1_4_RGB:
13321
case transcoder_texture_format::cTFPVRTC1_4_RGBA:
13322
return true;
13323
#endif
13324
#if BASISD_SUPPORT_BC7_MODE5
13325
case transcoder_texture_format::cTFBC7_RGBA:
13326
case transcoder_texture_format::cTFBC7_ALT:
13327
return true;
13328
#endif
13329
#if BASISD_SUPPORT_ETC2_EAC_A8
13330
case transcoder_texture_format::cTFETC2_RGBA:
13331
return true;
13332
#endif
13333
#if BASISD_SUPPORT_ASTC
13334
case transcoder_texture_format::cTFASTC_4x4_RGBA:
13335
return true;
13336
#endif
13337
#if BASISD_SUPPORT_ATC
13338
case transcoder_texture_format::cTFATC_RGB:
13339
case transcoder_texture_format::cTFATC_RGBA:
13340
return true;
13341
#endif
13342
#if BASISD_SUPPORT_FXT1
13343
case transcoder_texture_format::cTFFXT1_RGB:
13344
return true;
13345
#endif
13346
#if BASISD_SUPPORT_PVRTC2
13347
case transcoder_texture_format::cTFPVRTC2_4_RGB:
13348
case transcoder_texture_format::cTFPVRTC2_4_RGBA:
13349
return true;
13350
#endif
13351
#if BASISD_SUPPORT_ETC2_EAC_RG11
13352
case transcoder_texture_format::cTFETC2_EAC_R11:
13353
case transcoder_texture_format::cTFETC2_EAC_RG11:
13354
return true;
13355
#endif
13356
default:
13357
break;
13358
}
13359
}
13360
13361
return false;
13362
}
13363
13364
// ------------------------------------------------------------------------------------------------------
13365
// UASTC LDR 4x4
13366
// ------------------------------------------------------------------------------------------------------
13367
13368
#if BASISD_SUPPORT_UASTC
13369
const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] =
13370
{
13371
{ 0, 28, false }, { 1, 20, false }, { 2, 16, true }, { 3, 29, false },
13372
{ 4, 91, true }, { 5, 9, false }, { 6, 107, true }, { 7, 72, true },
13373
{ 8, 149, false }, { 9, 204, true }, { 10, 50, false }, { 11, 114, true },
13374
{ 12, 496, true }, { 13, 17, true }, { 14, 78, false }, { 15, 39, true },
13375
{ 17, 252, true }, { 18, 828, true }, { 19, 43, false }, { 20, 156, false },
13376
{ 21, 116, false }, { 22, 210, true }, { 23, 476, true }, { 24, 273, false },
13377
{ 25, 684, true }, { 26, 359, false }, { 29, 246, true }, { 32, 195, true },
13378
{ 33, 694, true }, { 52, 524, true }
13379
};
13380
13381
const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS] =
13382
{
13383
{ 10, 36, 4 }, { 11, 48, 4 }, { 0, 61, 3 }, { 2, 137, 4 },
13384
{ 8, 161, 5 }, { 13, 183, 4 }, { 1, 226, 2 }, { 33, 281, 2 },
13385
{ 40, 302, 3 }, { 20, 307, 4 }, { 21, 479, 0 }, { 58, 495, 3 },
13386
{ 3, 593, 0 }, { 32, 594, 2 }, { 59, 605, 1 }, { 34, 799, 3 },
13387
{ 20, 812, 1 }, { 14, 988, 4 }, { 31, 993, 3 }
13388
};
13389
13390
const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3] =
13391
{
13392
{ 4, 260, 0 }, { 8, 74, 5 }, { 9, 32, 5 }, { 10, 156, 2 },
13393
{ 11, 183, 2 }, { 12, 15, 0 }, { 13, 745, 4 }, { 20, 0, 1 },
13394
{ 35, 335, 1 }, { 36, 902, 5 }, { 57, 254, 0 }
13395
};
13396
13397
const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 1, 2, 0 }, { 2, 0, 1 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } };
13398
13399
const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 2, 0, 1 }, { 1, 2, 0 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } };
13400
13401
uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k)
13402
{
13403
assert(k < 6);
13404
switch (k >> 1)
13405
{
13406
case 0:
13407
if (p <= 1)
13408
p = 0;
13409
else
13410
p = 1;
13411
break;
13412
case 1:
13413
if (p == 0)
13414
p = 0;
13415
else
13416
p = 1;
13417
break;
13418
case 2:
13419
if ((p == 0) || (p == 2))
13420
p = 0;
13421
else
13422
p = 1;
13423
break;
13424
}
13425
if (k & 1)
13426
p = 1 - p;
13427
return p;
13428
}
13429
13430
static const uint8_t g_zero_pattern[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
13431
13432
const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16] =
13433
{
13434
{ 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1 }, { 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1 }, { 1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0 }, { 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1 },
13435
{ 1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0 }, { 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1 }, { 1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0 },
13436
{ 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1 }, { 1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0 },
13437
{ 1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0 },
13438
{ 1,0,0,0,1,1,1,0,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,1 }, { 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0 }, { 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0 },
13439
{ 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1 }, { 1,0,0,0,1,1,0,0,1,1,0,0,1,1,1,0 }, { 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0 },
13440
{ 1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1 }, { 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0 }, { 1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1 }, { 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0 },
13441
{ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0 }, { 1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0 }
13442
};
13443
13444
const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16] =
13445
{
13446
{ 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2 }, { 1,1,1,1,1,1,1,1,0,0,0,0,2,2,2,2 }, { 1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2 }, { 1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0 },
13447
{ 1,1,2,0,1,1,2,0,1,1,2,0,1,1,2,0 }, { 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2 }, { 0,2,1,1,0,2,1,1,0,2,1,1,0,2,1,1 }, { 2,0,0,0,2,0,0,0,2,1,1,1,2,1,1,1 },
13448
{ 2,0,1,2,2,0,1,2,2,0,1,2,2,0,1,2 }, { 1,1,1,1,0,0,0,0,2,2,2,2,1,1,1,1 }, { 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2 }
13449
};
13450
13451
const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16] =
13452
{
13453
{ 0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0 }, { 1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1 },
13454
{ 1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1 }, { 0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0 }, { 0,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1 }, { 0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1 },
13455
{ 1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0 }, { 0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0 }, { 1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0 },
13456
{ 0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0 }, { 1,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0 },
13457
{ 1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0 }, { 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0 }, { 1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 }
13458
};
13459
13460
const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3] =
13461
{
13462
{ 0, 2 }, { 0, 3 }, { 1, 0 }, { 0, 3 }, { 7, 0 }, { 0, 2 }, { 3, 0 }, { 7, 0 },
13463
{ 0, 11 }, { 2, 0 }, { 0, 7 }, { 11, 0 }, { 3, 0 }, { 8, 0 }, { 0, 4 }, { 12, 0 },
13464
{ 1, 0 }, { 8, 0 }, { 0, 1 }, { 0, 2 }, { 0, 4 }, { 8, 0 }, { 1, 0 }, { 0, 2 },
13465
{ 4, 0 }, { 0, 1 }, { 4, 0 }, { 1, 0 }, { 4, 0 }, { 1, 0 }
13466
};
13467
13468
const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3] =
13469
{
13470
{ 0, 8, 10 }, { 8, 0, 12 }, { 4, 0, 12 }, { 8, 0, 4 }, { 3, 0, 2 }, { 0, 1, 3 }, { 0, 2, 1 }, { 1, 9, 0 }, { 1, 2, 0 }, { 4, 0, 8 }, { 0, 6, 2 }
13471
};
13472
13473
const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3] =
13474
{
13475
{ 0, 4 }, { 0, 2 }, { 2, 0 }, { 0, 7 }, { 8, 0 }, { 0, 1 }, { 0, 3 }, { 0, 1 }, { 2, 0 }, { 0, 1 }, { 0, 8 }, { 2, 0 }, { 0, 1 }, { 0, 7 }, { 12, 0 }, { 2, 0 }, { 9, 0 }, { 0, 2 }, { 4, 0 }
13476
};
13477
13478
const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2] =
13479
{
13480
{ 0x1, 4 },
13481
{ 0x35, 6 },
13482
{ 0x1D, 5 },
13483
{ 0x3, 5 },
13484
13485
{ 0x13, 5 },
13486
{ 0xB, 5 },
13487
{ 0x1B, 5 },
13488
{ 0x7, 5 },
13489
13490
{ 0x17, 5 },
13491
{ 0xF, 5 },
13492
{ 0x2, 3 },
13493
{ 0x0, 2 },
13494
13495
{ 0x6, 3 },
13496
{ 0x1F, 5 },
13497
{ 0xD, 5 },
13498
{ 0x5, 7 },
13499
13500
{ 0x15, 6 },
13501
{ 0x25, 6 },
13502
{ 0x9, 4 },
13503
{ 0x45, 7 } // future expansion
13504
};
13505
13506
// If g_uastc_mode_huff_codes[] changes this table must be updated!
13507
static const uint8_t g_uastc_huff_modes[128] =
13508
{
13509
11,0,10,3,11,15,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,
13510
19,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13
13511
};
13512
13513
const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES] = { 4, 2, 3, 2, 2, 3, 2, 2, 0, 2, 4, 2, 3, 1, 2, 4, 2, 2, 5 };
13514
const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES] = { 8, 2, 5, 2, 2, 5, 2, 2, 0, 2, 8, 2, 5, 0, 2, 8, 2, 2, 11 };
13515
const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES] = { 19, 20, 8, 7, 12, 20, 18, 12, 0, 8, 13, 13, 19, 20, 20, 20, 20, 20, 11 };
13516
const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES] = { 1, 1, 2, 3, 2, 1, 1, 2, 0, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1 };
13517
const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 2, 1, 0, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1 };
13518
const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES] = { 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 3 };
13519
const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
13520
const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
13521
const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
13522
const uint8_t g_uastc_mode_cem[TOTAL_UASTC_MODES] = { 8, 8, 8, 8, 8, 8, 8, 8, 0, 12, 12, 12, 12, 12, 12, 4, 4, 4, 8 };
13523
const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 };
13524
const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0 };
13525
const uint8_t g_uastc_mode_total_hint_bits[TOTAL_UASTC_MODES] = { 15, 15, 15, 15, 15, 15, 15, 15, 0, 23, 17, 17, 17, 23, 23, 23, 23, 23, 15 };
13526
13527
// bits, trits, quints
13528
const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3] =
13529
{
13530
{ 1, 0, 0 }, // 0-1 0
13531
{ 0, 1, 0 }, // 0-2 1
13532
{ 2, 0, 0 }, // 0-3 2
13533
{ 0, 0, 1 }, // 0-4 3
13534
13535
{ 1, 1, 0 }, // 0-5 4
13536
{ 3, 0, 0 }, // 0-7 5
13537
{ 1, 0, 1 }, // 0-9 6
13538
{ 2, 1, 0 }, // 0-11 7
13539
13540
{ 4, 0, 0 }, // 0-15 8
13541
{ 2, 0, 1 }, // 0-19 9
13542
{ 3, 1, 0 }, // 0-23 10
13543
{ 5, 0, 0 }, // 0-31 11
13544
13545
{ 3, 0, 1 }, // 0-39 12
13546
{ 4, 1, 0 }, // 0-47 13
13547
{ 6, 0, 0 }, // 0-63 14
13548
{ 4, 0, 1 }, // 0-79 15
13549
13550
{ 5, 1, 0 }, // 0-95 16
13551
{ 7, 0, 0 }, // 0-127 17
13552
{ 5, 0, 1 }, // 0-159 18
13553
{ 6, 1, 0 }, // 0-191 19
13554
13555
{ 8, 0, 0 }, // 0-255 20
13556
};
13557
13558
int astc_get_levels(int range)
13559
{
13560
assert(range < (int)BC7ENC_TOTAL_ASTC_RANGES);
13561
return (1 + 2 * g_astc_bise_range_table[range][1] + 4 * g_astc_bise_range_table[range][2]) << g_astc_bise_range_table[range][0];
13562
}
13563
13564
// g_astc_unquant[] is the inverse of g_astc_sorted_order_unquant[]
13565
astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [ASTC encoded endpoint index]
13566
13567
// Taken right from the ASTC spec.
13568
static struct
13569
{
13570
const char* m_pB_str;
13571
uint32_t m_c;
13572
} g_astc_endpoint_unquant_params[BC7ENC_TOTAL_ASTC_RANGES] =
13573
{
13574
{ "", 0 },
13575
{ "", 0 },
13576
{ "", 0 },
13577
{ "", 0 },
13578
{ "000000000", 204, }, // 0-5
13579
{ "", 0 },
13580
{ "000000000", 113, }, // 0-9
13581
{ "b000b0bb0", 93 }, // 0-11
13582
{ "", 0 },
13583
{ "b0000bb00", 54 }, // 0-19
13584
{ "cb000cbcb", 44 }, // 0-23
13585
{ "", 0 },
13586
{ "cb0000cbc", 26 }, // 0-39
13587
{ "dcb000dcb", 22 }, // 0-47
13588
{ "", 0 },
13589
{ "dcb0000dc", 13 }, // 0-79
13590
{ "edcb000ed", 11 }, // 0-95
13591
{ "", 0 },
13592
{ "edcb0000e", 6 }, // 0-159
13593
{ "fedcb000f", 5 }, // 0-191
13594
{ "", 0 },
13595
};
13596
13597
bool astc_is_valid_endpoint_range(uint32_t range)
13598
{
13599
if ((g_astc_bise_range_table[range][1] == 0) && (g_astc_bise_range_table[range][2] == 0))
13600
return true;
13601
13602
return g_astc_endpoint_unquant_params[range].m_c != 0;
13603
}
13604
13605
uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range)
13606
{
13607
assert(range < BC7ENC_TOTAL_ASTC_RANGES);
13608
13609
const uint32_t bits = g_astc_bise_range_table[range][0];
13610
const uint32_t trits = g_astc_bise_range_table[range][1];
13611
const uint32_t quints = g_astc_bise_range_table[range][2];
13612
13613
uint32_t val = 0;
13614
if ((!trits) && (!quints))
13615
{
13616
assert(!packed_trits && !packed_quints);
13617
13618
int bits_left = 8;
13619
while (bits_left > 0)
13620
{
13621
uint32_t v = packed_bits;
13622
13623
int n = basisu::minimumi(bits_left, bits);
13624
if (n < (int)bits)
13625
v >>= (bits - n);
13626
13627
assert(v < (1U << n));
13628
13629
val |= (v << (bits_left - n));
13630
bits_left -= n;
13631
}
13632
}
13633
else
13634
{
13635
const uint32_t A = (packed_bits & 1) ? 511 : 0;
13636
const uint32_t C = g_astc_endpoint_unquant_params[range].m_c;
13637
const uint32_t D = trits ? packed_trits : packed_quints;
13638
13639
assert(C);
13640
13641
uint32_t B = 0;
13642
for (uint32_t i = 0; i < 9; i++)
13643
{
13644
B <<= 1;
13645
13646
char c = g_astc_endpoint_unquant_params[range].m_pB_str[i];
13647
if (c != '0')
13648
{
13649
c -= 'a';
13650
B |= ((packed_bits >> c) & 1);
13651
}
13652
}
13653
13654
val = D * C + B;
13655
val = val ^ A;
13656
val = (A & 0x80) | (val >> 2);
13657
}
13658
13659
return val;
13660
}
13661
13662
uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range)
13663
{
13664
assert(range < BC7ENC_TOTAL_ASTC_RANGES);
13665
assert(packed_val < (uint32_t)astc_get_levels(range));
13666
13667
const uint32_t bits = g_astc_bise_range_table[range][0];
13668
const uint32_t trits = g_astc_bise_range_table[range][1];
13669
const uint32_t quints = g_astc_bise_range_table[range][2];
13670
13671
if ((!trits) && (!quints))
13672
return unquant_astc_endpoint(packed_val, 0, 0, range);
13673
else if (trits)
13674
return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), packed_val >> bits, 0, range);
13675
else
13676
return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), 0, packed_val >> bits, range);
13677
}
13678
13679
// BC7 - Various BC7 tables/helpers
13680
const uint32_t g_bc7_weights1[2] = { 0, 64 };
13681
const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 };
13682
const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
13683
const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
13684
const uint32_t g_astc_weights4[16] = { 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 };
13685
const uint32_t g_astc_weights5[32] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64 };
13686
const uint32_t g_astc_weights_3levels[3] = { 0, 32, 64 };
13687
13688
const uint8_t g_bc7_partition1[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
13689
13690
const uint8_t g_bc7_partition2[64 * 16] =
13691
{
13692
0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1, 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1, 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1, 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,
13693
0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1, 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
13694
0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1, 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0, 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0, 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,
13695
0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0, 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0, 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0, 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0, 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,
13696
0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1, 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0, 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0, 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1, 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,
13697
0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0, 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0, 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0, 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1, 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1, 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,
13698
0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0, 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0, 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0, 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,
13699
0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1, 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1, 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1, 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1, 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0, 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0, 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1
13700
};
13701
13702
const uint8_t g_bc7_partition3[64 * 16] =
13703
{
13704
0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2, 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1, 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1, 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2, 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2, 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1, 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1,
13705
0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2, 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2, 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2, 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2, 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2, 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0,
13706
0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2, 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0, 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2, 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1, 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2, 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1, 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2, 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0,
13707
0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0, 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2, 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0, 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1, 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2, 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2, 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1, 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1,
13708
0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2, 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1, 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2, 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0, 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0, 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0, 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0, 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1,
13709
0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1, 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1, 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2, 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1, 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1, 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1, 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1,
13710
0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2, 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1, 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2, 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2, 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2, 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2, 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2,
13711
0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2, 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2, 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2, 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2, 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1, 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2, 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2, 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0,
13712
};
13713
13714
const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15, 15,15,15,15,15,15,15,15, 15, 2, 8, 2, 2, 8, 8,15, 2, 8, 2, 2, 8, 8, 2, 2, 15,15, 6, 8, 2, 8,15,15, 2, 8, 2, 2, 2,15,15, 6, 6, 2, 6, 8,15,15, 2, 2, 15,15,15,15,15, 2, 2,15 };
13715
13716
const uint8_t g_bc7_table_anchor_index_third_subset_1[64] =
13717
{
13718
3, 3,15,15, 8, 3,15,15, 8, 8, 6, 6, 6, 5, 3, 3, 3, 3, 8,15, 3, 3, 6,10, 5, 8, 8, 6, 8, 5,15,15, 8,15, 3, 5, 6,10, 8,15, 15, 3,15, 5,15,15,15,15, 3,15, 5, 5, 5, 8, 5,10, 5,10, 8,13,15,12, 3, 3
13719
};
13720
13721
const uint8_t g_bc7_table_anchor_index_third_subset_2[64] =
13722
{
13723
15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8
13724
};
13725
13726
const uint8_t g_bc7_num_subsets[8] = { 3, 2, 3, 2, 1, 1, 1, 2 };
13727
const uint8_t g_bc7_partition_bits[8] = { 4, 6, 6, 6, 0, 0, 0, 6 };
13728
const uint8_t g_bc7_color_index_bitcount[8] = { 3, 3, 2, 2, 2, 2, 4, 2 };
13729
13730
const uint8_t g_bc7_mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 };
13731
const uint8_t g_bc7_mode_has_shared_p_bits[8] = { 0, 1, 0, 0, 0, 0, 0, 0 };
13732
const uint8_t g_bc7_color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 };
13733
const int8_t g_bc7_alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 };
13734
13735
const uint8_t g_bc7_alpha_index_bitcount[8] = { 0, 0, 0, 0, 3, 2, 4, 2 };
13736
13737
endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]; // [c][pbit]
13738
endpoint_err g_bc7_mode_5_optimal_endpoints[256]; // [c]
13739
13740
static inline void bc7_set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t* pCur_ofs)
13741
{
13742
assert((num_bits <= 32) && (val < (1ULL << num_bits)));
13743
while (num_bits)
13744
{
13745
const uint32_t n = basisu::minimumu(8 - (*pCur_ofs & 7), num_bits);
13746
pBytes[*pCur_ofs >> 3] |= (uint8_t)(val << (*pCur_ofs & 7));
13747
val >>= n;
13748
num_bits -= n;
13749
*pCur_ofs += n;
13750
}
13751
assert(*pCur_ofs <= 128);
13752
}
13753
13754
// TODO: Optimize this.
13755
void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults)
13756
{
13757
const uint32_t best_mode = pResults->m_mode;
13758
13759
const uint32_t total_subsets = g_bc7_num_subsets[best_mode];
13760
const uint32_t total_partitions = 1 << g_bc7_partition_bits[best_mode];
13761
//const uint32_t num_rotations = 1 << g_bc7_rotation_bits[best_mode];
13762
//const uint32_t num_index_selectors = (best_mode == 4) ? 2 : 1;
13763
13764
const uint8_t* pPartition;
13765
if (total_subsets == 1)
13766
pPartition = &g_bc7_partition1[0];
13767
else if (total_subsets == 2)
13768
pPartition = &g_bc7_partition2[pResults->m_partition * 16];
13769
else
13770
pPartition = &g_bc7_partition3[pResults->m_partition * 16];
13771
13772
uint8_t color_selectors[16];
13773
memcpy(color_selectors, pResults->m_selectors, 16);
13774
13775
uint8_t alpha_selectors[16];
13776
memcpy(alpha_selectors, pResults->m_alpha_selectors, 16);
13777
13778
color_quad_u8 low[3], high[3];
13779
memcpy(low, pResults->m_low, sizeof(low));
13780
memcpy(high, pResults->m_high, sizeof(high));
13781
13782
uint32_t pbits[3][2];
13783
memcpy(pbits, pResults->m_pbits, sizeof(pbits));
13784
13785
int anchor[3] = { -1, -1, -1 };
13786
13787
for (uint32_t k = 0; k < total_subsets; k++)
13788
{
13789
uint32_t anchor_index = 0;
13790
if (k)
13791
{
13792
if ((total_subsets == 3) && (k == 1))
13793
anchor_index = g_bc7_table_anchor_index_third_subset_1[pResults->m_partition];
13794
else if ((total_subsets == 3) && (k == 2))
13795
anchor_index = g_bc7_table_anchor_index_third_subset_2[pResults->m_partition];
13796
else
13797
anchor_index = g_bc7_table_anchor_index_second_subset[pResults->m_partition];
13798
}
13799
13800
anchor[k] = anchor_index;
13801
13802
const uint32_t color_index_bits = get_bc7_color_index_size(best_mode, pResults->m_index_selector);
13803
const uint32_t num_color_indices = 1 << color_index_bits;
13804
13805
if (color_selectors[anchor_index] & (num_color_indices >> 1))
13806
{
13807
for (uint32_t i = 0; i < 16; i++)
13808
if (pPartition[i] == k)
13809
color_selectors[i] = (uint8_t)((num_color_indices - 1) - color_selectors[i]);
13810
13811
if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
13812
{
13813
for (uint32_t q = 0; q < 3; q++)
13814
{
13815
uint8_t t = low[k].m_c[q];
13816
low[k].m_c[q] = high[k].m_c[q];
13817
high[k].m_c[q] = t;
13818
}
13819
}
13820
else
13821
{
13822
color_quad_u8 tmp = low[k];
13823
low[k] = high[k];
13824
high[k] = tmp;
13825
}
13826
13827
if (!g_bc7_mode_has_shared_p_bits[best_mode])
13828
{
13829
uint32_t t = pbits[k][0];
13830
pbits[k][0] = pbits[k][1];
13831
pbits[k][1] = t;
13832
}
13833
}
13834
13835
if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
13836
{
13837
const uint32_t alpha_index_bits = get_bc7_alpha_index_size(best_mode, pResults->m_index_selector);
13838
const uint32_t num_alpha_indices = 1 << alpha_index_bits;
13839
13840
if (alpha_selectors[anchor_index] & (num_alpha_indices >> 1))
13841
{
13842
for (uint32_t i = 0; i < 16; i++)
13843
if (pPartition[i] == k)
13844
alpha_selectors[i] = (uint8_t)((num_alpha_indices - 1) - alpha_selectors[i]);
13845
13846
uint8_t t = low[k].m_c[3];
13847
low[k].m_c[3] = high[k].m_c[3];
13848
high[k].m_c[3] = t;
13849
}
13850
}
13851
}
13852
13853
uint8_t* pBlock_bytes = (uint8_t*)(pBlock);
13854
memset(pBlock_bytes, 0, BC7ENC_BLOCK_SIZE);
13855
13856
uint32_t cur_bit_ofs = 0;
13857
bc7_set_block_bits(pBlock_bytes, 1 << best_mode, best_mode + 1, &cur_bit_ofs);
13858
13859
if ((best_mode == 4) || (best_mode == 5))
13860
bc7_set_block_bits(pBlock_bytes, pResults->m_rotation, 2, &cur_bit_ofs);
13861
13862
if (best_mode == 4)
13863
bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector, 1, &cur_bit_ofs);
13864
13865
if (total_partitions > 1)
13866
bc7_set_block_bits(pBlock_bytes, pResults->m_partition, (total_partitions == 64) ? 6 : 4, &cur_bit_ofs);
13867
13868
const uint32_t total_comps = (best_mode >= 4) ? 4 : 3;
13869
for (uint32_t comp = 0; comp < total_comps; comp++)
13870
{
13871
for (uint32_t subset = 0; subset < total_subsets; subset++)
13872
{
13873
bc7_set_block_bits(pBlock_bytes, low[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
13874
bc7_set_block_bits(pBlock_bytes, high[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
13875
}
13876
}
13877
13878
if (g_bc7_mode_has_p_bits[best_mode])
13879
{
13880
for (uint32_t subset = 0; subset < total_subsets; subset++)
13881
{
13882
bc7_set_block_bits(pBlock_bytes, pbits[subset][0], 1, &cur_bit_ofs);
13883
if (!g_bc7_mode_has_shared_p_bits[best_mode])
13884
bc7_set_block_bits(pBlock_bytes, pbits[subset][1], 1, &cur_bit_ofs);
13885
}
13886
}
13887
13888
for (uint32_t y = 0; y < 4; y++)
13889
{
13890
for (uint32_t x = 0; x < 4; x++)
13891
{
13892
int idx = x + y * 4;
13893
13894
uint32_t n = pResults->m_index_selector ? get_bc7_alpha_index_size(best_mode, pResults->m_index_selector) : get_bc7_color_index_size(best_mode, pResults->m_index_selector);
13895
13896
if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2]))
13897
n--;
13898
13899
bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? alpha_selectors[idx] : color_selectors[idx], n, &cur_bit_ofs);
13900
}
13901
}
13902
13903
if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
13904
{
13905
for (uint32_t y = 0; y < 4; y++)
13906
{
13907
for (uint32_t x = 0; x < 4; x++)
13908
{
13909
int idx = x + y * 4;
13910
13911
uint32_t n = pResults->m_index_selector ? get_bc7_color_index_size(best_mode, pResults->m_index_selector) : get_bc7_alpha_index_size(best_mode, pResults->m_index_selector);
13912
13913
if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2]))
13914
n--;
13915
13916
bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? color_selectors[idx] : alpha_selectors[idx], n, &cur_bit_ofs);
13917
}
13918
}
13919
}
13920
13921
assert(cur_bit_ofs == 128);
13922
}
13923
13924
// ASTC
13925
static inline void astc_set_bits_1_to_9(uint32_t* pDst, int& bit_offset, uint32_t code, uint32_t codesize)
13926
{
13927
uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst);
13928
13929
assert(codesize <= 9);
13930
if (codesize)
13931
{
13932
uint32_t byte_bit_offset = bit_offset & 7;
13933
uint32_t val = code << byte_bit_offset;
13934
13935
uint32_t index = bit_offset >> 3;
13936
pBuf[index] |= (uint8_t)val;
13937
13938
if (codesize > (8 - byte_bit_offset))
13939
pBuf[index + 1] |= (uint8_t)(val >> 8);
13940
13941
bit_offset += codesize;
13942
}
13943
}
13944
13945
void pack_astc_solid_block(void* pDst_block, const color32& color)
13946
{
13947
uint32_t r = color[0], g = color[1], b = color[2];
13948
uint32_t a = color[3];
13949
13950
uint32_t* pOutput = static_cast<uint32_t*>(pDst_block);
13951
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pDst_block);
13952
13953
pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff;
13954
13955
pOutput[1] = 0xffffffff;
13956
pOutput[2] = 0;
13957
pOutput[3] = 0;
13958
13959
int bit_pos = 64;
13960
astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, r | (r << 8), 16);
13961
astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, g | (g << 8), 16);
13962
astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, b | (b << 8), 16);
13963
astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, a | (a << 8), 16);
13964
}
13965
13966
// See 23.21 https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_partition_pattern_generation
13967
#ifdef _DEBUG
13968
static inline uint32_t astc_hash52(uint32_t v)
13969
{
13970
uint32_t p = v;
13971
p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
13972
p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
13973
p ^= p << 6; p ^= p >> 17;
13974
return p;
13975
}
13976
13977
int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block)
13978
{
13979
if (small_block)
13980
{
13981
x <<= 1; y <<= 1; z <<= 1;
13982
}
13983
seed += (partitioncount - 1) * 1024;
13984
uint32_t rnum = astc_hash52(seed);
13985
uint8_t seed1 = rnum & 0xF;
13986
uint8_t seed2 = (rnum >> 4) & 0xF;
13987
uint8_t seed3 = (rnum >> 8) & 0xF;
13988
uint8_t seed4 = (rnum >> 12) & 0xF;
13989
uint8_t seed5 = (rnum >> 16) & 0xF;
13990
uint8_t seed6 = (rnum >> 20) & 0xF;
13991
uint8_t seed7 = (rnum >> 24) & 0xF;
13992
uint8_t seed8 = (rnum >> 28) & 0xF;
13993
uint8_t seed9 = (rnum >> 18) & 0xF;
13994
uint8_t seed10 = (rnum >> 22) & 0xF;
13995
uint8_t seed11 = (rnum >> 26) & 0xF;
13996
uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
13997
13998
seed1 *= seed1; seed2 *= seed2;
13999
seed3 *= seed3; seed4 *= seed4;
14000
seed5 *= seed5; seed6 *= seed6;
14001
seed7 *= seed7; seed8 *= seed8;
14002
seed9 *= seed9; seed10 *= seed10;
14003
seed11 *= seed11; seed12 *= seed12;
14004
14005
int sh1, sh2, sh3;
14006
if (seed & 1)
14007
{
14008
sh1 = (seed & 2 ? 4 : 5); sh2 = (partitioncount == 3 ? 6 : 5);
14009
}
14010
else
14011
{
14012
sh1 = (partitioncount == 3 ? 6 : 5); sh2 = (seed & 2 ? 4 : 5);
14013
}
14014
sh3 = (seed & 0x10) ? sh1 : sh2;
14015
14016
seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
14017
seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
14018
seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
14019
14020
int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
14021
int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
14022
int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
14023
int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
14024
14025
a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F;
14026
14027
if (partitioncount < 4) d = 0;
14028
if (partitioncount < 3) c = 0;
14029
14030
if (a >= b && a >= c && a >= d)
14031
return 0;
14032
else if (b >= c && b >= d)
14033
return 1;
14034
else if (c >= d)
14035
return 2;
14036
else
14037
return 3;
14038
}
14039
#endif
14040
14041
static const uint8_t g_astc_quint_encode[125] =
14042
{
14043
0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57,
14044
58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104,
14045
105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54,
14046
126, 127, 94, 95, 62, 39, 47, 55, 63, 31
14047
};
14048
14049
// Encodes 3 values to output, usable for any range that uses quints and bits
14050
static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
14051
{
14052
// First extract the quints and the bits from the 3 input values
14053
int quints = 0, bits[3];
14054
const uint32_t bit_mask = (1 << n) - 1;
14055
for (int i = 0; i < 3; i++)
14056
{
14057
static const int s_muls[3] = { 1, 5, 25 };
14058
14059
const int t = pValues[i] >> n;
14060
14061
quints += t * s_muls[i];
14062
bits[i] = pValues[i] & bit_mask;
14063
}
14064
14065
// Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits.
14066
// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
14067
14068
assert(quints < 125);
14069
const int T = g_astc_quint_encode[quints];
14070
14071
// Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.
14072
astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |
14073
(bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);
14074
}
14075
14076
// Packs values using ASTC's BISE to output buffer.
14077
static void astc_pack_bise(uint32_t* pDst, const uint8_t* pSrc_vals, int bit_pos, int num_vals, int range)
14078
{
14079
uint32_t temp[5] = { 0, 0, 0, 0, 0 };
14080
14081
const int num_bits = g_astc_bise_range_table[range][0];
14082
14083
int group_size = 0;
14084
if (g_astc_bise_range_table[range][1])
14085
group_size = 5;
14086
else if (g_astc_bise_range_table[range][2])
14087
group_size = 3;
14088
14089
if (group_size)
14090
{
14091
// Range has trits or quints - pack each group of 5 or 3 values
14092
const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3);
14093
14094
for (int group_index = 0; group_index < total_groups; group_index++)
14095
{
14096
uint8_t vals[5] = { 0, 0, 0, 0, 0 };
14097
14098
const int limit = basisu::minimum(group_size, num_vals - group_index * group_size);
14099
for (int i = 0; i < limit; i++)
14100
vals[i] = pSrc_vals[group_index * group_size + i];
14101
14102
if (group_size == 5)
14103
astc_encode_trits(temp, vals, bit_pos, num_bits);
14104
else
14105
astc_encode_quints(temp, vals, bit_pos, num_bits);
14106
}
14107
}
14108
else
14109
{
14110
for (int i = 0; i < num_vals; i++)
14111
astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);
14112
}
14113
14114
pDst[0] |= temp[0]; pDst[1] |= temp[1];
14115
pDst[2] |= temp[2]; pDst[3] |= temp[3];
14116
}
14117
14118
const uint32_t ASTC_BLOCK_MODE_BITS = 11;
14119
const uint32_t ASTC_PART_BITS = 2;
14120
const uint32_t ASTC_CEM_BITS = 4;
14121
const uint32_t ASTC_PARTITION_INDEX_BITS = 10;
14122
const uint32_t ASTC_CCS_BITS = 2;
14123
14124
const uint32_t g_uastc_mode_astc_block_mode[TOTAL_UASTC_MODES] = { 0x242, 0x42, 0x53, 0x42, 0x42, 0x53, 0x442, 0x42, 0, 0x42, 0x242, 0x442, 0x53, 0x441, 0x42, 0x242, 0x42, 0x442, 0x253 };
14125
14126
bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t uastc_mode)
14127
{
14128
assert(uastc_mode < TOTAL_UASTC_MODES);
14129
uint8_t* pDst_bytes = reinterpret_cast<uint8_t*>(pDst);
14130
14131
const int total_weights = pBlock->m_dual_plane ? 32 : 16;
14132
14133
// Set mode bits - see Table 146-147
14134
uint32_t mode = g_uastc_mode_astc_block_mode[uastc_mode];
14135
pDst_bytes[0] = (uint8_t)mode;
14136
pDst_bytes[1] = (uint8_t)(mode >> 8);
14137
14138
memset(pDst_bytes + 2, 0, 16 - 2);
14139
14140
int bit_pos = ASTC_BLOCK_MODE_BITS;
14141
14142
// We only support 1-5 bit weight indices
14143
assert(!g_astc_bise_range_table[pBlock->m_weight_range][1] && !g_astc_bise_range_table[pBlock->m_weight_range][2]);
14144
const int bits_per_weight = g_astc_bise_range_table[pBlock->m_weight_range][0];
14145
14146
// See table 143 - PART
14147
astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_subsets - 1, ASTC_PART_BITS);
14148
14149
if (pBlock->m_subsets == 1)
14150
astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_cem, ASTC_CEM_BITS);
14151
else
14152
{
14153
// See table 145
14154
astc_set_bits(pDst, bit_pos, pBlock->m_partition_seed, ASTC_PARTITION_INDEX_BITS);
14155
14156
// Table 150 - we assume all CEM's are equal, so write 2 0's along with the CEM
14157
astc_set_bits_1_to_9(pDst, bit_pos, (pBlock->m_cem << 2) & 63, ASTC_CEM_BITS + 2);
14158
}
14159
14160
if (pBlock->m_dual_plane)
14161
{
14162
const int total_weight_bits = total_weights * bits_per_weight;
14163
14164
// See Illegal Encodings 23.24
14165
// https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_illegal_encodings
14166
assert((total_weight_bits >= 24) && (total_weight_bits <= 96));
14167
14168
int ccs_bit_pos = 128 - total_weight_bits - ASTC_CCS_BITS;
14169
astc_set_bits_1_to_9(pDst, ccs_bit_pos, pBlock->m_ccs, ASTC_CCS_BITS);
14170
}
14171
14172
const int num_cem_pairs = (1 + (pBlock->m_cem >> 2)) * pBlock->m_subsets;
14173
assert(num_cem_pairs <= 9);
14174
14175
astc_pack_bise(pDst, pBlock->m_endpoints, bit_pos, num_cem_pairs * 2, g_uastc_mode_endpoint_ranges[uastc_mode]);
14176
14177
// Write the weight bits in reverse bit order.
14178
switch (bits_per_weight)
14179
{
14180
case 1:
14181
{
14182
const uint32_t N = 1;
14183
for (int i = 0; i < total_weights; i++)
14184
{
14185
const uint32_t ofs = 128 - N - i;
14186
assert((ofs >> 3) < 16);
14187
pDst_bytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7));
14188
}
14189
break;
14190
}
14191
case 2:
14192
{
14193
const uint32_t N = 2;
14194
for (int i = 0; i < total_weights; i++)
14195
{
14196
static const uint8_t s_reverse_bits2[4] = { 0, 2, 1, 3 };
14197
const uint32_t ofs = 128 - N - (i * N);
14198
assert((ofs >> 3) < 16);
14199
pDst_bytes[ofs >> 3] |= (s_reverse_bits2[pBlock->m_weights[i]] << (ofs & 7));
14200
}
14201
break;
14202
}
14203
case 3:
14204
{
14205
const uint32_t N = 3;
14206
for (int i = 0; i < total_weights; i++)
14207
{
14208
static const uint8_t s_reverse_bits3[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
14209
14210
const uint32_t ofs = 128 - N - (i * N);
14211
const uint32_t rev = s_reverse_bits3[pBlock->m_weights[i]] << (ofs & 7);
14212
14213
uint32_t index = ofs >> 3;
14214
assert(index < 16);
14215
pDst_bytes[index++] |= rev & 0xFF;
14216
if (index < 16)
14217
pDst_bytes[index++] |= (rev >> 8);
14218
}
14219
break;
14220
}
14221
case 4:
14222
{
14223
const uint32_t N = 4;
14224
for (int i = 0; i < total_weights; i++)
14225
{
14226
static const uint8_t s_reverse_bits4[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
14227
const int ofs = 128 - N - (i * N);
14228
assert(ofs >= 0 && (ofs >> 3) < 16);
14229
pDst_bytes[ofs >> 3] |= (s_reverse_bits4[pBlock->m_weights[i]] << (ofs & 7));
14230
}
14231
break;
14232
}
14233
case 5:
14234
{
14235
const uint32_t N = 5;
14236
for (int i = 0; i < total_weights; i++)
14237
{
14238
static const uint8_t s_reverse_bits5[32] = { 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30, 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31 };
14239
14240
const uint32_t ofs = 128 - N - (i * N);
14241
const uint32_t rev = s_reverse_bits5[pBlock->m_weights[i]] << (ofs & 7);
14242
14243
uint32_t index = ofs >> 3;
14244
assert(index < 16);
14245
pDst_bytes[index++] |= rev & 0xFF;
14246
if (index < 16)
14247
pDst_bytes[index++] |= (rev >> 8);
14248
}
14249
14250
break;
14251
}
14252
default:
14253
assert(0);
14254
break;
14255
}
14256
14257
return true;
14258
}
14259
14260
const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern)
14261
{
14262
const uint8_t* pSubset_anchor_indices = g_zero_pattern;
14263
pPartition_pattern = g_zero_pattern;
14264
14265
if (subsets >= 2)
14266
{
14267
if (subsets == 3)
14268
{
14269
pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0];
14270
pSubset_anchor_indices = &g_astc_bc7_pattern3_anchors[common_pattern][0];
14271
}
14272
else if (mode == 7)
14273
{
14274
pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0];
14275
pSubset_anchor_indices = &g_bc7_3_astc2_patterns2_anchors[common_pattern][0];
14276
}
14277
else
14278
{
14279
pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0];
14280
pSubset_anchor_indices = &g_astc_bc7_pattern2_anchors[common_pattern][0];
14281
}
14282
}
14283
14284
return pSubset_anchor_indices;
14285
}
14286
14287
static inline uint32_t read_bit(const uint8_t* pBuf, uint32_t& bit_offset)
14288
{
14289
uint32_t byte_bits = pBuf[bit_offset >> 3] >> (bit_offset & 7);
14290
bit_offset += 1;
14291
return byte_bits & 1;
14292
}
14293
14294
static inline uint32_t read_bits1_to_9(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
14295
{
14296
assert(codesize <= 9);
14297
if (!codesize)
14298
return 0;
14299
14300
if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS) || (bit_offset >= 112))
14301
{
14302
const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
14303
14304
uint32_t byte_bit_offset = bit_offset & 7U;
14305
14306
uint32_t bits = pBytes[0] >> byte_bit_offset;
14307
uint32_t bits_read = basisu::minimum<int>(codesize, 8 - byte_bit_offset);
14308
14309
uint32_t bits_remaining = codesize - bits_read;
14310
if (bits_remaining)
14311
bits |= ((uint32_t)pBytes[1]) << bits_read;
14312
14313
bit_offset += codesize;
14314
14315
return bits & ((1U << codesize) - 1U);
14316
}
14317
14318
uint32_t byte_bit_offset = bit_offset & 7U;
14319
const uint16_t w = *(const uint16_t *)(&pBuf[bit_offset >> 3U]);
14320
bit_offset += codesize;
14321
return (w >> byte_bit_offset) & ((1U << codesize) - 1U);
14322
}
14323
14324
inline uint64_t read_bits64(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
14325
{
14326
assert(codesize <= 64U);
14327
uint64_t bits = 0;
14328
uint32_t total_bits = 0;
14329
14330
while (total_bits < codesize)
14331
{
14332
uint32_t byte_bit_offset = bit_offset & 7U;
14333
uint32_t bits_to_read = basisu::minimum<int>(codesize - total_bits, 8U - byte_bit_offset);
14334
14335
uint32_t byte_bits = pBuf[bit_offset >> 3U] >> byte_bit_offset;
14336
byte_bits &= ((1U << bits_to_read) - 1U);
14337
14338
bits |= ((uint64_t)(byte_bits) << total_bits);
14339
14340
total_bits += bits_to_read;
14341
bit_offset += bits_to_read;
14342
}
14343
14344
return bits;
14345
}
14346
14347
static inline uint32_t read_bits1_to_9_fst(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
14348
{
14349
assert(codesize <= 9);
14350
if (!codesize)
14351
return 0;
14352
assert(bit_offset < 112);
14353
14354
if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
14355
{
14356
const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
14357
14358
uint32_t byte_bit_offset = bit_offset & 7U;
14359
14360
uint32_t bits = pBytes[0] >> byte_bit_offset;
14361
uint32_t bits_read = basisu::minimum<int>(codesize, 8 - byte_bit_offset);
14362
14363
uint32_t bits_remaining = codesize - bits_read;
14364
if (bits_remaining)
14365
bits |= ((uint32_t)pBytes[1]) << bits_read;
14366
14367
bit_offset += codesize;
14368
14369
return bits & ((1U << codesize) - 1U);
14370
}
14371
else
14372
{
14373
uint32_t byte_bit_offset = bit_offset & 7U;
14374
const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
14375
bit_offset += codesize;
14376
return (w >> byte_bit_offset) & ((1U << codesize) - 1U);
14377
}
14378
}
14379
14380
bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints)
14381
{
14382
//memset(&unpacked, 0, sizeof(unpacked));
14383
14384
#if 0
14385
uint8_t table[128];
14386
memset(table, 0xFF, sizeof(table));
14387
14388
{
14389
for (uint32_t mode = 0; mode <= TOTAL_UASTC_MODES; mode++)
14390
{
14391
const uint32_t code = g_uastc_mode_huff_codes[mode][0];
14392
const uint32_t codesize = g_uastc_mode_huff_codes[mode][1];
14393
14394
table[code] = mode;
14395
14396
uint32_t bits_left = 7 - codesize;
14397
for (uint32_t i = 0; i < (1 << bits_left); i++)
14398
table[code | (i << codesize)] = mode;
14399
}
14400
14401
for (uint32_t i = 0; i < 128; i++)
14402
printf("%u,", table[i]);
14403
exit(0);
14404
}
14405
#endif
14406
14407
const int mode = g_uastc_huff_modes[blk.m_bytes[0] & 127];
14408
if (mode >= (int)TOTAL_UASTC_MODES)
14409
return false;
14410
14411
unpacked.m_mode = mode;
14412
unpacked.m_common_pattern = 0;
14413
14414
uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1];
14415
14416
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
14417
{
14418
unpacked.m_solid_color.r = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
14419
unpacked.m_solid_color.g = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
14420
unpacked.m_solid_color.b = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
14421
unpacked.m_solid_color.a = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
14422
14423
if (read_hints)
14424
{
14425
unpacked.m_etc1_flip = false;
14426
unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0;
14427
unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
14428
unpacked.m_etc1_inten1 = 0;
14429
unpacked.m_etc1_selector = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2);
14430
unpacked.m_etc1_r = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
14431
unpacked.m_etc1_g = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
14432
unpacked.m_etc1_b = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
14433
unpacked.m_etc1_bias = 0;
14434
unpacked.m_etc2_hints = 0;
14435
}
14436
14437
return true;
14438
}
14439
14440
if (read_hints)
14441
{
14442
if (g_uastc_mode_has_bc1_hint0[mode])
14443
unpacked.m_bc1_hint0 = read_bit(blk.m_bytes, bit_ofs) != 0;
14444
else
14445
unpacked.m_bc1_hint0 = false;
14446
14447
if (g_uastc_mode_has_bc1_hint1[mode])
14448
unpacked.m_bc1_hint1 = read_bit(blk.m_bytes, bit_ofs) != 0;
14449
else
14450
unpacked.m_bc1_hint1 = false;
14451
14452
unpacked.m_etc1_flip = read_bit(blk.m_bytes, bit_ofs) != 0;
14453
unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0;
14454
unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
14455
unpacked.m_etc1_inten1 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
14456
14457
if (g_uastc_mode_has_etc1_bias[mode])
14458
unpacked.m_etc1_bias = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
14459
else
14460
unpacked.m_etc1_bias = 0;
14461
14462
if (g_uastc_mode_has_alpha[mode])
14463
{
14464
unpacked.m_etc2_hints = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
14465
//assert(unpacked.m_etc2_hints > 0);
14466
}
14467
else
14468
unpacked.m_etc2_hints = 0;
14469
}
14470
else
14471
bit_ofs += g_uastc_mode_total_hint_bits[mode];
14472
14473
uint32_t subsets = 1;
14474
switch (mode)
14475
{
14476
case 2:
14477
case 4:
14478
case 7:
14479
case 9:
14480
case 16:
14481
unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
14482
subsets = 2;
14483
break;
14484
case 3:
14485
unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 4);
14486
subsets = 3;
14487
break;
14488
default:
14489
break;
14490
}
14491
14492
uint32_t part_seed = 0;
14493
switch (mode)
14494
{
14495
case 2:
14496
case 4:
14497
case 9:
14498
case 16:
14499
if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS2)
14500
return false;
14501
14502
part_seed = g_astc_bc7_common_partitions2[unpacked.m_common_pattern].m_astc;
14503
break;
14504
case 3:
14505
if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS3)
14506
return false;
14507
14508
part_seed = g_astc_bc7_common_partitions3[unpacked.m_common_pattern].m_astc;
14509
break;
14510
case 7:
14511
if (unpacked.m_common_pattern >= TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS)
14512
return false;
14513
14514
part_seed = g_bc7_3_astc2_common_partitions[unpacked.m_common_pattern].m_astc2;
14515
break;
14516
default:
14517
break;
14518
}
14519
14520
uint32_t total_planes = 1;
14521
switch (mode)
14522
{
14523
case 6:
14524
case 11:
14525
case 13:
14526
unpacked.m_astc.m_ccs = (int)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2);
14527
total_planes = 2;
14528
break;
14529
case 17:
14530
unpacked.m_astc.m_ccs = 3;
14531
total_planes = 2;
14532
break;
14533
default:
14534
break;
14535
}
14536
14537
unpacked.m_astc.m_dual_plane = (total_planes == 2);
14538
14539
unpacked.m_astc.m_subsets = subsets;
14540
unpacked.m_astc.m_partition_seed = part_seed;
14541
14542
const uint32_t total_comps = g_uastc_mode_comps[mode];
14543
14544
const uint32_t weight_bits = g_uastc_mode_weight_bits[mode];
14545
14546
unpacked.m_astc.m_weight_range = g_uastc_mode_weight_ranges[mode];
14547
14548
const uint32_t total_values = total_comps * 2 * subsets;
14549
const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
14550
14551
const uint32_t cem = g_uastc_mode_cem[mode];
14552
unpacked.m_astc.m_cem = cem;
14553
14554
const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0];
14555
const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1];
14556
const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2];
14557
14558
uint32_t total_tqs = 0;
14559
uint32_t bundle_size = 0, mul = 0;
14560
if (ep_trits)
14561
{
14562
total_tqs = (total_values + 4) / 5;
14563
bundle_size = 5;
14564
mul = 3;
14565
}
14566
else if (ep_quints)
14567
{
14568
total_tqs = (total_values + 2) / 3;
14569
bundle_size = 3;
14570
mul = 5;
14571
}
14572
14573
uint32_t tq_values[8];
14574
for (uint32_t i = 0; i < total_tqs; i++)
14575
{
14576
uint32_t num_bits = ep_trits ? 8 : 7;
14577
if (i == (total_tqs - 1))
14578
{
14579
uint32_t num_remaining = total_values - (total_tqs - 1) * bundle_size;
14580
if (ep_trits)
14581
{
14582
switch (num_remaining)
14583
{
14584
case 1: num_bits = 2; break;
14585
case 2: num_bits = 4; break;
14586
case 3: num_bits = 5; break;
14587
case 4: num_bits = 7; break;
14588
default: break;
14589
}
14590
}
14591
else if (ep_quints)
14592
{
14593
switch (num_remaining)
14594
{
14595
case 1: num_bits = 3; break;
14596
case 2: num_bits = 5; break;
14597
default: break;
14598
}
14599
}
14600
}
14601
14602
tq_values[i] = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, num_bits);
14603
} // i
14604
14605
uint32_t accum = 0;
14606
uint32_t accum_remaining = 0;
14607
uint32_t next_tq_index = 0;
14608
14609
for (uint32_t i = 0; i < total_values; i++)
14610
{
14611
uint32_t value = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, ep_bits);
14612
14613
if (total_tqs)
14614
{
14615
if (!accum_remaining)
14616
{
14617
assert(next_tq_index < total_tqs);
14618
accum = tq_values[next_tq_index++];
14619
accum_remaining = bundle_size;
14620
}
14621
14622
// TODO: Optimize with tables
14623
uint32_t v = accum % mul;
14624
accum /= mul;
14625
accum_remaining--;
14626
14627
value |= (v << ep_bits);
14628
}
14629
14630
unpacked.m_astc.m_endpoints[i] = (uint8_t)value;
14631
}
14632
14633
const uint8_t* pPartition_pattern;
14634
const uint8_t* pSubset_anchor_indices = get_anchor_indices(subsets, mode, unpacked.m_common_pattern, pPartition_pattern);
14635
14636
#ifdef _DEBUG
14637
for (uint32_t i = 0; i < 16; i++)
14638
assert(pPartition_pattern[i] == astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true));
14639
14640
for (uint32_t subset_index = 0; subset_index < subsets; subset_index++)
14641
{
14642
uint32_t anchor_index = 0;
14643
14644
for (uint32_t i = 0; i < 16; i++)
14645
{
14646
if (pPartition_pattern[i] == subset_index)
14647
{
14648
anchor_index = i;
14649
break;
14650
}
14651
}
14652
14653
assert(pSubset_anchor_indices[subset_index] == anchor_index);
14654
}
14655
#endif
14656
14657
#if 0
14658
const uint32_t total_planes_shift = total_planes - 1;
14659
for (uint32_t i = 0; i < 16 * total_planes; i++)
14660
{
14661
uint32_t num_bits = weight_bits;
14662
for (uint32_t s = 0; s < subsets; s++)
14663
{
14664
if (pSubset_anchor_indices[s] == (i >> total_planes_shift))
14665
{
14666
num_bits--;
14667
break;
14668
}
14669
}
14670
14671
unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, num_bits);
14672
}
14673
#endif
14674
14675
if (mode == 18)
14676
{
14677
// Mode 18 is the only mode with more than 64 weight bits.
14678
for (uint32_t i = 0; i < 16; i++)
14679
unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, i ? weight_bits : (weight_bits - 1));
14680
}
14681
else
14682
{
14683
// All other modes have <= 64 weight bits.
14684
uint64_t bits;
14685
14686
// Read the weight bits
14687
if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
14688
bits = read_bits64(blk.m_bytes, bit_ofs, basisu::minimum<int>(64, 128 - (int)bit_ofs));
14689
else
14690
{
14691
bits = blk.m_dwords[2];
14692
bits |= (((uint64_t)blk.m_dwords[3]) << 32U);
14693
14694
if (bit_ofs >= 64U)
14695
bits >>= (bit_ofs - 64U);
14696
else
14697
{
14698
assert(bit_ofs >= 56U);
14699
14700
uint32_t bits_needed = 64U - bit_ofs;
14701
bits <<= bits_needed;
14702
bits |= (blk.m_bytes[7] >> (8U - bits_needed));
14703
}
14704
}
14705
14706
bit_ofs = 0;
14707
14708
const uint32_t mask = (1U << weight_bits) - 1U;
14709
const uint32_t anchor_mask = (1U << (weight_bits - 1U)) - 1U;
14710
14711
if (total_planes == 2)
14712
{
14713
// Dual plane modes always have a single subset, and the first 2 weights are anchors.
14714
14715
unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
14716
bit_ofs += (weight_bits - 1);
14717
14718
unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
14719
bit_ofs += (weight_bits - 1);
14720
14721
for (uint32_t i = 2; i < 32; i++)
14722
{
14723
unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
14724
bit_ofs += weight_bits;
14725
}
14726
}
14727
else
14728
{
14729
if (subsets == 1)
14730
{
14731
// Specialize the single subset case.
14732
if (weight_bits == 4)
14733
{
14734
assert(bit_ofs == 0);
14735
14736
// Specialize the most common case: 4-bit weights.
14737
unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits) & 7);
14738
unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> 3) & 15);
14739
unpacked.m_astc.m_weights[2] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 1)) & 15);
14740
unpacked.m_astc.m_weights[3] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 2)) & 15);
14741
14742
unpacked.m_astc.m_weights[4] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 3)) & 15);
14743
unpacked.m_astc.m_weights[5] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 4)) & 15);
14744
unpacked.m_astc.m_weights[6] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 5)) & 15);
14745
unpacked.m_astc.m_weights[7] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 6)) & 15);
14746
14747
unpacked.m_astc.m_weights[8] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 7)) & 15);
14748
unpacked.m_astc.m_weights[9] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 8)) & 15);
14749
unpacked.m_astc.m_weights[10] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 9)) & 15);
14750
unpacked.m_astc.m_weights[11] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 10)) & 15);
14751
14752
unpacked.m_astc.m_weights[12] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 11)) & 15);
14753
unpacked.m_astc.m_weights[13] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 12)) & 15);
14754
unpacked.m_astc.m_weights[14] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 13)) & 15);
14755
unpacked.m_astc.m_weights[15] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 14)) & 15);
14756
}
14757
else
14758
{
14759
// First weight is always an anchor.
14760
unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
14761
bit_ofs += (weight_bits - 1);
14762
14763
for (uint32_t i = 1; i < 16; i++)
14764
{
14765
unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
14766
bit_ofs += weight_bits;
14767
}
14768
}
14769
}
14770
else
14771
{
14772
const uint32_t a0 = pSubset_anchor_indices[0], a1 = pSubset_anchor_indices[1], a2 = pSubset_anchor_indices[2];
14773
14774
for (uint32_t i = 0; i < 16; i++)
14775
{
14776
if ((i == a0) || (i == a1) || (i == a2))
14777
{
14778
unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
14779
bit_ofs += (weight_bits - 1);
14780
}
14781
else
14782
{
14783
unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
14784
bit_ofs += weight_bits;
14785
}
14786
}
14787
}
14788
}
14789
}
14790
14791
if ((blue_contract_check) && (total_comps >= 3))
14792
{
14793
// We only need to disable ASTC Blue Contraction when we'll be packing to ASTC. The other transcoders don't care.
14794
bool invert_subset[3] = { false, false, false };
14795
bool any_flag = false;
14796
14797
for (uint32_t subset_index = 0; subset_index < subsets; subset_index++)
14798
{
14799
const int s0 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 0]].m_unquant +
14800
g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 2]].m_unquant +
14801
g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 4]].m_unquant;
14802
14803
const int s1 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 1]].m_unquant +
14804
g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 3]].m_unquant +
14805
g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 5]].m_unquant;
14806
14807
if (s1 < s0)
14808
{
14809
for (uint32_t c = 0; c < total_comps; c++)
14810
std::swap(unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 0], unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 1]);
14811
14812
invert_subset[subset_index] = true;
14813
any_flag = true;
14814
}
14815
}
14816
14817
if (any_flag)
14818
{
14819
const uint32_t weight_mask = (1 << weight_bits) - 1;
14820
14821
for (uint32_t i = 0; i < 16; i++)
14822
{
14823
uint32_t subset = pPartition_pattern[i];
14824
14825
if (invert_subset[subset])
14826
{
14827
unpacked.m_astc.m_weights[i * total_planes] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes]);
14828
14829
if (total_planes == 2)
14830
unpacked.m_astc.m_weights[i * total_planes + 1] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes + 1]);
14831
}
14832
}
14833
}
14834
}
14835
14836
return true;
14837
}
14838
14839
static const uint32_t* g_astc_weight_tables[6] = { nullptr, g_bc7_weights1, g_bc7_weights2, g_bc7_weights3, g_astc_weights4, g_astc_weights5 };
14840
14841
bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb)
14842
{
14843
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
14844
{
14845
for (uint32_t i = 0; i < 16; i++)
14846
pPixels[i] = solid_color;
14847
return true;
14848
}
14849
14850
color32 endpoints[3][2];
14851
14852
const uint32_t total_subsets = g_uastc_mode_subsets[mode];
14853
const uint32_t total_comps = basisu::minimum<uint32_t>(4U, g_uastc_mode_comps[mode]);
14854
const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
14855
const uint32_t total_planes = g_uastc_mode_planes[mode];
14856
const uint32_t weight_bits = g_uastc_mode_weight_bits[mode];
14857
const uint32_t weight_levels = 1 << weight_bits;
14858
14859
for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++)
14860
{
14861
if (total_comps == 2)
14862
{
14863
const uint32_t ll = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 0]].m_unquant;
14864
const uint32_t lh = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 1]].m_unquant;
14865
14866
const uint32_t al = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 0]].m_unquant;
14867
const uint32_t ah = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 1]].m_unquant;
14868
14869
endpoints[subset_index][0].set_noclamp_rgba(ll, ll, ll, al);
14870
endpoints[subset_index][1].set_noclamp_rgba(lh, lh, lh, ah);
14871
}
14872
else
14873
{
14874
for (uint32_t comp_index = 0; comp_index < total_comps; comp_index++)
14875
{
14876
endpoints[subset_index][0][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 0]].m_unquant;
14877
endpoints[subset_index][1][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 1]].m_unquant;
14878
}
14879
for (uint32_t comp_index = total_comps; comp_index < 4; comp_index++)
14880
{
14881
endpoints[subset_index][0][comp_index] = 255;
14882
endpoints[subset_index][1][comp_index] = 255;
14883
}
14884
}
14885
}
14886
14887
color32 block_colors[3][32];
14888
14889
const uint32_t* pWeights = g_astc_weight_tables[weight_bits];
14890
14891
for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++)
14892
{
14893
for (uint32_t l = 0; l < weight_levels; l++)
14894
{
14895
if (total_comps == 2)
14896
{
14897
const uint8_t lc = (uint8_t)astc_interpolate(endpoints[subset_index][0][0], endpoints[subset_index][1][0], pWeights[l], srgb);
14898
const uint8_t ac = (uint8_t)astc_interpolate(endpoints[subset_index][0][3], endpoints[subset_index][1][3], pWeights[l], srgb);
14899
14900
block_colors[subset_index][l].set(lc, lc, lc, ac);
14901
}
14902
else
14903
{
14904
uint32_t comp_index;
14905
for (comp_index = 0; comp_index < total_comps; comp_index++)
14906
block_colors[subset_index][l][comp_index] = (uint8_t)astc_interpolate(endpoints[subset_index][0][comp_index], endpoints[subset_index][1][comp_index], pWeights[l], srgb);
14907
14908
for (; comp_index < 4; comp_index++)
14909
block_colors[subset_index][l][comp_index] = 255;
14910
}
14911
}
14912
}
14913
14914
const uint8_t* pPartition_pattern = g_zero_pattern;
14915
14916
if (total_subsets >= 2)
14917
{
14918
if (total_subsets == 3)
14919
pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0];
14920
else if (mode == 7)
14921
pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0];
14922
else
14923
pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0];
14924
14925
#ifdef _DEBUG
14926
for (uint32_t i = 0; i < 16; i++)
14927
{
14928
assert(pPartition_pattern[i] == (uint8_t)astc_compute_texel_partition(astc.m_partition_seed, i & 3, i >> 2, 0, total_subsets, true));
14929
}
14930
#endif
14931
}
14932
14933
if (total_planes == 1)
14934
{
14935
if (total_subsets == 1)
14936
{
14937
for (uint32_t i = 0; i < 16; i++)
14938
{
14939
assert(astc.m_weights[i] < weight_levels);
14940
pPixels[i] = block_colors[0][astc.m_weights[i]];
14941
}
14942
}
14943
else
14944
{
14945
for (uint32_t i = 0; i < 16; i++)
14946
{
14947
assert(astc.m_weights[i] < weight_levels);
14948
pPixels[i] = block_colors[pPartition_pattern[i]][astc.m_weights[i]];
14949
}
14950
}
14951
}
14952
else
14953
{
14954
assert(total_subsets == 1);
14955
14956
for (uint32_t i = 0; i < 16; i++)
14957
{
14958
const uint32_t subset_index = 0; // pPartition_pattern[i];
14959
14960
const uint32_t weight_index0 = astc.m_weights[i * 2];
14961
const uint32_t weight_index1 = astc.m_weights[i * 2 + 1];
14962
14963
assert(weight_index0 < weight_levels && weight_index1 < weight_levels);
14964
14965
color32& c = pPixels[i];
14966
for (uint32_t comp = 0; comp < 4; comp++)
14967
{
14968
if ((int)comp == astc.m_ccs)
14969
c[comp] = block_colors[subset_index][weight_index1][comp];
14970
else
14971
c[comp] = block_colors[subset_index][weight_index0][comp];
14972
}
14973
}
14974
}
14975
14976
return true;
14977
}
14978
14979
bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb)
14980
{
14981
return unpack_uastc(unpacked_blk.m_mode, unpacked_blk.m_common_pattern, unpacked_blk.m_solid_color, unpacked_blk.m_astc, pPixels, srgb);
14982
}
14983
14984
bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb)
14985
{
14986
unpacked_uastc_block unpacked_blk;
14987
14988
if (!unpack_uastc(blk, unpacked_blk, false, false))
14989
return false;
14990
14991
return unpack_uastc(unpacked_blk, pPixels, srgb);
14992
}
14993
14994
// Determines the best shared pbits to use to encode xl/xh
14995
static void determine_shared_pbits(
14996
uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4],
14997
color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2])
14998
{
14999
const uint32_t total_bits = comp_bits + 1;
15000
assert(total_bits >= 4 && total_bits <= 8);
15001
15002
const int iscalep = (1 << total_bits) - 1;
15003
const float scalep = (float)iscalep;
15004
15005
float best_err = 1e+9f;
15006
15007
for (int p = 0; p < 2; p++)
15008
{
15009
color_quad_u8 xMinColor, xMaxColor;
15010
for (uint32_t c = 0; c < 4; c++)
15011
{
15012
xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
15013
xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
15014
}
15015
15016
color_quad_u8 scaledLow, scaledHigh;
15017
15018
for (uint32_t i = 0; i < 4; i++)
15019
{
15020
scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits));
15021
scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits);
15022
assert(scaledLow.m_c[i] <= 255);
15023
15024
scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits));
15025
scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits);
15026
assert(scaledHigh.m_c[i] <= 255);
15027
}
15028
15029
float err = 0;
15030
for (uint32_t i = 0; i < total_comps; i++)
15031
err += basisu::squaref((scaledLow.m_c[i] / 255.0f) - xl[i]) + basisu::squaref((scaledHigh.m_c[i] / 255.0f) - xh[i]);
15032
15033
if (err < best_err)
15034
{
15035
best_err = err;
15036
best_pbits[0] = p;
15037
best_pbits[1] = p;
15038
for (uint32_t j = 0; j < 4; j++)
15039
{
15040
bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1;
15041
bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1;
15042
}
15043
}
15044
}
15045
}
15046
15047
// Determines the best unique pbits to use to encode xl/xh
15048
static void determine_unique_pbits(
15049
uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4],
15050
color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2])
15051
{
15052
const uint32_t total_bits = comp_bits + 1;
15053
const int iscalep = (1 << total_bits) - 1;
15054
const float scalep = (float)iscalep;
15055
15056
float best_err0 = 1e+9f;
15057
float best_err1 = 1e+9f;
15058
15059
for (int p = 0; p < 2; p++)
15060
{
15061
color_quad_u8 xMinColor, xMaxColor;
15062
15063
for (uint32_t c = 0; c < 4; c++)
15064
{
15065
xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
15066
xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
15067
}
15068
15069
color_quad_u8 scaledLow, scaledHigh;
15070
for (uint32_t i = 0; i < 4; i++)
15071
{
15072
scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits));
15073
scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits);
15074
assert(scaledLow.m_c[i] <= 255);
15075
15076
scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits));
15077
scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits);
15078
assert(scaledHigh.m_c[i] <= 255);
15079
}
15080
15081
float err0 = 0, err1 = 0;
15082
for (uint32_t i = 0; i < total_comps; i++)
15083
{
15084
err0 += basisu::squaref(scaledLow.m_c[i] - xl[i] * 255.0f);
15085
err1 += basisu::squaref(scaledHigh.m_c[i] - xh[i] * 255.0f);
15086
}
15087
15088
if (err0 < best_err0)
15089
{
15090
best_err0 = err0;
15091
best_pbits[0] = p;
15092
15093
bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1;
15094
bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1;
15095
bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1;
15096
bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1;
15097
}
15098
15099
if (err1 < best_err1)
15100
{
15101
best_err1 = err1;
15102
best_pbits[1] = p;
15103
15104
bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1;
15105
bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1;
15106
bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1;
15107
bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1;
15108
}
15109
}
15110
}
15111
15112
bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst)
15113
{
15114
unpacked_uastc_block unpacked_src_blk;
15115
if (!unpack_uastc(src_blk, unpacked_src_blk, true, false))
15116
return false;
15117
15118
bool success = false;
15119
if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
15120
{
15121
pack_astc_solid_block(pDst, unpacked_src_blk.m_solid_color);
15122
success = true;
15123
}
15124
else
15125
{
15126
success = pack_astc_block(static_cast<uint32_t*>(pDst), &unpacked_src_blk.m_astc, unpacked_src_blk.m_mode);
15127
}
15128
15129
return success;
15130
}
15131
15132
bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk)
15133
{
15134
memset(&dst_blk, 0, sizeof(dst_blk));
15135
15136
const uint32_t mode = unpacked_src_blk.m_mode;
15137
15138
const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
15139
const uint32_t total_comps = g_uastc_mode_comps[mode];
15140
15141
switch (mode)
15142
{
15143
case 0:
15144
case 5:
15145
case 10:
15146
case 12:
15147
case 14:
15148
case 15:
15149
case 18:
15150
{
15151
// MODE 0: DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 19 (192) - BC7 MODE6 RGB
15152
// MODE 5: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6 RGB
15153
// MODE 10 DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE6
15154
// MODE 12: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 19 (192) - BC7 MODE6
15155
// MODE 14: DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6
15156
// MODE 18: DualPlane: 0, WeightRange : 11 (32), Subsets : 1, CEM : 8, EndpointRange : 11 (32) - BC7 MODE6
15157
// MODE 15: DualPlane: 0, WeightRange : 8 (16), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE6
15158
dst_blk.m_mode = 6;
15159
15160
float xl[4], xh[4];
15161
if (total_comps == 2)
15162
{
15163
xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f;
15164
xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f;
15165
15166
xl[1] = xl[0];
15167
xh[1] = xh[0];
15168
15169
xl[2] = xl[0];
15170
xh[2] = xh[0];
15171
15172
xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f;
15173
xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f;
15174
}
15175
else
15176
{
15177
xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f;
15178
xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f;
15179
xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4]].m_unquant / 255.0f;
15180
15181
xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f;
15182
xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f;
15183
xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5]].m_unquant / 255.0f;
15184
15185
if (total_comps == 4)
15186
{
15187
xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6]].m_unquant / 255.0f;
15188
xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7]].m_unquant / 255.0f;
15189
}
15190
else
15191
{
15192
xl[3] = 1.0f;
15193
xh[3] = 1.0f;
15194
}
15195
}
15196
15197
uint32_t best_pbits[2];
15198
color_quad_u8 bestMinColor, bestMaxColor;
15199
determine_unique_pbits((total_comps == 2) ? 4 : total_comps, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
15200
15201
dst_blk.m_low[0] = bestMinColor;
15202
dst_blk.m_high[0] = bestMaxColor;
15203
15204
if (total_comps == 3)
15205
{
15206
dst_blk.m_low[0].m_c[3] = 127;
15207
dst_blk.m_high[0].m_c[3] = 127;
15208
}
15209
15210
dst_blk.m_pbits[0][0] = best_pbits[0];
15211
dst_blk.m_pbits[0][1] = best_pbits[1];
15212
15213
if (mode == 18)
15214
{
15215
const uint8_t s_bc7_5_to_4[32] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 };
15216
for (uint32_t i = 0; i < 16; i++)
15217
dst_blk.m_selectors[i] = s_bc7_5_to_4[unpacked_src_blk.m_astc.m_weights[i]];
15218
}
15219
else if (mode == 14)
15220
{
15221
const uint8_t s_bc7_2_to_4[4] = { 0, 5, 10, 15 };
15222
for (uint32_t i = 0; i < 16; i++)
15223
dst_blk.m_selectors[i] = s_bc7_2_to_4[unpacked_src_blk.m_astc.m_weights[i]];
15224
}
15225
else if ((mode == 5) || (mode == 12))
15226
{
15227
const uint8_t s_bc7_3_to_4[8] = { 0, 2, 4, 6, 9, 11, 13, 15 };
15228
for (uint32_t i = 0; i < 16; i++)
15229
dst_blk.m_selectors[i] = s_bc7_3_to_4[unpacked_src_blk.m_astc.m_weights[i]];
15230
}
15231
else
15232
{
15233
for (uint32_t i = 0; i < 16; i++)
15234
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15235
}
15236
15237
break;
15238
}
15239
case 1:
15240
{
15241
// DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE3
15242
// Mode 1 uses endpoint range 20 - no need to use ASTC dequant tables.
15243
dst_blk.m_mode = 3;
15244
15245
float xl[4], xh[4];
15246
xl[0] = unpacked_src_blk.m_astc.m_endpoints[0] / 255.0f;
15247
xl[1] = unpacked_src_blk.m_astc.m_endpoints[2] / 255.0f;
15248
xl[2] = unpacked_src_blk.m_astc.m_endpoints[4] / 255.0f;
15249
xl[3] = 1.0f;
15250
15251
xh[0] = unpacked_src_blk.m_astc.m_endpoints[1] / 255.0f;
15252
xh[1] = unpacked_src_blk.m_astc.m_endpoints[3] / 255.0f;
15253
xh[2] = unpacked_src_blk.m_astc.m_endpoints[5] / 255.0f;
15254
xh[3] = 1.0f;
15255
15256
uint32_t best_pbits[2];
15257
color_quad_u8 bestMinColor, bestMaxColor;
15258
memset(&bestMinColor, 0, sizeof(bestMinColor));
15259
memset(&bestMaxColor, 0, sizeof(bestMaxColor));
15260
determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
15261
15262
for (uint32_t i = 0; i < 3; i++)
15263
{
15264
dst_blk.m_low[0].m_c[i] = bestMinColor.m_c[i];
15265
dst_blk.m_high[0].m_c[i] = bestMaxColor.m_c[i];
15266
dst_blk.m_low[1].m_c[i] = bestMinColor.m_c[i];
15267
dst_blk.m_high[1].m_c[i] = bestMaxColor.m_c[i];
15268
}
15269
dst_blk.m_pbits[0][0] = best_pbits[0];
15270
dst_blk.m_pbits[0][1] = best_pbits[1];
15271
dst_blk.m_pbits[1][0] = best_pbits[0];
15272
dst_blk.m_pbits[1][1] = best_pbits[1];
15273
15274
for (uint32_t i = 0; i < 16; i++)
15275
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15276
15277
break;
15278
}
15279
case 2:
15280
{
15281
// 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1
15282
dst_blk.m_mode = 1;
15283
dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
15284
15285
const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
15286
15287
float xl[4], xh[4];
15288
xl[3] = 1.0f;
15289
xh[3] = 1.0f;
15290
15291
for (uint32_t subset = 0; subset < 2; subset++)
15292
{
15293
for (uint32_t i = 0; i < 3; i++)
15294
{
15295
uint32_t v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6];
15296
v = (v << 4) | v;
15297
xl[i] = v / 255.0f;
15298
15299
v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1];
15300
v = (v << 4) | v;
15301
xh[i] = v / 255.0f;
15302
}
15303
15304
uint32_t best_pbits[2] = { 0, 0 };
15305
color_quad_u8 bestMinColor, bestMaxColor;
15306
memset(&bestMinColor, 0, sizeof(bestMinColor));
15307
memset(&bestMaxColor, 0, sizeof(bestMaxColor));
15308
determine_shared_pbits(3, 6, xl, xh, bestMinColor, bestMaxColor, best_pbits);
15309
15310
const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset;
15311
15312
for (uint32_t i = 0; i < 3; i++)
15313
{
15314
dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i];
15315
dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i];
15316
}
15317
15318
dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
15319
} // subset
15320
15321
for (uint32_t i = 0; i < 16; i++)
15322
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15323
15324
break;
15325
}
15326
case 3:
15327
{
15328
// DualPlane: 0, WeightRange : 2 (4), Subsets : 3, EndpointRange : 7 (12) - BC7 MODE2
15329
dst_blk.m_mode = 2;
15330
dst_blk.m_partition = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_bc7;
15331
15332
const uint32_t perm = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_astc_to_bc7_perm;
15333
15334
for (uint32_t subset = 0; subset < 3; subset++)
15335
{
15336
for (uint32_t comp = 0; comp < 3; comp++)
15337
{
15338
uint32_t lo = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 0 + subset * 6]].m_unquant;
15339
uint32_t hi = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 1 + subset * 6]].m_unquant;
15340
15341
// TODO: I think this can be improved by using tables like Basis Universal does with ETC1S conversion.
15342
lo = (lo * 31 + 127) / 255;
15343
hi = (hi * 31 + 127) / 255;
15344
15345
const uint32_t bc7_subset_index = g_astc_to_bc7_partition_index_perm_tables[perm][subset];
15346
15347
dst_blk.m_low[bc7_subset_index].m_c[comp] = (uint8_t)lo;
15348
dst_blk.m_high[bc7_subset_index].m_c[comp] = (uint8_t)hi;
15349
}
15350
}
15351
15352
for (uint32_t i = 0; i < 16; i++)
15353
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15354
15355
break;
15356
}
15357
case 4:
15358
{
15359
// 4. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, EndpointRange: 12 (40) - BC7 MODE3
15360
dst_blk.m_mode = 3;
15361
dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
15362
15363
const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
15364
15365
float xl[4], xh[4];
15366
xl[3] = 1.0f;
15367
xh[3] = 1.0f;
15368
15369
for (uint32_t subset = 0; subset < 2; subset++)
15370
{
15371
for (uint32_t i = 0; i < 3; i++)
15372
{
15373
xl[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6]].m_unquant / 255.0f;
15374
xh[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1]].m_unquant / 255.0f;
15375
}
15376
15377
uint32_t best_pbits[2] = { 0, 0 };
15378
color_quad_u8 bestMinColor, bestMaxColor;
15379
memset(&bestMinColor, 0, sizeof(bestMinColor));
15380
memset(&bestMaxColor, 0, sizeof(bestMaxColor));
15381
determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
15382
15383
const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset;
15384
15385
for (uint32_t i = 0; i < 3; i++)
15386
{
15387
dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i];
15388
dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i];
15389
}
15390
dst_blk.m_low[bc7_subset_index].m_c[3] = 127;
15391
dst_blk.m_high[bc7_subset_index].m_c[3] = 127;
15392
15393
dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
15394
dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1];
15395
15396
} // subset
15397
15398
for (uint32_t i = 0; i < 16; i++)
15399
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15400
15401
break;
15402
}
15403
case 6:
15404
case 11:
15405
case 13:
15406
case 17:
15407
{
15408
// MODE 6: DualPlane: 1, WeightRange : 2 (4), Subsets : 1, EndpointRange : 18 (160) - BC7 MODE5 RGB
15409
// MODE 11: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE5
15410
// MODE 13: DualPlane: 1, WeightRange: 0 (2), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE5
15411
// MODE 17: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE5
15412
dst_blk.m_mode = 5;
15413
dst_blk.m_rotation = (unpacked_src_blk.m_astc.m_ccs + 1) & 3;
15414
15415
if (total_comps == 2)
15416
{
15417
assert(unpacked_src_blk.m_astc.m_ccs == 3);
15418
15419
dst_blk.m_low->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant * 127 + 127) / 255);
15420
dst_blk.m_high->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant * 127 + 127) / 255);
15421
15422
dst_blk.m_low->m_c[1] = dst_blk.m_low->m_c[0];
15423
dst_blk.m_high->m_c[1] = dst_blk.m_high->m_c[0];
15424
15425
dst_blk.m_low->m_c[2] = dst_blk.m_low->m_c[0];
15426
dst_blk.m_high->m_c[2] = dst_blk.m_high->m_c[0];
15427
15428
dst_blk.m_low->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant);
15429
dst_blk.m_high->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant);
15430
}
15431
else
15432
{
15433
for (uint32_t astc_comp = 0; astc_comp < 4; astc_comp++)
15434
{
15435
uint32_t bc7_comp = astc_comp;
15436
// ASTC and BC7 handle dual plane component rotations differently:
15437
// ASTC: 2nd plane separately interpolates the CCS channel.
15438
// BC7: 2nd plane channel is swapped with alpha, 2nd plane controls alpha interpolation, then we swap alpha with the desired channel.
15439
if (astc_comp == (uint32_t)unpacked_src_blk.m_astc.m_ccs)
15440
bc7_comp = 3;
15441
else if (astc_comp == 3)
15442
bc7_comp = unpacked_src_blk.m_astc.m_ccs;
15443
15444
uint32_t l = 255, h = 255;
15445
if (astc_comp < total_comps)
15446
{
15447
l = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 0]].m_unquant;
15448
h = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 1]].m_unquant;
15449
}
15450
15451
if (bc7_comp < 3)
15452
{
15453
l = (l * 127 + 127) / 255;
15454
h = (h * 127 + 127) / 255;
15455
}
15456
15457
dst_blk.m_low->m_c[bc7_comp] = (uint8_t)l;
15458
dst_blk.m_high->m_c[bc7_comp] = (uint8_t)h;
15459
}
15460
}
15461
15462
if (mode == 13)
15463
{
15464
for (uint32_t i = 0; i < 16; i++)
15465
{
15466
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2] ? 3 : 0;
15467
dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1] ? 3 : 0;
15468
}
15469
}
15470
else
15471
{
15472
for (uint32_t i = 0; i < 16; i++)
15473
{
15474
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2];
15475
dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1];
15476
}
15477
}
15478
15479
break;
15480
}
15481
case 7:
15482
{
15483
// DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 12 (40) - BC7 MODE2
15484
dst_blk.m_mode = 2;
15485
dst_blk.m_partition = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].m_bc73;
15486
15487
const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].k;
15488
15489
for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++)
15490
{
15491
const uint32_t astc_part = bc7_convert_partition_index_3_to_2(bc7_part, common_pattern_k);
15492
15493
for (uint32_t c = 0; c < 3; c++)
15494
{
15495
dst_blk.m_low[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 0 + astc_part * 6]].m_unquant * 31 + 127) / 255;
15496
dst_blk.m_high[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 1 + astc_part * 6]].m_unquant * 31 + 127) / 255;
15497
}
15498
}
15499
15500
for (uint32_t i = 0; i < 16; i++)
15501
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15502
15503
break;
15504
}
15505
case UASTC_MODE_INDEX_SOLID_COLOR:
15506
{
15507
// Void-Extent: Solid Color RGBA (BC7 MODE5 or MODE6)
15508
const color32& solid_color = unpacked_src_blk.m_solid_color;
15509
15510
uint32_t best_err0 = g_bc7_mode_6_optimal_endpoints[solid_color.r][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][0].m_error +
15511
g_bc7_mode_6_optimal_endpoints[solid_color.b][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][0].m_error;
15512
15513
uint32_t best_err1 = g_bc7_mode_6_optimal_endpoints[solid_color.r][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][1].m_error +
15514
g_bc7_mode_6_optimal_endpoints[solid_color.b][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][1].m_error;
15515
15516
if (best_err0 > 0 && best_err1 > 0)
15517
{
15518
dst_blk.m_mode = 5;
15519
15520
for (uint32_t c = 0; c < 3; c++)
15521
{
15522
dst_blk.m_low[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_lo;
15523
dst_blk.m_high[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_hi;
15524
}
15525
15526
memset(dst_blk.m_selectors, BC7ENC_MODE_5_OPTIMAL_INDEX, 16);
15527
15528
dst_blk.m_low[0].m_c[3] = solid_color.c[3];
15529
dst_blk.m_high[0].m_c[3] = solid_color.c[3];
15530
15531
//memset(dst_blk.m_alpha_selectors, 0, 16);
15532
}
15533
else
15534
{
15535
dst_blk.m_mode = 6;
15536
15537
uint32_t best_p = 0;
15538
if (best_err1 < best_err0)
15539
best_p = 1;
15540
15541
for (uint32_t c = 0; c < 4; c++)
15542
{
15543
dst_blk.m_low[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_lo;
15544
dst_blk.m_high[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_hi;
15545
}
15546
15547
dst_blk.m_pbits[0][0] = best_p;
15548
dst_blk.m_pbits[0][1] = best_p;
15549
memset(dst_blk.m_selectors, BC7ENC_MODE_6_OPTIMAL_INDEX, 16);
15550
}
15551
15552
break;
15553
}
15554
case 9:
15555
case 16:
15556
{
15557
// 9. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE7
15558
// 16. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE7
15559
15560
dst_blk.m_mode = 7;
15561
dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
15562
15563
const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
15564
15565
for (uint32_t astc_subset = 0; astc_subset < 2; astc_subset++)
15566
{
15567
float xl[4], xh[4];
15568
15569
if (total_comps == 2)
15570
{
15571
xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 4]].m_unquant / 255.0f;
15572
xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 4]].m_unquant / 255.0f;
15573
15574
xl[1] = xl[0];
15575
xh[1] = xh[0];
15576
15577
xl[2] = xl[0];
15578
xh[2] = xh[0];
15579
15580
xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 4]].m_unquant / 255.0f;
15581
xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 4]].m_unquant / 255.0f;
15582
}
15583
else
15584
{
15585
xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 8]].m_unquant / 255.0f;
15586
xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 8]].m_unquant / 255.0f;
15587
xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4 + astc_subset * 8]].m_unquant / 255.0f;
15588
xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6 + astc_subset * 8]].m_unquant / 255.0f;
15589
15590
xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 8]].m_unquant / 255.0f;
15591
xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 8]].m_unquant / 255.0f;
15592
xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5 + astc_subset * 8]].m_unquant / 255.0f;
15593
xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7 + astc_subset * 8]].m_unquant / 255.0f;
15594
}
15595
15596
uint32_t best_pbits[2] = { 0, 0 };
15597
color_quad_u8 bestMinColor, bestMaxColor;
15598
memset(&bestMinColor, 0, sizeof(bestMinColor));
15599
memset(&bestMaxColor, 0, sizeof(bestMaxColor));
15600
determine_unique_pbits(4, 5, xl, xh, bestMinColor, bestMaxColor, best_pbits);
15601
15602
const uint32_t bc7_subset_index = invert_partition ? (1 - astc_subset) : astc_subset;
15603
15604
dst_blk.m_low[bc7_subset_index] = bestMinColor;
15605
dst_blk.m_high[bc7_subset_index] = bestMaxColor;
15606
15607
dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
15608
dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1];
15609
} // astc_subset
15610
15611
for (uint32_t i = 0; i < 16; i++)
15612
dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
15613
15614
break;
15615
}
15616
default:
15617
return false;
15618
}
15619
15620
return true;
15621
}
15622
15623
bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk)
15624
{
15625
unpacked_uastc_block unpacked_src_blk;
15626
if (!unpack_uastc(src_blk, unpacked_src_blk, false, false))
15627
return false;
15628
15629
return transcode_uastc_to_bc7(unpacked_src_blk, dst_blk);
15630
}
15631
15632
bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst)
15633
{
15634
bc7_optimization_results temp;
15635
if (!transcode_uastc_to_bc7(src_blk, temp))
15636
return false;
15637
15638
encode_bc7_block(pDst, &temp);
15639
return true;
15640
}
15641
15642
color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock)
15643
{
15644
color32 result;
15645
15646
for (uint32_t c = 0; c < 3; c++)
15647
{
15648
static const int s_divs[3] = { 1, 3, 9 };
15649
15650
int delta = 0;
15651
15652
switch (bias)
15653
{
15654
case 2: delta = subblock ? 0 : ((c == 0) ? -1 : 0); break;
15655
case 5: delta = subblock ? 0 : ((c == 1) ? -1 : 0); break;
15656
case 6: delta = subblock ? 0 : ((c == 2) ? -1 : 0); break;
15657
15658
case 7: delta = subblock ? 0 : ((c == 0) ? 1 : 0); break;
15659
case 11: delta = subblock ? 0 : ((c == 1) ? 1 : 0); break;
15660
case 15: delta = subblock ? 0 : ((c == 2) ? 1 : 0); break;
15661
15662
case 18: delta = subblock ? ((c == 0) ? -1 : 0) : 0; break;
15663
case 19: delta = subblock ? ((c == 1) ? -1 : 0) : 0; break;
15664
case 20: delta = subblock ? ((c == 2) ? -1 : 0) : 0; break;
15665
15666
case 21: delta = subblock ? ((c == 0) ? 1 : 0) : 0; break;
15667
case 24: delta = subblock ? ((c == 1) ? 1 : 0) : 0; break;
15668
case 8: delta = subblock ? ((c == 2) ? 1 : 0) : 0; break;
15669
15670
case 10: delta = -2; break;
15671
15672
case 27: delta = subblock ? 0 : -1; break;
15673
case 28: delta = subblock ? -1 : 1; break;
15674
case 29: delta = subblock ? 1 : 0; break;
15675
case 30: delta = subblock ? -1 : 0; break;
15676
case 31: delta = subblock ? 0 : 1; break;
15677
15678
default:
15679
delta = ((bias / s_divs[c]) % 3) - 1;
15680
break;
15681
}
15682
15683
int v = block_color[c];
15684
if (v == 0)
15685
{
15686
if (delta == -2)
15687
v += 3;
15688
else
15689
v += delta + 1;
15690
}
15691
else if (v == (int)limit)
15692
{
15693
v += (delta - 1);
15694
}
15695
else
15696
{
15697
v += delta;
15698
if ((v < 0) || (v > (int)limit))
15699
v = (v - delta) - delta;
15700
}
15701
15702
assert(v >= 0);
15703
assert(v <= (int)limit);
15704
15705
result[c] = (uint8_t)v;
15706
}
15707
15708
return result;
15709
}
15710
15711
static void etc1_determine_selectors(decoder_etc_block& dst_blk, const color32* pSource_pixels, uint32_t first_subblock, uint32_t last_subblock)
15712
{
15713
static const uint8_t s_tran[4] = { 1, 0, 2, 3 };
15714
15715
uint16_t l_bitmask = 0;
15716
uint16_t h_bitmask = 0;
15717
15718
for (uint32_t subblock = first_subblock; subblock < last_subblock; subblock++)
15719
{
15720
color32 block_colors[4];
15721
dst_blk.get_block_colors(block_colors, subblock);
15722
15723
uint32_t block_y[4];
15724
for (uint32_t i = 0; i < 4; i++)
15725
block_y[i] = block_colors[i][0] * 54 + block_colors[i][1] * 183 + block_colors[i][2] * 19;
15726
15727
const uint32_t block_y01 = block_y[0] + block_y[1];
15728
const uint32_t block_y12 = block_y[1] + block_y[2];
15729
const uint32_t block_y23 = block_y[2] + block_y[3];
15730
15731
// X0 X0 X0 X0 X1 X1 X1 X1 X2 X2 X2 X2 X3 X3 X3 X3
15732
// Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3
15733
15734
if (dst_blk.get_flip_bit())
15735
{
15736
uint32_t ofs = subblock * 2;
15737
15738
for (uint32_t y = 0; y < 2; y++)
15739
{
15740
for (uint32_t x = 0; x < 4; x++)
15741
{
15742
const color32& c = pSource_pixels[x + (subblock * 2 + y) * 4];
15743
const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38;
15744
15745
uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)];
15746
15747
assert(ofs < 16);
15748
l_bitmask |= ((t & 1) << ofs);
15749
h_bitmask |= ((t >> 1) << ofs);
15750
ofs += 4;
15751
}
15752
15753
ofs = (int)ofs + 1 - 4 * 4;
15754
}
15755
}
15756
else
15757
{
15758
uint32_t ofs = (subblock * 2) * 4;
15759
for (uint32_t x = 0; x < 2; x++)
15760
{
15761
for (uint32_t y = 0; y < 4; y++)
15762
{
15763
const color32& c = pSource_pixels[subblock * 2 + x + y * 4];
15764
const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38;
15765
15766
uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)];
15767
15768
assert(ofs < 16);
15769
l_bitmask |= ((t & 1) << ofs);
15770
h_bitmask |= ((t >> 1) << ofs);
15771
++ofs;
15772
}
15773
}
15774
}
15775
}
15776
15777
dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
15778
dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
15779
dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
15780
dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
15781
}
15782
15783
static const uint8_t s_etc1_solid_selectors[4][4] = { { 255, 255, 255, 255 }, { 255, 255, 0, 0 }, { 0, 0, 0, 0 }, {0, 0, 255, 255 } };
15784
15785
struct etc_coord2
15786
{
15787
uint8_t m_x, m_y;
15788
};
15789
15790
// [flip][subblock][pixel_index]
15791
const etc_coord2 g_etc1_pixel_coords[2][2][8] =
15792
{
15793
{
15794
{
15795
{ 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 },
15796
{ 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 }
15797
},
15798
{
15799
{ 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
15800
{ 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }
15801
}
15802
},
15803
{
15804
{
15805
{ 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },
15806
{ 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }
15807
},
15808
{
15809
{ 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 },
15810
{ 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }
15811
},
15812
}
15813
};
15814
15815
void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst)
15816
{
15817
decoder_etc_block& dst_blk = *static_cast<decoder_etc_block*>(pDst);
15818
15819
if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
15820
{
15821
dst_blk.m_bytes[3] = (uint8_t)((unpacked_src_blk.m_etc1_diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten0 << 2));
15822
15823
if (unpacked_src_blk.m_etc1_diff)
15824
{
15825
dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r << 3);
15826
dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g << 3);
15827
dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b << 3);
15828
}
15829
else
15830
{
15831
dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r | (unpacked_src_blk.m_etc1_r << 4));
15832
dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g | (unpacked_src_blk.m_etc1_g << 4));
15833
dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b | (unpacked_src_blk.m_etc1_b << 4));
15834
}
15835
15836
memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[unpacked_src_blk.m_etc1_selector][0], 4);
15837
15838
return;
15839
}
15840
15841
const bool flip = unpacked_src_blk.m_etc1_flip != 0;
15842
const bool diff = unpacked_src_blk.m_etc1_diff != 0;
15843
15844
dst_blk.m_bytes[3] = (uint8_t)((int)flip | (diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten1 << 2));
15845
15846
const uint32_t limit = diff ? 31 : 15;
15847
15848
color32 block_colors[2];
15849
15850
for (uint32_t subset = 0; subset < 2; subset++)
15851
{
15852
uint32_t avg_color[3];
15853
memset(avg_color, 0, sizeof(avg_color));
15854
15855
for (uint32_t j = 0; j < 8; j++)
15856
{
15857
const etc_coord2& c = g_etc1_pixel_coords[flip][subset][j];
15858
15859
avg_color[0] += block_pixels[c.m_y][c.m_x].r;
15860
avg_color[1] += block_pixels[c.m_y][c.m_x].g;
15861
avg_color[2] += block_pixels[c.m_y][c.m_x].b;
15862
} // j
15863
15864
block_colors[subset][0] = (uint8_t)((avg_color[0] * limit + 1020) / (8 * 255));
15865
block_colors[subset][1] = (uint8_t)((avg_color[1] * limit + 1020) / (8 * 255));
15866
block_colors[subset][2] = (uint8_t)((avg_color[2] * limit + 1020) / (8 * 255));
15867
block_colors[subset][3] = 0;
15868
15869
if (g_uastc_mode_has_etc1_bias[unpacked_src_blk.m_mode])
15870
{
15871
block_colors[subset] = apply_etc1_bias(block_colors[subset], unpacked_src_blk.m_etc1_bias, limit, subset);
15872
}
15873
15874
} // subset
15875
15876
if (diff)
15877
{
15878
int dr = block_colors[1].r - block_colors[0].r;
15879
int dg = block_colors[1].g - block_colors[0].g;
15880
int db = block_colors[1].b - block_colors[0].b;
15881
15882
dr = basisu::clamp<int>(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
15883
dg = basisu::clamp<int>(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
15884
db = basisu::clamp<int>(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
15885
15886
if (dr < 0) dr += 8;
15887
if (dg < 0) dg += 8;
15888
if (db < 0) db += 8;
15889
15890
dst_blk.m_bytes[0] = (uint8_t)((block_colors[0].r << 3) | dr);
15891
dst_blk.m_bytes[1] = (uint8_t)((block_colors[0].g << 3) | dg);
15892
dst_blk.m_bytes[2] = (uint8_t)((block_colors[0].b << 3) | db);
15893
}
15894
else
15895
{
15896
dst_blk.m_bytes[0] = (uint8_t)(block_colors[1].r | (block_colors[0].r << 4));
15897
dst_blk.m_bytes[1] = (uint8_t)(block_colors[1].g | (block_colors[0].g << 4));
15898
dst_blk.m_bytes[2] = (uint8_t)(block_colors[1].b | (block_colors[0].b << 4));
15899
}
15900
15901
etc1_determine_selectors(dst_blk, &block_pixels[0][0], 0, 2);
15902
}
15903
15904
bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst)
15905
{
15906
unpacked_uastc_block unpacked_src_blk;
15907
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
15908
return false;
15909
15910
color32 block_pixels[4][4];
15911
if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR)
15912
{
15913
const bool unpack_srgb = false;
15914
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
15915
return false;
15916
}
15917
15918
transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, pDst);
15919
15920
return true;
15921
}
15922
15923
static inline int gray_distance2(const uint8_t c, int y)
15924
{
15925
int gray_dist = (int)c - y;
15926
return gray_dist * gray_dist;
15927
}
15928
15929
static bool pack_etc1_y_estimate_flipped(const uint8_t* pSrc_pixels,
15930
int& upper_avg, int& lower_avg, int& left_avg, int& right_avg)
15931
{
15932
int sums[2][2];
15933
15934
#define GET_XY(x, y) pSrc_pixels[(x) + ((y) * 4)]
15935
15936
sums[0][0] = GET_XY(0, 0) + GET_XY(0, 1) + GET_XY(1, 0) + GET_XY(1, 1);
15937
sums[1][0] = GET_XY(2, 0) + GET_XY(2, 1) + GET_XY(3, 0) + GET_XY(3, 1);
15938
sums[0][1] = GET_XY(0, 2) + GET_XY(0, 3) + GET_XY(1, 2) + GET_XY(1, 3);
15939
sums[1][1] = GET_XY(2, 2) + GET_XY(2, 3) + GET_XY(3, 2) + GET_XY(3, 3);
15940
15941
upper_avg = (sums[0][0] + sums[1][0] + 4) / 8;
15942
lower_avg = (sums[0][1] + sums[1][1] + 4) / 8;
15943
left_avg = (sums[0][0] + sums[0][1] + 4) / 8;
15944
right_avg = (sums[1][0] + sums[1][1] + 4) / 8;
15945
15946
#undef GET_XY
15947
#define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a)
15948
15949
int upper_gray_dist = 0, lower_gray_dist = 0, left_gray_dist = 0, right_gray_dist = 0;
15950
for (uint32_t i = 0; i < 4; i++)
15951
{
15952
for (uint32_t j = 0; j < 2; j++)
15953
{
15954
upper_gray_dist += GET_XY(i, j, upper_avg);
15955
lower_gray_dist += GET_XY(i, 2 + j, lower_avg);
15956
left_gray_dist += GET_XY(j, i, left_avg);
15957
right_gray_dist += GET_XY(2 + j, i, right_avg);
15958
}
15959
}
15960
15961
#undef GET_XY
15962
15963
int upper_lower_sum = upper_gray_dist + lower_gray_dist;
15964
int left_right_sum = left_gray_dist + right_gray_dist;
15965
15966
return upper_lower_sum < left_right_sum;
15967
}
15968
15969
// Base Sel Table
15970
// XXXXX XX XXX
15971
static const uint16_t g_etc1_y_solid_block_configs[256] =
15972
{
15973
0,781,64,161,260,192,33,131,96,320,65,162,261,193,34,291,97,224,66,163,262,194,35,549,98,4,67,653,164,195,523,36,99,5,578,68,165,353,196,37,135,100,324,69,166,354,197,38,295,101,228,70,167,
15974
355,198,39,553,102,8,71,608,168,199,527,40,103,9,582,72,169,357,200,41,139,104,328,73,170,358,201,42,299,105,232,74,171,359,202,43,557,106,12,75,612,172,203,531,44,107,13,586,76,173,361,
15975
204,45,143,108,332,77,174,362,205,46,303,109,236,78,175,363,206,47,561,110,16,79,616,176,207,535,48,111,17,590,80,177,365,208,49,147,112,336,81,178,366,209,50,307,113,240,82,179,367,210,
15976
51,565,114,20,83,620,180,211,539,52,115,21,594,84,181,369,212,53,151,116,340,85,182,370,213,54,311,117,244,86,183,371,214,55,569,118,24,87,624,184,215,543,56,119,25,598,88,185,373,216,57,
15977
155,120,344,89,186,374,217,58,315,121,248,90,187,375,218,59,573,122,28,91,628,188,219,754,60,123,29,602,92,189,377,220,61,159,124,348,93,190,378,221,62,319,125,252,94,191,379,222,63,882,126
15978
};
15979
15980
// individual
15981
// table base sel0 sel1 sel2 sel3
15982
static const uint16_t g_etc1_y_solid_block_4i_configs[256] =
15983
{
15984
0xA000,0xA800,0x540B,0xAA01,0xAA01,0xFE00,0xFF00,0xFF00,0x8,0x5515,0x5509,0x5509,0xAA03,0x5508,0x5508,0x9508,0xA508,0xA908,0xAA08,0x5513,0xAA09,0xAA09,0xAA05,0xFF08,0xFF08,0x10,0x551D,0x5511,0x5511,
15985
0xAA0B,0x5510,0x5510,0x9510,0xA510,0xA910,0xAA10,0x551B,0xAA11,0xAA11,0xAA0D,0xFF10,0xFF10,0x18,0x5525,0x5519,0x5519,0xAA13,0x5518,0x5518,0x9518,0xA518,0xA918,0xAA18,0x5523,0xAA19,0xAA19,0xAA15,
15986
0xFF18,0xFF18,0x20,0x552D,0x5521,0x5521,0xAA1B,0x5520,0x5520,0x9520,0xA520,0xA920,0xAA20,0x552B,0xAA21,0xAA21,0xAA1D,0xFF20,0xFF20,0x28,0x5535,0x5529,0x5529,0xAA23,0x5528,0x5528,0x9528,0xA528,0xA928,
15987
0xAA28,0x5533,0xAA29,0xAA29,0xAA25,0xFF28,0xFF28,0x30,0x553D,0x5531,0x5531,0xAA2B,0x5530,0x5530,0x9530,0xA530,0xA930,0xAA30,0x553B,0xAA31,0xAA31,0xAA2D,0xFF30,0xFF30,0x38,0x5545,0x5539,0x5539,0xAA33,
15988
0x5538,0x5538,0x9538,0xA538,0xA938,0xAA38,0x5543,0xAA39,0xAA39,0xAA35,0xFF38,0xFF38,0x40,0x554D,0x5541,0x5541,0xAA3B,0x5540,0x5540,0x9540,0xA540,0xA940,0xAA40,0x554B,0xAA41,0xAA41,0xAA3D,0xFF40,0xFF40,
15989
0x48,0x5555,0x5549,0x5549,0xAA43,0x5548,0x5548,0x9548,0xA548,0xA948,0xAA48,0x5553,0xAA49,0xAA49,0xAA45,0xFF48,0xFF48,0x50,0x555D,0x5551,0x5551,0xAA4B,0x5550,0x5550,0x9550,0xA550,0xA950,0xAA50,0x555B,
15990
0xAA51,0xAA51,0xAA4D,0xFF50,0xFF50,0x58,0x5565,0x5559,0x5559,0xAA53,0x5558,0x5558,0x9558,0xA558,0xA958,0xAA58,0x5563,0xAA59,0xAA59,0xAA55,0xFF58,0xFF58,0x60,0x556D,0x5561,0x5561,0xAA5B,0x5560,0x5560,
15991
0x9560,0xA560,0xA960,0xAA60,0x556B,0xAA61,0xAA61,0xAA5D,0xFF60,0xFF60,0x68,0x5575,0x5569,0x5569,0xAA63,0x5568,0x5568,0x9568,0xA568,0xA968,0xAA68,0x5573,0xAA69,0xAA69,0xAA65,0xFF68,0xFF68,0x70,0x557D,
15992
0x5571,0x5571,0xAA6B,0x5570,0x5570,0x9570,0xA570,0xA970,0xAA70,0x557B,0xAA71,0xAA71,0xAA6D,0xFF70,0xFF70,0x78,0x78,0x5579,0x5579,0xAA73,0x5578,0x9578,0x2578,0xE6E,0x278
15993
};
15994
15995
static const uint16_t g_etc1_y_solid_block_2i_configs[256] =
15996
{
15997
0x416,0x800,0xA00,0x50B,0xA01,0xA01,0xF00,0xF00,0xF00,0x8,0x515,0x509,0x509,0xA03,0x508,0x508,0xF01,0xF01,0xA08,0xA08,0x513,0xA09,0xA09,0xA05,0xF08,0xF08,0x10,0x51D,0x511,0x511,0xA0B,0x510,0x510,0xF09,
15998
0xF09,0xA10,0xA10,0x51B,0xA11,0xA11,0xA0D,0xF10,0xF10,0x18,0x525,0x519,0x519,0xA13,0x518,0x518,0xF11,0xF11,0xA18,0xA18,0x523,0xA19,0xA19,0xA15,0xF18,0xF18,0x20,0x52D,0x521,0x521,0xA1B,0x520,0x520,0xF19,
15999
0xF19,0xA20,0xA20,0x52B,0xA21,0xA21,0xA1D,0xF20,0xF20,0x28,0x535,0x529,0x529,0xA23,0x528,0x528,0xF21,0xF21,0xA28,0xA28,0x533,0xA29,0xA29,0xA25,0xF28,0xF28,0x30,0x53D,0x531,0x531,0xA2B,0x530,0x530,0xF29,
16000
0xF29,0xA30,0xA30,0x53B,0xA31,0xA31,0xA2D,0xF30,0xF30,0x38,0x545,0x539,0x539,0xA33,0x538,0x538,0xF31,0xF31,0xA38,0xA38,0x543,0xA39,0xA39,0xA35,0xF38,0xF38,0x40,0x54D,0x541,0x541,0xA3B,0x540,0x540,0xF39,
16001
0xF39,0xA40,0xA40,0x54B,0xA41,0xA41,0xA3D,0xF40,0xF40,0x48,0x555,0x549,0x549,0xA43,0x548,0x548,0xF41,0xF41,0xA48,0xA48,0x553,0xA49,0xA49,0xA45,0xF48,0xF48,0x50,0x55D,0x551,0x551,0xA4B,0x550,0x550,0xF49,
16002
0xF49,0xA50,0xA50,0x55B,0xA51,0xA51,0xA4D,0xF50,0xF50,0x58,0x565,0x559,0x559,0xA53,0x558,0x558,0xF51,0xF51,0xA58,0xA58,0x563,0xA59,0xA59,0xA55,0xF58,0xF58,0x60,0x56D,0x561,0x561,0xA5B,0x560,0x560,0xF59,
16003
0xF59,0xA60,0xA60,0x56B,0xA61,0xA61,0xA5D,0xF60,0xF60,0x68,0x575,0x569,0x569,0xA63,0x568,0x568,0xF61,0xF61,0xA68,0xA68,0x573,0xA69,0xA69,0xA65,0xF68,0xF68,0x70,0x57D,0x571,0x571,0xA6B,0x570,0x570,0xF69,
16004
0xF69,0xA70,0xA70,0x57B,0xA71,0xA71,0xA6D,0xF70,0xF70,0x78,0x78,0x579,0x579,0xA73,0x578,0x578,0xE6E,0x278
16005
};
16006
16007
static const uint16_t g_etc1_y_solid_block_1i_configs[256] =
16008
{
16009
0x0,0x116,0x200,0x200,0x10B,0x201,0x201,0x300,0x300,0x8,0x115,0x109,0x109,0x203,0x108,0x108,0x114,0x301,0x204,0x208,0x208,0x113,0x209,0x209,0x205,0x308,0x10,0x11D,0x111,0x111,0x20B,0x110,0x110,0x11C,0x309,
16010
0x20C,0x210,0x210,0x11B,0x211,0x211,0x20D,0x310,0x18,0x125,0x119,0x119,0x213,0x118,0x118,0x124,0x311,0x214,0x218,0x218,0x123,0x219,0x219,0x215,0x318,0x20,0x12D,0x121,0x121,0x21B,0x120,0x120,0x12C,0x319,0x21C,
16011
0x220,0x220,0x12B,0x221,0x221,0x21D,0x320,0x28,0x135,0x129,0x129,0x223,0x128,0x128,0x134,0x321,0x224,0x228,0x228,0x133,0x229,0x229,0x225,0x328,0x30,0x13D,0x131,0x131,0x22B,0x130,0x130,0x13C,0x329,0x22C,0x230,
16012
0x230,0x13B,0x231,0x231,0x22D,0x330,0x38,0x145,0x139,0x139,0x233,0x138,0x138,0x144,0x331,0x234,0x238,0x238,0x143,0x239,0x239,0x235,0x338,0x40,0x14D,0x141,0x141,0x23B,0x140,0x140,0x14C,0x339,0x23C,0x240,0x240,
16013
0x14B,0x241,0x241,0x23D,0x340,0x48,0x155,0x149,0x149,0x243,0x148,0x148,0x154,0x341,0x244,0x248,0x248,0x153,0x249,0x249,0x245,0x348,0x50,0x15D,0x151,0x151,0x24B,0x150,0x150,0x15C,0x349,0x24C,0x250,0x250,0x15B,
16014
0x251,0x251,0x24D,0x350,0x58,0x165,0x159,0x159,0x253,0x158,0x158,0x164,0x351,0x254,0x258,0x258,0x163,0x259,0x259,0x255,0x358,0x60,0x16D,0x161,0x161,0x25B,0x160,0x160,0x16C,0x359,0x25C,0x260,0x260,0x16B,0x261,
16015
0x261,0x25D,0x360,0x68,0x175,0x169,0x169,0x263,0x168,0x168,0x174,0x361,0x264,0x268,0x268,0x173,0x269,0x269,0x265,0x368,0x70,0x17D,0x171,0x171,0x26B,0x170,0x170,0x17C,0x369,0x26C,0x270,0x270,0x17B,0x271,0x271,
16016
0x26D,0x370,0x78,0x78,0x179,0x179,0x273,0x178,0x178,0x26E,0x278
16017
};
16018
16019
// We don't have any useful hints to accelerate single channel ETC1, so we need to real-time encode from scratch.
16020
bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel)
16021
{
16022
unpacked_uastc_block unpacked_src_blk;
16023
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
16024
return false;
16025
16026
#if 0
16027
for (uint32_t individ = 0; individ < 2; individ++)
16028
{
16029
uint32_t overall_error = 0;
16030
16031
for (uint32_t c = 0; c < 256; c++)
16032
{
16033
uint32_t best_err = UINT32_MAX;
16034
uint32_t best_individ = 0;
16035
uint32_t best_base = 0;
16036
uint32_t best_sels[4] = { 0,0,0,0 };
16037
uint32_t best_table = 0;
16038
16039
const uint32_t limit = individ ? 16 : 32;
16040
16041
for (uint32_t table = 0; table < 8; table++)
16042
{
16043
for (uint32_t base = 0; base < limit; base++)
16044
{
16045
uint32_t total_e = 0;
16046
uint32_t sels[4] = { 0,0,0,0 };
16047
16048
const uint32_t N = 4;
16049
for (uint32_t i = 0; i < basisu::minimum<uint32_t>(N, (256 - c)); i++)
16050
{
16051
uint32_t best_sel_e = UINT32_MAX;
16052
uint32_t best_sel = 0;
16053
16054
for (uint32_t sel = 0; sel < 4; sel++)
16055
{
16056
int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2));
16057
val = clamp255(val + g_etc1_inten_tables[table][sel]);
16058
16059
int e = iabs(val - clamp255(c + i));
16060
if (e < best_sel_e)
16061
{
16062
best_sel_e = e;
16063
best_sel = sel;
16064
}
16065
16066
} // sel
16067
16068
sels[i] = best_sel;
16069
total_e += best_sel_e * best_sel_e;
16070
16071
} // i
16072
16073
if (total_e < best_err)
16074
{
16075
best_err = total_e;
16076
best_individ = individ;
16077
best_base = base;
16078
memcpy(best_sels, sels, sizeof(best_sels));
16079
best_table = table;
16080
}
16081
16082
} // base
16083
} // table
16084
16085
//printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]);
16086
16087
uint32_t encoded = best_table | (best_base << 3) |
16088
(best_sels[0] << 8) |
16089
(best_sels[1] << 10) |
16090
(best_sels[2] << 12) |
16091
(best_sels[3] << 14);
16092
16093
printf("0x%X,", encoded);
16094
16095
overall_error += best_err;
16096
} // c
16097
16098
printf("\n");
16099
printf("Overall error: %u\n", overall_error);
16100
16101
} // individ
16102
16103
exit(0);
16104
#endif
16105
16106
#if 0
16107
for (uint32_t individ = 0; individ < 2; individ++)
16108
{
16109
uint32_t overall_error = 0;
16110
16111
for (uint32_t c = 0; c < 256; c++)
16112
{
16113
uint32_t best_err = UINT32_MAX;
16114
uint32_t best_individ = 0;
16115
uint32_t best_base = 0;
16116
uint32_t best_sels[4] = { 0,0,0,0 };
16117
uint32_t best_table = 0;
16118
16119
const uint32_t limit = individ ? 16 : 32;
16120
16121
for (uint32_t table = 0; table < 8; table++)
16122
{
16123
for (uint32_t base = 0; base < limit; base++)
16124
{
16125
uint32_t total_e = 0;
16126
uint32_t sels[4] = { 0,0,0,0 };
16127
16128
const uint32_t N = 1;
16129
for (uint32_t i = 0; i < basisu::minimum<uint32_t>(N, (256 - c)); i++)
16130
{
16131
uint32_t best_sel_e = UINT32_MAX;
16132
uint32_t best_sel = 0;
16133
16134
for (uint32_t sel = 0; sel < 4; sel++)
16135
{
16136
int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2));
16137
val = clamp255(val + g_etc1_inten_tables[table][sel]);
16138
16139
int e = iabs(val - clamp255(c + i));
16140
if (e < best_sel_e)
16141
{
16142
best_sel_e = e;
16143
best_sel = sel;
16144
}
16145
16146
} // sel
16147
16148
sels[i] = best_sel;
16149
total_e += best_sel_e * best_sel_e;
16150
16151
} // i
16152
16153
if (total_e < best_err)
16154
{
16155
best_err = total_e;
16156
best_individ = individ;
16157
best_base = base;
16158
memcpy(best_sels, sels, sizeof(best_sels));
16159
best_table = table;
16160
}
16161
16162
} // base
16163
} // table
16164
16165
//printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]);
16166
16167
uint32_t encoded = best_table | (best_base << 3) |
16168
(best_sels[0] << 8) |
16169
(best_sels[1] << 10) |
16170
(best_sels[2] << 12) |
16171
(best_sels[3] << 14);
16172
16173
printf("0x%X,", encoded);
16174
16175
overall_error += best_err;
16176
} // c
16177
16178
printf("\n");
16179
printf("Overall error: %u\n", overall_error);
16180
16181
} // individ
16182
16183
exit(0);
16184
#endif
16185
16186
decoder_etc_block& dst_blk = *static_cast<decoder_etc_block*>(pDst);
16187
16188
if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
16189
{
16190
const uint32_t y = unpacked_src_blk.m_solid_color[channel];
16191
const uint32_t encoded_config = g_etc1_y_solid_block_configs[y];
16192
16193
const uint32_t base = encoded_config & 31;
16194
const uint32_t sel = (encoded_config >> 5) & 3;
16195
const uint32_t table = encoded_config >> 7;
16196
16197
dst_blk.m_bytes[3] = (uint8_t)(2 | (table << 5) | (table << 2));
16198
16199
dst_blk.m_bytes[0] = (uint8_t)(base << 3);
16200
dst_blk.m_bytes[1] = (uint8_t)(base << 3);
16201
dst_blk.m_bytes[2] = (uint8_t)(base << 3);
16202
16203
memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[sel][0], 4);
16204
return true;
16205
}
16206
16207
color32 block_pixels[4][4];
16208
const bool unpack_srgb = false;
16209
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
16210
return false;
16211
16212
uint8_t block_y[4][4];
16213
for (uint32_t i = 0; i < 16; i++)
16214
((uint8_t*)block_y)[i] = ((color32*)block_pixels)[i][channel];
16215
16216
int upper_avg, lower_avg, left_avg, right_avg;
16217
bool flip = pack_etc1_y_estimate_flipped(&block_y[0][0], upper_avg, lower_avg, left_avg, right_avg);
16218
16219
// non-flipped: | |
16220
// vs.
16221
// flipped: --
16222
// --
16223
16224
uint32_t low[2] = { 255, 255 }, high[2] = { 0, 0 };
16225
16226
if (flip)
16227
{
16228
for (uint32_t y = 0; y < 2; y++)
16229
{
16230
for (uint32_t x = 0; x < 4; x++)
16231
{
16232
const uint32_t v = block_y[y][x];
16233
low[0] = basisu::minimum(low[0], v);
16234
high[0] = basisu::maximum(high[0], v);
16235
}
16236
}
16237
for (uint32_t y = 2; y < 4; y++)
16238
{
16239
for (uint32_t x = 0; x < 4; x++)
16240
{
16241
const uint32_t v = block_y[y][x];
16242
low[1] = basisu::minimum(low[1], v);
16243
high[1] = basisu::maximum(high[1], v);
16244
}
16245
}
16246
}
16247
else
16248
{
16249
for (uint32_t y = 0; y < 4; y++)
16250
{
16251
for (uint32_t x = 0; x < 2; x++)
16252
{
16253
const uint32_t v = block_y[y][x];
16254
low[0] = basisu::minimum(low[0], v);
16255
high[0] = basisu::maximum(high[0], v);
16256
}
16257
}
16258
for (uint32_t y = 0; y < 4; y++)
16259
{
16260
for (uint32_t x = 2; x < 4; x++)
16261
{
16262
const uint32_t v = block_y[y][x];
16263
low[1] = basisu::minimum(low[1], v);
16264
high[1] = basisu::maximum(high[1], v);
16265
}
16266
}
16267
}
16268
16269
const uint32_t range[2] = { high[0] - low[0], high[1] - low[1] };
16270
16271
dst_blk.m_bytes[3] = (uint8_t)((int)flip);
16272
16273
if ((range[0] <= 3) && (range[1] <= 3))
16274
{
16275
// This is primarily for better gradients.
16276
dst_blk.m_bytes[0] = 0;
16277
dst_blk.m_bytes[1] = 0;
16278
dst_blk.m_bytes[2] = 0;
16279
16280
uint16_t l_bitmask = 0, h_bitmask = 0;
16281
16282
for (uint32_t subblock = 0; subblock < 2; subblock++)
16283
{
16284
const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]);
16285
16286
const uint32_t table = encoded & 7;
16287
const uint32_t base = (encoded >> 3) & 31;
16288
assert(base <= 15);
16289
const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 };
16290
16291
dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5));
16292
16293
const uint32_t sv = base << (subblock ? 0 : 4);
16294
dst_blk.m_bytes[0] |= (uint8_t)(sv);
16295
dst_blk.m_bytes[1] |= (uint8_t)(sv);
16296
dst_blk.m_bytes[2] |= (uint8_t)(sv);
16297
16298
if (flip)
16299
{
16300
uint32_t ofs = subblock * 2;
16301
for (uint32_t y = 0; y < 2; y++)
16302
{
16303
for (uint32_t x = 0; x < 4; x++)
16304
{
16305
uint32_t t = block_y[y + subblock * 2][x];
16306
assert(t >= low[subblock] && t <= high[subblock]);
16307
t -= low[subblock];
16308
assert(t <= 3);
16309
16310
t = g_selector_index_to_etc1[sels[t]];
16311
16312
assert(ofs < 16);
16313
l_bitmask |= ((t & 1) << ofs);
16314
h_bitmask |= ((t >> 1) << ofs);
16315
ofs += 4;
16316
}
16317
16318
ofs = (int)ofs + 1 - 4 * 4;
16319
}
16320
}
16321
else
16322
{
16323
uint32_t ofs = (subblock * 2) * 4;
16324
for (uint32_t x = 0; x < 2; x++)
16325
{
16326
for (uint32_t y = 0; y < 4; y++)
16327
{
16328
uint32_t t = block_y[y][x + subblock * 2];
16329
assert(t >= low[subblock] && t <= high[subblock]);
16330
t -= low[subblock];
16331
assert(t <= 3);
16332
16333
t = g_selector_index_to_etc1[sels[t]];
16334
16335
assert(ofs < 16);
16336
l_bitmask |= ((t & 1) << ofs);
16337
h_bitmask |= ((t >> 1) << ofs);
16338
++ofs;
16339
}
16340
}
16341
}
16342
} // subblock
16343
16344
dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
16345
dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
16346
dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
16347
dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
16348
16349
return true;
16350
}
16351
16352
uint32_t y0 = ((flip ? upper_avg : left_avg) * 31 + 127) / 255;
16353
uint32_t y1 = ((flip ? lower_avg : right_avg) * 31 + 127) / 255;
16354
16355
bool diff = true;
16356
16357
int dy = y1 - y0;
16358
16359
if ((dy < cETC1ColorDeltaMin) || (dy > cETC1ColorDeltaMax))
16360
{
16361
diff = false;
16362
16363
y0 = ((flip ? upper_avg : left_avg) * 15 + 127) / 255;
16364
y1 = ((flip ? lower_avg : right_avg) * 15 + 127) / 255;
16365
16366
dst_blk.m_bytes[0] = (uint8_t)(y1 | (y0 << 4));
16367
dst_blk.m_bytes[1] = (uint8_t)(y1 | (y0 << 4));
16368
dst_blk.m_bytes[2] = (uint8_t)(y1 | (y0 << 4));
16369
}
16370
else
16371
{
16372
dy = basisu::clamp<int>(dy, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
16373
16374
y1 = y0 + dy;
16375
16376
if (dy < 0) dy += 8;
16377
16378
dst_blk.m_bytes[0] = (uint8_t)((y0 << 3) | dy);
16379
dst_blk.m_bytes[1] = (uint8_t)((y0 << 3) | dy);
16380
dst_blk.m_bytes[2] = (uint8_t)((y0 << 3) | dy);
16381
16382
dst_blk.m_bytes[3] |= 2;
16383
}
16384
16385
const uint32_t base_y[2] = { diff ? ((y0 << 3) | (y0 >> 2)) : ((y0 << 4) | y0), diff ? ((y1 << 3) | (y1 >> 2)) : ((y1 << 4) | y1) };
16386
16387
uint32_t enc_range[2];
16388
for (uint32_t subset = 0; subset < 2; subset++)
16389
{
16390
const int pos = basisu::iabs((int)high[subset] - (int)base_y[subset]);
16391
const int neg = basisu::iabs((int)base_y[subset] - (int)low[subset]);
16392
16393
enc_range[subset] = basisu::maximum(pos, neg);
16394
}
16395
16396
uint16_t l_bitmask = 0, h_bitmask = 0;
16397
for (uint32_t subblock = 0; subblock < 2; subblock++)
16398
{
16399
if ((!diff) && (range[subblock] <= 3))
16400
{
16401
const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]);
16402
16403
const uint32_t table = encoded & 7;
16404
const uint32_t base = (encoded >> 3) & 31;
16405
assert(base <= 15);
16406
const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 };
16407
16408
dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5));
16409
16410
const uint32_t mask = ~(0xF << (subblock ? 0 : 4));
16411
16412
dst_blk.m_bytes[0] &= mask;
16413
dst_blk.m_bytes[1] &= mask;
16414
dst_blk.m_bytes[2] &= mask;
16415
16416
const uint32_t sv = base << (subblock ? 0 : 4);
16417
dst_blk.m_bytes[0] |= (uint8_t)(sv);
16418
dst_blk.m_bytes[1] |= (uint8_t)(sv);
16419
dst_blk.m_bytes[2] |= (uint8_t)(sv);
16420
16421
if (flip)
16422
{
16423
uint32_t ofs = subblock * 2;
16424
for (uint32_t y = 0; y < 2; y++)
16425
{
16426
for (uint32_t x = 0; x < 4; x++)
16427
{
16428
uint32_t t = block_y[y + subblock * 2][x];
16429
assert(t >= low[subblock] && t <= high[subblock]);
16430
t -= low[subblock];
16431
assert(t <= 3);
16432
16433
t = g_selector_index_to_etc1[sels[t]];
16434
16435
assert(ofs < 16);
16436
l_bitmask |= ((t & 1) << ofs);
16437
h_bitmask |= ((t >> 1) << ofs);
16438
ofs += 4;
16439
}
16440
16441
ofs = (int)ofs + 1 - 4 * 4;
16442
}
16443
}
16444
else
16445
{
16446
uint32_t ofs = (subblock * 2) * 4;
16447
for (uint32_t x = 0; x < 2; x++)
16448
{
16449
for (uint32_t y = 0; y < 4; y++)
16450
{
16451
uint32_t t = block_y[y][x + subblock * 2];
16452
assert(t >= low[subblock] && t <= high[subblock]);
16453
t -= low[subblock];
16454
assert(t <= 3);
16455
16456
t = g_selector_index_to_etc1[sels[t]];
16457
16458
assert(ofs < 16);
16459
l_bitmask |= ((t & 1) << ofs);
16460
h_bitmask |= ((t >> 1) << ofs);
16461
++ofs;
16462
}
16463
}
16464
}
16465
16466
continue;
16467
} // if
16468
16469
uint32_t best_err = UINT32_MAX;
16470
uint8_t best_sels[8];
16471
uint32_t best_inten = 0;
16472
16473
const int base = base_y[subblock];
16474
16475
const int low_limit = -base;
16476
const int high_limit = 255 - base;
16477
16478
assert(low_limit <= 0 && high_limit >= 0);
16479
16480
uint32_t inten_table_mask = 0xFF;
16481
const uint32_t er = enc_range[subblock];
16482
// Each one of these tables is expensive to evaluate, so let's only examine the ones we know may be useful.
16483
if (er <= 51)
16484
{
16485
inten_table_mask = 0xF;
16486
16487
if (er > 22)
16488
inten_table_mask &= ~(1 << 0);
16489
16490
if ((er < 4) || (er > 39))
16491
inten_table_mask &= ~(1 << 1);
16492
16493
if (er < 9)
16494
inten_table_mask &= ~(1 << 2);
16495
16496
if (er < 12)
16497
inten_table_mask &= ~(1 << 3);
16498
}
16499
else
16500
{
16501
inten_table_mask &= ~((1 << 0) | (1 << 1));
16502
16503
if (er > 60)
16504
inten_table_mask &= ~(1 << 2);
16505
16506
if (er > 89)
16507
inten_table_mask &= ~(1 << 3);
16508
16509
if (er > 120)
16510
inten_table_mask &= ~(1 << 4);
16511
16512
if (er > 136)
16513
inten_table_mask &= ~(1 << 5);
16514
16515
if (er > 174)
16516
inten_table_mask &= ~(1 << 6);
16517
}
16518
16519
for (uint32_t inten = 0; inten < 8; inten++)
16520
{
16521
if ((inten_table_mask & (1 << inten)) == 0)
16522
continue;
16523
16524
const int t0 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][0]);
16525
const int t1 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][1]);
16526
const int t2 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][2]);
16527
const int t3 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][3]);
16528
assert((t0 <= t1) && (t1 <= t2) && (t2 <= t3));
16529
16530
const int tv[4] = { t2, t3, t1, t0 };
16531
16532
const int thresh01 = t0 + t1;
16533
const int thresh12 = t1 + t2;
16534
const int thresh23 = t2 + t3;
16535
16536
assert(thresh01 <= thresh12 && thresh12 <= thresh23);
16537
16538
static const uint8_t s_table[4] = { 1, 0, 2, 3 };
16539
16540
uint32_t total_err = 0;
16541
uint8_t sels[8];
16542
16543
if (flip)
16544
{
16545
if (((int)high[subblock] - base) * 2 < thresh01)
16546
{
16547
memset(sels, 3, 8);
16548
16549
for (uint32_t y = 0; y < 2; y++)
16550
{
16551
for (uint32_t x = 0; x < 4; x++)
16552
{
16553
const int delta = (int)block_y[y + subblock * 2][x] - base;
16554
16555
const uint32_t c = 3;
16556
16557
uint32_t e = basisu::iabs(tv[c] - delta);
16558
total_err += e * e;
16559
}
16560
if (total_err >= best_err)
16561
break;
16562
}
16563
}
16564
else if (((int)low[subblock] - base) * 2 >= thresh23)
16565
{
16566
memset(sels, 1, 8);
16567
16568
for (uint32_t y = 0; y < 2; y++)
16569
{
16570
for (uint32_t x = 0; x < 4; x++)
16571
{
16572
const int delta = (int)block_y[y + subblock * 2][x] - base;
16573
16574
const uint32_t c = 1;
16575
16576
uint32_t e = basisu::iabs(tv[c] - delta);
16577
total_err += e * e;
16578
}
16579
if (total_err >= best_err)
16580
break;
16581
}
16582
}
16583
else
16584
{
16585
for (uint32_t y = 0; y < 2; y++)
16586
{
16587
for (uint32_t x = 0; x < 4; x++)
16588
{
16589
const int delta = (int)block_y[y + subblock * 2][x] - base;
16590
const int delta2 = delta * 2;
16591
16592
uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)];
16593
sels[y * 4 + x] = (uint8_t)c;
16594
16595
uint32_t e = basisu::iabs(tv[c] - delta);
16596
total_err += e * e;
16597
}
16598
if (total_err >= best_err)
16599
break;
16600
}
16601
}
16602
}
16603
else
16604
{
16605
if (((int)high[subblock] - base) * 2 < thresh01)
16606
{
16607
memset(sels, 3, 8);
16608
16609
for (uint32_t y = 0; y < 4; y++)
16610
{
16611
for (uint32_t x = 0; x < 2; x++)
16612
{
16613
const int delta = (int)block_y[y][x + subblock * 2] - base;
16614
16615
const uint32_t c = 3;
16616
16617
uint32_t e = basisu::iabs(tv[c] - delta);
16618
total_err += e * e;
16619
}
16620
if (total_err >= best_err)
16621
break;
16622
}
16623
}
16624
else if (((int)low[subblock] - base) * 2 >= thresh23)
16625
{
16626
memset(sels, 1, 8);
16627
16628
for (uint32_t y = 0; y < 4; y++)
16629
{
16630
for (uint32_t x = 0; x < 2; x++)
16631
{
16632
const int delta = (int)block_y[y][x + subblock * 2] - base;
16633
16634
const uint32_t c = 1;
16635
16636
uint32_t e = basisu::iabs(tv[c] - delta);
16637
total_err += e * e;
16638
}
16639
if (total_err >= best_err)
16640
break;
16641
}
16642
}
16643
else
16644
{
16645
for (uint32_t y = 0; y < 4; y++)
16646
{
16647
for (uint32_t x = 0; x < 2; x++)
16648
{
16649
const int delta = (int)block_y[y][x + subblock * 2] - base;
16650
const int delta2 = delta * 2;
16651
16652
uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)];
16653
sels[y * 2 + x] = (uint8_t)c;
16654
16655
uint32_t e = basisu::iabs(tv[c] - delta);
16656
total_err += e * e;
16657
}
16658
if (total_err >= best_err)
16659
break;
16660
}
16661
}
16662
}
16663
16664
if (total_err < best_err)
16665
{
16666
best_err = total_err;
16667
best_inten = inten;
16668
memcpy(best_sels, sels, 8);
16669
}
16670
16671
} // inten
16672
16673
//g_inten_hist[best_inten][enc_range[subblock]]++;
16674
16675
dst_blk.m_bytes[3] |= (uint8_t)(best_inten << (subblock ? 2 : 5));
16676
16677
if (flip)
16678
{
16679
uint32_t ofs = subblock * 2;
16680
for (uint32_t y = 0; y < 2; y++)
16681
{
16682
for (uint32_t x = 0; x < 4; x++)
16683
{
16684
uint32_t t = best_sels[y * 4 + x];
16685
16686
assert(ofs < 16);
16687
l_bitmask |= ((t & 1) << ofs);
16688
h_bitmask |= ((t >> 1) << ofs);
16689
ofs += 4;
16690
}
16691
16692
ofs = (int)ofs + 1 - 4 * 4;
16693
}
16694
}
16695
else
16696
{
16697
uint32_t ofs = (subblock * 2) * 4;
16698
for (uint32_t x = 0; x < 2; x++)
16699
{
16700
for (uint32_t y = 0; y < 4; y++)
16701
{
16702
uint32_t t = best_sels[y * 2 + x];
16703
16704
assert(ofs < 16);
16705
l_bitmask |= ((t & 1) << ofs);
16706
h_bitmask |= ((t >> 1) << ofs);
16707
++ofs;
16708
}
16709
}
16710
}
16711
16712
} // subblock
16713
16714
dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
16715
dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
16716
dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
16717
dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
16718
16719
return true;
16720
}
16721
16722
const uint32_t ETC2_EAC_MIN_VALUE_SELECTOR = 3, ETC2_EAC_MAX_VALUE_SELECTOR = 7;
16723
16724
void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst)
16725
{
16726
eac_block& dst = *static_cast<eac_block*>(pDst);
16727
const color32* pSrc_pixels = &block_pixels[0][0];
16728
16729
if ((!g_uastc_mode_has_alpha[unpacked_src_blk.m_mode]) || (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR))
16730
{
16731
const uint32_t a = (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) ? unpacked_src_blk.m_solid_color[3] : 255;
16732
16733
dst.m_base = a;
16734
dst.m_table = 13;
16735
dst.m_multiplier = 1;
16736
16737
memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
16738
16739
return;
16740
}
16741
16742
uint32_t min_a = 255, max_a = 0;
16743
for (uint32_t i = 0; i < 16; i++)
16744
{
16745
min_a = basisu::minimum<uint32_t>(min_a, pSrc_pixels[i].a);
16746
max_a = basisu::maximum<uint32_t>(max_a, pSrc_pixels[i].a);
16747
}
16748
16749
if (min_a == max_a)
16750
{
16751
dst.m_base = min_a;
16752
dst.m_table = 13;
16753
dst.m_multiplier = 1;
16754
16755
memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
16756
return;
16757
}
16758
16759
const uint32_t table = unpacked_src_blk.m_etc2_hints & 0xF;
16760
const int multiplier = unpacked_src_blk.m_etc2_hints >> 4;
16761
16762
assert(multiplier >= 1);
16763
16764
dst.m_multiplier = multiplier;
16765
dst.m_table = table;
16766
16767
const float range = (float)(g_eac_modifier_table[dst.m_table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]);
16768
const int center = (int)roundf(basisu::lerp((float)min_a, (float)max_a, (float)(0 - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range));
16769
16770
dst.m_base = center;
16771
16772
const int8_t* pTable = &g_eac_modifier_table[dst.m_table][0];
16773
16774
uint32_t vals[8];
16775
for (uint32_t j = 0; j < 8; j++)
16776
vals[j] = clamp255(center + (pTable[j] * multiplier));
16777
16778
uint64_t sels = 0;
16779
for (uint32_t i = 0; i < 16; i++)
16780
{
16781
const uint32_t a = block_pixels[i & 3][i >> 2].a;
16782
16783
const uint32_t err0 = (basisu::iabs(vals[0] - a) << 3) | 0;
16784
const uint32_t err1 = (basisu::iabs(vals[1] - a) << 3) | 1;
16785
const uint32_t err2 = (basisu::iabs(vals[2] - a) << 3) | 2;
16786
const uint32_t err3 = (basisu::iabs(vals[3] - a) << 3) | 3;
16787
const uint32_t err4 = (basisu::iabs(vals[4] - a) << 3) | 4;
16788
const uint32_t err5 = (basisu::iabs(vals[5] - a) << 3) | 5;
16789
const uint32_t err6 = (basisu::iabs(vals[6] - a) << 3) | 6;
16790
const uint32_t err7 = (basisu::iabs(vals[7] - a) << 3) | 7;
16791
16792
const uint32_t min_err = basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(err0, err1, err2), err3), err4), err5), err6), err7);
16793
16794
const uint64_t best_index = min_err & 7;
16795
sels |= (best_index << (45 - i * 3));
16796
}
16797
16798
dst.set_selector_bits(sels);
16799
}
16800
16801
bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst)
16802
{
16803
eac_block& dst_etc2_eac_a8_blk = *static_cast<eac_block*>(pDst);
16804
decoder_etc_block& dst_etc1_blk = static_cast<decoder_etc_block*>(pDst)[1];
16805
16806
unpacked_uastc_block unpacked_src_blk;
16807
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
16808
return false;
16809
16810
color32 block_pixels[4][4];
16811
if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR)
16812
{
16813
const bool unpack_srgb = false;
16814
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
16815
return false;
16816
}
16817
16818
transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &dst_etc2_eac_a8_blk);
16819
16820
transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, &dst_etc1_blk);
16821
16822
return true;
16823
}
16824
16825
static const uint8_t s_uastc5_to_bc1[32] = { 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1 };
16826
static const uint8_t s_uastc4_to_bc1[16] = { 0, 0, 0, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 1, 1, 1 };
16827
static const uint8_t s_uastc3_to_bc1[8] = { 0, 0, 2, 2, 3, 3, 1, 1 };
16828
static const uint8_t s_uastc2_to_bc1[4] = { 0, 2, 3, 1 };
16829
static const uint8_t s_uastc1_to_bc1[2] = { 0, 1 };
16830
const uint8_t* s_uastc_to_bc1_weights[6] = { nullptr, s_uastc1_to_bc1, s_uastc2_to_bc1, s_uastc3_to_bc1, s_uastc4_to_bc1, s_uastc5_to_bc1 };
16831
16832
void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride)
16833
{
16834
uint32_t min0_v, max0_v, min1_v, max1_v,min2_v, max2_v, min3_v, max3_v;
16835
16836
{
16837
min0_v = max0_v = pPixels[0 * stride];
16838
min1_v = max1_v = pPixels[1 * stride];
16839
min2_v = max2_v = pPixels[2 * stride];
16840
min3_v = max3_v = pPixels[3 * stride];
16841
}
16842
16843
{
16844
uint32_t v0 = pPixels[4 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
16845
uint32_t v1 = pPixels[5 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
16846
uint32_t v2 = pPixels[6 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
16847
uint32_t v3 = pPixels[7 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
16848
}
16849
16850
{
16851
uint32_t v0 = pPixels[8 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
16852
uint32_t v1 = pPixels[9 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
16853
uint32_t v2 = pPixels[10 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
16854
uint32_t v3 = pPixels[11 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
16855
}
16856
16857
{
16858
uint32_t v0 = pPixels[12 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
16859
uint32_t v1 = pPixels[13 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
16860
uint32_t v2 = pPixels[14 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
16861
uint32_t v3 = pPixels[15 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
16862
}
16863
16864
const uint32_t min_v = basisu::minimum(min0_v, min1_v, min2_v, min3_v);
16865
const uint32_t max_v = basisu::maximum(max0_v, max1_v, max2_v, max3_v);
16866
16867
uint8_t* pDst_bytes = static_cast<uint8_t*>(pDst);
16868
pDst_bytes[0] = (uint8_t)max_v;
16869
pDst_bytes[1] = (uint8_t)min_v;
16870
16871
if (max_v == min_v)
16872
{
16873
memset(pDst_bytes + 2, 0, 6);
16874
return;
16875
}
16876
16877
const uint32_t delta = max_v - min_v;
16878
16879
// min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors.
16880
const int t0 = delta * 13;
16881
const int t1 = delta * 11;
16882
const int t2 = delta * 9;
16883
const int t3 = delta * 7;
16884
const int t4 = delta * 5;
16885
const int t5 = delta * 3;
16886
const int t6 = delta * 1;
16887
16888
// BC4 floors in its divisions, which we compensate for with the 4 bias.
16889
// This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one).
16890
const int bias = 4 - min_v * 14;
16891
16892
static const uint32_t s_tran0[8] = { 1U , 7U , 6U , 5U , 4U , 3U , 2U , 0U };
16893
static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U };
16894
static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U };
16895
static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U };
16896
16897
uint64_t a0, a1, a2, a3;
16898
{
16899
const int v0 = pPixels[0 * stride] * 14 + bias;
16900
const int v1 = pPixels[1 * stride] * 14 + bias;
16901
const int v2 = pPixels[2 * stride] * 14 + bias;
16902
const int v3 = pPixels[3 * stride] * 14 + bias;
16903
a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)];
16904
a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)];
16905
a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)];
16906
a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)];
16907
}
16908
16909
{
16910
const int v0 = pPixels[4 * stride] * 14 + bias;
16911
const int v1 = pPixels[5 * stride] * 14 + bias;
16912
const int v2 = pPixels[6 * stride] * 14 + bias;
16913
const int v3 = pPixels[7 * stride] * 14 + bias;
16914
a0 |= (s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U);
16915
a1 |= (s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U);
16916
a2 |= (s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U);
16917
a3 |= (s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U);
16918
}
16919
16920
{
16921
const int v0 = pPixels[8 * stride] * 14 + bias;
16922
const int v1 = pPixels[9 * stride] * 14 + bias;
16923
const int v2 = pPixels[10 * stride] * 14 + bias;
16924
const int v3 = pPixels[11 * stride] * 14 + bias;
16925
a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U);
16926
a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U);
16927
a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U);
16928
a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U);
16929
}
16930
16931
{
16932
const int v0 = pPixels[12 * stride] * 14 + bias;
16933
const int v1 = pPixels[13 * stride] * 14 + bias;
16934
const int v2 = pPixels[14 * stride] * 14 + bias;
16935
const int v3 = pPixels[15 * stride] * 14 + bias;
16936
a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U);
16937
a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U);
16938
a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U);
16939
a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U);
16940
}
16941
16942
const uint64_t f = a0 | a1 | a2 | a3;
16943
16944
pDst_bytes[2] = (uint8_t)f;
16945
pDst_bytes[3] = (uint8_t)(f >> 8U);
16946
pDst_bytes[4] = (uint8_t)(f >> 16U);
16947
pDst_bytes[5] = (uint8_t)(f >> 24U);
16948
pDst_bytes[6] = (uint8_t)(f >> 32U);
16949
pDst_bytes[7] = (uint8_t)(f >> 40U);
16950
}
16951
16952
static void bc1_find_sels(const color32 *pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16])
16953
{
16954
uint32_t block_r[4], block_g[4], block_b[4];
16955
16956
block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2);
16957
block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2);
16958
block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3;
16959
block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3;
16960
16961
int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
16962
16963
int dots[4];
16964
for (uint32_t i = 0; i < 4; i++)
16965
dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
16966
16967
int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
16968
16969
ar *= 2; ag *= 2; ab *= 2;
16970
16971
for (uint32_t i = 0; i < 16; i++)
16972
{
16973
const int d = pSrc_pixels[i].r * ar + pSrc_pixels[i].g * ag + pSrc_pixels[i].b * ab;
16974
static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
16975
16976
// Rounding matters here!
16977
// d <= t0: <=, not <, to the later LS step "sees" a wider range of selectors. It matters for quality.
16978
sels[i] = s_sels[(d <= t0) + (d < t1) + (d < t2)];
16979
}
16980
}
16981
16982
static inline void bc1_find_sels_2(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16])
16983
{
16984
uint32_t block_r[4], block_g[4], block_b[4];
16985
16986
block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2);
16987
block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2);
16988
block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3;
16989
block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3;
16990
16991
int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
16992
16993
int dots[4];
16994
for (uint32_t i = 0; i < 4; i++)
16995
dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
16996
16997
int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
16998
16999
ar *= 2; ag *= 2; ab *= 2;
17000
17001
static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
17002
17003
for (uint32_t i = 0; i < 16; i += 4)
17004
{
17005
const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab;
17006
const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab;
17007
const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab;
17008
const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab;
17009
17010
sels[i+0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)];
17011
sels[i+1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)];
17012
sels[i+2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)];
17013
sels[i+3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)];
17014
}
17015
}
17016
17017
static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh)
17018
{
17019
// Derived from bc7enc16's LS function.
17020
// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
17021
// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
17022
uint32_t uq00_r = 0, uq10_r = 0, ut_r = 0, uq00_g = 0, uq10_g = 0, ut_g = 0, uq00_b = 0, uq10_b = 0, ut_b = 0;
17023
17024
// This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w))
17025
// where w is [0,1/3,2/3,1]. 9 is the perfect multiplier.
17026
static const uint32_t s_weight_vals[4] = { 0x000009, 0x010204, 0x040201, 0x090000 };
17027
17028
uint32_t weight_accum = 0;
17029
for (uint32_t i = 0; i < 16; i++)
17030
{
17031
const uint32_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2];
17032
const uint32_t sel = pSelectors[i];
17033
ut_r += r;
17034
ut_g += g;
17035
ut_b += b;
17036
weight_accum += s_weight_vals[sel];
17037
uq00_r += sel * r;
17038
uq00_g += sel * g;
17039
uq00_b += sel * b;
17040
}
17041
17042
float q00_r = (float)uq00_r, q10_r = (float)uq10_r, t_r = (float)ut_r;
17043
float q00_g = (float)uq00_g, q10_g = (float)uq10_g, t_g = (float)ut_g;
17044
float q00_b = (float)uq00_b, q10_b = (float)uq10_b, t_b = (float)ut_b;
17045
17046
q10_r = t_r * 3.0f - q00_r;
17047
q10_g = t_g * 3.0f - q00_g;
17048
q10_b = t_b * 3.0f - q00_b;
17049
17050
float z00 = (float)((weight_accum >> 16) & 0xFF);
17051
float z10 = (float)((weight_accum >> 8) & 0xFF);
17052
float z11 = (float)(weight_accum & 0xFF);
17053
float z01 = z10;
17054
17055
float det = z00 * z11 - z01 * z10;
17056
if (fabs(det) < 1e-8f)
17057
return false;
17058
17059
det = 3.0f / det;
17060
17061
float iz00, iz01, iz10, iz11;
17062
iz00 = z11 * det;
17063
iz01 = -z01 * det;
17064
iz10 = -z10 * det;
17065
iz11 = z00 * det;
17066
17067
pXl->c[0] = iz00 * q00_r + iz01 * q10_r; pXh->c[0] = iz10 * q00_r + iz11 * q10_r;
17068
pXl->c[1] = iz00 * q00_g + iz01 * q10_g; pXh->c[1] = iz10 * q00_g + iz11 * q10_g;
17069
pXl->c[2] = iz00 * q00_b + iz01 * q10_b; pXh->c[2] = iz10 * q00_b + iz11 * q10_b;
17070
17071
// Check and fix channel singularities - might not be needed, but is in UASTC's encoder.
17072
for (uint32_t c = 0; c < 3; c++)
17073
{
17074
if ((pXl->c[c] < 0.0f) || (pXh->c[c] > 255.0f))
17075
{
17076
uint32_t lo_v = UINT32_MAX, hi_v = 0;
17077
for (uint32_t i = 0; i < 16; i++)
17078
{
17079
lo_v = basisu::minimumu(lo_v, pColors[i].c[c]);
17080
hi_v = basisu::maximumu(hi_v, pColors[i].c[c]);
17081
}
17082
17083
if (lo_v == hi_v)
17084
{
17085
pXl->c[c] = (float)lo_v;
17086
pXh->c[c] = (float)hi_v;
17087
}
17088
}
17089
}
17090
17091
return true;
17092
}
17093
17094
void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb)
17095
{
17096
dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
17097
17098
uint32_t mask = 0xAA;
17099
uint32_t max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi;
17100
uint32_t min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo;
17101
17102
if (min16 == max16)
17103
{
17104
// Always forbid 3 color blocks
17105
// This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's.
17106
mask = 0;
17107
17108
// Make l > h
17109
if (min16 > 0)
17110
min16--;
17111
else
17112
{
17113
// l = h = 0
17114
assert(min16 == max16 && max16 == 0);
17115
17116
max16 = 1;
17117
min16 = 0;
17118
mask = 0x55;
17119
}
17120
17121
assert(max16 > min16);
17122
}
17123
17124
if (max16 < min16)
17125
{
17126
std::swap(max16, min16);
17127
mask ^= 0x55;
17128
}
17129
17130
pDst_block->set_low_color(static_cast<uint16_t>(max16));
17131
pDst_block->set_high_color(static_cast<uint16_t>(min16));
17132
pDst_block->m_selectors[0] = static_cast<uint8_t>(mask);
17133
pDst_block->m_selectors[1] = static_cast<uint8_t>(mask);
17134
pDst_block->m_selectors[2] = static_cast<uint8_t>(mask);
17135
pDst_block->m_selectors[3] = static_cast<uint8_t>(mask);
17136
}
17137
17138
static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
17139
static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
17140
17141
// Good references: squish library, stb_dxt.
17142
void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags)
17143
{
17144
const color32* pSrc_pixels = (const color32*)pPixels;
17145
dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
17146
17147
int avg_r = -1, avg_g = 0, avg_b = 0;
17148
int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0;
17149
uint8_t sels[16];
17150
17151
const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0;
17152
if (use_sels)
17153
{
17154
// Caller is jamming in their own selectors for us to try.
17155
const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24);
17156
17157
static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 };
17158
17159
for (uint32_t i = 0; i < 16; i++)
17160
sels[i] = s_sel_tran[(s >> (i * 2)) & 3];
17161
}
17162
else
17163
{
17164
const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;
17165
17166
uint32_t j;
17167
for (j = 1; j < 16; j++)
17168
if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb))
17169
break;
17170
17171
if (j == 16)
17172
{
17173
encode_bc1_solid_block(pDst, fr, fg, fb);
17174
return;
17175
}
17176
17177
// Select 2 colors along the principle axis. (There must be a faster/simpler way.)
17178
int total_r = fr, total_g = fg, total_b = fb;
17179
int max_r = fr, max_g = fg, max_b = fb;
17180
int min_r = fr, min_g = fg, min_b = fb;
17181
for (uint32_t i = 1; i < 16; i++)
17182
{
17183
const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
17184
max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b);
17185
min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b);
17186
total_r += r; total_g += g; total_b += b;
17187
}
17188
17189
avg_r = (total_r + 8) >> 4;
17190
avg_g = (total_g + 8) >> 4;
17191
avg_b = (total_b + 8) >> 4;
17192
17193
int icov[6] = { 0, 0, 0, 0, 0, 0 };
17194
for (uint32_t i = 0; i < 16; i++)
17195
{
17196
int r = (int)pSrc_pixels[i].r - avg_r;
17197
int g = (int)pSrc_pixels[i].g - avg_g;
17198
int b = (int)pSrc_pixels[i].b - avg_b;
17199
icov[0] += r * r;
17200
icov[1] += r * g;
17201
icov[2] += r * b;
17202
icov[3] += g * g;
17203
icov[4] += g * b;
17204
icov[5] += b * b;
17205
}
17206
17207
float cov[6];
17208
for (uint32_t i = 0; i < 6; i++)
17209
cov[i] = static_cast<float>(icov[i])* (1.0f / 255.0f);
17210
17211
#if 0
17212
// Seems silly to use full PCA to choose 2 colors. The diff in avg. PSNR between using PCA vs. not is small (~.025 difference).
17213
// TODO: Try 2 or 3 different normalized diagonal vectors, choose the one that results in the largest dot delta
17214
int saxis_r = max_r - min_r;
17215
int saxis_g = max_g - min_g;
17216
int saxis_b = max_b - min_b;
17217
#else
17218
float xr = (float)(max_r - min_r);
17219
float xg = (float)(max_g - min_g);
17220
float xb = (float)(max_b - min_b);
17221
//float xr = (float)(max_r - avg_r); // max-avg is nearly the same, and doesn't require computing min's
17222
//float xg = (float)(max_g - avg_g);
17223
//float xb = (float)(max_b - avg_b);
17224
for (uint32_t power_iter = 0; power_iter < 4; power_iter++)
17225
{
17226
float r = xr * cov[0] + xg * cov[1] + xb * cov[2];
17227
float g = xr * cov[1] + xg * cov[3] + xb * cov[4];
17228
float b = xr * cov[2] + xg * cov[4] + xb * cov[5];
17229
xr = r; xg = g; xb = b;
17230
}
17231
17232
float k = basisu::maximum(fabsf(xr), fabsf(xg), fabsf(xb));
17233
int saxis_r = 306, saxis_g = 601, saxis_b = 117;
17234
if (k >= 2)
17235
{
17236
float m = 1024.0f / k;
17237
saxis_r = (int)(xr * m);
17238
saxis_g = (int)(xg * m);
17239
saxis_b = (int)(xb * m);
17240
}
17241
#endif
17242
17243
int low_dot = INT_MAX, high_dot = INT_MIN, low_c = 0, high_c = 0;
17244
for (uint32_t i = 0; i < 16; i++)
17245
{
17246
int dot = pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b;
17247
if (dot < low_dot)
17248
{
17249
low_dot = dot;
17250
low_c = i;
17251
}
17252
if (dot > high_dot)
17253
{
17254
high_dot = dot;
17255
high_c = i;
17256
}
17257
}
17258
17259
lr = to_5(pSrc_pixels[low_c].r);
17260
lg = to_6(pSrc_pixels[low_c].g);
17261
lb = to_5(pSrc_pixels[low_c].b);
17262
17263
hr = to_5(pSrc_pixels[high_c].r);
17264
hg = to_6(pSrc_pixels[high_c].g);
17265
hb = to_5(pSrc_pixels[high_c].b);
17266
17267
bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
17268
} // if (use_sels)
17269
17270
const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1);
17271
for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
17272
{
17273
// This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors.
17274
vec3F xl, xh;
17275
if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh))
17276
{
17277
if (avg_r < 0)
17278
{
17279
int total_r = 0, total_g = 0, total_b = 0;
17280
for (uint32_t i = 0; i < 16; i++)
17281
{
17282
total_r += pSrc_pixels[i].r;
17283
total_g += pSrc_pixels[i].g;
17284
total_b += pSrc_pixels[i].b;
17285
}
17286
17287
avg_r = (total_r + 8) >> 4;
17288
avg_g = (total_g + 8) >> 4;
17289
avg_b = (total_b + 8) >> 4;
17290
}
17291
17292
// All selectors equal - treat it as a solid block which should always be equal or better.
17293
lr = g_bc1_match5_equals_1[avg_r].m_hi;
17294
lg = g_bc1_match6_equals_1[avg_g].m_hi;
17295
lb = g_bc1_match5_equals_1[avg_b].m_hi;
17296
17297
hr = g_bc1_match5_equals_1[avg_r].m_lo;
17298
hg = g_bc1_match6_equals_1[avg_g].m_lo;
17299
hb = g_bc1_match5_equals_1[avg_b].m_lo;
17300
17301
// In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
17302
}
17303
else
17304
{
17305
lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
17306
lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
17307
lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
17308
17309
hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
17310
hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
17311
hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
17312
}
17313
17314
bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
17315
}
17316
17317
uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb);
17318
uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb);
17319
17320
// Always forbid 3 color blocks
17321
if (lc16 == hc16)
17322
{
17323
uint8_t mask = 0;
17324
17325
// Make l > h
17326
if (hc16 > 0)
17327
hc16--;
17328
else
17329
{
17330
// lc16 = hc16 = 0
17331
assert(lc16 == hc16 && hc16 == 0);
17332
17333
hc16 = 0;
17334
lc16 = 1;
17335
mask = 0x55; // select hc16
17336
}
17337
17338
assert(lc16 > hc16);
17339
pDst_block->set_low_color(static_cast<uint16_t>(lc16));
17340
pDst_block->set_high_color(static_cast<uint16_t>(hc16));
17341
17342
pDst_block->m_selectors[0] = mask;
17343
pDst_block->m_selectors[1] = mask;
17344
pDst_block->m_selectors[2] = mask;
17345
pDst_block->m_selectors[3] = mask;
17346
}
17347
else
17348
{
17349
uint8_t invert_mask = 0;
17350
if (lc16 < hc16)
17351
{
17352
std::swap(lc16, hc16);
17353
invert_mask = 0x55;
17354
}
17355
17356
assert(lc16 > hc16);
17357
pDst_block->set_low_color((uint16_t)lc16);
17358
pDst_block->set_high_color((uint16_t)hc16);
17359
17360
uint32_t packed_sels = 0;
17361
static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 };
17362
for (uint32_t i = 0; i < 16; i++)
17363
packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2));
17364
17365
pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask;
17366
pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
17367
pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
17368
pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
17369
}
17370
}
17371
17372
void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags)
17373
{
17374
const color32* pSrc_pixels = (const color32*)pPixels;
17375
dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
17376
17377
int avg_r = -1, avg_g = 0, avg_b = 0;
17378
int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0;
17379
uint8_t sels[16];
17380
17381
const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0;
17382
if (use_sels)
17383
{
17384
// Caller is jamming in their own selectors for us to try.
17385
const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24);
17386
17387
static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 };
17388
17389
for (uint32_t i = 0; i < 16; i++)
17390
sels[i] = s_sel_tran[(s >> (i * 2)) & 3];
17391
}
17392
else
17393
{
17394
const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;
17395
17396
uint32_t j;
17397
for (j = 1; j < 16; j++)
17398
if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb))
17399
break;
17400
17401
if (j == 16)
17402
{
17403
encode_bc1_solid_block(pDst, fr, fg, fb);
17404
return;
17405
}
17406
17407
// Select 2 colors along the principle axis. (There must be a faster/simpler way.)
17408
int total_r = fr, total_g = fg, total_b = fb;
17409
int max_r = fr, max_g = fg, max_b = fb;
17410
int min_r = fr, min_g = fg, min_b = fb;
17411
uint32_t grayscale_flag = (fr == fg) && (fr == fb);
17412
for (uint32_t i = 1; i < 16; i++)
17413
{
17414
const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
17415
grayscale_flag &= ((r == g) && (r == b));
17416
max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b);
17417
min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b);
17418
total_r += r; total_g += g; total_b += b;
17419
}
17420
17421
if (grayscale_flag)
17422
{
17423
// Grayscale blocks are a common enough case to specialize.
17424
if ((max_r - min_r) < 2)
17425
{
17426
lr = lb = hr = hb = to_5(fr);
17427
lg = hg = to_6(fr);
17428
}
17429
else
17430
{
17431
lr = lb = to_5(min_r);
17432
lg = to_6(min_r);
17433
17434
hr = hb = to_5(max_r);
17435
hg = to_6(max_r);
17436
}
17437
}
17438
else
17439
{
17440
avg_r = (total_r + 8) >> 4;
17441
avg_g = (total_g + 8) >> 4;
17442
avg_b = (total_b + 8) >> 4;
17443
17444
// Find the shortest vector from a AABB corner to the block's average color.
17445
// This is to help avoid outliers.
17446
17447
uint32_t dist[3][2];
17448
dist[0][0] = basisu::square(min_r - avg_r) << 3; dist[0][1] = basisu::square(max_r - avg_r) << 3;
17449
dist[1][0] = basisu::square(min_g - avg_g) << 3; dist[1][1] = basisu::square(max_g - avg_g) << 3;
17450
dist[2][0] = basisu::square(min_b - avg_b) << 3; dist[2][1] = basisu::square(max_b - avg_b) << 3;
17451
17452
uint32_t min_d0 = (dist[0][0] + dist[1][0] + dist[2][0]);
17453
uint32_t d4 = (dist[0][0] + dist[1][0] + dist[2][1]) | 4;
17454
min_d0 = basisu::minimum(min_d0, d4);
17455
17456
uint32_t min_d1 = (dist[0][1] + dist[1][0] + dist[2][0]) | 1;
17457
uint32_t d5 = (dist[0][1] + dist[1][0] + dist[2][1]) | 5;
17458
min_d1 = basisu::minimum(min_d1, d5);
17459
17460
uint32_t d2 = (dist[0][0] + dist[1][1] + dist[2][0]) | 2;
17461
min_d0 = basisu::minimum(min_d0, d2);
17462
17463
uint32_t d3 = (dist[0][1] + dist[1][1] + dist[2][0]) | 3;
17464
min_d1 = basisu::minimum(min_d1, d3);
17465
17466
uint32_t d6 = (dist[0][0] + dist[1][1] + dist[2][1]) | 6;
17467
min_d0 = basisu::minimum(min_d0, d6);
17468
17469
uint32_t d7 = (dist[0][1] + dist[1][1] + dist[2][1]) | 7;
17470
min_d1 = basisu::minimum(min_d1, d7);
17471
17472
uint32_t min_d = basisu::minimum(min_d0, min_d1);
17473
uint32_t best_i = min_d & 7;
17474
17475
int delta_r = (best_i & 1) ? (max_r - avg_r) : (avg_r - min_r);
17476
int delta_g = (best_i & 2) ? (max_g - avg_g) : (avg_g - min_g);
17477
int delta_b = (best_i & 4) ? (max_b - avg_b) : (avg_b - min_b);
17478
17479
// Note: if delta_r/g/b==0, we actually want to choose a single color, so the block average color optimization kicks in.
17480
uint32_t low_c = 0, high_c = 0;
17481
if ((delta_r | delta_g | delta_b) != 0)
17482
{
17483
// Now we have a smaller AABB going from the block's average color to a cornerpoint of the larger AABB.
17484
// Project all pixels colors along the 4 vectors going from a smaller AABB cornerpoint to the opposite cornerpoint, find largest projection.
17485
// One of these vectors will be a decent approximation of the block's PCA.
17486
const int saxis0_r = delta_r, saxis0_g = delta_g, saxis0_b = delta_b;
17487
17488
int low_dot0 = INT_MAX, high_dot0 = INT_MIN;
17489
int low_dot1 = INT_MAX, high_dot1 = INT_MIN;
17490
int low_dot2 = INT_MAX, high_dot2 = INT_MIN;
17491
int low_dot3 = INT_MAX, high_dot3 = INT_MIN;
17492
17493
//int low_c0, low_c1, low_c2, low_c3;
17494
//int high_c0, high_c1, high_c2, high_c3;
17495
17496
for (uint32_t i = 0; i < 16; i++)
17497
{
17498
const int dotx = pSrc_pixels[i].r * saxis0_r;
17499
const int doty = pSrc_pixels[i].g * saxis0_g;
17500
const int dotz = pSrc_pixels[i].b * saxis0_b;
17501
17502
const int dot0 = ((dotz + dotx + doty) << 4) + i;
17503
const int dot1 = ((dotz - dotx - doty) << 4) + i;
17504
const int dot2 = ((dotz - dotx + doty) << 4) + i;
17505
const int dot3 = ((dotz + dotx - doty) << 4) + i;
17506
17507
if (dot0 < low_dot0)
17508
{
17509
low_dot0 = dot0;
17510
//low_c0 = i;
17511
}
17512
if ((dot0 ^ 15) > high_dot0)
17513
{
17514
high_dot0 = dot0 ^ 15;
17515
//high_c0 = i;
17516
}
17517
17518
if (dot1 < low_dot1)
17519
{
17520
low_dot1 = dot1;
17521
//low_c1 = i;
17522
}
17523
if ((dot1 ^ 15) > high_dot1)
17524
{
17525
high_dot1 = dot1 ^ 15;
17526
//high_c1 = i;
17527
}
17528
17529
if (dot2 < low_dot2)
17530
{
17531
low_dot2 = dot2;
17532
//low_c2 = i;
17533
}
17534
if ((dot2 ^ 15) > high_dot2)
17535
{
17536
high_dot2 = dot2 ^ 15;
17537
//high_c2 = i;
17538
}
17539
17540
if (dot3 < low_dot3)
17541
{
17542
low_dot3 = dot3;
17543
//low_c3 = i;
17544
}
17545
if ((dot3 ^ 15) > high_dot3)
17546
{
17547
high_dot3 = dot3 ^ 15;
17548
//high_c3 = i;
17549
}
17550
}
17551
17552
low_c = low_dot0 & 15;
17553
high_c = ~high_dot0 & 15;
17554
uint32_t r = (high_dot0 & ~15) - (low_dot0 & ~15);
17555
17556
uint32_t tr = (high_dot1 & ~15) - (low_dot1 & ~15);
17557
if (tr > r) {
17558
low_c = low_dot1 & 15;
17559
high_c = ~high_dot1 & 15;
17560
r = tr;
17561
}
17562
17563
tr = (high_dot2 & ~15) - (low_dot2 & ~15);
17564
if (tr > r) {
17565
low_c = low_dot2 & 15;
17566
high_c = ~high_dot2 & 15;
17567
r = tr;
17568
}
17569
17570
tr = (high_dot3 & ~15) - (low_dot3 & ~15);
17571
if (tr > r) {
17572
low_c = low_dot3 & 15;
17573
high_c = ~high_dot3 & 15;
17574
}
17575
}
17576
17577
lr = to_5(pSrc_pixels[low_c].r);
17578
lg = to_6(pSrc_pixels[low_c].g);
17579
lb = to_5(pSrc_pixels[low_c].b);
17580
17581
hr = to_5(pSrc_pixels[high_c].r);
17582
hg = to_6(pSrc_pixels[high_c].g);
17583
hb = to_5(pSrc_pixels[high_c].b);
17584
}
17585
17586
bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
17587
} // if (use_sels)
17588
17589
const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1);
17590
for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
17591
{
17592
int prev_lr = lr, prev_lg = lg, prev_lb = lb, prev_hr = hr, prev_hg = hg, prev_hb = hb;
17593
17594
// This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors.
17595
vec3F xl, xh;
17596
if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh))
17597
{
17598
if (avg_r < 0)
17599
{
17600
int total_r = 0, total_g = 0, total_b = 0;
17601
for (uint32_t i = 0; i < 16; i++)
17602
{
17603
total_r += pSrc_pixels[i].r;
17604
total_g += pSrc_pixels[i].g;
17605
total_b += pSrc_pixels[i].b;
17606
}
17607
17608
avg_r = (total_r + 8) >> 4;
17609
avg_g = (total_g + 8) >> 4;
17610
avg_b = (total_b + 8) >> 4;
17611
}
17612
17613
// All selectors equal - treat it as a solid block which should always be equal or better.
17614
lr = g_bc1_match5_equals_1[avg_r].m_hi;
17615
lg = g_bc1_match6_equals_1[avg_g].m_hi;
17616
lb = g_bc1_match5_equals_1[avg_b].m_hi;
17617
17618
hr = g_bc1_match5_equals_1[avg_r].m_lo;
17619
hg = g_bc1_match6_equals_1[avg_g].m_lo;
17620
hb = g_bc1_match5_equals_1[avg_b].m_lo;
17621
17622
// In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
17623
}
17624
else
17625
{
17626
lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
17627
lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
17628
lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
17629
17630
hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
17631
hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
17632
hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
17633
}
17634
17635
if ((prev_lr == lr) && (prev_lg == lg) && (prev_lb == lb) && (prev_hr == hr) && (prev_hg == hg) && (prev_hb == hb))
17636
break;
17637
17638
bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
17639
}
17640
17641
uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb);
17642
uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb);
17643
17644
// Always forbid 3 color blocks
17645
if (lc16 == hc16)
17646
{
17647
uint8_t mask = 0;
17648
17649
// Make l > h
17650
if (hc16 > 0)
17651
hc16--;
17652
else
17653
{
17654
// lc16 = hc16 = 0
17655
assert(lc16 == hc16 && hc16 == 0);
17656
17657
hc16 = 0;
17658
lc16 = 1;
17659
mask = 0x55; // select hc16
17660
}
17661
17662
assert(lc16 > hc16);
17663
pDst_block->set_low_color(static_cast<uint16_t>(lc16));
17664
pDst_block->set_high_color(static_cast<uint16_t>(hc16));
17665
17666
pDst_block->m_selectors[0] = mask;
17667
pDst_block->m_selectors[1] = mask;
17668
pDst_block->m_selectors[2] = mask;
17669
pDst_block->m_selectors[3] = mask;
17670
}
17671
else
17672
{
17673
uint8_t invert_mask = 0;
17674
if (lc16 < hc16)
17675
{
17676
std::swap(lc16, hc16);
17677
invert_mask = 0x55;
17678
}
17679
17680
assert(lc16 > hc16);
17681
pDst_block->set_low_color((uint16_t)lc16);
17682
pDst_block->set_high_color((uint16_t)hc16);
17683
17684
uint32_t packed_sels = 0;
17685
static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 };
17686
for (uint32_t i = 0; i < 16; i++)
17687
packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2));
17688
17689
pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask;
17690
pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
17691
pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
17692
pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
17693
}
17694
}
17695
17696
// Scale the UASTC first subset endpoints and first plane's weight indices directly to BC1's - fastest.
17697
void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst)
17698
{
17699
const uint32_t mode = unpacked_src_blk.m_mode;
17700
const astc_block_desc& astc_blk = unpacked_src_blk.m_astc;
17701
17702
dxt1_block& b = *static_cast<dxt1_block*>(pDst);
17703
17704
const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
17705
17706
const uint32_t total_comps = g_uastc_mode_comps[mode];
17707
17708
if (total_comps == 2)
17709
{
17710
const uint32_t l = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant;
17711
const uint32_t h = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant;
17712
17713
b.set_low_color(dxt1_block::pack_color(color32(l, l, l, 255), true, 127));
17714
b.set_high_color(dxt1_block::pack_color(color32(h, h, h, 255), true, 127));
17715
}
17716
else
17717
{
17718
b.set_low_color(dxt1_block::pack_color(
17719
color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant,
17720
g_astc_unquant[endpoint_range][astc_blk.m_endpoints[2]].m_unquant,
17721
g_astc_unquant[endpoint_range][astc_blk.m_endpoints[4]].m_unquant,
17722
255), true, 127)
17723
);
17724
17725
b.set_high_color(dxt1_block::pack_color(
17726
color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant,
17727
g_astc_unquant[endpoint_range][astc_blk.m_endpoints[3]].m_unquant,
17728
g_astc_unquant[endpoint_range][astc_blk.m_endpoints[5]].m_unquant,
17729
255), true, 127)
17730
);
17731
}
17732
17733
if (b.get_low_color() == b.get_high_color())
17734
{
17735
// Always forbid 3 color blocks
17736
uint16_t lc16 = (uint16_t)b.get_low_color();
17737
uint16_t hc16 = (uint16_t)b.get_high_color();
17738
17739
uint8_t mask = 0;
17740
17741
// Make l > h
17742
if (hc16 > 0)
17743
hc16--;
17744
else
17745
{
17746
// lc16 = hc16 = 0
17747
assert(lc16 == hc16 && hc16 == 0);
17748
17749
hc16 = 0;
17750
lc16 = 1;
17751
mask = 0x55; // select hc16
17752
}
17753
17754
assert(lc16 > hc16);
17755
b.set_low_color(static_cast<uint16_t>(lc16));
17756
b.set_high_color(static_cast<uint16_t>(hc16));
17757
17758
b.m_selectors[0] = mask;
17759
b.m_selectors[1] = mask;
17760
b.m_selectors[2] = mask;
17761
b.m_selectors[3] = mask;
17762
}
17763
else
17764
{
17765
bool invert = false;
17766
if (b.get_low_color() < b.get_high_color())
17767
{
17768
std::swap(b.m_low_color[0], b.m_high_color[0]);
17769
std::swap(b.m_low_color[1], b.m_high_color[1]);
17770
invert = true;
17771
}
17772
17773
const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]];
17774
17775
const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1;
17776
17777
uint32_t sels = 0;
17778
for (int i = 15; i >= 0; --i)
17779
{
17780
uint32_t s = pTran[astc_blk.m_weights[i << plane_shift]];
17781
17782
if (invert)
17783
s ^= 1;
17784
17785
sels = (sels << 2) | s;
17786
}
17787
b.m_selectors[0] = sels & 0xFF;
17788
b.m_selectors[1] = (sels >> 8) & 0xFF;
17789
b.m_selectors[2] = (sels >> 16) & 0xFF;
17790
b.m_selectors[3] = (sels >> 24) & 0xFF;
17791
}
17792
}
17793
17794
// Scale the UASTC first plane's weight indices to BC1, use 1 or 2 least squares passes to compute endpoints - no PCA needed.
17795
void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality)
17796
{
17797
const uint32_t mode = unpacked_src_blk.m_mode;
17798
17799
const astc_block_desc& astc_blk = unpacked_src_blk.m_astc;
17800
17801
dxt1_block& b = *static_cast<dxt1_block*>(pDst);
17802
17803
b.set_low_color(1);
17804
b.set_high_color(0);
17805
17806
const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]];
17807
17808
const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1;
17809
17810
uint32_t sels = 0;
17811
for (int i = 15; i >= 0; --i)
17812
{
17813
sels <<= 2;
17814
sels |= pTran[astc_blk.m_weights[i << plane_shift]];
17815
}
17816
17817
b.m_selectors[0] = sels & 0xFF;
17818
b.m_selectors[1] = (sels >> 8) & 0xFF;
17819
b.m_selectors[2] = (sels >> 16) & 0xFF;
17820
b.m_selectors[3] = (sels >> 24) & 0xFF;
17821
17822
encode_bc1(&b, (const uint8_t*)&block_pixels[0][0].c[0], (high_quality ? cEncodeBC1HighQuality : 0) | cEncodeBC1UseSelectors);
17823
}
17824
17825
bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality)
17826
{
17827
unpacked_uastc_block unpacked_src_blk;
17828
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
17829
return false;
17830
17831
const uint32_t mode = unpacked_src_blk.m_mode;
17832
17833
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
17834
{
17835
encode_bc1_solid_block(pDst, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b);
17836
return true;
17837
}
17838
17839
if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0))
17840
transcode_uastc_to_bc1_hint0(unpacked_src_blk, pDst);
17841
else
17842
{
17843
color32 block_pixels[4][4];
17844
const bool unpack_srgb = false;
17845
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
17846
return false;
17847
17848
if (unpacked_src_blk.m_bc1_hint1)
17849
transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pDst, high_quality);
17850
else
17851
encode_bc1(pDst, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0);
17852
}
17853
17854
return true;
17855
}
17856
17857
static void write_bc4_solid_block(uint8_t* pDst, uint32_t a)
17858
{
17859
pDst[0] = (uint8_t)a;
17860
pDst[1] = (uint8_t)a;
17861
memset(pDst + 2, 0, 6);
17862
}
17863
17864
bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality)
17865
{
17866
unpacked_uastc_block unpacked_src_blk;
17867
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
17868
return false;
17869
17870
const uint32_t mode = unpacked_src_blk.m_mode;
17871
17872
void* pBC4_block = pDst;
17873
dxt1_block* pBC1_block = &static_cast<dxt1_block*>(pDst)[1];
17874
17875
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
17876
{
17877
write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block), unpacked_src_blk.m_solid_color.a);
17878
encode_bc1_solid_block(pBC1_block, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b);
17879
return true;
17880
}
17881
17882
color32 block_pixels[4][4];
17883
const bool unpack_srgb = false;
17884
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
17885
return false;
17886
17887
basist::encode_bc4(pBC4_block, &block_pixels[0][0].a, sizeof(color32));
17888
17889
if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0))
17890
transcode_uastc_to_bc1_hint0(unpacked_src_blk, pBC1_block);
17891
else
17892
{
17893
if (unpacked_src_blk.m_bc1_hint1)
17894
transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pBC1_block, high_quality);
17895
else
17896
encode_bc1(pBC1_block, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0);
17897
}
17898
17899
return true;
17900
}
17901
17902
bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0)
17903
{
17904
BASISU_NOTE_UNUSED(high_quality);
17905
17906
unpacked_uastc_block unpacked_src_blk;
17907
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
17908
return false;
17909
17910
const uint32_t mode = unpacked_src_blk.m_mode;
17911
17912
void* pBC4_block = pDst;
17913
17914
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
17915
{
17916
write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block), unpacked_src_blk.m_solid_color.c[chan0]);
17917
return true;
17918
}
17919
17920
color32 block_pixels[4][4];
17921
const bool unpack_srgb = false;
17922
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
17923
return false;
17924
17925
basist::encode_bc4(pBC4_block, &block_pixels[0][0].c[chan0], sizeof(color32));
17926
17927
return true;
17928
}
17929
17930
bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1)
17931
{
17932
BASISU_NOTE_UNUSED(high_quality);
17933
17934
unpacked_uastc_block unpacked_src_blk;
17935
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
17936
return false;
17937
17938
const uint32_t mode = unpacked_src_blk.m_mode;
17939
17940
void* pBC4_block0 = pDst;
17941
void* pBC4_block1 = (uint8_t*)pDst + 8;
17942
17943
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
17944
{
17945
write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block0), unpacked_src_blk.m_solid_color.c[chan0]);
17946
write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block1), unpacked_src_blk.m_solid_color.c[chan1]);
17947
return true;
17948
}
17949
17950
color32 block_pixels[4][4];
17951
const bool unpack_srgb = false;
17952
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
17953
return false;
17954
17955
basist::encode_bc4(pBC4_block0, &block_pixels[0][0].c[chan0], sizeof(color32));
17956
basist::encode_bc4(pBC4_block1, &block_pixels[0][0].c[chan1], sizeof(color32));
17957
17958
return true;
17959
}
17960
17961
static const uint8_t s_etc2_eac_bit_ofs[16] = { 45, 33, 21, 9, 42, 30, 18, 6, 39, 27, 15, 3, 36, 24, 12, 0 };
17962
17963
static void pack_eac_solid_block(eac_block& blk, uint32_t a)
17964
{
17965
blk.m_base = static_cast<uint8_t>(a);
17966
blk.m_table = 13;
17967
blk.m_multiplier = 0;
17968
17969
memcpy(blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
17970
17971
return;
17972
}
17973
17974
// Only checks 4 tables.
17975
static void pack_eac(eac_block& blk, const uint8_t* pPixels, uint32_t stride)
17976
{
17977
uint32_t min_alpha = 255, max_alpha = 0;
17978
for (uint32_t i = 0; i < 16; i++)
17979
{
17980
const uint32_t a = pPixels[i * stride];
17981
if (a < min_alpha) min_alpha = a;
17982
if (a > max_alpha) max_alpha = a;
17983
}
17984
17985
if (min_alpha == max_alpha)
17986
{
17987
pack_eac_solid_block(blk, min_alpha);
17988
return;
17989
}
17990
17991
const uint32_t alpha_range = max_alpha - min_alpha;
17992
17993
const uint32_t SINGLE_TABLE_THRESH = 5;
17994
if (alpha_range <= SINGLE_TABLE_THRESH)
17995
{
17996
// If alpha_range <= 5 table 13 is lossless
17997
int base = clamp255((int)max_alpha - 2);
17998
17999
blk.m_base = base;
18000
blk.m_multiplier = 1;
18001
blk.m_table = 13;
18002
18003
base -= 3;
18004
18005
uint64_t packed_sels = 0;
18006
for (uint32_t i = 0; i < 16; i++)
18007
{
18008
const int a = pPixels[i * stride];
18009
18010
static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 };
18011
18012
int sel = a - base;
18013
assert(sel >= 0 && sel <= 5);
18014
18015
packed_sels |= (static_cast<uint64_t>(s_sels[sel]) << s_etc2_eac_bit_ofs[i]);
18016
}
18017
18018
blk.set_selector_bits(packed_sels);
18019
18020
return;
18021
}
18022
18023
const uint32_t T0 = 2, T1 = 8, T2 = 11, T3 = 13;
18024
static const uint8_t s_tables[4] = { T0, T1, T2, T3 };
18025
18026
int base[4], mul[4];
18027
uint32_t mul_or = 0;
18028
for (uint32_t i = 0; i < 4; i++)
18029
{
18030
const uint32_t table = s_tables[i];
18031
18032
const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]);
18033
18034
base[i] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)));
18035
mul[i] = clampi((int)roundf(alpha_range / range), 1, 15);
18036
mul_or |= mul[i];
18037
}
18038
18039
uint32_t total_err[4] = { 0, 0, 0, 0 };
18040
uint8_t sels[4][16];
18041
18042
for (uint32_t i = 0; i < 16; i++)
18043
{
18044
const int a = pPixels[i * stride];
18045
18046
uint32_t l0 = UINT32_MAX, l1 = UINT32_MAX, l2 = UINT32_MAX, l3 = UINT32_MAX;
18047
18048
if ((a < 7) || (a > (255 - 7)))
18049
{
18050
for (uint32_t s = 0; s < 8; s++)
18051
{
18052
const int v0 = clamp255(mul[0] * g_eac_modifier_table[T0][s] + base[0]);
18053
const int v1 = clamp255(mul[1] * g_eac_modifier_table[T1][s] + base[1]);
18054
const int v2 = clamp255(mul[2] * g_eac_modifier_table[T2][s] + base[2]);
18055
const int v3 = clamp255(mul[3] * g_eac_modifier_table[T3][s] + base[3]);
18056
18057
l0 = basisu::minimum(l0, (basisu::iabs(v0 - a) << 3) | s);
18058
l1 = basisu::minimum(l1, (basisu::iabs(v1 - a) << 3) | s);
18059
l2 = basisu::minimum(l2, (basisu::iabs(v2 - a) << 3) | s);
18060
l3 = basisu::minimum(l3, (basisu::iabs(v3 - a) << 3) | s);
18061
}
18062
}
18063
else if (mul_or == 1)
18064
{
18065
const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a;
18066
18067
for (uint32_t s = 0; s < 8; s++)
18068
{
18069
const int v0 = g_eac_modifier_table[T0][s] + a0;
18070
const int v1 = g_eac_modifier_table[T1][s] + a1;
18071
const int v2 = g_eac_modifier_table[T2][s] + a2;
18072
const int v3 = g_eac_modifier_table[T3][s] + a3;
18073
18074
l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s);
18075
l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s);
18076
l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s);
18077
l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s);
18078
}
18079
}
18080
else
18081
{
18082
const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a;
18083
18084
for (uint32_t s = 0; s < 8; s++)
18085
{
18086
const int v0 = mul[0] * g_eac_modifier_table[T0][s] + a0;
18087
const int v1 = mul[1] * g_eac_modifier_table[T1][s] + a1;
18088
const int v2 = mul[2] * g_eac_modifier_table[T2][s] + a2;
18089
const int v3 = mul[3] * g_eac_modifier_table[T3][s] + a3;
18090
18091
l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s);
18092
l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s);
18093
l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s);
18094
l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s);
18095
}
18096
}
18097
18098
sels[0][i] = l0 & 7;
18099
sels[1][i] = l1 & 7;
18100
sels[2][i] = l2 & 7;
18101
sels[3][i] = l3 & 7;
18102
18103
total_err[0] += basisu::square<uint32_t>(l0 >> 3);
18104
total_err[1] += basisu::square<uint32_t>(l1 >> 3);
18105
total_err[2] += basisu::square<uint32_t>(l2 >> 3);
18106
total_err[3] += basisu::square<uint32_t>(l3 >> 3);
18107
}
18108
18109
uint32_t min_err = total_err[0], min_index = 0;
18110
for (uint32_t i = 1; i < 4; i++)
18111
{
18112
if (total_err[i] < min_err)
18113
{
18114
min_err = total_err[i];
18115
min_index = i;
18116
}
18117
}
18118
18119
blk.m_base = base[min_index];
18120
blk.m_multiplier = mul[min_index];
18121
blk.m_table = s_tables[min_index];
18122
18123
uint64_t packed_sels = 0;
18124
const uint8_t* pSels = &sels[min_index][0];
18125
for (uint32_t i = 0; i < 16; i++)
18126
packed_sels |= (static_cast<uint64_t>(pSels[i]) << s_etc2_eac_bit_ofs[i]);
18127
18128
blk.set_selector_bits(packed_sels);
18129
}
18130
18131
// Checks all 16 tables. Around ~2 dB better vs. pack_eac(), ~1.2 dB less than near-optimal.
18132
static void pack_eac_high_quality(eac_block& blk, const uint8_t* pPixels, uint32_t stride)
18133
{
18134
uint32_t min_alpha = 255, max_alpha = 0;
18135
for (uint32_t i = 0; i < 16; i++)
18136
{
18137
const uint32_t a = pPixels[i * stride];
18138
if (a < min_alpha) min_alpha = a;
18139
if (a > max_alpha) max_alpha = a;
18140
}
18141
18142
if (min_alpha == max_alpha)
18143
{
18144
pack_eac_solid_block(blk, min_alpha);
18145
return;
18146
}
18147
18148
const uint32_t alpha_range = max_alpha - min_alpha;
18149
18150
const uint32_t SINGLE_TABLE_THRESH = 5;
18151
if (alpha_range <= SINGLE_TABLE_THRESH)
18152
{
18153
// If alpha_range <= 5 table 13 is lossless
18154
int base = clamp255((int)max_alpha - 2);
18155
18156
blk.m_base = base;
18157
blk.m_multiplier = 1;
18158
blk.m_table = 13;
18159
18160
base -= 3;
18161
18162
uint64_t packed_sels = 0;
18163
for (uint32_t i = 0; i < 16; i++)
18164
{
18165
const int a = pPixels[i * stride];
18166
18167
static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 };
18168
18169
int sel = a - base;
18170
assert(sel >= 0 && sel <= 5);
18171
18172
packed_sels |= (static_cast<uint64_t>(s_sels[sel]) << s_etc2_eac_bit_ofs[i]);
18173
}
18174
18175
blk.set_selector_bits(packed_sels);
18176
18177
return;
18178
}
18179
18180
int base[16], mul[16];
18181
for (uint32_t table = 0; table < 16; table++)
18182
{
18183
const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]);
18184
18185
base[table] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)));
18186
mul[table] = clampi((int)roundf(alpha_range / range), 1, 15);
18187
}
18188
18189
uint32_t total_err[16];
18190
memset(total_err, 0, sizeof(total_err));
18191
18192
uint8_t sels[16][16];
18193
18194
for (uint32_t table = 0; table < 16; table++)
18195
{
18196
const int8_t* pTable = &g_eac_modifier_table[table][0];
18197
const int m = mul[table], b = base[table];
18198
18199
uint32_t prev_l = 0, prev_a = UINT32_MAX;
18200
18201
for (uint32_t i = 0; i < 16; i++)
18202
{
18203
const int a = pPixels[i * stride];
18204
18205
if ((uint32_t)a == prev_a)
18206
{
18207
sels[table][i] = prev_l & 7;
18208
total_err[table] += basisu::square<uint32_t>(prev_l >> 3);
18209
}
18210
else
18211
{
18212
uint32_t l = basisu::iabs(clamp255(m * pTable[0] + b) - a) << 3;
18213
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[1] + b) - a) << 3) | 1);
18214
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[2] + b) - a) << 3) | 2);
18215
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[3] + b) - a) << 3) | 3);
18216
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[4] + b) - a) << 3) | 4);
18217
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[5] + b) - a) << 3) | 5);
18218
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[6] + b) - a) << 3) | 6);
18219
l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[7] + b) - a) << 3) | 7);
18220
18221
sels[table][i] = l & 7;
18222
total_err[table] += basisu::square<uint32_t>(l >> 3);
18223
18224
prev_l = l;
18225
prev_a = a;
18226
}
18227
}
18228
}
18229
18230
uint32_t min_err = total_err[0], min_index = 0;
18231
for (uint32_t i = 1; i < 16; i++)
18232
{
18233
if (total_err[i] < min_err)
18234
{
18235
min_err = total_err[i];
18236
min_index = i;
18237
}
18238
}
18239
18240
blk.m_base = base[min_index];
18241
blk.m_multiplier = mul[min_index];
18242
blk.m_table = min_index;
18243
18244
uint64_t packed_sels = 0;
18245
const uint8_t* pSels = &sels[min_index][0];
18246
for (uint32_t i = 0; i < 16; i++)
18247
packed_sels |= (static_cast<uint64_t>(pSels[i]) << s_etc2_eac_bit_ofs[i]);
18248
18249
blk.set_selector_bits(packed_sels);
18250
}
18251
18252
bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0)
18253
{
18254
unpacked_uastc_block unpacked_src_blk;
18255
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
18256
return false;
18257
18258
const uint32_t mode = unpacked_src_blk.m_mode;
18259
18260
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
18261
{
18262
pack_eac_solid_block(*static_cast<eac_block*>(pDst), unpacked_src_blk.m_solid_color.c[chan0]);
18263
return true;
18264
}
18265
18266
color32 block_pixels[4][4];
18267
const bool unpack_srgb = false;
18268
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
18269
return false;
18270
18271
if (chan0 == 3)
18272
transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, pDst);
18273
else
18274
(high_quality ? pack_eac_high_quality : pack_eac)(*static_cast<eac_block*>(pDst), &block_pixels[0][0].c[chan0], sizeof(color32));
18275
18276
return true;
18277
}
18278
18279
bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1)
18280
{
18281
unpacked_uastc_block unpacked_src_blk;
18282
if (!unpack_uastc(src_blk, unpacked_src_blk, false))
18283
return false;
18284
18285
const uint32_t mode = unpacked_src_blk.m_mode;
18286
18287
if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
18288
{
18289
pack_eac_solid_block(static_cast<eac_block*>(pDst)[0], unpacked_src_blk.m_solid_color.c[chan0]);
18290
pack_eac_solid_block(static_cast<eac_block*>(pDst)[1], unpacked_src_blk.m_solid_color.c[chan1]);
18291
return true;
18292
}
18293
18294
color32 block_pixels[4][4];
18295
const bool unpack_srgb = false;
18296
if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
18297
return false;
18298
18299
if (chan0 == 3)
18300
transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast<eac_block*>(pDst)[0]);
18301
else
18302
(high_quality ? pack_eac_high_quality : pack_eac)(static_cast<eac_block*>(pDst)[0], &block_pixels[0][0].c[chan0], sizeof(color32));
18303
18304
if (chan1 == 3)
18305
transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast<eac_block*>(pDst)[1]);
18306
else
18307
(high_quality ? pack_eac_high_quality : pack_eac)(static_cast<eac_block*>(pDst)[1], &block_pixels[0][0].c[chan1], sizeof(color32));
18308
return true;
18309
}
18310
18311
// PVRTC1
18312
static void fixup_pvrtc1_4_modulation_rgb(
18313
const uastc_block* pSrc_blocks,
18314
const uint32_t* pPVRTC_endpoints,
18315
void* pDst_blocks,
18316
uint32_t num_blocks_x, uint32_t num_blocks_y, bool from_alpha)
18317
{
18318
const uint32_t x_mask = num_blocks_x - 1;
18319
const uint32_t y_mask = num_blocks_y - 1;
18320
const uint32_t x_bits = basisu::total_bits(x_mask);
18321
const uint32_t y_bits = basisu::total_bits(y_mask);
18322
const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
18323
//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
18324
const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
18325
18326
uint32_t block_index = 0;
18327
18328
// really 3x3
18329
int e0[4][4], e1[4][4];
18330
18331
for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
18332
{
18333
const uint32_t* pE_rows[3];
18334
18335
for (int ey = 0; ey < 3; ey++)
18336
{
18337
int by = y + ey - 1;
18338
18339
const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
18340
18341
pE_rows[ey] = pE;
18342
18343
for (int ex = 0; ex < 3; ex++)
18344
{
18345
int bx = 0 + ex - 1;
18346
18347
const uint32_t e = pE[bx & x_mask];
18348
18349
e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31;
18350
e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31;
18351
}
18352
}
18353
18354
const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
18355
18356
for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
18357
{
18358
const uastc_block& src_block = pSrc_blocks[block_index];
18359
18360
color32 block_pixels[4][4];
18361
unpack_uastc(src_block, &block_pixels[0][0], false);
18362
if (from_alpha)
18363
{
18364
// Just set RGB to alpha to avoid adding complexity below.
18365
for (uint32_t i = 0; i < 16; i++)
18366
{
18367
const uint8_t a = ((color32*)block_pixels)[i].a;
18368
((color32*)block_pixels)[i].set(a, a, a, 255);
18369
}
18370
}
18371
18372
const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
18373
18374
uint32_t swizzled = x_swizzle | y_swizzle;
18375
if (num_blocks_x != num_blocks_y)
18376
{
18377
swizzled &= swizzle_mask;
18378
18379
if (num_blocks_x > num_blocks_y)
18380
swizzled |= ((x >> min_bits) << (min_bits * 2));
18381
else
18382
swizzled |= ((y >> min_bits) << (min_bits * 2));
18383
}
18384
18385
pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
18386
pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
18387
18388
{
18389
const uint32_t ex = 2;
18390
int bx = x + ex - 1;
18391
bx &= x_mask;
18392
18393
#define DO_ROW(ey) \
18394
{ \
18395
const uint32_t e = pE_rows[ey][bx]; \
18396
e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \
18397
e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \
18398
}
18399
18400
DO_ROW(0);
18401
DO_ROW(1);
18402
DO_ROW(2);
18403
#undef DO_ROW
18404
}
18405
18406
uint32_t mod = 0;
18407
18408
#define DO_PIX(lx, ly, w0, w1, w2, w3) \
18409
{ \
18410
int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
18411
int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
18412
int cl = (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b) * 16; \
18413
int dl = cb_l - ca_l; \
18414
int vl = cl - ca_l; \
18415
int p = vl * 16; \
18416
if (ca_l > cb_l) { p = -p; dl = -dl; } \
18417
uint32_t m = 0; \
18418
if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
18419
if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
18420
if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
18421
mod |= m; \
18422
}
18423
18424
{
18425
const uint32_t ex = 0, ey = 0;
18426
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18427
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18428
DO_PIX(0, 0, 4, 4, 4, 4);
18429
DO_PIX(1, 0, 2, 6, 2, 6);
18430
DO_PIX(0, 1, 2, 2, 6, 6);
18431
DO_PIX(1, 1, 1, 3, 3, 9);
18432
}
18433
18434
{
18435
const uint32_t ex = 1, ey = 0;
18436
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18437
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18438
DO_PIX(2, 0, 8, 0, 8, 0);
18439
DO_PIX(3, 0, 6, 2, 6, 2);
18440
DO_PIX(2, 1, 4, 0, 12, 0);
18441
DO_PIX(3, 1, 3, 1, 9, 3);
18442
}
18443
18444
{
18445
const uint32_t ex = 0, ey = 1;
18446
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18447
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18448
DO_PIX(0, 2, 8, 8, 0, 0);
18449
DO_PIX(1, 2, 4, 12, 0, 0);
18450
DO_PIX(0, 3, 6, 6, 2, 2);
18451
DO_PIX(1, 3, 3, 9, 1, 3);
18452
}
18453
18454
{
18455
const uint32_t ex = 1, ey = 1;
18456
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18457
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18458
DO_PIX(2, 2, 16, 0, 0, 0);
18459
DO_PIX(3, 2, 12, 4, 0, 0);
18460
DO_PIX(2, 3, 12, 0, 4, 0);
18461
DO_PIX(3, 3, 9, 3, 3, 1);
18462
}
18463
#undef DO_PIX
18464
18465
pDst_block->m_modulation = mod;
18466
18467
e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
18468
e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
18469
e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
18470
18471
e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
18472
e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
18473
e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
18474
18475
} // x
18476
} // y
18477
}
18478
18479
static void fixup_pvrtc1_4_modulation_rgba(
18480
const uastc_block* pSrc_blocks,
18481
const uint32_t* pPVRTC_endpoints,
18482
void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y)
18483
{
18484
const uint32_t x_mask = num_blocks_x - 1;
18485
const uint32_t y_mask = num_blocks_y - 1;
18486
const uint32_t x_bits = basisu::total_bits(x_mask);
18487
const uint32_t y_bits = basisu::total_bits(y_mask);
18488
const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
18489
//const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
18490
const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
18491
18492
uint32_t block_index = 0;
18493
18494
// really 3x3
18495
int e0[4][4], e1[4][4];
18496
18497
for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
18498
{
18499
const uint32_t* pE_rows[3];
18500
18501
for (int ey = 0; ey < 3; ey++)
18502
{
18503
int by = y + ey - 1;
18504
18505
const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
18506
18507
pE_rows[ey] = pE;
18508
18509
for (int ex = 0; ex < 3; ex++)
18510
{
18511
int bx = 0 + ex - 1;
18512
18513
const uint32_t e = pE[bx & x_mask];
18514
18515
e0[ex][ey] = get_endpoint_l8(e, 0);
18516
e1[ex][ey] = get_endpoint_l8(e, 1);
18517
}
18518
}
18519
18520
const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
18521
18522
for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
18523
{
18524
const uastc_block& src_block = pSrc_blocks[block_index];
18525
18526
color32 block_pixels[4][4];
18527
unpack_uastc(src_block, &block_pixels[0][0], false);
18528
18529
const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
18530
18531
uint32_t swizzled = x_swizzle | y_swizzle;
18532
if (num_blocks_x != num_blocks_y)
18533
{
18534
swizzled &= swizzle_mask;
18535
18536
if (num_blocks_x > num_blocks_y)
18537
swizzled |= ((x >> min_bits) << (min_bits * 2));
18538
else
18539
swizzled |= ((y >> min_bits) << (min_bits * 2));
18540
}
18541
18542
pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
18543
pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
18544
18545
{
18546
const uint32_t ex = 2;
18547
int bx = x + ex - 1;
18548
bx &= x_mask;
18549
18550
#define DO_ROW(ey) \
18551
{ \
18552
const uint32_t e = pE_rows[ey][bx]; \
18553
e0[ex][ey] = get_endpoint_l8(e, 0); \
18554
e1[ex][ey] = get_endpoint_l8(e, 1); \
18555
}
18556
18557
DO_ROW(0);
18558
DO_ROW(1);
18559
DO_ROW(2);
18560
#undef DO_ROW
18561
}
18562
18563
uint32_t mod = 0;
18564
18565
#define DO_PIX(lx, ly, w0, w1, w2, w3) \
18566
{ \
18567
int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
18568
int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
18569
int cl = 16 * (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b + block_pixels[ly][lx].a); \
18570
int dl = cb_l - ca_l; \
18571
int vl = cl - ca_l; \
18572
int p = vl * 16; \
18573
if (ca_l > cb_l) { p = -p; dl = -dl; } \
18574
uint32_t m = 0; \
18575
if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
18576
if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
18577
if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
18578
mod |= m; \
18579
}
18580
18581
{
18582
const uint32_t ex = 0, ey = 0;
18583
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18584
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18585
DO_PIX(0, 0, 4, 4, 4, 4);
18586
DO_PIX(1, 0, 2, 6, 2, 6);
18587
DO_PIX(0, 1, 2, 2, 6, 6);
18588
DO_PIX(1, 1, 1, 3, 3, 9);
18589
}
18590
18591
{
18592
const uint32_t ex = 1, ey = 0;
18593
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18594
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18595
DO_PIX(2, 0, 8, 0, 8, 0);
18596
DO_PIX(3, 0, 6, 2, 6, 2);
18597
DO_PIX(2, 1, 4, 0, 12, 0);
18598
DO_PIX(3, 1, 3, 1, 9, 3);
18599
}
18600
18601
{
18602
const uint32_t ex = 0, ey = 1;
18603
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18604
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18605
DO_PIX(0, 2, 8, 8, 0, 0);
18606
DO_PIX(1, 2, 4, 12, 0, 0);
18607
DO_PIX(0, 3, 6, 6, 2, 2);
18608
DO_PIX(1, 3, 3, 9, 1, 3);
18609
}
18610
18611
{
18612
const uint32_t ex = 1, ey = 1;
18613
const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
18614
const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
18615
DO_PIX(2, 2, 16, 0, 0, 0);
18616
DO_PIX(3, 2, 12, 4, 0, 0);
18617
DO_PIX(2, 3, 12, 0, 4, 0);
18618
DO_PIX(3, 3, 9, 3, 3, 1);
18619
}
18620
#undef DO_PIX
18621
18622
pDst_block->m_modulation = mod;
18623
18624
e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
18625
e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
18626
e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
18627
18628
e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
18629
e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
18630
e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
18631
18632
} // x
18633
} // y
18634
}
18635
18636
bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha)
18637
{
18638
BASISU_NOTE_UNUSED(high_quality);
18639
18640
if ((!num_blocks_x) || (!num_blocks_y))
18641
return false;
18642
18643
const uint32_t width = num_blocks_x * 4;
18644
const uint32_t height = num_blocks_y * 4;
18645
if (!basisu::is_pow2(width) || !basisu::is_pow2(height))
18646
return false;
18647
18648
basisu::vector<uint32_t> temp_endpoints(num_blocks_x * num_blocks_y);
18649
18650
for (uint32_t y = 0; y < num_blocks_y; y++)
18651
{
18652
for (uint32_t x = 0; x < num_blocks_x; x++)
18653
{
18654
color32 block_pixels[16];
18655
if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false))
18656
return false;
18657
18658
// Get block's RGB bounding box
18659
color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0);
18660
18661
if (from_alpha)
18662
{
18663
uint32_t low_a = 255, high_a = 0;
18664
for (uint32_t i = 0; i < 16; i++)
18665
{
18666
low_a = basisu::minimum<uint32_t>(low_a, block_pixels[i].a);
18667
high_a = basisu::maximum<uint32_t>(high_a, block_pixels[i].a);
18668
}
18669
low_color.set(low_a, low_a, low_a, 255);
18670
high_color.set(high_a, high_a, high_a, 255);
18671
}
18672
else
18673
{
18674
for (uint32_t i = 0; i < 16; i++)
18675
{
18676
low_color = color32::comp_min(low_color, block_pixels[i]);
18677
high_color = color32::comp_max(high_color, block_pixels[i]);
18678
}
18679
}
18680
18681
// Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
18682
pvrtc4_block temp;
18683
temp.set_opaque_endpoint_floor(0, low_color);
18684
temp.set_opaque_endpoint_ceil(1, high_color);
18685
18686
temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints;
18687
}
18688
}
18689
18690
fixup_pvrtc1_4_modulation_rgb(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y, from_alpha);
18691
18692
return true;
18693
}
18694
18695
bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality)
18696
{
18697
BASISU_NOTE_UNUSED(high_quality);
18698
18699
if ((!num_blocks_x) || (!num_blocks_y))
18700
return false;
18701
18702
const uint32_t width = num_blocks_x * 4;
18703
const uint32_t height = num_blocks_y * 4;
18704
if (!basisu::is_pow2(width) || !basisu::is_pow2(height))
18705
return false;
18706
18707
basisu::vector<uint32_t> temp_endpoints(num_blocks_x * num_blocks_y);
18708
18709
for (uint32_t y = 0; y < num_blocks_y; y++)
18710
{
18711
for (uint32_t x = 0; x < num_blocks_x; x++)
18712
{
18713
color32 block_pixels[16];
18714
if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false))
18715
return false;
18716
18717
// Get block's RGBA bounding box
18718
color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0);
18719
18720
for (uint32_t i = 0; i < 16; i++)
18721
{
18722
low_color = color32::comp_min(low_color, block_pixels[i]);
18723
high_color = color32::comp_max(high_color, block_pixels[i]);
18724
}
18725
18726
// Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
18727
pvrtc4_block temp;
18728
temp.set_endpoint_floor(0, low_color);
18729
temp.set_endpoint_ceil(1, high_color);
18730
18731
temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints;
18732
}
18733
}
18734
18735
fixup_pvrtc1_4_modulation_rgba(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y);
18736
18737
return true;
18738
}
18739
18740
void uastc_init()
18741
{
18742
for (uint32_t range = 0; range < BC7ENC_TOTAL_ASTC_RANGES; range++)
18743
{
18744
if (!astc_is_valid_endpoint_range(range))
18745
continue;
18746
18747
const uint32_t levels = astc_get_levels(range);
18748
18749
uint32_t vals[256];
18750
for (uint32_t i = 0; i < levels; i++)
18751
vals[i] = (unquant_astc_endpoint_val(i, range) << 8) | i;
18752
18753
std::sort(vals, vals + levels);
18754
18755
for (uint32_t i = 0; i < levels; i++)
18756
{
18757
const uint32_t order = vals[i] & 0xFF;
18758
const uint32_t unq = vals[i] >> 8;
18759
18760
g_astc_unquant[range][order].m_unquant = (uint8_t)unq;
18761
g_astc_unquant[range][order].m_index = (uint8_t)i;
18762
18763
} // i
18764
}
18765
18766
// TODO: Precompute?
18767
// BC7 777.1
18768
for (int c = 0; c < 256; c++)
18769
{
18770
for (uint32_t lp = 0; lp < 2; lp++)
18771
{
18772
endpoint_err best;
18773
best.m_error = (uint16_t)UINT16_MAX;
18774
18775
for (uint32_t l = 0; l < 128; l++)
18776
{
18777
const uint32_t low = (l << 1) | lp;
18778
18779
for (uint32_t h = 0; h < 128; h++)
18780
{
18781
const uint32_t high = (h << 1) | lp;
18782
18783
const int k = (low * (64 - g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX]) + high * g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX] + 32) >> 6;
18784
18785
const int err = (k - c) * (k - c);
18786
if (err < best.m_error)
18787
{
18788
best.m_error = (uint16_t)err;
18789
best.m_lo = (uint8_t)l;
18790
best.m_hi = (uint8_t)h;
18791
}
18792
} // h
18793
} // l
18794
18795
g_bc7_mode_6_optimal_endpoints[c][lp] = best;
18796
} // lp
18797
18798
} // c
18799
18800
// BC7 777
18801
for (int c = 0; c < 256; c++)
18802
{
18803
endpoint_err best;
18804
best.m_error = (uint16_t)UINT16_MAX;
18805
18806
for (uint32_t l = 0; l < 128; l++)
18807
{
18808
const uint32_t low = (l << 1) | (l >> 6);
18809
18810
for (uint32_t h = 0; h < 128; h++)
18811
{
18812
const uint32_t high = (h << 1) | (h >> 6);
18813
18814
const int k = (low * (64 - g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX] + 32) >> 6;
18815
18816
const int err = (k - c) * (k - c);
18817
if (err < best.m_error)
18818
{
18819
best.m_error = (uint16_t)err;
18820
best.m_lo = (uint8_t)l;
18821
best.m_hi = (uint8_t)h;
18822
}
18823
} // h
18824
} // l
18825
18826
g_bc7_mode_5_optimal_endpoints[c] = best;
18827
18828
} // c
18829
}
18830
18831
#endif // #if BASISD_SUPPORT_UASTC
18832
18833
// ------------------------------------------------------------------------------------------------------
18834
// KTX2
18835
// ------------------------------------------------------------------------------------------------------
18836
18837
#if BASISD_SUPPORT_KTX2
18838
const uint8_t g_ktx2_file_identifier[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };
18839
18840
ktx2_transcoder::ktx2_transcoder() :
18841
m_etc1s_transcoder()
18842
{
18843
clear();
18844
}
18845
18846
void ktx2_transcoder::clear()
18847
{
18848
m_pData = nullptr;
18849
m_data_size = 0;
18850
18851
memset((void *)&m_header, 0, sizeof(m_header));
18852
m_levels.clear();
18853
m_dfd.clear();
18854
m_key_values.clear();
18855
memset((void *)&m_etc1s_header, 0, sizeof(m_etc1s_header));
18856
m_etc1s_image_descs.clear();
18857
m_astc_6x6_intermediate_image_descs.clear();
18858
18859
m_format = basist::basis_tex_format::cETC1S;
18860
18861
m_dfd_color_model = 0;
18862
m_dfd_color_prims = KTX2_DF_PRIMARIES_UNSPECIFIED;
18863
m_dfd_transfer_func = 0;
18864
m_dfd_flags = 0;
18865
m_dfd_samples = 0;
18866
m_dfd_chan0 = KTX2_DF_CHANNEL_UASTC_RGB;
18867
m_dfd_chan1 = KTX2_DF_CHANNEL_UASTC_RGB;
18868
18869
m_etc1s_transcoder.clear();
18870
18871
m_def_transcoder_state.clear();
18872
18873
m_has_alpha = false;
18874
m_is_video = false;
18875
m_ldr_hdr_upconversion_nit_multiplier = 0.0f;
18876
}
18877
18878
bool ktx2_transcoder::init(const void* pData, uint32_t data_size)
18879
{
18880
clear();
18881
18882
if (!pData)
18883
{
18884
BASISU_DEVEL_ERROR("ktx2_transcoder::init: pData is nullptr\n");
18885
assert(0);
18886
return false;
18887
}
18888
18889
if (data_size <= sizeof(ktx2_header))
18890
{
18891
BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is impossibly too small to be a valid KTX2 file\n");
18892
return false;
18893
}
18894
18895
if (memcmp(pData, g_ktx2_file_identifier, sizeof(g_ktx2_file_identifier)) != 0)
18896
{
18897
BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file identifier is not present\n");
18898
return false;
18899
}
18900
18901
m_pData = static_cast<const uint8_t *>(pData);
18902
m_data_size = data_size;
18903
18904
memcpy((void *)&m_header, pData, sizeof(m_header));
18905
18906
// Check for supported VK formats. We may also need to parse the DFD.
18907
if ((m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) &&
18908
(m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK) &&
18909
(m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK))
18910
{
18911
BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC LDR/HDR format\n");
18912
return false;
18913
}
18914
18915
// 3.3: "When format is VK_FORMAT_UNDEFINED, typeSize must equal 1."
18916
if (m_header.m_type_size != 1)
18917
{
18918
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid type_size\n");
18919
return false;
18920
}
18921
18922
// We only currently support 2D textures (plain, cubemapped, or texture array), which is by far the most common use case.
18923
// The BasisU library does not support 1D or 3D textures at all.
18924
if ((m_header.m_pixel_width < 1) || (m_header.m_pixel_height < 1) || (m_header.m_pixel_depth > 0))
18925
{
18926
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Only 2D or cubemap textures are supported\n");
18927
return false;
18928
}
18929
18930
// Face count must be 1 or 6
18931
if ((m_header.m_face_count != 1) && (m_header.m_face_count != 6))
18932
{
18933
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid face count, file is corrupted or invalid\n");
18934
return false;
18935
}
18936
18937
if (m_header.m_face_count > 1)
18938
{
18939
// 3.4: Make sure cubemaps are square.
18940
if (m_header.m_pixel_width != m_header.m_pixel_height)
18941
{
18942
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Cubemap is not square\n");
18943
return false;
18944
}
18945
}
18946
18947
// 3.7 levelCount: "levelCount=0 is allowed, except for block-compressed formats"
18948
if (m_header.m_level_count < 1)
18949
{
18950
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level count\n");
18951
return false;
18952
}
18953
18954
// Sanity check the level count.
18955
if (m_header.m_level_count > KTX2_MAX_SUPPORTED_LEVEL_COUNT)
18956
{
18957
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Too many levels or file is corrupted or invalid\n");
18958
return false;
18959
}
18960
18961
if (m_header.m_supercompression_scheme > KTX2_SS_ZSTANDARD)
18962
{
18963
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid/unsupported supercompression or file is corrupted or invalid\n");
18964
return false;
18965
}
18966
18967
if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
18968
{
18969
#if 0
18970
if (m_header.m_sgd_byte_length <= sizeof(ktx2_etc1s_global_data_header))
18971
{
18972
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data is too small\n");
18973
return false;
18974
}
18975
#endif
18976
18977
if (m_header.m_sgd_byte_offset.get_uint64() < sizeof(ktx2_header))
18978
{
18979
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset is too low\n");
18980
return false;
18981
}
18982
18983
if (m_header.m_sgd_byte_offset.get_uint64() + m_header.m_sgd_byte_length.get_uint64() > m_data_size)
18984
{
18985
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset and/or length is too high\n");
18986
return false;
18987
}
18988
}
18989
18990
if (!m_levels.try_resize(m_header.m_level_count))
18991
{
18992
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n");
18993
return false;
18994
}
18995
18996
const uint32_t level_index_size_in_bytes = basisu::maximum(1U, (uint32_t)m_header.m_level_count) * sizeof(ktx2_level_index);
18997
18998
if ((sizeof(ktx2_header) + level_index_size_in_bytes) > m_data_size)
18999
{
19000
BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is too small (can't read level index array)\n");
19001
return false;
19002
}
19003
19004
memcpy((void *)&m_levels[0], m_pData + sizeof(ktx2_header), level_index_size_in_bytes);
19005
19006
// Sanity check the level offsets and byte sizes
19007
for (uint32_t i = 0; i < m_levels.size(); i++)
19008
{
19009
if (m_levels[i].m_byte_offset.get_uint64() < sizeof(ktx2_header))
19010
{
19011
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too low)\n");
19012
return false;
19013
}
19014
19015
if (!m_levels[i].m_byte_length.get_uint64())
19016
{
19017
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level byte length\n");
19018
}
19019
19020
if ((m_levels[i].m_byte_offset.get_uint64() + m_levels[i].m_byte_length.get_uint64()) > m_data_size)
19021
{
19022
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset and/or length\n");
19023
return false;
19024
}
19025
19026
const uint64_t MAX_SANE_LEVEL_UNCOMP_SIZE = 2048ULL * 1024ULL * 1024ULL;
19027
19028
if (m_levels[i].m_uncompressed_byte_length.get_uint64() >= MAX_SANE_LEVEL_UNCOMP_SIZE)
19029
{
19030
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too large)\n");
19031
return false;
19032
}
19033
19034
if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
19035
{
19036
if (m_levels[i].m_uncompressed_byte_length.get_uint64())
19037
{
19038
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (0)\n");
19039
return false;
19040
}
19041
}
19042
else if (m_header.m_supercompression_scheme >= KTX2_SS_ZSTANDARD)
19043
{
19044
if (!m_levels[i].m_uncompressed_byte_length.get_uint64())
19045
{
19046
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (1)\n");
19047
return false;
19048
}
19049
}
19050
}
19051
19052
const uint32_t DFD_MINIMUM_SIZE = 44, DFD_MAXIMUM_SIZE = 60;
19053
if ((m_header.m_dfd_byte_length != DFD_MINIMUM_SIZE) && (m_header.m_dfd_byte_length != DFD_MAXIMUM_SIZE))
19054
{
19055
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD size\n");
19056
return false;
19057
}
19058
19059
if (((m_header.m_dfd_byte_offset + m_header.m_dfd_byte_length) > m_data_size) || (m_header.m_dfd_byte_offset < sizeof(ktx2_header)))
19060
{
19061
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD offset and/or length\n");
19062
return false;
19063
}
19064
19065
const uint8_t* pDFD = m_pData + m_header.m_dfd_byte_offset;
19066
19067
if (!m_dfd.try_resize(m_header.m_dfd_byte_length))
19068
{
19069
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n");
19070
return false;
19071
}
19072
19073
memcpy(m_dfd.data(), pDFD, m_header.m_dfd_byte_length);
19074
19075
// This is all hard coded for only ETC1S and UASTC.
19076
uint32_t dfd_total_size = basisu::read_le_dword(pDFD);
19077
19078
// 3.10.3: Sanity check
19079
if (dfd_total_size != m_header.m_dfd_byte_length)
19080
{
19081
BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (1)\n");
19082
return false;
19083
}
19084
19085
// 3.10.3: More sanity checking
19086
if (m_header.m_kvd_byte_length)
19087
{
19088
if (dfd_total_size != m_header.m_kvd_byte_offset - m_header.m_dfd_byte_offset)
19089
{
19090
BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (2)\n");
19091
return false;
19092
}
19093
}
19094
19095
const uint32_t dfd_bits = basisu::read_le_dword(pDFD + 3 * sizeof(uint32_t));
19096
const uint32_t sample_channel0 = basisu::read_le_dword(pDFD + 7 * sizeof(uint32_t));
19097
19098
m_dfd_color_model = dfd_bits & 255;
19099
m_dfd_color_prims = (ktx2_df_color_primaries)((dfd_bits >> 8) & 255);
19100
m_dfd_transfer_func = (dfd_bits >> 16) & 255;
19101
m_dfd_flags = (dfd_bits >> 24) & 255;
19102
19103
// See 3.10.1.Restrictions
19104
if ((m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_LINEAR) && (m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_SRGB))
19105
{
19106
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD transfer function\n");
19107
return false;
19108
}
19109
19110
if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ETC1S)
19111
{
19112
if (m_header.m_vk_format != basist::KTX2_VK_FORMAT_UNDEFINED)
19113
{
19114
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid header vkFormat\n");
19115
return false;
19116
}
19117
19118
m_format = basist::basis_tex_format::cETC1S;
19119
19120
// 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD's sample count."
19121
// If m_has_alpha is true it may be 2-channel RRRG or 4-channel RGBA, but we let the caller deal with that.
19122
m_has_alpha = (m_header.m_dfd_byte_length == 60);
19123
19124
m_dfd_samples = m_has_alpha ? 2 : 1;
19125
m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
19126
19127
if (m_has_alpha)
19128
{
19129
const uint32_t sample_channel1 = basisu::read_le_dword(pDFD + 11 * sizeof(uint32_t));
19130
m_dfd_chan1 = (ktx2_df_channel_id)((sample_channel1 >> 24) & 15);
19131
}
19132
}
19133
else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_LDR_4X4)
19134
{
19135
if (m_header.m_vk_format != basist::KTX2_VK_FORMAT_UNDEFINED)
19136
{
19137
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid header vkFormat\n");
19138
return false;
19139
}
19140
19141
m_format = basist::basis_tex_format::cUASTC4x4;
19142
19143
m_dfd_samples = 1;
19144
m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
19145
19146
// We're assuming "DATA" means RGBA so it has alpha.
19147
m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
19148
}
19149
else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_HDR_4X4)
19150
{
19151
// UASTC HDR 4x4 is standard ASTC HDR 4x4 texture data. Check the header's vkFormat.
19152
if (m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK)
19153
{
19154
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid header vkFormat\n");
19155
return false;
19156
}
19157
19158
m_format = basist::basis_tex_format::cUASTC_HDR_4x4;
19159
19160
m_dfd_samples = 1;
19161
m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
19162
19163
// We're assuming "DATA" means RGBA so it has alpha.
19164
// [11/26/2024] - changed to always false for now
19165
m_has_alpha = false;// (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
19166
}
19167
else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ASTC)
19168
{
19169
// The DFD indicates plain ASTC texture data. We only support ASTC HDR 6x6 - check the header's vkFormat.
19170
if (m_header.m_vk_format != basist::KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK)
19171
{
19172
BASISU_DEVEL_ERROR("ktx2_transcoder::init: DVD color model is ASTC, but the header's vkFormat isn't KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK\n");
19173
return false;
19174
}
19175
19176
m_format = basist::basis_tex_format::cASTC_HDR_6x6;
19177
19178
m_dfd_samples = 1;
19179
m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
19180
19181
m_has_alpha = false;
19182
}
19183
else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE)
19184
{
19185
// Custom variable block size ASTC HDR 6x6 texture data.
19186
if (m_header.m_vk_format != basist::KTX2_VK_FORMAT_UNDEFINED)
19187
{
19188
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid header vkFormat\n");
19189
return false;
19190
}
19191
19192
m_format = basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE;
19193
19194
m_dfd_samples = 1;
19195
m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
19196
19197
m_has_alpha = false;
19198
}
19199
else
19200
{
19201
// Unsupported DFD color model.
19202
BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD color model\n");
19203
return false;
19204
}
19205
19206
if (!read_key_values())
19207
{
19208
BASISU_DEVEL_ERROR("ktx2_transcoder::init: read_key_values() failed\n");
19209
return false;
19210
}
19211
19212
// Check for a KTXanimData key
19213
for (uint32_t i = 0; i < m_key_values.size(); i++)
19214
{
19215
if (strcmp(reinterpret_cast<const char*>(m_key_values[i].m_key.data()), "KTXanimData") == 0)
19216
{
19217
m_is_video = true;
19218
break;
19219
}
19220
}
19221
19222
m_ldr_hdr_upconversion_nit_multiplier = 0.0f;
19223
19224
for (uint32_t i = 0; i < m_key_values.size(); i++)
19225
{
19226
if (strcmp(reinterpret_cast<const char*>(m_key_values[i].m_key.data()), "LDRUpconversionMultiplier") == 0)
19227
{
19228
m_ldr_hdr_upconversion_nit_multiplier = (float)atof(reinterpret_cast<const char*>(m_key_values[i].m_value.data()));
19229
19230
if (std::isnan(m_ldr_hdr_upconversion_nit_multiplier) || std::isinf(m_ldr_hdr_upconversion_nit_multiplier) || (m_ldr_hdr_upconversion_nit_multiplier < 0.0f))
19231
m_ldr_hdr_upconversion_nit_multiplier = 0;
19232
19233
break;
19234
}
19235
}
19236
19237
return true;
19238
}
19239
19240
uint32_t ktx2_transcoder::get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const
19241
{
19242
const uint32_t etc1s_image_index =
19243
(level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
19244
layer_index * m_header.m_face_count +
19245
face_index;
19246
19247
if (etc1s_image_index >= get_etc1s_image_descs().size())
19248
{
19249
assert(0);
19250
return 0;
19251
}
19252
19253
return get_etc1s_image_descs()[etc1s_image_index].m_image_flags;
19254
}
19255
19256
const basisu::uint8_vec* ktx2_transcoder::find_key(const std::string& key_name) const
19257
{
19258
for (uint32_t i = 0; i < m_key_values.size(); i++)
19259
if (strcmp((const char *)m_key_values[i].m_key.data(), key_name.c_str()) == 0)
19260
return &m_key_values[i].m_value;
19261
19262
return nullptr;
19263
}
19264
19265
bool ktx2_transcoder::start_transcoding()
19266
{
19267
if (!m_pData)
19268
{
19269
BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: Must call init() first\n");
19270
return false;
19271
}
19272
19273
if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
19274
{
19275
if (m_format == basis_tex_format::cETC1S)
19276
{
19277
// Check if we've already decompressed the ETC1S global data. If so don't unpack it again.
19278
if (!m_etc1s_transcoder.get_endpoints().empty())
19279
return true;
19280
19281
if (!decompress_etc1s_global_data())
19282
{
19283
BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: decompress_etc1s_global_data() failed\n");
19284
return false;
19285
}
19286
19287
if (!m_is_video)
19288
{
19289
// See if there are any P-frames. If so it must be a video, even if there wasn't a KTXanimData key.
19290
// Video cannot be a cubemap, and it must be a texture array.
19291
if ((m_header.m_face_count == 1) && (m_header.m_layer_count > 1))
19292
{
19293
for (uint32_t i = 0; i < m_etc1s_image_descs.size(); i++)
19294
{
19295
if (m_etc1s_image_descs[i].m_image_flags & KTX2_IMAGE_IS_P_FRAME)
19296
{
19297
m_is_video = true;
19298
break;
19299
}
19300
}
19301
}
19302
}
19303
}
19304
else if (m_format == basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)
19305
{
19306
if (m_astc_6x6_intermediate_image_descs.size())
19307
return true;
19308
19309
if (!read_astc_6x6_hdr_intermediate_global_data())
19310
{
19311
BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: read_astc_6x6_hdr_intermediate_global_data() failed\n");
19312
return false;
19313
}
19314
}
19315
else
19316
{
19317
BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: Invalid supercompression scheme and/or format\n");
19318
return false;
19319
}
19320
}
19321
else if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
19322
{
19323
#if !BASISD_SUPPORT_KTX2_ZSTD
19324
BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: File uses zstd supercompression, but zstd support was not enabled at compilation time (BASISD_SUPPORT_KTX2_ZSTD == 0)\n");
19325
return false;
19326
#endif
19327
}
19328
19329
return true;
19330
}
19331
19332
bool ktx2_transcoder::get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const
19333
{
19334
if (level_index >= m_levels.size())
19335
{
19336
BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: level_index >= m_levels.size()\n");
19337
return false;
19338
}
19339
19340
if (m_header.m_face_count > 1)
19341
{
19342
if (face_index >= 6)
19343
{
19344
BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index >= 6\n");
19345
return false;
19346
}
19347
}
19348
else if (face_index != 0)
19349
{
19350
BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index != 0\n");
19351
return false;
19352
}
19353
19354
if (layer_index >= basisu::maximum<uint32_t>(m_header.m_layer_count, 1))
19355
{
19356
BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: layer_index >= maximum<uint32_t>(m_header.m_layer_count, 1)\n");
19357
return false;
19358
}
19359
19360
const uint32_t level_width = basisu::maximum<uint32_t>(m_header.m_pixel_width >> level_index, 1);
19361
const uint32_t level_height = basisu::maximum<uint32_t>(m_header.m_pixel_height >> level_index, 1);
19362
19363
const uint32_t block_width = get_block_width();
19364
const uint32_t block_height = get_block_height();
19365
19366
const uint32_t num_blocks_x = (level_width + block_width - 1) / block_width;
19367
const uint32_t num_blocks_y = (level_height + block_height - 1) / block_height;
19368
19369
level_info.m_face_index = face_index;
19370
level_info.m_layer_index = layer_index;
19371
level_info.m_level_index = level_index;
19372
level_info.m_orig_width = level_width;
19373
level_info.m_orig_height = level_height;
19374
level_info.m_width = num_blocks_x * block_width;
19375
level_info.m_height = num_blocks_y * block_height;
19376
level_info.m_block_width = block_width;
19377
level_info.m_block_height = block_height;
19378
level_info.m_num_blocks_x = num_blocks_x;
19379
level_info.m_num_blocks_y = num_blocks_y;
19380
level_info.m_total_blocks = num_blocks_x * num_blocks_y;
19381
level_info.m_alpha_flag = m_has_alpha;
19382
level_info.m_iframe_flag = false;
19383
19384
if (m_etc1s_image_descs.size())
19385
{
19386
const uint32_t etc1s_image_index =
19387
(level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
19388
layer_index * m_header.m_face_count +
19389
face_index;
19390
19391
level_info.m_iframe_flag = (m_etc1s_image_descs[etc1s_image_index].m_image_flags & KTX2_IMAGE_IS_P_FRAME) == 0;
19392
}
19393
19394
return true;
19395
}
19396
19397
bool ktx2_transcoder::transcode_image_level(
19398
uint32_t level_index, uint32_t layer_index, uint32_t face_index,
19399
void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
19400
basist::transcoder_texture_format fmt,
19401
uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, int channel0, int channel1,
19402
ktx2_transcoder_state* pState)
19403
{
19404
if (!m_pData)
19405
{
19406
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Must call init() first\n");
19407
return false;
19408
}
19409
19410
if (!pState)
19411
pState = &m_def_transcoder_state;
19412
19413
if (level_index >= m_levels.size())
19414
{
19415
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: level_index >= m_levels.size()\n");
19416
return false;
19417
}
19418
19419
if (m_header.m_face_count > 1)
19420
{
19421
if (face_index >= 6)
19422
{
19423
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index >= 6\n");
19424
return false;
19425
}
19426
}
19427
else if (face_index != 0)
19428
{
19429
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index != 0\n");
19430
return false;
19431
}
19432
19433
if (layer_index >= basisu::maximum<uint32_t>(m_header.m_layer_count, 1))
19434
{
19435
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: layer_index >= maximum<uint32_t>(m_header.m_layer_count, 1)\n");
19436
return false;
19437
}
19438
19439
const uint8_t* pComp_level_data = m_pData + m_levels[level_index].m_byte_offset.get_uint64();
19440
uint64_t comp_level_data_size = m_levels[level_index].m_byte_length.get_uint64();
19441
19442
const uint8_t* pUncomp_level_data = pComp_level_data;
19443
uint64_t uncomp_level_data_size = comp_level_data_size;
19444
19445
if (uncomp_level_data_size > UINT32_MAX)
19446
{
19447
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_level_data_size > UINT32_MAX\n");
19448
return false;
19449
}
19450
19451
if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
19452
{
19453
// Check if we've already decompressed this level's supercompressed data.
19454
if ((int)level_index != pState->m_uncomp_data_level_index)
19455
{
19456
// Uncompress the entire level's supercompressed data.
19457
if (!decompress_level_data(level_index, pState->m_level_uncomp_data))
19458
{
19459
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: decompress_level_data() failed\n");
19460
return false;
19461
}
19462
pState->m_uncomp_data_level_index = level_index;
19463
}
19464
19465
pUncomp_level_data = pState->m_level_uncomp_data.data();
19466
uncomp_level_data_size = pState->m_level_uncomp_data.size();
19467
}
19468
19469
const uint32_t level_width = basisu::maximum<uint32_t>(m_header.m_pixel_width >> level_index, 1);
19470
const uint32_t level_height = basisu::maximum<uint32_t>(m_header.m_pixel_height >> level_index, 1);
19471
const uint32_t num_blocks4_x = (level_width + 3) >> 2;
19472
const uint32_t num_blocks4_y = (level_height + 3) >> 2;
19473
19474
if (m_format == basist::basis_tex_format::cETC1S)
19475
{
19476
// Ensure start_transcoding() was called.
19477
if (m_etc1s_transcoder.get_endpoints().empty())
19478
{
19479
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: must call start_transcoding() first\n");
19480
return false;
19481
}
19482
19483
const uint32_t etc1s_image_index =
19484
(level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
19485
layer_index * m_header.m_face_count +
19486
face_index;
19487
19488
// Sanity check
19489
if (etc1s_image_index >= m_etc1s_image_descs.size())
19490
{
19491
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: etc1s_image_index >= m_etc1s_image_descs.size()\n");
19492
assert(0);
19493
return false;
19494
}
19495
19496
const ktx2_etc1s_image_desc& image_desc = m_etc1s_image_descs[etc1s_image_index];
19497
19498
if (!m_etc1s_transcoder.transcode_image(fmt,
19499
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, m_pData, m_data_size,
19500
num_blocks4_x, num_blocks4_y, level_width, level_height,
19501
level_index,
19502
m_levels[level_index].m_byte_offset.get_uint64() + image_desc.m_rgb_slice_byte_offset, image_desc.m_rgb_slice_byte_length,
19503
image_desc.m_alpha_slice_byte_length ? (m_levels[level_index].m_byte_offset.get_uint64() + image_desc.m_alpha_slice_byte_offset) : 0, image_desc.m_alpha_slice_byte_length,
19504
decode_flags, m_has_alpha,
19505
m_is_video, output_row_pitch_in_blocks_or_pixels, &pState->m_transcoder_state, output_rows_in_pixels))
19506
{
19507
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ETC1S transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
19508
return false;
19509
}
19510
}
19511
else if (m_format == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)
19512
{
19513
if (!m_astc_6x6_intermediate_image_descs.size())
19514
{
19515
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: must call start_transcoding() first\n");
19516
return false;
19517
}
19518
19519
const uint32_t num_blocks6_x = (level_width + 5) / 6;
19520
const uint32_t num_blocks6_y = (level_height + 5) / 6;
19521
19522
const uint32_t image_index =
19523
(level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
19524
layer_index * m_header.m_face_count +
19525
face_index;
19526
19527
// Sanity check
19528
if (image_index >= m_astc_6x6_intermediate_image_descs.size())
19529
{
19530
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Invalid image_index\n");
19531
assert(0);
19532
return false;
19533
}
19534
19535
const ktx2_astc_hdr_6x6_intermediate_image_desc& image_desc = m_astc_6x6_intermediate_image_descs[image_index];
19536
19537
if (!m_astc_hdr_6x6_intermediate_transcoder.transcode_image(fmt,
19538
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
19539
m_pData, m_data_size, num_blocks6_x, num_blocks6_y, level_width, level_height, level_index,
19540
m_levels[level_index].m_byte_offset.get_uint64() + image_desc.m_rgb_slice_byte_offset, image_desc.m_rgb_slice_byte_length,
19541
decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
19542
{
19543
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ASTC 6x6 HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
19544
return false;
19545
}
19546
}
19547
else if (m_format == basist::basis_tex_format::cASTC_HDR_6x6)
19548
{
19549
const uint32_t num_blocks6_x = (level_width + 5) / 6;
19550
const uint32_t num_blocks6_y = (level_height + 5) / 6;
19551
19552
// Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices.
19553
assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length.get_uint64());
19554
const uint32_t total_2D_image_size = num_blocks6_x * num_blocks6_y * sizeof(astc_helpers::astc_block);
19555
19556
const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size;
19557
19558
// Sanity checks
19559
if (uncomp_ofs >= uncomp_level_data_size)
19560
{
19561
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_ofs >= total_2D_image_size\n");
19562
return false;
19563
}
19564
19565
if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size)
19566
{
19567
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n");
19568
return false;
19569
}
19570
19571
if (!m_astc_hdr_6x6_transcoder.transcode_image(fmt,
19572
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
19573
(const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks6_x, num_blocks6_y, level_width, level_height, level_index,
19574
0, (uint32_t)total_2D_image_size,
19575
decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
19576
{
19577
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ASTC 6x6 HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
19578
return false;
19579
}
19580
}
19581
else if ((m_format == basist::basis_tex_format::cUASTC4x4) ||
19582
(m_format == basist::basis_tex_format::cUASTC_HDR_4x4))
19583
{
19584
// Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices.
19585
assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length.get_uint64());
19586
const uint32_t total_2D_image_size = num_blocks4_x * num_blocks4_y * KTX2_UASTC_BLOCK_SIZE;
19587
19588
const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size;
19589
19590
// Sanity checks
19591
if (uncomp_ofs >= uncomp_level_data_size)
19592
{
19593
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_ofs >= total_2D_image_size\n");
19594
return false;
19595
}
19596
19597
if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size)
19598
{
19599
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n");
19600
return false;
19601
}
19602
19603
if (m_format == basist::basis_tex_format::cUASTC_HDR_4x4)
19604
{
19605
if (!m_uastc_hdr_transcoder.transcode_image(fmt,
19606
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
19607
(const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks4_x, num_blocks4_y, level_width, level_height, level_index,
19608
0, (uint32_t)total_2D_image_size,
19609
decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
19610
{
19611
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
19612
return false;
19613
}
19614
}
19615
else
19616
{
19617
if (!m_uastc_transcoder.transcode_image(fmt,
19618
pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
19619
(const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks4_x, num_blocks4_y, level_width, level_height, level_index,
19620
0, (uint32_t)total_2D_image_size,
19621
decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
19622
{
19623
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
19624
return false;
19625
}
19626
}
19627
}
19628
else
19629
{
19630
// Shouldn't get here.
19631
BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Internal error\n");
19632
assert(0);
19633
return false;
19634
}
19635
19636
return true;
19637
}
19638
19639
bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data)
19640
{
19641
const uint8_t* pComp_data = m_levels[level_index].m_byte_offset.get_uint64() + m_pData;
19642
const uint64_t comp_size = m_levels[level_index].m_byte_length.get_uint64();
19643
19644
const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length.get_uint64();
19645
19646
if (((size_t)comp_size) != comp_size)
19647
{
19648
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Compressed data too large\n");
19649
return false;
19650
}
19651
if (((size_t)uncomp_size) != uncomp_size)
19652
{
19653
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Uncompressed data too large\n");
19654
return false;
19655
}
19656
19657
if (!uncomp_data.try_resize((size_t)uncomp_size))
19658
{
19659
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Out of memory\n");
19660
return false;
19661
}
19662
19663
if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
19664
{
19665
#if BASISD_SUPPORT_KTX2_ZSTD
19666
size_t actualUncompSize = ZSTD_decompress(uncomp_data.data(), (size_t)uncomp_size, pComp_data, (size_t)comp_size);
19667
if (ZSTD_isError(actualUncompSize))
19668
{
19669
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression failed, file is invalid or corrupted\n");
19670
return false;
19671
}
19672
if (actualUncompSize != uncomp_size)
19673
{
19674
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression returned too few bytes, file is invalid or corrupted\n");
19675
return false;
19676
}
19677
#else
19678
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: File uses Zstd supercompression, but Zstd support was not enabled at compile time (BASISD_SUPPORT_KTX2_ZSTD is 0)\n");
19679
return false;
19680
#endif
19681
}
19682
19683
return true;
19684
}
19685
19686
bool ktx2_transcoder::read_astc_6x6_hdr_intermediate_global_data()
19687
{
19688
const uint32_t image_count = basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count * m_header.m_level_count;
19689
assert(image_count);
19690
19691
const uint8_t* pSrc = m_pData + m_header.m_sgd_byte_offset.get_uint64();
19692
19693
if (m_header.m_sgd_byte_length.get_uint64() != image_count * sizeof(ktx2_astc_hdr_6x6_intermediate_image_desc))
19694
{
19695
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_astc_6x6_hdr_intermediate_global_data: Invalid global data length\n");
19696
return false;
19697
}
19698
19699
m_astc_6x6_intermediate_image_descs.resize(image_count);
19700
19701
memcpy((void *)m_astc_6x6_intermediate_image_descs.data(), pSrc, sizeof(ktx2_astc_hdr_6x6_intermediate_image_desc) * image_count);
19702
19703
// Sanity check the image descs
19704
for (uint32_t i = 0; i < image_count; i++)
19705
{
19706
// transcode_image() will validate the slice offsets/lengths before transcoding.
19707
19708
if (!m_astc_6x6_intermediate_image_descs[i].m_rgb_slice_byte_length)
19709
{
19710
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_astc_6x6_hdr_intermediate_global_data: image descs sanity check failed (1)\n");
19711
return false;
19712
}
19713
}
19714
19715
return true;
19716
}
19717
19718
bool ktx2_transcoder::decompress_etc1s_global_data()
19719
{
19720
// Note: we don't actually support 3D textures in here yet
19721
//uint32_t layer_pixel_depth = basisu::maximum<uint32_t>(m_header.m_pixel_depth, 1);
19722
//for (uint32_t i = 1; i < m_header.m_level_count; i++)
19723
// layer_pixel_depth += basisu::maximum<uint32_t>(m_header.m_pixel_depth >> i, 1);
19724
19725
const uint32_t image_count = basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count * m_header.m_level_count;
19726
assert(image_count);
19727
19728
const uint8_t* pSrc = m_pData + m_header.m_sgd_byte_offset.get_uint64();
19729
19730
memcpy((void *)&m_etc1s_header, pSrc, sizeof(ktx2_etc1s_global_data_header));
19731
pSrc += sizeof(ktx2_etc1s_global_data_header);
19732
19733
if ((!m_etc1s_header.m_endpoints_byte_length) || (!m_etc1s_header.m_selectors_byte_length) || (!m_etc1s_header.m_tables_byte_length))
19734
{
19735
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Invalid ETC1S global data\n");
19736
return false;
19737
}
19738
19739
if ((!m_etc1s_header.m_endpoint_count) || (!m_etc1s_header.m_selector_count))
19740
{
19741
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: endpoint and/or selector count is 0, file is invalid or corrupted\n");
19742
return false;
19743
}
19744
19745
// Sanity check the ETC1S header.
19746
if ((sizeof(ktx2_etc1s_global_data_header) +
19747
sizeof(ktx2_etc1s_image_desc) * image_count +
19748
m_etc1s_header.m_endpoints_byte_length +
19749
m_etc1s_header.m_selectors_byte_length +
19750
m_etc1s_header.m_tables_byte_length +
19751
m_etc1s_header.m_extended_byte_length) > m_header.m_sgd_byte_length.get_uint64())
19752
{
19753
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: SGD byte length is too small, file is invalid or corrupted\n");
19754
return false;
19755
}
19756
19757
if (!m_etc1s_image_descs.try_resize(image_count))
19758
{
19759
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Out of memory\n");
19760
return false;
19761
}
19762
19763
memcpy((void *)m_etc1s_image_descs.data(), pSrc, sizeof(ktx2_etc1s_image_desc) * image_count);
19764
pSrc += sizeof(ktx2_etc1s_image_desc) * image_count;
19765
19766
// Sanity check the ETC1S image descs
19767
for (uint32_t i = 0; i < image_count; i++)
19768
{
19769
// m_etc1s_transcoder.transcode_image() will validate the slice offsets/lengths before transcoding.
19770
19771
if (!m_etc1s_image_descs[i].m_rgb_slice_byte_length)
19772
{
19773
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (1)\n");
19774
return false;
19775
}
19776
19777
if (m_has_alpha)
19778
{
19779
if (!m_etc1s_image_descs[i].m_alpha_slice_byte_length)
19780
{
19781
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (2)\n");
19782
return false;
19783
}
19784
}
19785
}
19786
19787
const uint8_t* pEndpoint_data = pSrc;
19788
const uint8_t* pSelector_data = pSrc + m_etc1s_header.m_endpoints_byte_length;
19789
const uint8_t* pTables_data = pSrc + m_etc1s_header.m_endpoints_byte_length + m_etc1s_header.m_selectors_byte_length;
19790
19791
if (!m_etc1s_transcoder.decode_tables(pTables_data, m_etc1s_header.m_tables_byte_length))
19792
{
19793
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_tables() failed, file is invalid or corrupted\n");
19794
return false;
19795
}
19796
19797
if (!m_etc1s_transcoder.decode_palettes(
19798
m_etc1s_header.m_endpoint_count, pEndpoint_data, m_etc1s_header.m_endpoints_byte_length,
19799
m_etc1s_header.m_selector_count, pSelector_data, m_etc1s_header.m_selectors_byte_length))
19800
{
19801
BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_palettes() failed, file is likely corrupted\n");
19802
return false;
19803
}
19804
19805
return true;
19806
}
19807
19808
bool ktx2_transcoder::read_key_values()
19809
{
19810
if (!m_header.m_kvd_byte_length)
19811
{
19812
if (m_header.m_kvd_byte_offset)
19813
{
19814
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset (it should be zero when the length is zero)\n");
19815
return false;
19816
}
19817
19818
return true;
19819
}
19820
19821
if (m_header.m_kvd_byte_offset < sizeof(ktx2_header))
19822
{
19823
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset\n");
19824
return false;
19825
}
19826
19827
if ((m_header.m_kvd_byte_offset + m_header.m_kvd_byte_length) > m_data_size)
19828
{
19829
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset and/or length\n");
19830
return false;
19831
}
19832
19833
const uint8_t* pSrc = m_pData + m_header.m_kvd_byte_offset;
19834
uint32_t src_left = m_header.m_kvd_byte_length;
19835
19836
if (!m_key_values.try_reserve(8))
19837
{
19838
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
19839
return false;
19840
}
19841
19842
while (src_left > sizeof(uint32_t))
19843
{
19844
uint32_t l = basisu::read_le_dword(pSrc);
19845
19846
pSrc += sizeof(uint32_t);
19847
src_left -= sizeof(uint32_t);
19848
19849
if (l < 2)
19850
{
19851
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (0)\n");
19852
return false;
19853
}
19854
19855
if (src_left < l)
19856
{
19857
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (1)\n");
19858
return false;
19859
}
19860
19861
if (!m_key_values.try_resize(m_key_values.size() + 1))
19862
{
19863
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
19864
return false;
19865
}
19866
19867
basisu::uint8_vec& key_data = m_key_values.back().m_key;
19868
basisu::uint8_vec& value_data = m_key_values.back().m_value;
19869
19870
do
19871
{
19872
if (!l)
19873
{
19874
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (2)\n");
19875
return false;
19876
}
19877
19878
if (!key_data.try_push_back(*pSrc++))
19879
{
19880
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
19881
return false;
19882
}
19883
19884
src_left--;
19885
l--;
19886
19887
} while (key_data.back());
19888
19889
// Ensure key and value are definitely 0 terminated
19890
if (!key_data.try_push_back('\0'))
19891
{
19892
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
19893
return false;
19894
}
19895
19896
if (!value_data.try_resize(l))
19897
{
19898
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
19899
return false;
19900
}
19901
19902
if (l)
19903
{
19904
memcpy(value_data.data(), pSrc, l);
19905
pSrc += l;
19906
src_left -= l;
19907
}
19908
19909
// Ensure key and value are definitely 0 terminated
19910
if (!value_data.try_push_back('\0'))
19911
{
19912
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
19913
return false;
19914
}
19915
19916
uint32_t ofs = (uint32_t)(pSrc - m_pData) & 3;
19917
uint32_t alignment_bytes = (4 - ofs) & 3;
19918
19919
if (src_left < alignment_bytes)
19920
{
19921
BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (3)\n");
19922
return false;
19923
}
19924
19925
pSrc += alignment_bytes;
19926
src_left -= alignment_bytes;
19927
}
19928
19929
return true;
19930
}
19931
19932
#endif // BASISD_SUPPORT_KTX2
19933
19934
bool basisu_transcoder_supports_ktx2()
19935
{
19936
#if BASISD_SUPPORT_KTX2
19937
return true;
19938
#else
19939
return false;
19940
#endif
19941
}
19942
19943
bool basisu_transcoder_supports_ktx2_zstd()
19944
{
19945
#if BASISD_SUPPORT_KTX2_ZSTD
19946
return true;
19947
#else
19948
return false;
19949
#endif
19950
}
19951
19952
//-------------------------------
19953
19954
#if BASISD_SUPPORT_UASTC_HDR
19955
// This float->half conversion matches how "F32TO16" works on Intel GPU's.
19956
basist::half_float float_to_half(float val)
19957
{
19958
union { float f; int32_t i; uint32_t u; } fi = { val };
19959
const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
19960
int s = flt_s, e = 0, m = 0;
19961
19962
// inf/NaN
19963
if (flt_e == 0xff)
19964
{
19965
e = 31;
19966
if (flt_m != 0) // NaN
19967
m = 1;
19968
}
19969
// not zero or denormal
19970
else if (flt_e != 0)
19971
{
19972
int new_exp = flt_e - 127;
19973
if (new_exp > 15)
19974
e = 31;
19975
else if (new_exp < -14)
19976
m = lrintf((1 << 24) * fabsf(fi.f));
19977
else
19978
{
19979
e = new_exp + 15;
19980
m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));
19981
}
19982
}
19983
19984
assert((0 <= m) && (m <= 1024));
19985
if (m == 1024)
19986
{
19987
e++;
19988
m = 0;
19989
}
19990
19991
assert((s >= 0) && (s <= 1));
19992
assert((e >= 0) && (e <= 31));
19993
assert((m >= 0) && (m <= 1023));
19994
19995
basist::half_float result = (basist::half_float)((s << 15) | (e << 10) | m);
19996
return result;
19997
}
19998
19999
//------------------------------------------------------------------------------------------------
20000
// HDR support
20001
//
20002
// Originally from bc6h_enc.cpp
20003
// BC6H decoder fuzzed vs. DirectXTex's for unsigned/signed
20004
20005
const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4] = // base bits, r, g, b
20006
{
20007
// 2 subsets
20008
{ 10, 5, 5, 5, }, // 0, mode 1 in MS/D3D docs
20009
{ 7, 6, 6, 6, }, // 1
20010
{ 11, 5, 4, 4, }, // 2
20011
{ 11, 4, 5, 4, }, // 3
20012
{ 11, 4, 4, 5, }, // 4
20013
{ 9, 5, 5, 5, }, // 5
20014
{ 8, 6, 5, 5, }, // 6
20015
{ 8, 5, 6, 5, }, // 7
20016
{ 8, 5, 5, 6, }, // 8
20017
{ 6, 6, 6, 6, }, // 9, endpoints not delta encoded, mode 10 in MS/D3D docs
20018
// 1 subset
20019
{ 10, 10, 10, 10, }, // 10, endpoints not delta encoded, mode 11 in MS/D3D docs
20020
{ 11, 9, 9, 9, }, // 11
20021
{ 12, 8, 8, 8, }, // 12
20022
{ 16, 4, 4, 4, } // 13, also useful for solid blocks
20023
};
20024
20025
const int8_t g_bc6h_mode_lookup[32] = { 0, 1, 2, 10, 0, 1, 3, 11, 0, 1, 4, 12, 0, 1, 5, 13, 0, 1, 6, -1, 0, 1, 7, -1, 0, 1, 8, -1, 0, 1, 9, -1 };
20026
20027
const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX] =
20028
{
20029
// comp_index, subset*2+lh_index, last_bit, first_bit
20030
//------------------------ mode 0: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (10.555, 10.555, 10.555), delta
20031
{ { 1, 2, 4, -1 }, { 2, 2, 4, -1 }, { 2, 3, 4, -1 }, { 0, 0, 9, 0 }, { 1, 0, 9, 0 }, { 2, 0, 9, 0 }, { 0, 1, 4, 0 },
20032
{ 1, 3, 4, -1 }, { 1, 2, 3, 0 }, { 1, 1, 4, 0 }, { 2, 3, 0, -1 }, { 1, 3, 3, 0 }, { 2, 1, 4, 0 }, { 2, 3, 1, -1 },
20033
{ 2, 2, 3, 0 }, { 0, 2, 4, 0 }, { 2, 3, 2, -1 }, { 0, 3, 4, 0 }, { 2, 3, 3, -1 }, { 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20034
//------------------------ mode 1: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (7.666, 7.666, 7.666), delta
20035
{ { 1, 2, 5, -1 },{ 1, 3, 4, -1 },{ 1, 3, 5, -1 },{ 0, 0, 6, 0 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },
20036
{ 1, 0, 6, 0 },{ 2, 2, 5, -1 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 6, 0 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },
20037
{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },
20038
{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20039
//------------------------ mode 2: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.555, 11.444, 11.444), delta
20040
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 4, 0 },{ 0, 0, 10, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },{ 1, 0, 10, -1 },
20041
{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },
20042
{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20043
//------------------------ mode 3: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.555, 11.444), delta
20044
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },
20045
{ 1, 0, 10, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 0, -1 },
20046
{ 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 1, 2, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20047
//------------------------ mode 4: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.444, 11.555), delta
20048
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 2, 2, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },
20049
{ 1, 0, 10, -1 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 0, 10, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 1, -1 },
20050
{ 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 2, 3, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20051
//------------------------ mode 5: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (9.555, 9.555, 9.555), delta
20052
{ { 0, 0, 8, 0 },{ 2, 2, 4, -1 },{ 1, 0, 8, 0 },{ 1, 2, 4, -1 },{ 2, 0, 8, 0 },{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },
20053
{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },
20054
{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20055
//------------------------ mode 6: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.666, 8.555, 8.555), delta
20056
{ { 0, 0, 7, 0 },{ 1, 3, 4, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 3, -1 },
20057
{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
20058
{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20059
//------------------------ mode 7: 2 subsets, Weight bits: 46 bits, Endpoints bits: 72 bits (8.555, 8.666, 8.555), delta
20060
{ { 0, 0, 7, 0 },{ 2, 3, 0, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 1, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 1, 3, 5, -1 },
20061
{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
20062
{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20063
//------------------------ mode 8: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.555, 8.555, 8.666), delta
20064
{ { 0, 0, 7, 0 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 5, -1 },
20065
{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },
20066
{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20067
//------------------------ mode 9: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (6.6.6.6, 6.6.6.6, 6.6.6.6), NO delta
20068
{ { 0, 0, 5, 0 },{ 1, 3, 4, -1 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 5, 0 },{ 1, 2, 5, -1 },{ 2, 2, 5, -1 },
20069
{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 5, 0 },{ 1, 3, 5, -1 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },
20070
{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
20071
//------------------------ mode 10: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (10.10, 10.10, 10.10), NO delta
20072
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 9, 0 },{ 1, 1, 9, 0 },{ 2, 1, 9, 0 }, {-1, 0, 0, 0} },
20073
//------------------------ mode 11: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (11.9, 11.9, 11.9), delta
20074
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 8, 0 },{ 0, 0, 10, -1 },{ 1, 1, 8, 0 },{ 1, 0, 10, -1 },{ 2, 1, 8, 0 },{ 2, 0, 10, -1 }, {-1, 0, 0, 0} },
20075
//------------------------ mode 12: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (12.8, 12.8, 12.8), delta
20076
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 7, 0 },{ 0, 0, 10, 11 },{ 1, 1, 7, 0 },{ 1, 0, 10, 11 },{ 2, 1, 7, 0 },{ 2, 0, 10, 11 }, {-1, 0, 0, 0} },
20077
//------------------------ mode 13: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (16.4, 16.4, 16.4), delta
20078
{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, 15 },{ 1, 1, 3, 0 },{ 1, 0, 10, 15 },{ 2, 1, 3, 0 },{ 2, 0, 10, 15 }, {-1, 0, 0, 0} }
20079
};
20080
20081
// The same as the first 32 2-subset patterns in BC7.
20082
// Bit 7 is a flag indicating that the weight uses 1 less bit than usual.
20083
const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4] = // [pat][y][x]
20084
{
20085
{ {0x80, 0, 1, 1}, { 0, 0, 1, 1 }, { 0, 0, 1, 1 }, { 0, 0, 1, 0x81 }}, { {0x80, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0x81} },
20086
{ {0x80, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 0x81} }, { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
20087
{ {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} },
20088
{ {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
20089
{ {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
20090
{ {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 0x81} },
20091
{ {0x80, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
20092
{ {0x80, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 0x81} },
20093
{ {0x80, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 1, 0}, {1, 1, 1, 0x81} }, { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} },
20094
{ {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 1, 0x81, 1}, {0, 0, 1, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} },
20095
{ {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 1, 0, 0}, {1, 1, 1, 0} },
20096
{ {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} }, { {0x80, 1, 1, 1}, {0, 0, 1, 1}, { 0, 0, 1, 1}, {0, 0, 0, 0x81} },
20097
{ {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} },
20098
{ {0x80, 1, 0x81, 0}, {0, 1, 1, 0}, {0, 1, 1, 0}, {0, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {0, 1, 1, 0}, {0, 1, 1, 0}, {1, 1, 0, 0} },
20099
{ {0x80, 0, 0, 1}, {0, 1, 1, 1}, {0x81, 1, 1, 0}, {1, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {0x81, 1, 1, 1}, {0, 0, 0, 0} },
20100
{ {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {1, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {1, 0, 0, 1}, {1, 0, 0, 1}, {1, 1, 0, 0} }
20101
};
20102
20103
const uint8_t g_bc6h_weight3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
20104
const uint8_t g_bc6h_weight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
20105
20106
static inline void write_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
20107
{
20108
assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
20109
assert(val < (1ULL << num_bits));
20110
20111
if (bit_pos < 64)
20112
{
20113
l |= (val << bit_pos);
20114
20115
if ((bit_pos + num_bits) > 64)
20116
h |= (val >> (64 - bit_pos));
20117
}
20118
else
20119
{
20120
h |= (val << (bit_pos - 64));
20121
}
20122
20123
bit_pos += num_bits;
20124
assert(bit_pos <= 128);
20125
}
20126
20127
static inline void write_rev_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
20128
{
20129
assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
20130
assert(val < (1ULL << num_bits));
20131
20132
for (uint32_t i = 0; i < num_bits; i++)
20133
write_bits((val >> (num_bits - 1u - i)) & 1, 1, bit_pos, l, h);
20134
}
20135
20136
void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk)
20137
{
20138
const uint8_t s_mode_bits[NUM_BC6H_MODES] = { 0b00, 0b01, 0b00010, 0b00110, 0b01010, 0b01110, 0b10010, 0b10110, 0b11010, 0b11110, 0b00011, 0b00111, 0b01011, 0b01111 };
20139
20140
const uint32_t mode = log_blk.m_mode;
20141
assert(mode < NUM_BC6H_MODES);
20142
20143
uint64_t l = s_mode_bits[mode], h = 0;
20144
uint32_t bit_pos = (mode >= 2) ? 5 : 2;
20145
20146
const uint32_t num_subsets = (mode >= BC6H_FIRST_1SUBSET_MODE_INDEX) ? 1 : 2;
20147
20148
assert(((num_subsets == 2) && (log_blk.m_partition_pattern < TOTAL_BC6H_PARTITION_PATTERNS)) ||
20149
((num_subsets == 1) && (!log_blk.m_partition_pattern)));
20150
20151
// Sanity checks
20152
for (uint32_t c = 0; c < 3; c++)
20153
{
20154
assert(log_blk.m_endpoints[c][0] < (1u << g_bc6h_mode_sig_bits[mode][0])); // 1st subset l, base bits
20155
assert(log_blk.m_endpoints[c][1] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 1st subset h, these are deltas except for modes 9,10
20156
assert(log_blk.m_endpoints[c][2] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset l
20157
assert(log_blk.m_endpoints[c][3] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset h
20158
}
20159
20160
const bc6h_bit_layout* pLayout = &g_bc6h_bit_layouts[mode][0];
20161
20162
while (pLayout->m_comp != -1)
20163
{
20164
uint32_t v = (pLayout->m_comp == 3) ? log_blk.m_partition_pattern : log_blk.m_endpoints[pLayout->m_comp][pLayout->m_index];
20165
20166
if (pLayout->m_first_bit == -1)
20167
{
20168
write_bits((v >> pLayout->m_last_bit) & 1, 1, bit_pos, l, h);
20169
}
20170
else
20171
{
20172
const uint32_t total_bits = basisu::iabs(pLayout->m_last_bit - pLayout->m_first_bit) + 1;
20173
20174
v >>= basisu::minimum(pLayout->m_first_bit, pLayout->m_last_bit);
20175
v &= ((1 << total_bits) - 1);
20176
20177
if (pLayout->m_first_bit > pLayout->m_last_bit)
20178
write_rev_bits(v, total_bits, bit_pos, l, h);
20179
else
20180
write_bits(v, total_bits, bit_pos, l, h);
20181
}
20182
20183
pLayout++;
20184
}
20185
20186
const uint32_t num_mode_sel_bits = (num_subsets == 1) ? 4 : 3;
20187
const uint8_t* pPat = &g_bc6h_2subset_patterns[log_blk.m_partition_pattern][0][0];
20188
20189
for (uint32_t i = 0; i < 16; i++)
20190
{
20191
const uint32_t sel = log_blk.m_weights[i];
20192
20193
uint32_t num_bits = num_mode_sel_bits;
20194
if (num_subsets == 2)
20195
{
20196
const uint32_t subset_index = pPat[i];
20197
num_bits -= (subset_index >> 7);
20198
}
20199
else if (!i)
20200
{
20201
num_bits--;
20202
}
20203
20204
assert(sel < (1u << num_bits));
20205
20206
write_bits(sel, num_bits, bit_pos, l, h);
20207
}
20208
20209
assert(bit_pos == 128);
20210
20211
basisu::write_le_dword(&dst_blk.m_bytes[0], (uint32_t)l);
20212
basisu::write_le_dword(&dst_blk.m_bytes[4], (uint32_t)(l >> 32u));
20213
basisu::write_le_dword(&dst_blk.m_bytes[8], (uint32_t)h);
20214
basisu::write_le_dword(&dst_blk.m_bytes[12], (uint32_t)(h >> 32u));
20215
}
20216
20217
#if 0
20218
static inline uint32_t bc6h_blog_dequantize_to_blog16(uint32_t comp, uint32_t bits_per_comp)
20219
{
20220
int unq;
20221
20222
if (bits_per_comp >= 15)
20223
unq = comp;
20224
else if (comp == 0)
20225
unq = 0;
20226
else if (comp == ((1u << bits_per_comp) - 1u))
20227
unq = 0xFFFFu;
20228
else
20229
unq = ((comp << 16u) + 0x8000u) >> bits_per_comp;
20230
20231
return unq;
20232
}
20233
#endif
20234
20235
// 6,7,8,9,10,11,12
20236
const uint32_t BC6H_BLOG_TAB_MIN = 6;
20237
const uint32_t BC6H_BLOG_TAB_MAX = 12;
20238
//const uint32_t BC6H_BLOG_TAB_NUM = BC6H_BLOG_TAB_MAX - BC6H_BLOG_TAB_MIN + 1;
20239
20240
// Handles 16, or 6-12 bits. Others assert.
20241
static inline uint32_t half_to_blog_tab(half_float h, uint32_t num_bits)
20242
{
20243
assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
20244
assert((num_bits == 16) || ((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX)));
20245
20246
return bc6h_half_to_blog(h, num_bits);
20247
#if 0
20248
BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MIN);
20249
BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MAX);
20250
20251
if (num_bits == 16)
20252
{
20253
return bc6h_half_to_blog(h, 16);
20254
}
20255
else
20256
{
20257
assert((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX));
20258
20259
// Note: This used to be done using a table lookup, but it required ~224KB of tables. This isn't quite as accurate, but the error is very slight (+-1 half values as ints).
20260
return bc6h_half_to_blog(h, num_bits);
20261
}
20262
#endif
20263
}
20264
20265
bool g_bc6h_enc_initialized;
20266
20267
void bc6h_enc_init()
20268
{
20269
if (g_bc6h_enc_initialized)
20270
return;
20271
20272
g_bc6h_enc_initialized = true;
20273
}
20274
20275
// mode 10, 4-bit weights
20276
void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
20277
{
20278
assert(g_bc6h_enc_initialized);
20279
20280
for (uint32_t i = 0; i < 16; i++)
20281
{
20282
assert(pWeights[i] <= 15);
20283
}
20284
20285
bc6h_logical_block log_blk;
20286
log_blk.clear();
20287
20288
// Convert half endpoints to blog10 (mode 10 doesn't use delta encoding)
20289
for (uint32_t c = 0; c < 3; c++)
20290
{
20291
log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 10);
20292
log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 10);
20293
}
20294
20295
memcpy(log_blk.m_weights, pWeights, 16);
20296
20297
if (log_blk.m_weights[0] & 8)
20298
{
20299
for (uint32_t i = 0; i < 16; i++)
20300
log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
20301
20302
for (uint32_t c = 0; c < 3; c++)
20303
{
20304
std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
20305
}
20306
}
20307
20308
log_blk.m_mode = BC6H_FIRST_1SUBSET_MODE_INDEX;
20309
pack_bc6h_block(*pPacked_block, log_blk);
20310
}
20311
20312
// Tries modes 11-13 (delta endpoint) encoding, falling back to mode 10 only when necessary, 4-bit weights
20313
void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
20314
{
20315
assert(g_bc6h_enc_initialized);
20316
20317
for (uint32_t i = 0; i < 16; i++)
20318
{
20319
assert(pWeights[i] <= 15);
20320
}
20321
20322
bc6h_logical_block log_blk;
20323
log_blk.clear();
20324
20325
for (uint32_t mode = BC6H_LAST_MODE_INDEX; mode > BC6H_FIRST_1SUBSET_MODE_INDEX; mode--)
20326
{
20327
const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0], num_delta_bits = g_bc6h_mode_sig_bits[mode][1];
20328
const int base_bitmask = (1 << num_base_bits) - 1;
20329
const int delta_bitmask = (1 << num_delta_bits) - 1;
20330
BASISU_NOTE_UNUSED(base_bitmask);
20331
20332
assert(num_delta_bits < num_base_bits);
20333
assert((num_delta_bits == g_bc6h_mode_sig_bits[mode][2]) && (num_delta_bits == g_bc6h_mode_sig_bits[mode][3]));
20334
20335
uint32_t blog_endpoints[3][2];
20336
20337
// Convert half endpoints to blog 16, 12, or 11
20338
for (uint32_t c = 0; c < 3; c++)
20339
{
20340
blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
20341
assert((int)blog_endpoints[c][0] <= base_bitmask);
20342
20343
blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
20344
assert((int)blog_endpoints[c][1] <= base_bitmask);
20345
}
20346
20347
// Copy weights
20348
memcpy(log_blk.m_weights, pWeights, 16);
20349
20350
// Ensure first weight MSB is 0
20351
if (log_blk.m_weights[0] & 8)
20352
{
20353
// Invert weights
20354
for (uint32_t i = 0; i < 16; i++)
20355
log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
20356
20357
// Swap blog quantized endpoints
20358
for (uint32_t c = 0; c < 3; c++)
20359
{
20360
std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
20361
}
20362
}
20363
20364
const int max_delta = (1 << (num_delta_bits - 1)) - 1;
20365
const int min_delta = -(max_delta + 1);
20366
assert((max_delta - min_delta) == delta_bitmask);
20367
20368
bool failed_flag = false;
20369
for (uint32_t c = 0; c < 3; c++)
20370
{
20371
log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
20372
20373
int delta = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
20374
if ((delta < min_delta) || (delta > max_delta))
20375
{
20376
failed_flag = true;
20377
break;
20378
}
20379
20380
log_blk.m_endpoints[c][1] = delta & delta_bitmask;
20381
}
20382
20383
if (failed_flag)
20384
continue;
20385
20386
log_blk.m_mode = mode;
20387
pack_bc6h_block(*pPacked_block, log_blk);
20388
20389
return;
20390
}
20391
20392
// Worst case fall back to mode 10, which can handle any endpoints
20393
bc6h_enc_block_mode10(pPacked_block, pEndpoints, pWeights);
20394
}
20395
20396
// Mode 9 (direct endpoint encoding), 3-bit weights, but only 1 subset
20397
void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
20398
{
20399
assert(g_bc6h_enc_initialized);
20400
20401
for (uint32_t i = 0; i < 16; i++)
20402
{
20403
assert(pWeights[i] <= 7);
20404
}
20405
20406
bc6h_logical_block log_blk;
20407
log_blk.clear();
20408
20409
// Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
20410
for (uint32_t c = 0; c < 3; c++)
20411
{
20412
log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 6);
20413
log_blk.m_endpoints[c][2] = log_blk.m_endpoints[c][0];
20414
20415
log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 6);
20416
log_blk.m_endpoints[c][3] = log_blk.m_endpoints[c][1];
20417
}
20418
20419
memcpy(log_blk.m_weights, pWeights, 16);
20420
20421
const uint32_t pat_index = 0;
20422
const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
20423
20424
if (log_blk.m_weights[0] & 4)
20425
{
20426
for (uint32_t c = 0; c < 3; c++)
20427
std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
20428
20429
for (uint32_t i = 0; i < 16; i++)
20430
if ((pPat[i] & 0x7F) == 0)
20431
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20432
}
20433
20434
if (log_blk.m_weights[15] & 4)
20435
{
20436
for (uint32_t c = 0; c < 3; c++)
20437
std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
20438
20439
for (uint32_t i = 0; i < 16; i++)
20440
if ((pPat[i] & 0x7F) == 1)
20441
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20442
}
20443
20444
log_blk.m_mode = 9;
20445
log_blk.m_partition_pattern = pat_index;
20446
pack_bc6h_block(*pPacked_block, log_blk);
20447
}
20448
20449
// Tries modes 0-8, falls back to mode 9
20450
void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
20451
{
20452
assert(g_bc6h_enc_initialized);
20453
20454
for (uint32_t i = 0; i < 16; i++)
20455
{
20456
assert(pWeights[i] <= 7);
20457
}
20458
20459
bc6h_logical_block log_blk;
20460
log_blk.clear();
20461
20462
for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
20463
{
20464
static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least
20465
const uint32_t mode = s_mode_order[mode_iter];
20466
20467
const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
20468
const int base_bitmask = (1 << num_base_bits) - 1;
20469
BASISU_NOTE_UNUSED(base_bitmask);
20470
20471
const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
20472
const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
20473
20474
uint32_t blog_endpoints[3][4];
20475
20476
// Convert half endpoints to blog 7-11
20477
for (uint32_t c = 0; c < 3; c++)
20478
{
20479
blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
20480
blog_endpoints[c][2] = blog_endpoints[c][0];
20481
assert((int)blog_endpoints[c][0] <= base_bitmask);
20482
20483
blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
20484
blog_endpoints[c][3] = blog_endpoints[c][1];
20485
assert((int)blog_endpoints[c][1] <= base_bitmask);
20486
}
20487
20488
const uint32_t pat_index = 0;
20489
const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
20490
20491
memcpy(log_blk.m_weights, pWeights, 16);
20492
20493
if (log_blk.m_weights[0] & 4)
20494
{
20495
// Swap part 0's endpoints/weights
20496
for (uint32_t c = 0; c < 3; c++)
20497
std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
20498
20499
for (uint32_t i = 0; i < 16; i++)
20500
if ((pPat[i] & 0x7F) == 0)
20501
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20502
}
20503
20504
if (log_blk.m_weights[15] & 4)
20505
{
20506
// Swap part 1's endpoints/weights
20507
for (uint32_t c = 0; c < 3; c++)
20508
std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
20509
20510
for (uint32_t i = 0; i < 16; i++)
20511
if ((pPat[i] & 0x7F) == 1)
20512
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20513
}
20514
20515
bool failed_flag = false;
20516
20517
for (uint32_t c = 0; c < 3; c++)
20518
{
20519
const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
20520
20521
const int min_delta = -(max_delta + 1);
20522
assert((max_delta - min_delta) == delta_bitmasks[c]);
20523
20524
log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
20525
20526
int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
20527
int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
20528
int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
20529
20530
if ((delta0 < min_delta) || (delta0 > max_delta) ||
20531
(delta1 < min_delta) || (delta1 > max_delta) ||
20532
(delta2 < min_delta) || (delta2 > max_delta))
20533
{
20534
failed_flag = true;
20535
break;
20536
}
20537
20538
log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
20539
log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
20540
log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
20541
}
20542
20543
if (failed_flag)
20544
continue;
20545
20546
log_blk.m_mode = mode;
20547
log_blk.m_partition_pattern = pat_index;
20548
pack_bc6h_block(*pPacked_block, log_blk);
20549
20550
return;
20551
20552
} // mode_iter
20553
20554
bc6h_enc_block_1subset_mode9_3bit_weights(pPacked_block, pEndpoints, pWeights);
20555
}
20556
20557
// pEndpoints[subset][comp][lh_index]
20558
void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
20559
{
20560
assert(g_bc6h_enc_initialized);
20561
assert(common_part_index < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2);
20562
20563
for (uint32_t i = 0; i < 16; i++)
20564
{
20565
assert(pWeights[i] <= 7);
20566
}
20567
20568
bc6h_logical_block log_blk;
20569
log_blk.clear();
20570
20571
// Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
20572
for (uint32_t s = 0; s < 2; s++)
20573
{
20574
for (uint32_t c = 0; c < 3; c++)
20575
{
20576
log_blk.m_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], 6);
20577
log_blk.m_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], 6);
20578
}
20579
}
20580
20581
memcpy(log_blk.m_weights, pWeights, 16);
20582
20583
//const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
20584
const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
20585
20586
const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
20587
if (invert_flag)
20588
{
20589
for (uint32_t c = 0; c < 3; c++)
20590
{
20591
std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][2]);
20592
std::swap(log_blk.m_endpoints[c][1], log_blk.m_endpoints[c][3]);
20593
}
20594
}
20595
20596
const uint32_t pat_index = bc7_pattern;
20597
assert(pat_index < 32);
20598
const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
20599
20600
bool swap_flags[2] = { false, false };
20601
for (uint32_t i = 0; i < 16; i++)
20602
{
20603
if ((pPat[i] & 0x80) == 0)
20604
continue;
20605
20606
if (log_blk.m_weights[i] & 4)
20607
{
20608
const uint32_t p = pPat[i] & 1;
20609
swap_flags[p] = true;
20610
}
20611
}
20612
20613
if (swap_flags[0])
20614
{
20615
for (uint32_t c = 0; c < 3; c++)
20616
std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
20617
20618
for (uint32_t i = 0; i < 16; i++)
20619
if ((pPat[i] & 0x7F) == 0)
20620
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20621
}
20622
20623
if (swap_flags[1])
20624
{
20625
for (uint32_t c = 0; c < 3; c++)
20626
std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
20627
20628
for (uint32_t i = 0; i < 16; i++)
20629
if ((pPat[i] & 0x7F) == 1)
20630
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20631
}
20632
20633
log_blk.m_mode = 9;
20634
log_blk.m_partition_pattern = pat_index;
20635
pack_bc6h_block(*pPacked_block, log_blk);
20636
}
20637
20638
void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
20639
{
20640
assert(g_bc6h_enc_initialized);
20641
20642
for (uint32_t i = 0; i < 16; i++)
20643
{
20644
assert(pWeights[i] <= 7);
20645
}
20646
20647
bc6h_logical_block log_blk;
20648
log_blk.clear();
20649
20650
for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
20651
{
20652
static const int s_mode_order[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 }; // ordered from largest base bits to least
20653
const uint32_t mode = s_mode_order[mode_iter];
20654
20655
const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
20656
const int base_bitmask = (1 << num_base_bits) - 1;
20657
BASISU_NOTE_UNUSED(base_bitmask);
20658
20659
const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
20660
const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
20661
20662
uint32_t blog_endpoints[3][4];
20663
20664
// Convert half endpoints to blog 7-11
20665
for (uint32_t s = 0; s < 2; s++)
20666
{
20667
for (uint32_t c = 0; c < 3; c++)
20668
{
20669
blog_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], num_base_bits);
20670
blog_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], num_base_bits);
20671
}
20672
}
20673
20674
memcpy(log_blk.m_weights, pWeights, 16);
20675
20676
//const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
20677
const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
20678
20679
const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
20680
if (invert_flag)
20681
{
20682
for (uint32_t c = 0; c < 3; c++)
20683
{
20684
std::swap(blog_endpoints[c][0], blog_endpoints[c][2]);
20685
std::swap(blog_endpoints[c][1], blog_endpoints[c][3]);
20686
}
20687
}
20688
20689
const uint32_t pat_index = bc7_pattern;
20690
assert(pat_index < 32);
20691
const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
20692
20693
bool swap_flags[2] = { false, false };
20694
for (uint32_t i = 0; i < 16; i++)
20695
{
20696
if ((pPat[i] & 0x80) == 0)
20697
continue;
20698
20699
if (log_blk.m_weights[i] & 4)
20700
{
20701
const uint32_t p = pPat[i] & 1;
20702
swap_flags[p] = true;
20703
}
20704
}
20705
20706
if (swap_flags[0])
20707
{
20708
for (uint32_t c = 0; c < 3; c++)
20709
std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
20710
20711
for (uint32_t i = 0; i < 16; i++)
20712
if ((pPat[i] & 0x7F) == 0)
20713
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20714
}
20715
20716
if (swap_flags[1])
20717
{
20718
for (uint32_t c = 0; c < 3; c++)
20719
std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
20720
20721
for (uint32_t i = 0; i < 16; i++)
20722
if ((pPat[i] & 0x7F) == 1)
20723
log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
20724
}
20725
20726
// Try packing the endpoints
20727
bool failed_flag = false;
20728
20729
for (uint32_t c = 0; c < 3; c++)
20730
{
20731
const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
20732
20733
const int min_delta = -(max_delta + 1);
20734
assert((max_delta - min_delta) == delta_bitmasks[c]);
20735
20736
log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
20737
20738
int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
20739
int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
20740
int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
20741
20742
if ((delta0 < min_delta) || (delta0 > max_delta) ||
20743
(delta1 < min_delta) || (delta1 > max_delta) ||
20744
(delta2 < min_delta) || (delta2 > max_delta))
20745
{
20746
failed_flag = true;
20747
break;
20748
}
20749
20750
log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
20751
log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
20752
log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
20753
}
20754
20755
if (failed_flag)
20756
continue;
20757
20758
log_blk.m_mode = mode;
20759
log_blk.m_partition_pattern = pat_index;
20760
pack_bc6h_block(*pPacked_block, log_blk);
20761
20762
//half_float blk[16 * 3];
20763
//unpack_bc6h(pPacked_block, blk, false);
20764
20765
return;
20766
}
20767
20768
bc6h_enc_block_2subset_mode9_3bit_weights(pPacked_block, common_part_index, pEndpoints, pWeights);
20769
}
20770
20771
bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3])
20772
{
20773
assert(g_bc6h_enc_initialized);
20774
20775
if ((pColor[0] | pColor[1] | pColor[2]) & 0x8000)
20776
return false;
20777
20778
// ASTC block unpacker won't allow Inf/NaN's to come through.
20779
//if (is_half_inf_or_nan(pColor[0]) || is_half_inf_or_nan(pColor[1]) || is_half_inf_or_nan(pColor[2]))
20780
// return false;
20781
20782
uint8_t weights[16];
20783
memset(weights, 0, sizeof(weights));
20784
20785
half_float endpoints[3][2];
20786
endpoints[0][0] = pColor[0];
20787
endpoints[0][1] = pColor[0];
20788
20789
endpoints[1][0] = pColor[1];
20790
endpoints[1][1] = pColor[1];
20791
20792
endpoints[2][0] = pColor[2];
20793
endpoints[2][1] = pColor[2];
20794
20795
bc6h_enc_block_1subset_4bit_weights(pPacked_block, endpoints, weights);
20796
20797
return true;
20798
}
20799
20800
//--------------------------------------------------------------------------------------------------------------------------
20801
// basisu_astc_hdr_core.cpp
20802
20803
static bool g_astc_hdr_core_initialized;
20804
static int8_t g_astc_partition_id_to_common_bc7_pat_index[1024];
20805
20806
//--------------------------------------------------------------------------------------------------------------------------
20807
20808
void astc_hdr_core_init()
20809
{
20810
if (g_astc_hdr_core_initialized)
20811
return;
20812
20813
memset(g_astc_partition_id_to_common_bc7_pat_index, 0xFF, sizeof(g_astc_partition_id_to_common_bc7_pat_index));
20814
20815
for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; ++part_index)
20816
{
20817
const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
20818
//const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
20819
20820
assert(astc_pattern < 1024);
20821
g_astc_partition_id_to_common_bc7_pat_index[astc_pattern] = (int8_t)part_index;
20822
}
20823
20824
g_astc_hdr_core_initialized = true;
20825
}
20826
20827
//--------------------------------------------------------------------------------------------------------------------------
20828
20829
static inline int astc_hdr_sign_extend(int src, int num_src_bits)
20830
{
20831
assert(basisu::in_range(num_src_bits, 2, 31));
20832
20833
const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
20834
if (negative)
20835
return src | ~((1 << num_src_bits) - 1);
20836
else
20837
return src & ((1 << num_src_bits) - 1);
20838
}
20839
20840
static inline void astc_hdr_pack_bit(
20841
int& dst, int dst_bit,
20842
int src_val, int src_bit = 0)
20843
{
20844
assert(dst_bit >= 0 && dst_bit <= 31);
20845
int bit = basisu::get_bit(src_val, src_bit);
20846
dst |= (bit << dst_bit);
20847
}
20848
20849
//--------------------------------------------------------------------------------------------------------------------------
20850
20851
void decode_mode7_to_qlog12_ise20(
20852
const uint8_t* pEndpoints,
20853
int e[2][3],
20854
int* pScale)
20855
{
20856
assert(g_astc_hdr_core_initialized);
20857
20858
for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
20859
{
20860
assert(pEndpoints[i] <= 255);
20861
}
20862
20863
const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3];
20864
20865
// Extract mode bits and unpack to major component and mode.
20866
const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
20867
20868
int majcomp, mode;
20869
if ((modeval & 0xC) != 0xC)
20870
{
20871
majcomp = modeval >> 2;
20872
mode = modeval & 3;
20873
}
20874
else if (modeval != 0xF)
20875
{
20876
majcomp = modeval & 3;
20877
mode = 4;
20878
}
20879
else
20880
{
20881
majcomp = 0;
20882
mode = 5;
20883
}
20884
20885
// Extract low-order bits of r, g, b, and s.
20886
int red = v0 & 0x3f;
20887
int green = v1 & 0x1f;
20888
int blue = v2 & 0x1f;
20889
int scale = v3 & 0x1f;
20890
20891
// Extract high-order bits, which may be assigned depending on mode
20892
int x0 = (v1 >> 6) & 1;
20893
int x1 = (v1 >> 5) & 1;
20894
int x2 = (v2 >> 6) & 1;
20895
int x3 = (v2 >> 5) & 1;
20896
int x4 = (v3 >> 7) & 1;
20897
int x5 = (v3 >> 6) & 1;
20898
int x6 = (v3 >> 5) & 1;
20899
20900
// Now move the high-order xs into the right place.
20901
const int ohm = 1 << mode;
20902
if (ohm & 0x30) green |= x0 << 6;
20903
if (ohm & 0x3A) green |= x1 << 5;
20904
if (ohm & 0x30) blue |= x2 << 6;
20905
if (ohm & 0x3A) blue |= x3 << 5;
20906
if (ohm & 0x3D) scale |= x6 << 5;
20907
if (ohm & 0x2D) scale |= x5 << 6;
20908
if (ohm & 0x04) scale |= x4 << 7;
20909
if (ohm & 0x3B) red |= x4 << 6;
20910
if (ohm & 0x04) red |= x3 << 6;
20911
if (ohm & 0x10) red |= x5 << 7;
20912
if (ohm & 0x0F) red |= x2 << 7;
20913
if (ohm & 0x05) red |= x1 << 8;
20914
if (ohm & 0x0A) red |= x0 << 8;
20915
if (ohm & 0x05) red |= x0 << 9;
20916
if (ohm & 0x02) red |= x6 << 9;
20917
if (ohm & 0x01) red |= x3 << 10;
20918
if (ohm & 0x02) red |= x5 << 10;
20919
20920
// Shift the bits to the top of the 12-bit result.
20921
static const int s_shamts[6] = { 1,1,2,3,4,5 };
20922
20923
const int shamt = s_shamts[mode];
20924
red <<= shamt;
20925
green <<= shamt;
20926
blue <<= shamt;
20927
scale <<= shamt;
20928
20929
// Minor components are stored as differences
20930
if (mode != 5)
20931
{
20932
green = red - green;
20933
blue = red - blue;
20934
}
20935
20936
// Swizzle major component into place
20937
if (majcomp == 1)
20938
std::swap(red, green);
20939
20940
if (majcomp == 2)
20941
std::swap(red, blue);
20942
20943
// Clamp output values, set alpha to 1.0
20944
e[1][0] = basisu::clamp(red, 0, 0xFFF);
20945
e[1][1] = basisu::clamp(green, 0, 0xFFF);
20946
e[1][2] = basisu::clamp(blue, 0, 0xFFF);
20947
20948
e[0][0] = basisu::clamp(red - scale, 0, 0xFFF);
20949
e[0][1] = basisu::clamp(green - scale, 0, 0xFFF);
20950
e[0][2] = basisu::clamp(blue - scale, 0, 0xFFF);
20951
20952
if (pScale)
20953
*pScale = scale;
20954
}
20955
20956
//--------------------------------------------------------------------------------------------------------------------------
20957
20958
bool decode_mode7_to_qlog12(
20959
const uint8_t* pEndpoints,
20960
int e[2][3],
20961
int* pScale,
20962
uint32_t ise_endpoint_range)
20963
{
20964
assert(g_astc_hdr_core_initialized);
20965
20966
if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
20967
{
20968
decode_mode7_to_qlog12_ise20(pEndpoints, e, pScale);
20969
}
20970
else
20971
{
20972
uint8_t dequantized_endpoints[NUM_MODE7_ENDPOINTS];
20973
20974
for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
20975
dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
20976
20977
decode_mode7_to_qlog12_ise20(dequantized_endpoints, e, pScale);
20978
}
20979
20980
for (uint32_t i = 0; i < 2; i++)
20981
{
20982
if (e[i][0] > (int)MAX_QLOG12)
20983
return false;
20984
20985
if (e[i][1] > (int)MAX_QLOG12)
20986
return false;
20987
20988
if (e[i][2] > (int)MAX_QLOG12)
20989
return false;
20990
}
20991
20992
return true;
20993
}
20994
20995
//--------------------------------------------------------------------------------------------------------------------------
20996
20997
void decode_mode11_to_qlog12_ise20(
20998
const uint8_t* pEndpoints,
20999
int e[2][3])
21000
{
21001
#ifdef _DEBUG
21002
for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
21003
{
21004
assert(pEndpoints[i] <= 255);
21005
}
21006
#endif
21007
21008
const uint32_t maj_comp = basisu::get_bit(pEndpoints[4], 7) | (basisu::get_bit(pEndpoints[5], 7) << 1);
21009
21010
if (maj_comp == 3)
21011
{
21012
// Direct, qlog8 and qlog7
21013
e[0][0] = pEndpoints[0] << 4;
21014
e[1][0] = pEndpoints[1] << 4;
21015
21016
e[0][1] = pEndpoints[2] << 4;
21017
e[1][1] = pEndpoints[3] << 4;
21018
21019
e[0][2] = (pEndpoints[4] & 127) << 5;
21020
e[1][2] = (pEndpoints[5] & 127) << 5;
21021
}
21022
else
21023
{
21024
int v0 = pEndpoints[0];
21025
int v1 = pEndpoints[1];
21026
int v2 = pEndpoints[2];
21027
int v3 = pEndpoints[3];
21028
int v4 = pEndpoints[4];
21029
int v5 = pEndpoints[5];
21030
21031
int mode = 0;
21032
astc_hdr_pack_bit(mode, 0, v1, 7);
21033
astc_hdr_pack_bit(mode, 1, v2, 7);
21034
astc_hdr_pack_bit(mode, 2, v3, 7);
21035
21036
int va = v0;
21037
astc_hdr_pack_bit(va, 8, v1, 6);
21038
21039
int vb0 = v2 & 63;
21040
int vb1 = v3 & 63;
21041
int vc = v1 & 63;
21042
21043
int vd0 = v4 & 0x7F; // this takes more bits than is sometimes needed
21044
int vd1 = v5 & 0x7F; // this takes more bits than is sometimes needed
21045
static const int8_t dbitstab[8] = { 7,6,7,6,5,6,5,6 };
21046
vd0 = astc_hdr_sign_extend(vd0, dbitstab[mode]);
21047
vd1 = astc_hdr_sign_extend(vd1, dbitstab[mode]);
21048
21049
int x0 = basisu::get_bit(v2, 6);
21050
int x1 = basisu::get_bit(v3, 6);
21051
int x2 = basisu::get_bit(v4, 6);
21052
int x3 = basisu::get_bit(v5, 6);
21053
int x4 = basisu::get_bit(v4, 5);
21054
int x5 = basisu::get_bit(v5, 5);
21055
21056
const uint32_t ohm = 1U << mode;
21057
if (ohm & 0xA4) va |= (x0 << 9);
21058
if (ohm & 0x08) va |= (x2 << 9);
21059
if (ohm & 0x50) va |= (x4 << 9);
21060
if (ohm & 0x50) va |= (x5 << 10);
21061
if (ohm & 0xA0) va |= (x1 << 10);
21062
if (ohm & 0xC0) va |= (x2 << 11);
21063
if (ohm & 0x04) vc |= (x1 << 6);
21064
if (ohm & 0xE8) vc |= (x3 << 6);
21065
if (ohm & 0x20) vc |= (x2 << 7);
21066
if (ohm & 0x5B) vb0 |= (x0 << 6);
21067
if (ohm & 0x5B) vb1 |= (x1 << 6);
21068
if (ohm & 0x12) vb0 |= (x2 << 7);
21069
if (ohm & 0x12) vb1 |= (x3 << 7);
21070
21071
const int shamt = (mode >> 1) ^ 3;
21072
21073
va = (uint32_t)va << shamt;
21074
vb0 = (uint32_t)vb0 << shamt;
21075
vb1 = (uint32_t)vb1 << shamt;
21076
vc = (uint32_t)vc << shamt;
21077
vd0 = (uint32_t)vd0 << shamt;
21078
vd1 = (uint32_t)vd1 << shamt;
21079
21080
// qlog12
21081
e[1][0] = basisu::clamp<int>(va, 0, 0xFFF);
21082
e[1][1] = basisu::clamp<int>(va - vb0, 0, 0xFFF);
21083
e[1][2] = basisu::clamp<int>(va - vb1, 0, 0xFFF);
21084
21085
e[0][0] = basisu::clamp<int>(va - vc, 0, 0xFFF);
21086
e[0][1] = basisu::clamp<int>(va - vb0 - vc - vd0, 0, 0xFFF);
21087
e[0][2] = basisu::clamp<int>(va - vb1 - vc - vd1, 0, 0xFFF);
21088
21089
if (maj_comp)
21090
{
21091
std::swap(e[0][0], e[0][maj_comp]);
21092
std::swap(e[1][0], e[1][maj_comp]);
21093
}
21094
}
21095
}
21096
21097
//--------------------------------------------------------------------------------------------------------------------------
21098
21099
bool decode_mode11_to_qlog12(
21100
const uint8_t* pEndpoints,
21101
int e[2][3],
21102
uint32_t ise_endpoint_range)
21103
{
21104
assert(g_astc_hdr_core_initialized);
21105
assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
21106
21107
if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
21108
{
21109
decode_mode11_to_qlog12_ise20(pEndpoints, e);
21110
}
21111
else
21112
{
21113
uint8_t dequantized_endpoints[NUM_MODE11_ENDPOINTS];
21114
21115
for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
21116
dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
21117
21118
decode_mode11_to_qlog12_ise20(dequantized_endpoints, e);
21119
}
21120
21121
for (uint32_t i = 0; i < 2; i++)
21122
{
21123
if (e[i][0] > (int)MAX_QLOG12)
21124
return false;
21125
21126
if (e[i][1] > (int)MAX_QLOG12)
21127
return false;
21128
21129
if (e[i][2] > (int)MAX_QLOG12)
21130
return false;
21131
}
21132
21133
return true;
21134
}
21135
21136
//--------------------------------------------------------------------------------------------------------------------------
21137
21138
bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
21139
{
21140
assert(g_astc_hdr_core_initialized);
21141
assert((best_blk.m_weight_ise_range >= 1) && (best_blk.m_weight_ise_range <= 8));
21142
21143
if (best_blk.m_weight_ise_range == 5)
21144
{
21145
// Use 3-bit BC6H weights which are a perfect match for 3-bit ASTC weights, but encode 1-subset as 2 equal subsets
21146
bc6h_enc_block_1subset_3bit_weights(&transcoded_bc6h_blk, h_e, best_blk.m_weights);
21147
}
21148
else
21149
{
21150
uint8_t bc6h_weights[16];
21151
21152
if (best_blk.m_weight_ise_range == 1)
21153
{
21154
// weight ISE 1: 3 levels
21155
static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 8, 15 };
21156
21157
for (uint32_t i = 0; i < 16; i++)
21158
bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
21159
}
21160
else if (best_blk.m_weight_ise_range == 2)
21161
{
21162
// weight ISE 2: 4 levels
21163
static const uint8_t s_astc2_to_bc6h_4[4] = { 0, 5, 10, 15 };
21164
21165
for (uint32_t i = 0; i < 16; i++)
21166
bc6h_weights[i] = s_astc2_to_bc6h_4[best_blk.m_weights[i]];
21167
}
21168
else if (best_blk.m_weight_ise_range == 3)
21169
{
21170
// weight ISE 3: 5 levels
21171
static const uint8_t s_astc3_to_bc6h_4[5] = { 0, 4, 7, 11, 15 };
21172
21173
for (uint32_t i = 0; i < 16; i++)
21174
bc6h_weights[i] = s_astc3_to_bc6h_4[best_blk.m_weights[i]];
21175
}
21176
else if (best_blk.m_weight_ise_range == 4)
21177
{
21178
// weight ISE 4: 6 levels
21179
static const uint8_t s_astc4_to_bc6h_4[6] = { 0, 15, 3, 12, 6, 9 };
21180
21181
for (uint32_t i = 0; i < 16; i++)
21182
bc6h_weights[i] = s_astc4_to_bc6h_4[best_blk.m_weights[i]];
21183
}
21184
else if (best_blk.m_weight_ise_range == 6)
21185
{
21186
// weight ISE 6: 10 levels
21187
static const uint8_t s_astc6_to_bc6h_4[10] = { 0, 15, 2, 13, 3, 12, 5, 10, 6, 9 };
21188
21189
for (uint32_t i = 0; i < 16; i++)
21190
bc6h_weights[i] = s_astc6_to_bc6h_4[best_blk.m_weights[i]];
21191
}
21192
else if (best_blk.m_weight_ise_range == 7)
21193
{
21194
// weight ISE 7: 12 levels
21195
static const uint8_t s_astc7_to_bc6h_4[12] = { 0, 15, 4, 11, 1, 14, 5, 10, 2, 13, 6, 9 };
21196
21197
for (uint32_t i = 0; i < 16; i++)
21198
bc6h_weights[i] = s_astc7_to_bc6h_4[best_blk.m_weights[i]];
21199
}
21200
else if (best_blk.m_weight_ise_range == 8)
21201
{
21202
// 16 levels
21203
memcpy(bc6h_weights, best_blk.m_weights, 16);
21204
}
21205
else
21206
{
21207
assert(0);
21208
return false;
21209
}
21210
21211
bc6h_enc_block_1subset_4bit_weights(&transcoded_bc6h_blk, h_e, bc6h_weights);
21212
}
21213
21214
return true;
21215
}
21216
21217
//--------------------------------------------------------------------------------------------------------------------------
21218
21219
bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
21220
{
21221
assert(g_astc_hdr_core_initialized);
21222
assert(best_blk.m_num_partitions == 2);
21223
assert(common_part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
21224
21225
half_float bc6h_endpoints[2][3][2]; // [subset][comp][lh_index]
21226
21227
// UASTC HDR checks
21228
// Both CEM's must be equal in 2-subset UASTC HDR.
21229
if (best_blk.m_color_endpoint_modes[0] != best_blk.m_color_endpoint_modes[1])
21230
return false;
21231
if ((best_blk.m_color_endpoint_modes[0] != 7) && (best_blk.m_color_endpoint_modes[0] != 11))
21232
return false;
21233
21234
if (best_blk.m_color_endpoint_modes[0] == 7)
21235
{
21236
if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 20)) ||
21237
((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 20)) ||
21238
((best_blk.m_weight_ise_range == 3) && (best_blk.m_endpoint_ise_range == 19)) ||
21239
((best_blk.m_weight_ise_range == 4) && (best_blk.m_endpoint_ise_range == 17)) ||
21240
((best_blk.m_weight_ise_range == 5) && (best_blk.m_endpoint_ise_range == 15))))
21241
{
21242
return false;
21243
}
21244
}
21245
else
21246
{
21247
if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 14)) ||
21248
((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 12))))
21249
{
21250
return false;
21251
}
21252
}
21253
21254
for (uint32_t s = 0; s < 2; s++)
21255
{
21256
int e[2][3];
21257
if (best_blk.m_color_endpoint_modes[0] == 7)
21258
{
21259
bool success = decode_mode7_to_qlog12(best_blk.m_endpoints + s * NUM_MODE7_ENDPOINTS, e, nullptr, best_blk.m_endpoint_ise_range);
21260
if (!success)
21261
return false;
21262
}
21263
else
21264
{
21265
bool success = decode_mode11_to_qlog12(best_blk.m_endpoints + s * NUM_MODE11_ENDPOINTS, e, best_blk.m_endpoint_ise_range);
21266
if (!success)
21267
return false;
21268
}
21269
21270
for (uint32_t c = 0; c < 3; c++)
21271
{
21272
bc6h_endpoints[s][c][0] = qlog_to_half(e[0][c], 12);
21273
if (is_half_inf_or_nan(bc6h_endpoints[s][c][0]))
21274
return false;
21275
21276
bc6h_endpoints[s][c][1] = qlog_to_half(e[1][c], 12);
21277
if (is_half_inf_or_nan(bc6h_endpoints[s][c][1]))
21278
return false;
21279
}
21280
}
21281
21282
uint8_t bc6h_weights[16];
21283
if (best_blk.m_weight_ise_range == 1)
21284
{
21285
static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 4, 7 };
21286
21287
for (uint32_t i = 0; i < 16; i++)
21288
bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
21289
}
21290
else if (best_blk.m_weight_ise_range == 2)
21291
{
21292
static const uint8_t s_astc2_to_bc6h_3[4] = { 0, 2, 5, 7 };
21293
21294
for (uint32_t i = 0; i < 16; i++)
21295
bc6h_weights[i] = s_astc2_to_bc6h_3[best_blk.m_weights[i]];
21296
}
21297
else if (best_blk.m_weight_ise_range == 3)
21298
{
21299
static const uint8_t s_astc3_to_bc6h_3[5] = { 0, 2, 4, 5, 7 };
21300
21301
for (uint32_t i = 0; i < 16; i++)
21302
bc6h_weights[i] = s_astc3_to_bc6h_3[best_blk.m_weights[i]];
21303
}
21304
else if (best_blk.m_weight_ise_range == 4)
21305
{
21306
static const uint8_t s_astc4_to_bc6h_3[6] = { 0, 7, 1, 6, 3, 4 };
21307
21308
for (uint32_t i = 0; i < 16; i++)
21309
bc6h_weights[i] = s_astc4_to_bc6h_3[best_blk.m_weights[i]];
21310
}
21311
else if (best_blk.m_weight_ise_range == 5)
21312
{
21313
memcpy(bc6h_weights, best_blk.m_weights, 16);
21314
}
21315
else
21316
{
21317
assert(0);
21318
return false;
21319
}
21320
21321
bc6h_enc_block_2subset_3bit_weights(&transcoded_bc6h_blk, common_part_index, bc6h_endpoints, bc6h_weights);
21322
21323
return true;
21324
}
21325
21326
//--------------------------------------------------------------------------------------------------------------------------
21327
// Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
21328
bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk)
21329
{
21330
assert(g_astc_hdr_core_initialized);
21331
if (!g_astc_hdr_core_initialized)
21332
{
21333
assert(0);
21334
return false;
21335
}
21336
21337
astc_helpers::log_astc_block log_blk;
21338
21339
if (!astc_helpers::unpack_block(&src_blk, log_blk, 4, 4))
21340
{
21341
// Failed unpacking ASTC data
21342
return false;
21343
}
21344
21345
return astc_hdr_transcode_to_bc6h(log_blk, dst_blk);
21346
}
21347
21348
//--------------------------------------------------------------------------------------------------------------------------
21349
// Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
21350
bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk)
21351
{
21352
assert(g_astc_hdr_core_initialized);
21353
if (!g_astc_hdr_core_initialized)
21354
{
21355
assert(0);
21356
return false;
21357
}
21358
21359
if (log_blk.m_solid_color_flag_ldr)
21360
{
21361
// Don't support LDR solid colors.
21362
return false;
21363
}
21364
21365
if (log_blk.m_solid_color_flag_hdr)
21366
{
21367
// Solid color HDR block
21368
return bc6h_enc_block_solid_color(&dst_blk, log_blk.m_solid_color);
21369
}
21370
21371
// Only support 4x4 grid sizes
21372
if ((log_blk.m_grid_width != 4) || (log_blk.m_grid_height != 4))
21373
return false;
21374
21375
// Don't support dual plane encoding
21376
if (log_blk.m_dual_plane)
21377
return false;
21378
21379
if (log_blk.m_num_partitions == 1)
21380
{
21381
// Handle 1 partition (or subset)
21382
21383
// UASTC HDR checks
21384
if ((log_blk.m_weight_ise_range < 1) || (log_blk.m_weight_ise_range > 8))
21385
return false;
21386
21387
int e[2][3];
21388
bool success;
21389
21390
if (log_blk.m_color_endpoint_modes[0] == 7)
21391
{
21392
if (log_blk.m_endpoint_ise_range != 20)
21393
return false;
21394
21395
success = decode_mode7_to_qlog12(log_blk.m_endpoints, e, nullptr, log_blk.m_endpoint_ise_range);
21396
}
21397
else if (log_blk.m_color_endpoint_modes[0] == 11)
21398
{
21399
// UASTC HDR checks
21400
if (log_blk.m_weight_ise_range <= 7)
21401
{
21402
if (log_blk.m_endpoint_ise_range != 20)
21403
return false;
21404
}
21405
else if (log_blk.m_endpoint_ise_range != 19)
21406
{
21407
return false;
21408
}
21409
21410
success = decode_mode11_to_qlog12(log_blk.m_endpoints, e, log_blk.m_endpoint_ise_range);
21411
}
21412
else
21413
{
21414
return false;
21415
}
21416
21417
if (!success)
21418
return false;
21419
21420
// Transform endpoints to half float
21421
half_float h_e[3][2] =
21422
{
21423
{ qlog_to_half(e[0][0], 12), qlog_to_half(e[1][0], 12) },
21424
{ qlog_to_half(e[0][1], 12), qlog_to_half(e[1][1], 12) },
21425
{ qlog_to_half(e[0][2], 12), qlog_to_half(e[1][2], 12) }
21426
};
21427
21428
// Sanity check for NaN/Inf
21429
for (uint32_t i = 0; i < 2; i++)
21430
if (is_half_inf_or_nan(h_e[0][i]) || is_half_inf_or_nan(h_e[1][i]) || is_half_inf_or_nan(h_e[2][i]))
21431
return false;
21432
21433
// Transcode to bc6h
21434
if (!transcode_bc6h_1subset(h_e, log_blk, dst_blk))
21435
return false;
21436
}
21437
else if (log_blk.m_num_partitions == 2)
21438
{
21439
// Handle 2 partition (or subset)
21440
int common_bc7_pat_index = g_astc_partition_id_to_common_bc7_pat_index[log_blk.m_partition_id];
21441
if (common_bc7_pat_index < 0)
21442
return false;
21443
21444
assert(common_bc7_pat_index < (int)basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
21445
21446
if (!transcode_bc6h_2subsets(common_bc7_pat_index, log_blk, dst_blk))
21447
return false;
21448
}
21449
else
21450
{
21451
// Only supports 1 or 2 partitions (or subsets)
21452
return false;
21453
}
21454
21455
return true;
21456
}
21457
21458
// ASTC 6x6 support
21459
namespace astc_6x6_hdr
21460
{
21461
const block_mode_desc g_block_mode_descs[TOTAL_BLOCK_MODE_DECS] =
21462
{
21463
// ------ mode 11
21464
{ false, 11, 1, 6, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21465
{ false, 11, 1, 6, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21466
21467
{ false, 11, 1, 6, 5, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21468
{ false, 11, 1, 5, 6, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21469
21470
{ false, 11, 1, 6, 4, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21471
{ false, 11, 1, 4, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21472
21473
{ false, 11, 1, 6, 3, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21474
{ false, 11, 1, 3, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21475
21476
{ false, 11, 1, 5, 5, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21477
{ false, 11, 1, 4, 4, astc_helpers::BISE_192_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_192_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21478
21479
{ false, 11, 1, 3, 3, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21480
21481
// ------ mode 7
21482
{ false, 7, 1, 6, 6, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_96_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21483
21484
{ false, 7, 1, 6, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21485
{ false, 7, 1, 6, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21486
21487
{ false, 7, 1, 5, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_6_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_6_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21488
{ false, 7, 1, 6, 5, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_6_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_6_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21489
21490
{ false, 7, 1, 3, 6, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_20_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_20_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21491
{ false, 7, 1, 6, 3, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_20_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_20_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21492
21493
// ------ mode 11, 2 subset
21494
{ false, 11, 2, 6, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21495
21496
// 6x3/3x6
21497
{ false, 11, 2, 6, 3, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21498
{ false, 11, 2, 3, 6, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21499
21500
// 3x6/6x3
21501
{ false, 11, 2, 3, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21502
{ false, 11, 2, 6, 3, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21503
21504
// 3x6/6x3
21505
{ false, 11, 2, 4, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21506
{ false, 11, 2, 6, 4, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21507
21508
// ------ mode 7, 2 subset
21509
21510
// 6x5/5x6
21511
{ false, 7, 2, 5, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21512
{ false, 7, 2, 6, 5, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21513
21514
// 6x4/4x6 mode 7
21515
{ false, 7, 2, 4, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21516
{ false, 7, 2, 6, 4, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21517
21518
// 6x6
21519
{ false, 7, 2, 6, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21520
21521
// 6x6
21522
{ false, 7, 2, 6, 6, astc_helpers::BISE_192_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_192_LEVELS, astc_helpers::BISE_2_LEVELS, 0, 0 },
21523
21524
// 5x5
21525
{ false, 7, 2, 5, 5, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, 0, 0 },
21526
21527
// 6x3/3x6 mode 7
21528
{ false, 7, 2, 3, 6, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_8_LEVELS, 0, 0 },
21529
{ false, 7, 2, 6, 3, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_8_LEVELS, 0, 0 },
21530
21531
// 6x3/3x6 mode 7
21532
{ false, 7, 2, 3, 6, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_6_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_6_LEVELS, 0, 0 },
21533
{ false, 7, 2, 6, 3, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_6_LEVELS, astc_helpers::BISE_80_LEVELS, astc_helpers::BISE_6_LEVELS, 0, 0 },
21534
21535
// ------ dual plane
21536
21537
// 3x6
21538
{ true, 11, 1, 3, 6, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21539
{ true, 11, 1, 3, 6, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 1 },
21540
{ true, 11, 1, 3, 6, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 2 },
21541
21542
// 6x3
21543
{ true, 11, 1, 6, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21544
{ true, 11, 1, 6, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 1 },
21545
{ true, 11, 1, 6, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 2 },
21546
21547
// 3x3
21548
{ true, 11, 1, 3, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21549
{ true, 11, 1, 3, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 1 },
21550
{ true, 11, 1, 3, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_16_LEVELS, BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 2 },
21551
21552
// 4x4
21553
{ true, 11, 1, 4, 4, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21554
{ true, 11, 1, 4, 4, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL2, 1 },
21555
{ true, 11, 1, 4, 4, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_5_LEVELS, BASIST_HDR_6X6_LEVEL2, 2 },
21556
21557
// 5x5
21558
{ true, 11, 1, 5, 5, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21559
{ true, 11, 1, 5, 5, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 1 },
21560
{ true, 11, 1, 5, 5, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 2 },
21561
21562
// ------ 2x2 modes for RDO
21563
// note 2x2 modes will be upsampled to 4x4 during transcoding (the min # of weight bits is 7 in ASTC)
21564
{ true, 11, 1, 2, 2, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21565
{ true, 11, 1, 2, 2, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 1 },
21566
{ true, 11, 1, 2, 2, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_8_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 2 },
21567
{ false, 11, 1, 2, 2, astc_helpers::BISE_128_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_256_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL0 | BASIST_HDR_6X6_LEVEL1 | BASIST_HDR_6X6_LEVEL2, 0 },
21568
21569
// ------ 3 subsets
21570
21571
// 6x6
21572
{ false, 7, 3, 6, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21573
21574
// 5x5
21575
{ false, 7, 3, 5, 5, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21576
21577
// 4x4
21578
{ false, 7, 3, 4, 4, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_3_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21579
{ false, 7, 3, 4, 4, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21580
{ false, 7, 3, 4, 4, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_5_LEVELS, 0, 0 },
21581
21582
// 3x3
21583
{ false, 7, 3, 3, 3, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_8_LEVELS, 0, 0 },
21584
21585
// 6x4
21586
{ false, 7, 3, 6, 4, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21587
{ false, 7, 3, 4, 6, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_64_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21588
21589
// 6x4
21590
{ false, 7, 3, 6, 4, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21591
{ false, 7, 3, 4, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21592
21593
// 6x5
21594
{ false, 7, 3, 6, 5, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21595
{ false, 7, 3, 5, 6, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_2_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_2_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21596
21597
// 6x3
21598
{ false, 7, 3, 6, 3, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21599
{ false, 7, 3, 3, 6, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_48_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21600
21601
// 6x3
21602
{ false, 7, 3, 6, 3, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21603
{ false, 7, 3, 3, 6, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_32_LEVELS, astc_helpers::BISE_4_LEVELS, BASIST_HDR_6X6_LEVEL2, 0 },
21604
21605
// 6x3
21606
{ false, 7, 3, 6, 3, astc_helpers::BISE_24_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_24_LEVELS, astc_helpers::BISE_5_LEVELS, 0, 0 },
21607
{ false, 7, 3, 3, 6, astc_helpers::BISE_24_LEVELS, astc_helpers::BISE_5_LEVELS, astc_helpers::BISE_24_LEVELS, astc_helpers::BISE_5_LEVELS, 0, 0 },
21608
21609
// 5x4
21610
{ false, 7, 3, 5, 4, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21611
{ false, 7, 3, 4, 5, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, astc_helpers::BISE_40_LEVELS, astc_helpers::BISE_3_LEVELS, 0, 0 },
21612
};
21613
21614
const reuse_xy_delta g_reuse_xy_deltas[NUM_REUSE_XY_DELTAS] =
21615
{
21616
{ -1, 0 }, { -2, 0 }, { -3, 0 }, { -4, 0 },
21617
{ 3, -1 }, { 2, -1 }, { 1, -1 }, { 0, -1 }, { -1, -1 }, { -2, -1 }, { -3, -1 }, { -4, -1 },
21618
{ 3, -2 }, { 2, -2 }, { 1, -2 }, { 0, -2 }, { -1, -2 }, { -2, -2 }, { -3, -2 }, { -4, -2 },
21619
{ 3, -3 }, { 2, -3 }, { 1, -3 }, { 0, -3 }, { -1, -3 }, { -2, -3 }, { -3, -3 }, { -4, -3 },
21620
{ 3, -4 }, { 2, -4 }, { 1, -4 }, { 0, -4 }
21621
};
21622
21623
//--------------------------------------------------------------------------------------------------------------------------
21624
21625
void requantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_ise_vals, uint32_t to_ise_range)
21626
{
21627
if (from_ise_range == to_ise_range)
21628
{
21629
if (pDst_ise_vals != pSrc_ise_vals)
21630
memcpy(pDst_ise_vals, pSrc_ise_vals, n);
21631
return;
21632
}
21633
21634
const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(from_ise_range).m_ISE_to_val;
21635
const auto& quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(to_ise_range).m_val_to_ise;
21636
21637
for (uint32_t i = 0; i < n; i++)
21638
pDst_ise_vals[i] = quant_tab[dequant_tab[pSrc_ise_vals[i]]];
21639
}
21640
21641
//--------------------------------------------------------------------------------------------------------------------------
21642
21643
inline int get_bit(
21644
int src_val, int src_bit)
21645
{
21646
assert(src_bit >= 0 && src_bit <= 31);
21647
int bit = (src_val >> src_bit) & 1;
21648
return bit;
21649
}
21650
21651
inline void pack_bit(
21652
int& dst, int dst_bit,
21653
int src_val, int src_bit = 0)
21654
{
21655
assert(dst_bit >= 0 && dst_bit <= 31);
21656
int bit = get_bit(src_val, src_bit);
21657
dst |= (bit << dst_bit);
21658
}
21659
21660
// Valid for weight ISE ranges 12-192 levels. Preserves upper 2 or 3 bits post-quantization.
21661
static uint8_t g_quantize_tables_preserve2[astc_helpers::TOTAL_ISE_RANGES - 1][256];
21662
static uint8_t g_quantize_tables_preserve3[astc_helpers::TOTAL_ISE_RANGES - 1][256];
21663
21664
const uint32_t g_part2_unique_index_to_seed[NUM_UNIQUE_PARTITIONS2] =
21665
{
21666
86, 959, 936, 476, 1007, 672, 447, 423, 488, 422, 273, 65, 267, 786, 585, 195, 108, 731, 878, 812, 264, 125, 868, 581, 258, 390, 549, 872, 661, 352, 645, 543, 988,
21667
906, 903, 616, 482, 529, 3, 286, 272, 303, 151, 504, 498, 260, 79, 66, 608, 769, 305, 610, 1014, 967, 835, 789, 7, 951, 691, 15, 763, 976, 438, 314, 601, 673, 177,
21668
252, 615, 436, 220, 899, 623, 433, 674, 278, 797, 107, 847, 114, 470, 760, 821, 490, 329, 945, 387, 471, 225, 172, 83, 418, 966, 439, 316, 247, 43, 343, 625, 798,
21669
1, 61, 73, 307, 136, 474, 42, 664, 1013, 249, 389, 227, 374, 121, 48, 538, 226, 309, 554, 802, 834, 335, 495, 10, 955, 461, 293, 508, 153, 101, 63, 139, 31, 687,
21670
132, 174, 324, 545, 289, 39, 178, 594, 963, 854, 222, 323, 998, 964, 598, 475, 720, 1019, 983, 91, 703, 614, 394, 612, 281, 207, 930, 758, 586, 128, 517, 426, 306,
21671
168, 713, 36, 458, 876, 368, 780, 5, 9, 214, 109, 553, 726, 175, 103, 753, 684, 44, 665, 53, 500, 367, 611, 119, 732, 639, 326, 203, 156, 686, 910, 255, 62, 392, 591,
21672
112, 88, 213, 19, 1022, 478, 90, 486, 799, 702, 730, 414, 99, 1008, 142, 886, 373, 216, 69, 393, 299, 648, 415, 822, 912, 110, 567, 550, 693, 2, 138, 59, 271, 562, 295,
21673
714, 719, 199, 893, 831, 1006, 662, 235, 262, 78, 51, 902, 298, 190, 169, 583, 347, 890, 958, 909, 49, 987, 696, 633, 480, 50, 764, 826, 1023, 1016, 437, 891, 774, 257,
21674
724, 791, 526, 593, 690, 638, 858, 895, 794, 995, 130, 87, 877, 819, 318, 649, 376, 211, 284, 937, 370, 688, 229, 994, 115, 842, 60, 521, 95, 694, 804, 146, 754, 487, 55,
21675
17, 770, 450, 223, 4, 137, 911, 236, 683, 523, 47, 181, 24, 270, 602, 736, 11, 355, 148, 351, 762, 1009, 16, 210, 619, 805, 874, 807, 887, 403, 999, 810, 27, 402, 551, 135,
21676
778, 33, 409, 993, 71, 363, 159, 183, 77, 596, 670, 380, 968, 811, 404, 348, 539, 158, 578, 196, 621, 68, 530, 193, 100, 167, 919, 353, 366, 327, 643, 948, 518, 756, 801, 558,
21677
28, 705, 116, 94, 898, 453, 622, 647, 231, 445, 652, 230, 191, 277, 292, 254, 198, 766, 386, 232, 29, 70, 942, 740, 291, 607, 411, 496, 839, 8, 675, 319, 742, 21, 547, 627, 716,
21678
663, 23, 914, 631, 595, 499, 685, 950, 510, 54, 587, 432, 45, 646, 25, 122, 947, 171, 862, 441, 808, 722, 14, 74, 658, 129, 266, 1001, 534, 395, 527, 250, 206, 237, 67, 897, 634,
21679
572, 569, 533, 37, 341, 89, 463, 419, 75, 134, 283, 943, 519, 362, 144, 681, 407, 954, 131, 455, 934, 46, 513, 339, 194, 361, 606, 852, 546, 655, 1015, 147, 506, 240, 56, 836, 76,
21680
98, 600, 430, 388, 980, 695, 817, 279, 58, 215, 149, 170, 531, 870, 18, 727, 154, 26, 938, 929, 302, 697, 452, 218, 700, 524, 828, 751, 869, 217, 440, 354
21681
};
21682
21683
const uint32_t g_part3_unique_index_to_seed[NUM_UNIQUE_PARTITIONS3] =
21684
{
21685
0, 8, 11, 14, 15, 17, 18, 19, 26, 31, 34, 35, 36, 38, 44, 47, 48, 49, 51, 56,
21686
59, 61, 70, 74, 76, 82, 88, 90, 96, 100, 103, 104, 108, 110, 111, 117, 122, 123,
21687
126, 127, 132, 133, 135, 139, 147, 150, 151, 152, 156, 157, 163, 166, 168, 171,
21688
175, 176, 179, 181, 182, 183, 186, 189, 192, 199, 203, 205, 207, 210, 214, 216,
21689
222, 247, 249, 250, 252, 254, 260, 261, 262, 263, 266, 272, 273, 275, 276, 288,
21690
291, 292, 293, 294, 297, 302, 309, 310, 313, 314, 318, 327, 328, 331, 335, 337,
21691
346, 356, 357, 358, 363, 365, 368, 378, 381, 384, 386, 390, 391, 392, 396, 397,
21692
398, 399, 401, 410, 411, 419, 427, 430, 431, 437, 439, 440, 451, 455, 457, 458,
21693
459, 460, 462, 468, 470, 471, 472, 474, 475, 477, 479, 482, 483, 488, 493, 495,
21694
496, 502, 503, 504, 507, 510, 511, 512, 515, 516, 518, 519, 522, 523, 525, 526,
21695
527, 538, 543, 544, 546, 547, 549, 550, 552, 553, 554, 562, 570, 578, 579, 581,
21696
582, 588, 589, 590, 593, 595, 600, 606, 611, 613, 618, 623, 625, 632, 637, 638,
21697
645, 646, 650, 651, 658, 659, 662, 666, 667, 669, 670, 678, 679, 685, 686, 687,
21698
688, 691, 694, 696, 698, 699, 700, 701, 703, 704, 707, 713, 714, 715, 717, 719,
21699
722, 724, 727, 730, 731, 734, 738, 739, 743, 747, 748, 750, 751, 753, 758, 760,
21700
764, 766, 769, 775, 776, 783, 784, 785, 787, 791, 793, 798, 799, 802, 804, 805,
21701
806, 807, 808, 809, 810, 813, 822, 823, 825, 831, 835, 837, 838, 839, 840, 842,
21702
845, 846, 848, 853, 854, 858, 859, 860, 866, 874, 882, 884, 887, 888, 892, 894,
21703
898, 902, 907, 914, 915, 918, 919, 922, 923, 925, 927, 931, 932, 937, 938, 940,
21704
943, 944, 945, 953, 955, 958, 959, 963, 966, 971, 974, 979, 990, 991, 998, 999,
21705
1007, 1010, 1011, 1012, 1015, 1020, 1023
21706
};
21707
21708
static void init_quantize_tables()
21709
{
21710
for (uint32_t ise_range = astc_helpers::BISE_192_LEVELS; ise_range >= astc_helpers::BISE_12_LEVELS; ise_range--)
21711
{
21712
const uint32_t num_levels = astc_helpers::get_ise_levels(ise_range);
21713
const auto& ise_to_val_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_ISE_to_val;
21714
21715
for (uint32_t desired_val = 0; desired_val < 256; desired_val++)
21716
{
21717
{
21718
uint32_t best_err = UINT32_MAX;
21719
int best_ise_val = -1;
21720
21721
for (uint32_t ise_val = 0; ise_val < num_levels; ise_val++)
21722
{
21723
const uint32_t quant_val = ise_to_val_tab[ise_val];
21724
21725
if ((quant_val & 0b11000000) != (desired_val & 0b11000000))
21726
continue;
21727
21728
uint32_t err = basisu::squarei((int)quant_val - (int)desired_val);
21729
if (err < best_err)
21730
{
21731
best_err = err;
21732
best_ise_val = ise_val;
21733
}
21734
21735
} // ise_val
21736
21737
assert(best_ise_val != -1);
21738
21739
g_quantize_tables_preserve2[ise_range][desired_val] = (uint8_t)best_ise_val;
21740
}
21741
21742
{
21743
uint32_t best_err = UINT32_MAX;
21744
int best_ise_val = -1;
21745
21746
for (uint32_t ise_val = 0; ise_val < num_levels; ise_val++)
21747
{
21748
const uint32_t quant_val = ise_to_val_tab[ise_val];
21749
21750
if ((quant_val & 0b11100000) != (desired_val & 0b11100000))
21751
continue;
21752
21753
uint32_t err = basisu::squarei((int)quant_val - (int)desired_val);
21754
if (err < best_err)
21755
{
21756
best_err = err;
21757
best_ise_val = ise_val;
21758
}
21759
21760
} // ise_val
21761
21762
assert(best_ise_val != -1);
21763
21764
g_quantize_tables_preserve3[ise_range][desired_val] = (uint8_t)best_ise_val;
21765
}
21766
21767
} // desired_val
21768
21769
#if 0
21770
for (uint32_t i = 0; i < 256; i++)
21771
{
21772
if (g_quantize_tables_preserve2[ise_range][i] != astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_val_to_ise[i])
21773
{
21774
fmt_printf("P2, Range: {}, {} vs. {}\n", ise_range, g_quantize_tables_preserve2[ise_range][i], astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_val_to_ise[i]);
21775
}
21776
21777
if (g_quantize_tables_preserve3[ise_range][i] != astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_val_to_ise[i])
21778
{
21779
fmt_printf("P3, Range: {}, {} vs. {}\n", ise_range, g_quantize_tables_preserve3[ise_range][i], astc_helpers::g_dequant_tables.get_endpoint_tab(ise_range).m_val_to_ise[i]);
21780
}
21781
}
21782
#endif
21783
21784
} // ise_range
21785
}
21786
21787
void requantize_ise_endpoints(uint32_t cem, uint32_t src_ise_endpoint_range, const uint8_t* pSrc_endpoints, uint32_t dst_ise_endpoint_range, uint8_t* pDst_endpoints)
21788
{
21789
assert(pSrc_endpoints != pDst_endpoints);
21790
assert((src_ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (src_ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
21791
assert((dst_ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (dst_ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
21792
21793
// must be >=12 ISE levels for g_quantize_tables_preserve2 etc.
21794
assert(dst_ise_endpoint_range >= astc_helpers::BISE_12_LEVELS);
21795
21796
const uint32_t n = (cem == 11) ? basist::NUM_MODE11_ENDPOINTS : basist::NUM_MODE7_ENDPOINTS;
21797
21798
if (src_ise_endpoint_range == dst_ise_endpoint_range)
21799
{
21800
memcpy(pDst_endpoints, pSrc_endpoints, n);
21801
return;
21802
}
21803
21804
uint8_t temp_endpoints[basist::NUM_MODE11_ENDPOINTS];
21805
if (src_ise_endpoint_range != astc_helpers::BISE_256_LEVELS)
21806
{
21807
assert(n <= basist::NUM_MODE11_ENDPOINTS);
21808
21809
const auto& endpoint_dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(src_ise_endpoint_range).m_ISE_to_val;
21810
21811
for (uint32_t i = 0; i < n; i++)
21812
temp_endpoints[i] = endpoint_dequant_tab[pSrc_endpoints[i]];
21813
21814
pSrc_endpoints = temp_endpoints;
21815
}
21816
21817
if (dst_ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
21818
{
21819
memcpy(pDst_endpoints, pSrc_endpoints, n);
21820
return;
21821
}
21822
21823
const auto& quant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(dst_ise_endpoint_range).m_val_to_ise;
21824
21825
const auto& dequant_tab = astc_helpers::g_dequant_tables.get_endpoint_tab(dst_ise_endpoint_range).m_ISE_to_val;
21826
BASISU_NOTE_UNUSED(dequant_tab);
21827
21828
#if 1
21829
// A smarter value quantization that preserves the key upper bits. (If these bits get corrupted, the entire meaning of the encoding can get lost.)
21830
if (cem == 11)
21831
{
21832
assert(n == 6);
21833
21834
int maj_comp = 0;
21835
pack_bit(maj_comp, 0, pSrc_endpoints[4], 7);
21836
pack_bit(maj_comp, 1, pSrc_endpoints[5], 7);
21837
21838
if (maj_comp == 3)
21839
{
21840
// Direct
21841
pDst_endpoints[0] = quant_tab[pSrc_endpoints[0]];
21842
pDst_endpoints[1] = quant_tab[pSrc_endpoints[1]];
21843
pDst_endpoints[2] = quant_tab[pSrc_endpoints[2]];
21844
pDst_endpoints[3] = quant_tab[pSrc_endpoints[3]];
21845
// No need for preserve1 tables, we can use the regular quantization tables because they preserve the MSB.
21846
pDst_endpoints[4] = quant_tab[pSrc_endpoints[4]];
21847
pDst_endpoints[5] = quant_tab[pSrc_endpoints[5]];
21848
21849
assert((dequant_tab[pDst_endpoints[4]] & 128) == (pSrc_endpoints[4] & 128));
21850
assert((dequant_tab[pDst_endpoints[5]] & 128) == (pSrc_endpoints[5] & 128));
21851
}
21852
else
21853
{
21854
pDst_endpoints[0] = quant_tab[pSrc_endpoints[0]];
21855
pDst_endpoints[1] = g_quantize_tables_preserve2[dst_ise_endpoint_range][pSrc_endpoints[1]];
21856
pDst_endpoints[2] = g_quantize_tables_preserve2[dst_ise_endpoint_range][pSrc_endpoints[2]];
21857
pDst_endpoints[3] = g_quantize_tables_preserve2[dst_ise_endpoint_range][pSrc_endpoints[3]];
21858
pDst_endpoints[4] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[4]];
21859
pDst_endpoints[5] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[5]];
21860
21861
assert((dequant_tab[pDst_endpoints[1]] & 0b11000000) == (pSrc_endpoints[1] & 0b11000000));
21862
assert((dequant_tab[pDst_endpoints[2]] & 0b11000000) == (pSrc_endpoints[2] & 0b11000000));
21863
assert((dequant_tab[pDst_endpoints[3]] & 0b11000000) == (pSrc_endpoints[3] & 0b11000000));
21864
assert((dequant_tab[pDst_endpoints[4]] & 0b11100000) == (pSrc_endpoints[4] & 0b11100000));
21865
assert((dequant_tab[pDst_endpoints[5]] & 0b11100000) == (pSrc_endpoints[5] & 0b11100000));
21866
}
21867
}
21868
else if (cem == 7)
21869
{
21870
assert(n == 4);
21871
21872
pDst_endpoints[0] = g_quantize_tables_preserve2[dst_ise_endpoint_range][pSrc_endpoints[0]];
21873
pDst_endpoints[1] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[1]];
21874
pDst_endpoints[2] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[2]];
21875
pDst_endpoints[3] = g_quantize_tables_preserve3[dst_ise_endpoint_range][pSrc_endpoints[3]];
21876
21877
assert((dequant_tab[pDst_endpoints[0]] & 0b11000000) == (pSrc_endpoints[0] & 0b11000000));
21878
assert((dequant_tab[pDst_endpoints[1]] & 0b11100000) == (pSrc_endpoints[1] & 0b11100000));
21879
assert((dequant_tab[pDst_endpoints[2]] & 0b11100000) == (pSrc_endpoints[2] & 0b11100000));
21880
assert((dequant_tab[pDst_endpoints[3]] & 0b11100000) == (pSrc_endpoints[3] & 0b11100000));
21881
}
21882
else
21883
{
21884
assert(0);
21885
}
21886
#else
21887
for (uint32_t i = 0; i < n; i++)
21888
{
21889
uint32_t v = pSrc_endpoints[i];
21890
assert(v <= 255);
21891
21892
pDst_endpoints[i] = quant_tab[v];
21893
}
21894
#endif
21895
}
21896
21897
void copy_weight_grid(bool dual_plane, uint32_t grid_x, uint32_t grid_y, const uint8_t* transcode_weights, astc_helpers::log_astc_block& decomp_blk)
21898
{
21899
assert(decomp_blk.m_weight_ise_range >= astc_helpers::BISE_2_LEVELS);
21900
assert(decomp_blk.m_weight_ise_range <= astc_helpers::BISE_32_LEVELS);
21901
21902
// Special case for 2x2 which isn't typically valid ASTC (too few weight bits without dual plane). Upsample to 4x4.
21903
if ((!dual_plane) && (grid_x == 2) && (grid_y == 2))
21904
{
21905
decomp_blk.m_grid_width = 4;
21906
decomp_blk.m_grid_height = 4;
21907
21908
//const uint32_t total_weight_levels = astc_helpers::bise_levels(decomp_blk.m_weight_ise_range);
21909
const auto& dequant_weight = astc_helpers::g_dequant_tables.get_weight_tab(decomp_blk.m_weight_ise_range).m_ISE_to_val;
21910
const auto& quant_weight = astc_helpers::g_dequant_tables.get_weight_tab(decomp_blk.m_weight_ise_range).m_val_to_ise;
21911
21912
astc_helpers::weighted_sample weights[16];
21913
21914
compute_upsample_weights(4, 4, 2, 2, weights);
21915
21916
for (uint32_t y = 0; y < 4; y++)
21917
{
21918
for (uint32_t x = 0; x < 4; x++)
21919
{
21920
const astc_helpers::weighted_sample& sample = weights[x + y * 4];
21921
21922
uint32_t total_weight = 8;
21923
21924
for (uint32_t yo = 0; yo < 2; yo++)
21925
{
21926
for (uint32_t xo = 0; xo < 2; xo++)
21927
{
21928
if (!sample.m_weights[yo][xo])
21929
continue;
21930
21931
total_weight += dequant_weight[transcode_weights[basisu::in_bounds((x + xo) + (y + yo) * grid_x, 0, grid_x * grid_y)]] * sample.m_weights[yo][xo];
21932
} // x
21933
} // y
21934
21935
total_weight >>= 4;
21936
21937
assert(total_weight <= 64);
21938
21939
decomp_blk.m_weights[x + y * 4] = quant_weight[total_weight];
21940
}
21941
}
21942
}
21943
else
21944
{
21945
const uint32_t num_planes = dual_plane ? 2 : 1;
21946
21947
decomp_blk.m_grid_width = (uint8_t)grid_x;
21948
decomp_blk.m_grid_height = (uint8_t)grid_y;
21949
memcpy(decomp_blk.m_weights, transcode_weights, grid_x * grid_y * num_planes);
21950
}
21951
}
21952
21953
// cur_y is the current destination row
21954
// prev_y is the row we want to access
21955
static inline int calc_row_index(int cur_y, int prev_y, int cur_row_index)
21956
{
21957
assert((cur_y >= 0) && (prev_y >= 0));
21958
assert((cur_row_index >= 0) && (cur_row_index < REUSE_MAX_BUFFER_ROWS));
21959
21960
int delta_y = prev_y - cur_y;
21961
assert((delta_y > -REUSE_MAX_BUFFER_ROWS) && (delta_y <= 0));
21962
21963
cur_row_index += delta_y;
21964
if (cur_row_index < 0)
21965
cur_row_index += REUSE_MAX_BUFFER_ROWS;
21966
21967
assert((cur_row_index >= 0) && (cur_row_index < REUSE_MAX_BUFFER_ROWS));
21968
21969
return cur_row_index;
21970
}
21971
21972
bool decode_values(basist::bitwise_decoder& decoder, uint32_t total_values, uint32_t ise_range, uint8_t* pValues)
21973
{
21974
assert(ise_range <= astc_helpers::BISE_256_LEVELS);
21975
21976
const uint32_t ep_bits = astc_helpers::g_ise_range_table[ise_range][0];
21977
const uint32_t ep_trits = astc_helpers::g_ise_range_table[ise_range][1];
21978
const uint32_t ep_quints = astc_helpers::g_ise_range_table[ise_range][2];
21979
21980
uint32_t total_tqs = 0;
21981
uint32_t bundle_size = 0, mul = 0;
21982
if (ep_trits)
21983
{
21984
total_tqs = (total_values + 4) / 5;
21985
bundle_size = 5;
21986
mul = 3;
21987
}
21988
else if (ep_quints)
21989
{
21990
total_tqs = (total_values + 2) / 3;
21991
bundle_size = 3;
21992
mul = 5;
21993
}
21994
21995
const uint32_t MAX_TQ_VALUES = 32;
21996
assert(total_tqs <= MAX_TQ_VALUES);
21997
uint32_t tq_values[MAX_TQ_VALUES];
21998
21999
for (uint32_t i = 0; i < total_tqs; i++)
22000
{
22001
uint32_t num_bits = ep_trits ? 8 : 7;
22002
22003
if (i == (total_tqs - 1))
22004
{
22005
uint32_t num_remaining = total_values - (total_tqs - 1) * bundle_size;
22006
if (ep_trits)
22007
{
22008
switch (num_remaining)
22009
{
22010
case 1: num_bits = 2; break;
22011
case 2: num_bits = 4; break;
22012
case 3: num_bits = 5; break;
22013
case 4: num_bits = 7; break;
22014
default: break;
22015
}
22016
}
22017
else if (ep_quints)
22018
{
22019
switch (num_remaining)
22020
{
22021
case 1: num_bits = 3; break;
22022
case 2: num_bits = 5; break;
22023
default: break;
22024
}
22025
}
22026
}
22027
22028
tq_values[i] = (uint32_t)decoder.get_bits(num_bits);
22029
} // i
22030
22031
uint32_t accum = 0;
22032
uint32_t accum_remaining = 0;
22033
uint32_t next_tq_index = 0;
22034
22035
for (uint32_t i = 0; i < total_values; i++)
22036
{
22037
uint32_t value = (uint32_t)decoder.get_bits(ep_bits);
22038
22039
if (total_tqs)
22040
{
22041
if (!accum_remaining)
22042
{
22043
assert(next_tq_index < total_tqs);
22044
accum = tq_values[next_tq_index++];
22045
accum_remaining = bundle_size;
22046
}
22047
22048
uint32_t v = accum % mul;
22049
accum /= mul;
22050
accum_remaining--;
22051
22052
value |= (v << ep_bits);
22053
}
22054
22055
pValues[i] = (uint8_t)value;
22056
}
22057
22058
return true;
22059
}
22060
22061
static inline uint32_t get_num_endpoint_vals(uint32_t cem)
22062
{
22063
assert((cem == 7) || (cem == 11));
22064
return (cem == 11) ? basist::NUM_MODE11_ENDPOINTS : basist::NUM_MODE7_ENDPOINTS;
22065
}
22066
22067
const uint32_t g_bc6h_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
22068
22069
#if 0
22070
static BASISU_FORCE_INLINE int pos_lrintf(float x)
22071
{
22072
assert(x >= 0.0f);
22073
return (int)(x + .5f);
22074
}
22075
22076
static BASISU_FORCE_INLINE basist::half_float fast_float_to_half_non_neg_no_nan_inf(float val)
22077
{
22078
union { float f; int32_t i; uint32_t u; } fi = { val };
22079
const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF;
22080
int e = 0, m = 0;
22081
22082
assert(((fi.i >> 31) == 0) && (flt_e != 0xFF));
22083
22084
// not zero or denormal
22085
if (flt_e != 0)
22086
{
22087
int new_exp = flt_e - 127;
22088
if (new_exp > 15)
22089
e = 31;
22090
else if (new_exp < -14)
22091
m = pos_lrintf((1 << 24) * fabsf(fi.f));
22092
else
22093
{
22094
e = new_exp + 15;
22095
m = pos_lrintf(flt_m * (1.0f / ((float)(1 << 13))));
22096
}
22097
}
22098
22099
assert((0 <= m) && (m <= 1024));
22100
if (m == 1024)
22101
{
22102
e++;
22103
m = 0;
22104
}
22105
22106
assert((e >= 0) && (e <= 31));
22107
assert((m >= 0) && (m <= 1023));
22108
22109
basist::half_float result = (basist::half_float)((e << 10) | m);
22110
return result;
22111
}
22112
#endif
22113
22114
union fu32
22115
{
22116
uint32_t u;
22117
float f;
22118
};
22119
22120
static BASISU_FORCE_INLINE basist::half_float fast_float_to_half_no_clamp_neg_nan_or_inf(float f)
22121
{
22122
assert(!isnan(f) && !isinf(f));
22123
assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT));
22124
22125
// Sutract 112 from the exponent, to change the bias from 127 to 15.
22126
static const fu32 g_f_to_h{ 0x7800000 };
22127
22128
fu32 fu;
22129
22130
fu.f = f * g_f_to_h.f;
22131
22132
uint32_t h = (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF);
22133
22134
// round to even
22135
uint32_t mant = fu.u & 8191; // examine lowest 13 bits
22136
h += (mant > 4096);
22137
22138
if (h > basist::MAX_HALF_FLOAT_AS_INT_BITS)
22139
h = basist::MAX_HALF_FLOAT_AS_INT_BITS;
22140
22141
return (basist::half_float)h;
22142
}
22143
22144
static BASISU_FORCE_INLINE float ftoh(float f)
22145
{
22146
//float res = (float)fast_float_to_half_non_neg_no_nan_inf(fabsf(f)) * ((f < 0.0f) ? -1.0f : 1.0f);
22147
float res = (float)fast_float_to_half_no_clamp_neg_nan_or_inf(fabsf(f)) * ((f < 0.0f) ? -1.0f : 1.0f);
22148
return res;
22149
}
22150
22151
// Supports positive and denormals only. No NaN or Inf.
22152
static BASISU_FORCE_INLINE float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h)
22153
{
22154
assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h));
22155
22156
// add 112 to the exponent (112+half float's exp bias of 15=float32's bias of 127)
22157
static const fu32 K = { 0x77800000 };
22158
22159
fu32 o;
22160
o.u = h << 13;
22161
o.f *= K.f;
22162
22163
return o.f;
22164
}
22165
22166
static BASISU_FORCE_INLINE float inv_sqrt(float v)
22167
{
22168
union
22169
{
22170
float flt;
22171
uint32_t ui;
22172
} un;
22173
22174
un.flt = v;
22175
un.ui = 0x5F1FFFF9UL - (un.ui >> 1);
22176
22177
return 0.703952253f * un.flt * (2.38924456f - v * (un.flt * un.flt));
22178
}
22179
22180
static const int FAST_BC6H_STD_DEV_THRESH = 256;
22181
static const int FAST_BC6H_COMPLEX_STD_DEV_THRESH = 512;
22182
static const int FAST_BC6H_VERY_COMPLEX_STD_DEV_THRESH = 2048;
22183
22184
static void assign_weights_simple_4(
22185
const basist::half_float* pPixels,
22186
uint8_t* pWeights,
22187
int min_r, int min_g, int min_b,
22188
int max_r, int max_g, int max_b, int64_t block_max_var)
22189
{
22190
BASISU_NOTE_UNUSED(block_max_var);
22191
22192
float fmin_r = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)min_r);
22193
float fmin_g = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)min_g);
22194
float fmin_b = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)min_b);
22195
22196
float fmax_r = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)max_r);
22197
float fmax_g = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)max_g);
22198
float fmax_b = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)max_b);
22199
22200
float fdir_r = fmax_r - fmin_r;
22201
float fdir_g = fmax_g - fmin_g;
22202
float fdir_b = fmax_b - fmin_b;
22203
22204
float l = inv_sqrt(fdir_r * fdir_r + fdir_g * fdir_g + fdir_b * fdir_b);
22205
if (l != 0.0f)
22206
{
22207
fdir_r *= l;
22208
fdir_g *= l;
22209
fdir_b *= l;
22210
}
22211
22212
float lr = ftoh(fmin_r * fdir_r + fmin_g * fdir_g + fmin_b * fdir_b);
22213
float hr = ftoh(fmax_r * fdir_r + fmax_g * fdir_g + fmax_b * fdir_b);
22214
22215
float frr = (hr == lr) ? 0.0f : (14.93333f / (float)(hr - lr));
22216
22217
lr = (-lr * frr) + 0.53333f;
22218
for (uint32_t i = 0; i < 16; i++)
22219
{
22220
const float r = fast_half_to_float_pos_not_inf_or_nan(pPixels[i * 3 + 0]);
22221
const float g = fast_half_to_float_pos_not_inf_or_nan(pPixels[i * 3 + 1]);
22222
const float b = fast_half_to_float_pos_not_inf_or_nan(pPixels[i * 3 + 2]);
22223
const float w = ftoh(r * fdir_r + g * fdir_g + b * fdir_b);
22224
22225
pWeights[i] = (uint8_t)basisu::clamp((int)(w * frr + lr), 0, 15);
22226
}
22227
}
22228
22229
static double assign_weights_4(
22230
const vec3F* pFloat_pixels, const float* pPixel_scales,
22231
uint8_t* pWeights,
22232
int min_r, int min_g, int min_b,
22233
int max_r, int max_g, int max_b, int64_t block_max_var, bool try_2subsets_flag,
22234
const fast_bc6h_params& params)
22235
{
22236
float cr[16], cg[16], cb[16];
22237
22238
for (uint32_t i = 0; i < 16; i++)
22239
{
22240
const uint32_t w = g_bc6h_weights4[i];
22241
22242
cr[i] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_r * (64 - w) + max_r * w + 32) >> 6));
22243
cg[i] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_g * (64 - w) + max_g * w + 32) >> 6));
22244
cb[i] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_b * (64 - w) + max_b * w + 32) >> 6));
22245
}
22246
22247
double total_err = 0.0f;
22248
22249
if (params.m_brute_force_weight4_assignment)
22250
{
22251
for (uint32_t i = 0; i < 16; i++)
22252
{
22253
const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2];
22254
22255
float best_err = basisu::squaref(cr[0] - qr) + basisu::squaref(cg[0] - qg) + basisu::squaref(cb[0] - qb);
22256
uint32_t best_idx = 0;
22257
22258
for (uint32_t j = 1; j < 16; j++)
22259
{
22260
float rd = cr[j] - qr, gd = cg[j] - qg, bd = cb[j] - qb;
22261
float e = rd * rd + gd * gd + bd * bd;
22262
22263
if (e < best_err)
22264
{
22265
best_err = e;
22266
best_idx = j;
22267
}
22268
}
22269
22270
pWeights[i] = (uint8_t)best_idx;
22271
22272
total_err += best_err * pPixel_scales[i];
22273
}
22274
}
22275
else
22276
{
22277
const float dir_r = cr[15] - cr[0], dir_g = cg[15] - cg[0], dir_b = cb[15] - cb[0];
22278
22279
float dots[16];
22280
for (uint32_t i = 0; i < 16; i++)
22281
dots[i] = cr[i] * dir_r + cg[i] * dir_g + cb[i] * dir_b;
22282
22283
float mid_dots[15];
22284
bool monotonically_increasing = true;
22285
for (uint32_t i = 0; i < 15; i++)
22286
{
22287
mid_dots[i] = (dots[i] + dots[i + 1]) * .5f;
22288
22289
if (dots[i] > dots[i + 1])
22290
monotonically_increasing = false;
22291
}
22292
22293
const bool check_more_colors = block_max_var > (FAST_BC6H_VERY_COMPLEX_STD_DEV_THRESH * FAST_BC6H_VERY_COMPLEX_STD_DEV_THRESH * 16); // watch prec
22294
22295
if (!monotonically_increasing)
22296
{
22297
// Seems very rare, not worth optimizing the other cases
22298
for (uint32_t i = 0; i < 16; i++)
22299
{
22300
const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2];
22301
22302
float d = qr * dir_r + qg * dir_g + qb * dir_b;
22303
22304
float best_e = fabsf(d - dots[0]);
22305
int best_idx = 0;
22306
22307
for (int j = 1; j < 16; j++)
22308
{
22309
float e = fabsf(d - dots[j]);
22310
if (e < best_e)
22311
{
22312
best_e = e;
22313
best_idx = j;
22314
}
22315
}
22316
22317
assert((best_idx >= 0) && (best_idx <= 15));
22318
22319
pWeights[i] = (uint8_t)best_idx;
22320
22321
float err = basisu::squaref(qr - cr[best_idx]) + basisu::squaref(qg - cg[best_idx]) + basisu::squaref(qb - cb[best_idx]);
22322
total_err += err * pPixel_scales[i];
22323
}
22324
}
22325
else if ((!try_2subsets_flag) || (!check_more_colors))
22326
{
22327
for (uint32_t i = 0; i < 16; i++)
22328
{
22329
const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2];
22330
22331
uint32_t best_idx = 0;
22332
22333
float d = qr * dir_r + qg * dir_g + qb * dir_b;
22334
22335
int low = 0;
22336
22337
int mid = low + 7;
22338
if (d >= mid_dots[mid]) low = mid + 1;
22339
mid = low + 3;
22340
if (d >= mid_dots[mid]) low = mid + 1;
22341
mid = low + 1;
22342
if (d >= mid_dots[mid]) low = mid + 1;
22343
mid = low;
22344
if (d >= mid_dots[mid]) low = mid + 1;
22345
22346
best_idx = low;
22347
assert((best_idx >= 0) && (best_idx <= 15));
22348
22349
pWeights[i] = (uint8_t)best_idx;
22350
22351
// Giesen's MRSSE (Mean Relative Sum of Squared Errors).
22352
// Our ASTC HDR encoder uses slightly slower approx. MSLE, and it's too late/risky to eval the difference vs. MRSSE on the larger ASTC HDR blocks.
22353
float err = basisu::squaref(qr - cr[best_idx]) + basisu::squaref(qg - cg[best_idx]) + basisu::squaref(qb - cb[best_idx]);
22354
total_err += err * pPixel_scales[i];
22355
}
22356
}
22357
else
22358
{
22359
for (uint32_t i = 0; i < 16; i++)
22360
{
22361
const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2];
22362
22363
uint32_t best_idx = 0;
22364
22365
float d = qr * dir_r + qg * dir_g + qb * dir_b;
22366
22367
int low = 0;
22368
22369
int mid = low + 7;
22370
if (d >= mid_dots[mid]) low = mid + 1;
22371
mid = low + 3;
22372
if (d >= mid_dots[mid]) low = mid + 1;
22373
mid = low + 1;
22374
if (d >= mid_dots[mid]) low = mid + 1;
22375
mid = low;
22376
if (d >= mid_dots[mid]) low = mid + 1;
22377
22378
best_idx = low;
22379
assert((best_idx >= 0) && (best_idx <= 15));
22380
22381
float err = basisu::squaref(qr - cr[best_idx]) + basisu::squaref(qg - cg[best_idx]) + basisu::squaref(qb - cb[best_idx]);
22382
22383
{
22384
int alt_idx = best_idx + 1;
22385
if (alt_idx > 15)
22386
alt_idx = 13;
22387
22388
float alt_err = basisu::squaref(qr - cr[alt_idx]) + basisu::squaref(qg - cg[alt_idx]) + basisu::squaref(qb - cb[alt_idx]);
22389
if (alt_err < err)
22390
{
22391
err = alt_err;
22392
best_idx = alt_idx;
22393
}
22394
}
22395
22396
{
22397
int alt_idx2 = best_idx - 1;
22398
if (alt_idx2 < 0)
22399
alt_idx2 = 2;
22400
float alt_err2 = basisu::squaref(qr - cr[alt_idx2]) + basisu::squaref(qg - cg[alt_idx2]) + basisu::squaref(qb - cb[alt_idx2]);
22401
if (alt_err2 < err)
22402
{
22403
err = alt_err2;
22404
best_idx = alt_idx2;
22405
}
22406
}
22407
22408
pWeights[i] = (uint8_t)best_idx;
22409
22410
total_err += err * pPixel_scales[i];
22411
}
22412
}
22413
}
22414
22415
return total_err;
22416
}
22417
22418
static void assign_weights3(uint8_t trial_weights[16],
22419
uint32_t best_pat_bits,
22420
uint32_t subset_min_r[2], uint32_t subset_min_g[2], uint32_t subset_min_b[2],
22421
uint32_t subset_max_r[2], uint32_t subset_max_g[2], uint32_t subset_max_b[2],
22422
const vec3F* pFloat_pixels)
22423
{
22424
float subset_cr[2][8], subset_cg[2][8], subset_cb[2][8];
22425
22426
for (uint32_t subset = 0; subset < 2; subset++)
22427
{
22428
const uint32_t min_r = subset_min_r[subset], min_g = subset_min_g[subset], min_b = subset_min_b[subset];
22429
const uint32_t max_r = subset_max_r[subset], max_g = subset_max_g[subset], max_b = subset_max_b[subset];
22430
22431
for (uint32_t j = 0; j < 8; j++)
22432
{
22433
const uint32_t w = g_bc7_weights3[j];
22434
22435
subset_cr[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_r * (64 - w) + max_r * w + 32) >> 6));
22436
subset_cg[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_g * (64 - w) + max_g * w + 32) >> 6));
22437
subset_cb[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_b * (64 - w) + max_b * w + 32) >> 6));
22438
} // j
22439
22440
} // subset
22441
22442
// TODO: Plane optimization?
22443
22444
for (uint32_t i = 0; i < 16; i++)
22445
{
22446
const uint32_t subset = (best_pat_bits >> i) & 1;
22447
const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2];
22448
22449
float best_error = basisu::squaref(subset_cr[subset][0] - qr) + basisu::squaref(subset_cg[subset][0] - qg) + basisu::squaref(subset_cb[subset][0] - qb);
22450
uint32_t best_idx = 0;
22451
22452
for (uint32_t j = 1; j < 8; j++)
22453
{
22454
float e = basisu::squaref(subset_cr[subset][j] - qr) + basisu::squaref(subset_cg[subset][j] - qg) + basisu::squaref(subset_cb[subset][j] - qb);
22455
if (e < best_error)
22456
{
22457
best_error = e;
22458
best_idx = j;
22459
}
22460
}
22461
22462
trial_weights[i] = (uint8_t)best_idx;
22463
22464
} // i
22465
}
22466
22467
static double assign_weights_error_3(uint8_t trial_weights[16],
22468
uint32_t best_pat_bits,
22469
uint32_t subset_min_r[2], uint32_t subset_min_g[2], uint32_t subset_min_b[2],
22470
uint32_t subset_max_r[2], uint32_t subset_max_g[2], uint32_t subset_max_b[2],
22471
const vec3F* pFloat_pixels, const float* pPixel_scales)
22472
{
22473
float subset_cr[2][8], subset_cg[2][8], subset_cb[2][8];
22474
22475
for (uint32_t subset = 0; subset < 2; subset++)
22476
{
22477
const uint32_t min_r = subset_min_r[subset], min_g = subset_min_g[subset], min_b = subset_min_b[subset];
22478
const uint32_t max_r = subset_max_r[subset], max_g = subset_max_g[subset], max_b = subset_max_b[subset];
22479
22480
for (uint32_t j = 0; j < 8; j++)
22481
{
22482
const uint32_t w = g_bc7_weights3[j];
22483
22484
subset_cr[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_r * (64 - w) + max_r * w + 32) >> 6));
22485
subset_cg[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_g * (64 - w) + max_g * w + 32) >> 6));
22486
subset_cb[subset][j] = fast_half_to_float_pos_not_inf_or_nan((basist::half_float)((min_b * (64 - w) + max_b * w + 32) >> 6));
22487
} // j
22488
22489
} // subset
22490
22491
double trial_error = 0.0f;
22492
22493
// TODO: Plane optimization?
22494
22495
for (uint32_t i = 0; i < 16; i++)
22496
{
22497
const uint32_t subset = (best_pat_bits >> i) & 1;
22498
const float qr = pFloat_pixels[i].c[0], qg = pFloat_pixels[i].c[1], qb = pFloat_pixels[i].c[2];
22499
22500
float best_error = basisu::squaref(subset_cr[subset][0] - qr) + basisu::squaref(subset_cg[subset][0] - qg) + basisu::squaref(subset_cb[subset][0] - qb);
22501
uint32_t best_idx = 0;
22502
22503
for (uint32_t j = 1; j < 8; j++)
22504
{
22505
float e = basisu::squaref(subset_cr[subset][j] - qr) + basisu::squaref(subset_cg[subset][j] - qg) + basisu::squaref(subset_cb[subset][j] - qb);
22506
if (e < best_error)
22507
{
22508
best_error = e;
22509
best_idx = j;
22510
}
22511
}
22512
22513
trial_weights[i] = (uint8_t)best_idx;
22514
22515
trial_error += best_error * pPixel_scales[i];
22516
22517
} // i
22518
22519
return trial_error;
22520
}
22521
22522
static basist::vec4F g_bc6h_ls_weights_3[8];
22523
static basist::vec4F g_bc6h_ls_weights_4[16];
22524
22525
const uint32_t BC6H_NUM_PATS = 32;
22526
static uint32_t g_bc6h_pats2[BC6H_NUM_PATS];
22527
22528
static void fast_encode_bc6h_init()
22529
{
22530
for (uint32_t i = 0; i < 8; i++)
22531
{
22532
const float w = (float)g_bc7_weights3[i] * (1.0f / 64.0f);
22533
g_bc6h_ls_weights_3[i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w);
22534
}
22535
22536
for (uint32_t i = 0; i < 16; i++)
22537
{
22538
const float w = (float)g_bc6h_weights4[i] * (1.0f / 64.0f);
22539
g_bc6h_ls_weights_4[i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w);
22540
}
22541
22542
for (uint32_t pat_index = 0; pat_index < BC6H_NUM_PATS; pat_index++)
22543
{
22544
uint32_t pat_bits = 0;
22545
22546
for (uint32_t j = 0; j < 16; j++)
22547
pat_bits |= (g_bc7_partition2[pat_index * 16 + j] << j);
22548
22549
g_bc6h_pats2[pat_index] = pat_bits;
22550
}
22551
}
22552
22553
static int bc6h_dequantize(int val, int bits)
22554
{
22555
assert(val < (1 << bits));
22556
22557
int result;
22558
if (bits >= 15)
22559
result = val;
22560
else if (!val)
22561
result = 0;
22562
else if (val == ((1 << bits) - 1))
22563
result = 0xFFFF;
22564
else
22565
result = ((val << 16) + 0x8000) >> bits;
22566
return result;
22567
}
22568
22569
static inline basist::half_float bc6h_convert_to_half(int val)
22570
{
22571
assert(val < 65536);
22572
22573
// scale by 31/64
22574
return (basist::half_float)((val * 31) >> 6);
22575
}
22576
22577
static void bc6h_quant_dequant_endpoints(uint32_t& min_r, uint32_t& min_g, uint32_t& min_b, uint32_t& max_r, uint32_t& max_g, uint32_t& max_b, int bits) // bits=10
22578
{
22579
min_r = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)min_r, bits), bits));
22580
min_g = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)min_g, bits), bits));
22581
min_b = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)min_b, bits), bits));
22582
22583
max_r = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)max_r, bits), bits));
22584
max_g = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)max_g, bits), bits));
22585
max_b = bc6h_convert_to_half(bc6h_dequantize(basist::bc6h_half_to_blog((basist::half_float)max_b, bits), bits));
22586
}
22587
22588
static void bc6h_quant_endpoints(
22589
uint32_t min_hr, uint32_t min_hg, uint32_t min_hb, uint32_t max_hr, uint32_t max_hg, uint32_t max_hb,
22590
uint32_t& min_r, uint32_t& min_g, uint32_t& min_b, uint32_t& max_r, uint32_t& max_g, uint32_t& max_b,
22591
int bits)
22592
{
22593
min_r = basist::bc6h_half_to_blog((basist::half_float)min_hr, bits);
22594
min_g = basist::bc6h_half_to_blog((basist::half_float)min_hg, bits);
22595
min_b = basist::bc6h_half_to_blog((basist::half_float)min_hb, bits);
22596
22597
max_r = basist::bc6h_half_to_blog((basist::half_float)max_hr, bits);
22598
max_g = basist::bc6h_half_to_blog((basist::half_float)max_hg, bits);
22599
max_b = basist::bc6h_half_to_blog((basist::half_float)max_hb, bits);
22600
}
22601
22602
static void bc6h_dequant_endpoints(
22603
uint32_t min_br, uint32_t min_bg, uint32_t min_bb, uint32_t max_br, uint32_t max_bg, uint32_t max_bb,
22604
uint32_t& min_hr, uint32_t& min_hg, uint32_t& min_hb, uint32_t& max_hr, uint32_t& max_hg, uint32_t& max_hb,
22605
int bits)
22606
{
22607
min_hr = bc6h_convert_to_half(bc6h_dequantize(min_br, bits));
22608
min_hg = bc6h_convert_to_half(bc6h_dequantize(min_bg, bits));
22609
min_hb = bc6h_convert_to_half(bc6h_dequantize(min_bb, bits));
22610
22611
max_hr = bc6h_convert_to_half(bc6h_dequantize(max_br, bits));
22612
max_hg = bc6h_convert_to_half(bc6h_dequantize(max_bg, bits));
22613
max_hb = bc6h_convert_to_half(bc6h_dequantize(max_bb, bits));
22614
}
22615
22616
static BASISU_FORCE_INLINE int popcount32(uint32_t x)
22617
{
22618
#if defined(__EMSCRIPTEN__) || defined(__clang__) || defined(__GNUC__)
22619
return __builtin_popcount(x);
22620
#elif defined(_MSC_VER)
22621
return __popcnt(x);
22622
#else
22623
int count = 0;
22624
while (x)
22625
{
22626
x &= (x - 1);
22627
++count;
22628
}
22629
return count;
22630
#endif
22631
}
22632
22633
static BASISU_FORCE_INLINE int fast_roundf_int(float x)
22634
{
22635
return (x >= 0.0f) ? (int)(x + 0.5f) : (int)(x - 0.5f);
22636
}
22637
22638
static void fast_encode_bc6h_2subsets_pattern(
22639
uint32_t best_pat_index, uint32_t best_pat_bits,
22640
const basist::half_float* pPixels, const vec3F* pFloat_pixels, const float* pPixel_scales,
22641
double& cur_error, basist::bc6h_logical_block& log_blk,
22642
int64_t block_max_var,
22643
int mean_r, int mean_g, int mean_b,
22644
const fast_bc6h_params& params)
22645
{
22646
BASISU_NOTE_UNUSED(block_max_var);
22647
22648
uint32_t subset_means[2][3] = { { 0 } };
22649
for (uint32_t i = 0; i < 16; i++)
22650
{
22651
const uint32_t subset_index = (best_pat_bits >> i) & 1;
22652
const uint32_t r = pPixels[i * 3 + 0], g = pPixels[i * 3 + 1], b = pPixels[i * 3 + 2];
22653
22654
subset_means[subset_index][0] += r;
22655
subset_means[subset_index][1] += g;
22656
subset_means[subset_index][2] += b;
22657
}
22658
22659
for (uint32_t s = 0; s < 2; s++)
22660
for (uint32_t c = 0; c < 3; c++)
22661
subset_means[s][c] = (subset_means[s][c] + 8) / 16;
22662
22663
int64_t subset_icov[2][6] = { { 0 } };
22664
22665
for (uint32_t i = 0; i < 16; i++)
22666
{
22667
const uint32_t subset_index = (best_pat_bits >> i) & 1;
22668
const int r = (int)pPixels[i * 3 + 0] - mean_r, g = (int)pPixels[i * 3 + 1] - mean_g, b = (int)pPixels[i * 3 + 2] - mean_b;
22669
22670
subset_icov[subset_index][0] += r * r;
22671
subset_icov[subset_index][1] += r * g;
22672
subset_icov[subset_index][2] += r * b;
22673
subset_icov[subset_index][3] += g * g;
22674
subset_icov[subset_index][4] += g * b;
22675
subset_icov[subset_index][5] += b * b;
22676
}
22677
22678
vec3F subset_axis[2];
22679
22680
for (uint32_t subset_index = 0; subset_index < 2; subset_index++)
22681
{
22682
float cov[6];
22683
for (uint32_t i = 0; i < 6; i++)
22684
cov[i] = (float)subset_icov[subset_index][i];
22685
22686
const float sc = 1.0f / (basisu::maximum(cov[0], cov[3], cov[5]) + basisu::REALLY_SMALL_FLOAT_VAL);
22687
const float wx = sc * cov[0], wy = sc * cov[3], wz = sc * cov[5];
22688
22689
const float alt_xr = cov[0] * wx + cov[1] * wy + cov[2] * wz;
22690
const float alt_xg = cov[1] * wx + cov[3] * wy + cov[4] * wz;
22691
const float alt_xb = cov[2] * wx + cov[4] * wy + cov[5] * wz;
22692
22693
float l = basisu::squaref(alt_xr) + basisu::squaref(alt_xg) + basisu::squaref(alt_xb);
22694
22695
float axis_r = 0.57735027f, axis_g = 0.57735027f, axis_b = 0.57735027f;
22696
if (fabs(l) >= basisu::SMALL_FLOAT_VAL)
22697
{
22698
const float inv_l = inv_sqrt(l);
22699
axis_r = alt_xr * inv_l;
22700
axis_g = alt_xg * inv_l;
22701
axis_b = alt_xb * inv_l;
22702
}
22703
22704
subset_axis[subset_index].set(axis_r, axis_g, axis_b);
22705
} // s
22706
22707
float subset_min_dot[2] = { basisu::BIG_FLOAT_VAL, basisu::BIG_FLOAT_VAL };
22708
float subset_max_dot[2] = { -basisu::BIG_FLOAT_VAL, -basisu::BIG_FLOAT_VAL };
22709
int subset_min_idx[2] = { 0 }, subset_max_idx[2] = { 0 };
22710
22711
for (uint32_t i = 0; i < 16; i++)
22712
{
22713
const uint32_t subset_index = (best_pat_bits >> i) & 1;
22714
const float r = (float)pPixels[i * 3 + 0], g = (float)pPixels[i * 3 + 1], b = (float)pPixels[i * 3 + 2];
22715
const float dot = r * subset_axis[subset_index].c[0] + g * subset_axis[subset_index].c[1] + b * subset_axis[subset_index].c[2];
22716
22717
if (dot < subset_min_dot[subset_index])
22718
{
22719
subset_min_dot[subset_index] = dot;
22720
subset_min_idx[subset_index] = i;
22721
}
22722
22723
if (dot > subset_max_dot[subset_index])
22724
{
22725
subset_max_dot[subset_index] = dot;
22726
subset_max_idx[subset_index] = i;
22727
}
22728
} // i
22729
22730
uint32_t subset_min_r[2], subset_min_g[2], subset_min_b[2];
22731
uint32_t subset_max_r[2], subset_max_g[2], subset_max_b[2];
22732
22733
for (uint32_t subset_index = 0; subset_index < 2; subset_index++)
22734
{
22735
const uint32_t min_index = subset_min_idx[subset_index] * 3, max_index = subset_max_idx[subset_index] * 3;
22736
22737
subset_min_r[subset_index] = pPixels[min_index + 0];
22738
subset_min_g[subset_index] = pPixels[min_index + 1];
22739
subset_min_b[subset_index] = pPixels[min_index + 2];
22740
22741
subset_max_r[subset_index] = pPixels[max_index + 0];
22742
subset_max_g[subset_index] = pPixels[max_index + 1];
22743
subset_max_b[subset_index] = pPixels[max_index + 2];
22744
22745
} // subset_index
22746
22747
// least squares with unquantized endpoints
22748
const bool use_ls = true;
22749
if (use_ls)
22750
{
22751
uint8_t trial_weights[16];
22752
assign_weights3(trial_weights, best_pat_bits, subset_min_r, subset_min_g, subset_min_b, subset_max_r, subset_max_g, subset_max_b, pFloat_pixels);
22753
22754
float z00[2] = { 0.0f }, z01[2] = { 0.0f }, z10[2] = { 0.0f }, z11[2] = { 0.0f };
22755
float q00_r[2] = { 0.0f }, q10_r[2] = { 0.0f }, t_r[2] = { 0.0f };
22756
float q00_g[2] = { 0.0f }, q10_g[2] = { 0.0f }, t_g[2] = { 0.0f };
22757
float q00_b[2] = { 0.0f }, q10_b[2] = { 0.0f }, t_b[2] = { 0.0f };
22758
22759
for (uint32_t i = 0; i < 16; i++)
22760
{
22761
const uint32_t subset = (best_pat_bits >> i) & 1;
22762
22763
float r = (float)pPixels[i * 3 + 0];
22764
float g = (float)pPixels[i * 3 + 1];
22765
float b = (float)pPixels[i * 3 + 2];
22766
22767
const uint32_t sel = trial_weights[i];
22768
22769
z00[subset] += g_bc6h_ls_weights_3[sel][0];
22770
z10[subset] += g_bc6h_ls_weights_3[sel][1];
22771
z11[subset] += g_bc6h_ls_weights_3[sel][2];
22772
22773
float w = g_bc6h_ls_weights_3[sel][3];
22774
22775
q00_r[subset] += w * r;
22776
t_r[subset] += r;
22777
22778
q00_g[subset] += w * g;
22779
t_g[subset] += g;
22780
22781
q00_b[subset] += w * b;
22782
t_b[subset] += b;
22783
}
22784
22785
for (uint32_t subset = 0; subset < 2; subset++)
22786
{
22787
q10_r[subset] = t_r[subset] - q00_r[subset];
22788
q10_g[subset] = t_g[subset] - q00_g[subset];
22789
q10_b[subset] = t_b[subset] - q00_b[subset];
22790
22791
z01[subset] = z10[subset];
22792
22793
float det = z00[subset] * z11[subset] - z01[subset] * z10[subset];
22794
if (fabs(det) >= basisu::SMALL_FLOAT_VAL)
22795
{
22796
det = 1.0f / det;
22797
22798
float iz00 = z11[subset] * det;
22799
float iz01 = -z01[subset] * det;
22800
float iz10 = -z10[subset] * det;
22801
float iz11 = z00[subset] * det;
22802
22803
subset_max_r[subset] = basisu::clamp<int>(fast_roundf_int(iz00 * q00_r[subset] + iz01 * q10_r[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
22804
subset_min_r[subset] = basisu::clamp<int>(fast_roundf_int(iz10 * q00_r[subset] + iz11 * q10_r[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
22805
22806
subset_max_g[subset] = basisu::clamp<int>(fast_roundf_int(iz00 * q00_g[subset] + iz01 * q10_g[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
22807
subset_min_g[subset] = basisu::clamp<int>(fast_roundf_int(iz10 * q00_g[subset] + iz11 * q10_g[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
22808
22809
subset_max_b[subset] = basisu::clamp<int>(fast_roundf_int(iz00 * q00_b[subset] + iz01 * q10_b[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
22810
subset_min_b[subset] = basisu::clamp<int>(fast_roundf_int(iz10 * q00_b[subset] + iz11 * q10_b[subset]), 0, (int)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
22811
}
22812
} // subset
22813
}
22814
22815
const int BC6H_2SUBSET_ABS_ENDPOINT_MODE = 9;
22816
22817
int bc6h_mode_index = BC6H_2SUBSET_ABS_ENDPOINT_MODE, num_endpoint_bits = 6;
22818
uint32_t abs_blog_endpoints[3][4];
22819
22820
if (params.m_num_diff_endpoint_modes_to_try)
22821
{
22822
// ordered from largest base bits to least
22823
static const int s_bc6h_mode_order2[2] = { 5, 1 };
22824
static const int s_bc6h_mode_order4[4] = { 0, 5, 7, 1 };
22825
static const int s_bc6h_mode_order9[9] = { 2, 3, 4, 0, 5, 6, 7, 8, 1 };
22826
22827
uint32_t num_endpoint_modes = 2;
22828
const int* pBC6H_mode_order = s_bc6h_mode_order2;
22829
22830
if (params.m_num_diff_endpoint_modes_to_try >= 9)
22831
{
22832
num_endpoint_modes = 9;
22833
pBC6H_mode_order = s_bc6h_mode_order9;
22834
}
22835
else if (params.m_num_diff_endpoint_modes_to_try >= 4)
22836
{
22837
num_endpoint_modes = 4;
22838
pBC6H_mode_order = s_bc6h_mode_order4;
22839
}
22840
22841
// Find the BC6H mode that will conservatively encode our trial endpoints. The mode chosen will handle any endpoint swaps.
22842
for (uint32_t bc6h_mode_iter = 0; bc6h_mode_iter < num_endpoint_modes; bc6h_mode_iter++)
22843
{
22844
const uint32_t mode = pBC6H_mode_order[bc6h_mode_iter];
22845
22846
const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
22847
const int base_bitmask = (1 << num_base_bits) - 1;
22848
BASISU_NOTE_UNUSED(base_bitmask);
22849
22850
const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
22851
const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
22852
22853
for (uint32_t subset_index = 0; subset_index < 2; subset_index++)
22854
{
22855
bc6h_quant_endpoints(
22856
subset_min_r[subset_index], subset_min_g[subset_index], subset_min_b[subset_index], subset_max_r[subset_index], subset_max_g[subset_index], subset_max_b[subset_index],
22857
abs_blog_endpoints[0][subset_index * 2 + 0], abs_blog_endpoints[1][subset_index * 2 + 0], abs_blog_endpoints[2][subset_index * 2 + 0],
22858
abs_blog_endpoints[0][subset_index * 2 + 1], abs_blog_endpoints[1][subset_index * 2 + 1], abs_blog_endpoints[2][subset_index * 2 + 1],
22859
num_base_bits);
22860
}
22861
22862
uint32_t c;
22863
for (c = 0; c < 3; c++)
22864
{
22865
// a very conservative check because we don't have the weight indices yet, so we don't know how to swap end point values
22866
// purposely enforcing a symmetric limit here so we can invert any endpoints later if needed
22867
const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
22868
const int min_delta = -max_delta;
22869
22870
int delta0 = (int)abs_blog_endpoints[c][1] - (int)abs_blog_endpoints[c][0];
22871
if ((delta0 < min_delta) || (delta0 > max_delta))
22872
break;
22873
22874
int delta1 = (int)abs_blog_endpoints[c][2] - (int)abs_blog_endpoints[c][0];
22875
if ((delta1 < min_delta) || (delta1 > max_delta))
22876
break;
22877
22878
int delta2 = (int)abs_blog_endpoints[c][3] - (int)abs_blog_endpoints[c][0];
22879
if ((delta2 < min_delta) || (delta2 > max_delta))
22880
break;
22881
22882
// in case the endpoints are swapped
22883
int delta3 = (int)abs_blog_endpoints[c][2] - (int)abs_blog_endpoints[c][1];
22884
if ((delta3 < min_delta) || (delta3 > max_delta))
22885
break;
22886
22887
int delta4 = (int)abs_blog_endpoints[c][3] - (int)abs_blog_endpoints[c][1];
22888
if ((delta4 < min_delta) || (delta4 > max_delta))
22889
break;
22890
}
22891
22892
if (c == 3)
22893
{
22894
bc6h_mode_index = mode;
22895
num_endpoint_bits = num_base_bits;
22896
break;
22897
}
22898
}
22899
}
22900
22901
if (bc6h_mode_index == BC6H_2SUBSET_ABS_ENDPOINT_MODE)
22902
{
22903
for (uint32_t subset_index = 0; subset_index < 2; subset_index++)
22904
{
22905
bc6h_quant_endpoints(
22906
subset_min_r[subset_index], subset_min_g[subset_index], subset_min_b[subset_index], subset_max_r[subset_index], subset_max_g[subset_index], subset_max_b[subset_index],
22907
abs_blog_endpoints[0][subset_index * 2 + 0], abs_blog_endpoints[1][subset_index * 2 + 0], abs_blog_endpoints[2][subset_index * 2 + 0],
22908
abs_blog_endpoints[0][subset_index * 2 + 1], abs_blog_endpoints[1][subset_index * 2 + 1], abs_blog_endpoints[2][subset_index * 2 + 1],
22909
num_endpoint_bits);
22910
}
22911
}
22912
22913
for (uint32_t subset_index = 0; subset_index < 2; subset_index++)
22914
{
22915
bc6h_dequant_endpoints(
22916
abs_blog_endpoints[0][subset_index * 2 + 0], abs_blog_endpoints[1][subset_index * 2 + 0], abs_blog_endpoints[2][subset_index * 2 + 0],
22917
abs_blog_endpoints[0][subset_index * 2 + 1], abs_blog_endpoints[1][subset_index * 2 + 1], abs_blog_endpoints[2][subset_index * 2 + 1],
22918
subset_min_r[subset_index], subset_min_g[subset_index], subset_min_b[subset_index],
22919
subset_max_r[subset_index], subset_max_g[subset_index], subset_max_b[subset_index], num_endpoint_bits);
22920
}
22921
22922
uint8_t trial_weights[16];
22923
double trial_error = assign_weights_error_3(trial_weights, best_pat_bits, subset_min_r, subset_min_g, subset_min_b, subset_max_r, subset_max_g, subset_max_b, pFloat_pixels, pPixel_scales);
22924
22925
if (trial_error < cur_error)
22926
{
22927
basist::bc6h_logical_block trial_log_blk;
22928
22929
trial_log_blk.m_mode = bc6h_mode_index;
22930
trial_log_blk.m_partition_pattern = best_pat_index;
22931
22932
memcpy(trial_log_blk.m_endpoints, abs_blog_endpoints, sizeof(trial_log_blk.m_endpoints));
22933
memcpy(trial_log_blk.m_weights, trial_weights, 16);
22934
22935
if (trial_log_blk.m_weights[0] & 4)
22936
{
22937
for (uint32_t c = 0; c < 3; c++)
22938
std::swap(trial_log_blk.m_endpoints[c][0], trial_log_blk.m_endpoints[c][1]);
22939
22940
for (uint32_t i = 0; i < 16; i++)
22941
{
22942
const uint32_t subset_index = (best_pat_bits >> i) & 1;
22943
if (subset_index == 0)
22944
trial_log_blk.m_weights[i] = 7 - trial_log_blk.m_weights[i];
22945
}
22946
}
22947
22948
const uint32_t subset2_anchor_index = g_bc7_table_anchor_index_second_subset[best_pat_index];
22949
if (trial_log_blk.m_weights[subset2_anchor_index] & 4)
22950
{
22951
for (uint32_t c = 0; c < 3; c++)
22952
std::swap(trial_log_blk.m_endpoints[c][2], trial_log_blk.m_endpoints[c][3]);
22953
22954
for (uint32_t i = 0; i < 16; i++)
22955
{
22956
const uint32_t subset_index = (best_pat_bits >> i) & 1;
22957
if (subset_index == 1)
22958
trial_log_blk.m_weights[i] = 7 - trial_log_blk.m_weights[i];
22959
}
22960
}
22961
22962
if (bc6h_mode_index != BC6H_2SUBSET_ABS_ENDPOINT_MODE)
22963
{
22964
const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[bc6h_mode_index][1], g_bc6h_mode_sig_bits[bc6h_mode_index][2], g_bc6h_mode_sig_bits[bc6h_mode_index][3] };
22965
const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
22966
22967
for (uint32_t c = 0; c < 3; c++)
22968
{
22969
const int delta0 = (int)trial_log_blk.m_endpoints[c][1] - (int)trial_log_blk.m_endpoints[c][0];
22970
const int delta1 = (int)trial_log_blk.m_endpoints[c][2] - (int)trial_log_blk.m_endpoints[c][0];
22971
const int delta2 = (int)trial_log_blk.m_endpoints[c][3] - (int)trial_log_blk.m_endpoints[c][0];
22972
22973
#ifdef _DEBUG
22974
// sanity check the final endpoints
22975
const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
22976
const int min_delta = -(max_delta + 1);
22977
assert((max_delta - min_delta) == delta_bitmasks[c]);
22978
22979
if ((delta0 < min_delta) || (delta0 > max_delta) || (delta1 < min_delta) || (delta1 > max_delta) || (delta2 < min_delta) || (delta2 > max_delta))
22980
{
22981
assert(0);
22982
break;
22983
}
22984
#endif
22985
22986
trial_log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
22987
trial_log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
22988
trial_log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
22989
22990
} // c
22991
}
22992
22993
cur_error = trial_error;
22994
log_blk = trial_log_blk;
22995
}
22996
}
22997
22998
static void fast_encode_bc6h_2subsets(
22999
const basist::half_float* pPixels, const vec3F* pFloat_pixels, const float* pPixel_scales,
23000
double& cur_error, basist::bc6h_logical_block& log_blk,
23001
int64_t block_max_var,
23002
int mean_r, int mean_g, int mean_b, float block_axis_r, float block_axis_g, float block_axis_b,
23003
const fast_bc6h_params& params)
23004
{
23005
assert((params.m_max_2subset_pats_to_try > 0) && (params.m_max_2subset_pats_to_try <= BC6H_NUM_PATS));
23006
23007
if (params.m_max_2subset_pats_to_try == BC6H_NUM_PATS)
23008
{
23009
for (uint32_t i = 0; i < BC6H_NUM_PATS; i++)
23010
{
23011
const uint32_t best_pat_index = i;
23012
const uint32_t best_pat_bits = g_bc6h_pats2[best_pat_index];
23013
23014
fast_encode_bc6h_2subsets_pattern(
23015
best_pat_index, best_pat_bits,
23016
pPixels, pFloat_pixels, pPixel_scales,
23017
cur_error, log_blk,
23018
block_max_var,
23019
mean_r, mean_g, mean_b, params);
23020
}
23021
return;
23022
}
23023
23024
uint32_t desired_pat_bits = 0;
23025
for (uint32_t i = 0; i < 16; i++)
23026
{
23027
float f = (float)(pPixels[i * 3 + 0] - mean_r) * block_axis_r +
23028
(float)(pPixels[i * 3 + 1] - mean_g) * block_axis_g +
23029
(float)(pPixels[i * 3 + 2] - mean_b) * block_axis_b;
23030
23031
desired_pat_bits |= (((f >= 0.0f) ? 1 : 0) << i);
23032
} // i
23033
23034
if (params.m_max_2subset_pats_to_try == 1)
23035
{
23036
uint32_t best_diff = UINT32_MAX;
23037
for (uint32_t p = 0; p < BC6H_NUM_PATS; p++)
23038
{
23039
const uint32_t bc6h_pat_bits = g_bc6h_pats2[p];
23040
23041
int diff = popcount32(bc6h_pat_bits ^ desired_pat_bits);
23042
int diff_inv = 16 - diff;
23043
23044
uint32_t min_diff = (basisu::minimum<int>(diff, diff_inv) << 8) | p;
23045
if (min_diff < best_diff)
23046
best_diff = min_diff;
23047
} // p
23048
23049
const uint32_t best_pat_index = best_diff & 0xFF;
23050
const uint32_t best_pat_bits = g_bc6h_pats2[best_pat_index];
23051
23052
fast_encode_bc6h_2subsets_pattern(
23053
best_pat_index, best_pat_bits,
23054
pPixels, pFloat_pixels, pPixel_scales,
23055
cur_error, log_blk,
23056
block_max_var,
23057
mean_r, mean_g, mean_b, params);
23058
}
23059
else
23060
{
23061
assert(params.m_max_2subset_pats_to_try <= BC6H_NUM_PATS);
23062
uint32_t pat_diffs[BC6H_NUM_PATS];
23063
23064
for (uint32_t p = 0; p < BC6H_NUM_PATS; p++)
23065
{
23066
const uint32_t bc6h_pat_bits = g_bc6h_pats2[p];
23067
23068
int diff = popcount32(bc6h_pat_bits ^ desired_pat_bits);
23069
int diff_inv = 16 - diff;
23070
23071
pat_diffs[p] = (basisu::minimum<int>(diff, diff_inv) << 8) | p;
23072
} // p
23073
23074
std::sort(pat_diffs, pat_diffs + BC6H_NUM_PATS);
23075
23076
for (uint32_t pat_iter = 0; pat_iter < params.m_max_2subset_pats_to_try; pat_iter++)
23077
{
23078
const uint32_t best_pat_index = pat_diffs[pat_iter] & 0xFF;
23079
const uint32_t best_pat_bits = g_bc6h_pats2[best_pat_index];
23080
23081
fast_encode_bc6h_2subsets_pattern(
23082
best_pat_index, best_pat_bits,
23083
pPixels, pFloat_pixels, pPixel_scales,
23084
cur_error, log_blk,
23085
block_max_var,
23086
mean_r, mean_g, mean_b, params);
23087
}
23088
}
23089
}
23090
23091
void fast_encode_bc6h(const basist::half_float* pPixels, basist::bc6h_block* pBlock, const fast_bc6h_params &params)
23092
{
23093
basist::bc6h_logical_block log_blk;
23094
log_blk.clear();
23095
23096
log_blk.m_mode = basist::BC6H_FIRST_1SUBSET_MODE_INDEX;
23097
23098
uint32_t omin_r = UINT32_MAX, omin_g = UINT32_MAX, omin_b = UINT32_MAX;
23099
uint32_t omax_r = 0, omax_g = 0, omax_b = 0;
23100
uint32_t total_r = 0, total_g = 0, total_b = 0;
23101
23102
for (uint32_t i = 0; i < 16; i++)
23103
{
23104
uint32_t r = pPixels[i * 3 + 0];
23105
uint32_t g = pPixels[i * 3 + 1];
23106
uint32_t b = pPixels[i * 3 + 2];
23107
23108
total_r += r;
23109
total_g += g;
23110
total_b += b;
23111
23112
omin_r = basisu::minimum(omin_r, r);
23113
omin_g = basisu::minimum(omin_g, g);
23114
omin_b = basisu::minimum(omin_b, b);
23115
23116
omax_r = basisu::maximum(omax_r, r);
23117
omax_g = basisu::maximum(omax_g, g);
23118
omax_b = basisu::maximum(omax_b, b);
23119
}
23120
23121
if ((omin_r == omax_r) && (omin_g == omax_g) && (omin_b == omax_b))
23122
{
23123
// Solid block
23124
log_blk.m_endpoints[0][0] = basist::bc6h_half_to_blog16((basist::half_float)omin_r);
23125
log_blk.m_endpoints[0][1] = 0;
23126
23127
log_blk.m_endpoints[1][0] = basist::bc6h_half_to_blog16((basist::half_float)omin_g);
23128
log_blk.m_endpoints[1][1] = 0;
23129
23130
log_blk.m_endpoints[2][0] = basist::bc6h_half_to_blog16((basist::half_float)omin_b);
23131
log_blk.m_endpoints[2][1] = 0;
23132
23133
log_blk.m_mode = 13;
23134
pack_bc6h_block(*pBlock, log_blk);
23135
23136
return;
23137
}
23138
23139
uint32_t min_r, min_g, min_b, max_r, max_g, max_b;
23140
23141
int mean_r = (total_r + 8) / 16;
23142
int mean_g = (total_g + 8) / 16;
23143
int mean_b = (total_b + 8) / 16;
23144
23145
int64_t icov[6] = { 0, 0, 0, 0, 0, 0 };
23146
23147
for (uint32_t i = 0; i < 16; i++)
23148
{
23149
int r = (int)pPixels[i * 3 + 0] - mean_r;
23150
int g = (int)pPixels[i * 3 + 1] - mean_g;
23151
int b = (int)pPixels[i * 3 + 2] - mean_b;
23152
23153
icov[0] += r * r;
23154
icov[1] += r * g;
23155
icov[2] += r * b;
23156
icov[3] += g * g;
23157
icov[4] += g * b;
23158
icov[5] += b * b;
23159
}
23160
23161
int64_t block_max_var = basisu::maximum(icov[0], icov[3], icov[5]); // not divided by 16, i.e. scaled by 16
23162
23163
if (block_max_var < (FAST_BC6H_STD_DEV_THRESH * FAST_BC6H_STD_DEV_THRESH * 16))
23164
{
23165
// Simple block
23166
min_r = (omax_r - omin_r) / 32 + omin_r;
23167
min_g = (omax_g - omin_g) / 32 + omin_g;
23168
min_b = (omax_b - omin_b) / 32 + omin_b;
23169
23170
max_r = ((omax_r - omin_r) * 31) / 32 + omin_r;
23171
max_g = ((omax_g - omin_g) * 31) / 32 + omin_g;
23172
max_b = ((omax_b - omin_b) * 31) / 32 + omin_b;
23173
23174
assert((max_r < MAX_HALF_FLOAT_AS_INT_BITS) && (max_g < MAX_HALF_FLOAT_AS_INT_BITS) && (max_b < MAX_HALF_FLOAT_AS_INT_BITS));
23175
23176
bc6h_quant_dequant_endpoints(min_r, min_g, min_b, max_r, max_g, max_b, 10);
23177
23178
assign_weights_simple_4(pPixels, log_blk.m_weights, min_r, min_g, min_b, max_r, max_g, max_b, block_max_var);
23179
23180
log_blk.m_endpoints[0][0] = basist::bc6h_half_to_blog((basist::half_float)min_r, 10);
23181
log_blk.m_endpoints[0][1] = basist::bc6h_half_to_blog((basist::half_float)max_r, 10);
23182
23183
log_blk.m_endpoints[1][0] = basist::bc6h_half_to_blog((basist::half_float)min_g, 10);
23184
log_blk.m_endpoints[1][1] = basist::bc6h_half_to_blog((basist::half_float)max_g, 10);
23185
23186
log_blk.m_endpoints[2][0] = basist::bc6h_half_to_blog((basist::half_float)min_b, 10);
23187
log_blk.m_endpoints[2][1] = basist::bc6h_half_to_blog((basist::half_float)max_b, 10);
23188
23189
if (log_blk.m_weights[0] & 8)
23190
{
23191
for (uint32_t i = 0; i < 16; i++)
23192
log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
23193
23194
for (uint32_t c = 0; c < 3; c++)
23195
{
23196
std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
23197
}
23198
}
23199
23200
pack_bc6h_block(*pBlock, log_blk);
23201
23202
return;
23203
}
23204
23205
// block_max_var cannot be 0 here, also trace cannot be 0
23206
23207
// Complex block (edges/strong gradients)
23208
bool try_2subsets = false;
23209
double cur_err = 0.0f;
23210
vec3F float_pixels[16];
23211
float pixel_scales[16];
23212
23213
// covar rows are:
23214
// 0, 1, 2
23215
// 1, 3, 4
23216
// 2, 4, 5
23217
float cov[6];
23218
for (uint32_t i = 0; i < 6; i++)
23219
cov[i] = (float)icov[i];
23220
23221
const float sc = 1.0f / (float)block_max_var;
23222
const float wx = sc * cov[0], wy = sc * cov[3], wz = sc * cov[5];
23223
23224
const float alt_xr = cov[0] * wx + cov[1] * wy + cov[2] * wz;
23225
const float alt_xg = cov[1] * wx + cov[3] * wy + cov[4] * wz;
23226
const float alt_xb = cov[2] * wx + cov[4] * wy + cov[5] * wz;
23227
23228
float l = basisu::squaref(alt_xr) + basisu::squaref(alt_xg) + basisu::squaref(alt_xb);
23229
23230
float axis_r = 0.57735027f, axis_g = 0.57735027f, axis_b = 0.57735027f;
23231
if (fabs(l) >= basisu::SMALL_FLOAT_VAL)
23232
{
23233
const float inv_l = inv_sqrt(l);
23234
axis_r = alt_xr * inv_l;
23235
axis_g = alt_xg * inv_l;
23236
axis_b = alt_xb * inv_l;
23237
}
23238
23239
const float tr = axis_r * cov[0] + axis_g * cov[1] + axis_b * cov[2];
23240
const float tg = axis_r * cov[1] + axis_g * cov[3] + axis_b * cov[4];
23241
const float tb = axis_r * cov[2] + axis_g * cov[4] + axis_b * cov[5];
23242
const float principle_axis_var = tr * axis_r + tg * axis_g + tb * axis_b;
23243
23244
const float inv_principle_axis_var = 1.0f / (principle_axis_var + basisu::REALLY_SMALL_FLOAT_VAL);
23245
axis_r = tr * inv_principle_axis_var;
23246
axis_g = tg * inv_principle_axis_var;
23247
axis_b = tb * inv_principle_axis_var;
23248
23249
float total_var = cov[0] + cov[3] + cov[5];
23250
23251
// If the principle axis variance vs. the block's total variance accounts for less than this threshold, it's a "very complex" block that may benefit from 2 subsets.
23252
const float COMPLEX_BLOCK_PRINCIPLE_AXIS_FRACT_THRESH = .995f;
23253
try_2subsets = principle_axis_var < (total_var * COMPLEX_BLOCK_PRINCIPLE_AXIS_FRACT_THRESH);
23254
23255
uint32_t min_idx = 0, max_idx = 0;
23256
float min_dot = basisu::BIG_FLOAT_VAL, max_dot = -basisu::BIG_FLOAT_VAL;
23257
23258
for (uint32_t i = 0; i < 16; i++)
23259
{
23260
float r = (float)pPixels[i * 3 + 0];
23261
float g = (float)pPixels[i * 3 + 1];
23262
float b = (float)pPixels[i * 3 + 2];
23263
23264
float_pixels[i].c[0] = fast_half_to_float_pos_not_inf_or_nan((half_float)r);
23265
float_pixels[i].c[1] = fast_half_to_float_pos_not_inf_or_nan((half_float)g);
23266
float_pixels[i].c[2] = fast_half_to_float_pos_not_inf_or_nan((half_float)b);
23267
23268
pixel_scales[i] = 1.0f / (basisu::squaref(float_pixels[i].c[0]) + basisu::squaref(float_pixels[i].c[1]) + basisu::squaref(float_pixels[i].c[2]) + (float)MIN_HALF_FLOAT);
23269
23270
float dot = r * axis_r + g * axis_g + b * axis_b;
23271
23272
if (dot < min_dot)
23273
{
23274
min_dot = dot;
23275
min_idx = i;
23276
}
23277
23278
if (dot > max_dot)
23279
{
23280
max_dot = dot;
23281
max_idx = i;
23282
}
23283
}
23284
23285
min_r = pPixels[min_idx * 3 + 0];
23286
min_g = pPixels[min_idx * 3 + 1];
23287
min_b = pPixels[min_idx * 3 + 2];
23288
23289
max_r = pPixels[max_idx * 3 + 0];
23290
max_g = pPixels[max_idx * 3 + 1];
23291
max_b = pPixels[max_idx * 3 + 2];
23292
23293
assert((max_r < MAX_HALF_FLOAT_AS_INT_BITS) && (max_g < MAX_HALF_FLOAT_AS_INT_BITS) && (max_b < MAX_HALF_FLOAT_AS_INT_BITS));
23294
23295
bc6h_quant_dequant_endpoints(min_r, min_g, min_b, max_r, max_g, max_b, 10);
23296
23297
cur_err = assign_weights_4(float_pixels, pixel_scales, log_blk.m_weights, min_r, min_g, min_b, max_r, max_g, max_b, block_max_var, try_2subsets, params);
23298
23299
const uint32_t MAX_LS_PASSES = params.m_hq_ls ? 2 : 1;
23300
for (uint32_t pass = 0; pass < MAX_LS_PASSES; pass++)
23301
{
23302
float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
23303
float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
23304
float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f;
23305
float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f;
23306
23307
for (uint32_t i = 0; i < 16; i++)
23308
{
23309
float r = (float)pPixels[i * 3 + 0];
23310
float g = (float)pPixels[i * 3 + 1];
23311
float b = (float)pPixels[i * 3 + 2];
23312
23313
const uint32_t sel = log_blk.m_weights[i];
23314
23315
z00 += g_bc6h_ls_weights_4[sel][0];
23316
z10 += g_bc6h_ls_weights_4[sel][1];
23317
z11 += g_bc6h_ls_weights_4[sel][2];
23318
23319
float w = g_bc6h_ls_weights_4[sel][3];
23320
23321
q00_r += w * r;
23322
t_r += r;
23323
23324
q00_g += w * g;
23325
t_g += g;
23326
23327
q00_b += w * b;
23328
t_b += b;
23329
}
23330
23331
q10_r = t_r - q00_r;
23332
q10_g = t_g - q00_g;
23333
q10_b = t_b - q00_b;
23334
23335
z01 = z10;
23336
23337
float det = z00 * z11 - z01 * z10;
23338
if (fabs(det) < basisu::SMALL_FLOAT_VAL)
23339
break;
23340
23341
det = 1.0f / det;
23342
23343
float iz00 = z11 * det;
23344
float iz01 = -z01 * det;
23345
float iz10 = -z10 * det;
23346
float iz11 = z00 * det;
23347
23348
uint32_t trial_max_r = (int)basisu::clamp<float>(std::round(iz00 * q00_r + iz01 * q10_r), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
23349
uint32_t trial_min_r = (int)basisu::clamp<float>(std::round(iz10 * q00_r + iz11 * q10_r), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
23350
23351
uint32_t trial_max_g = (int)basisu::clamp<float>(std::round(iz00 * q00_g + iz01 * q10_g), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
23352
uint32_t trial_min_g = (int)basisu::clamp<float>(std::round(iz10 * q00_g + iz11 * q10_g), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
23353
23354
uint32_t trial_max_b = (int)basisu::clamp<float>(std::round(iz00 * q00_b + iz01 * q10_b), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
23355
uint32_t trial_min_b = (int)basisu::clamp<float>(std::round(iz10 * q00_b + iz11 * q10_b), 0, (float)basist::MAX_BC6H_HALF_FLOAT_AS_UINT);
23356
23357
bc6h_quant_dequant_endpoints(trial_min_r, trial_min_g, trial_min_b, trial_max_r, trial_max_g, trial_max_b, 10);
23358
23359
uint8_t trial_weights[16];
23360
double trial_err = assign_weights_4(float_pixels, pixel_scales, trial_weights, trial_min_r, trial_min_g, trial_min_b, trial_max_r, trial_max_g, trial_max_b, block_max_var, try_2subsets, params);
23361
23362
if (trial_err < cur_err)
23363
{
23364
cur_err = trial_err;
23365
23366
min_r = trial_min_r;
23367
max_r = trial_max_r;
23368
23369
min_g = trial_min_g;
23370
max_g = trial_max_g;
23371
23372
min_b = trial_min_b;
23373
max_b = trial_max_b;
23374
23375
memcpy(log_blk.m_weights, trial_weights, 16);
23376
}
23377
else
23378
{
23379
break;
23380
}
23381
23382
} // pass
23383
23384
#if 0
23385
//if (full_flag)
23386
if ((try_2subsets) && (block_max_var > (FAST_BC6H_COMPLEX_STD_DEV_THRESH * FAST_BC6H_COMPLEX_STD_DEV_THRESH * 16)))
23387
{
23388
min_r = 0;
23389
max_r = 0;
23390
min_g = 0;
23391
max_g = 0;
23392
min_b = 0;
23393
max_b = 0;
23394
}
23395
#endif
23396
23397
log_blk.m_endpoints[0][0] = basist::bc6h_half_to_blog((basist::half_float)min_r, 10);
23398
log_blk.m_endpoints[0][1] = basist::bc6h_half_to_blog((basist::half_float)max_r, 10);
23399
23400
log_blk.m_endpoints[1][0] = basist::bc6h_half_to_blog((basist::half_float)min_g, 10);
23401
log_blk.m_endpoints[1][1] = basist::bc6h_half_to_blog((basist::half_float)max_g, 10);
23402
23403
log_blk.m_endpoints[2][0] = basist::bc6h_half_to_blog((basist::half_float)min_b, 10);
23404
log_blk.m_endpoints[2][1] = basist::bc6h_half_to_blog((basist::half_float)max_b, 10);
23405
23406
if (log_blk.m_weights[0] & 8)
23407
{
23408
for (uint32_t i = 0; i < 16; i++)
23409
log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
23410
23411
for (uint32_t c = 0; c < 3; c++)
23412
{
23413
std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
23414
}
23415
}
23416
23417
if ((params.m_max_2subset_pats_to_try > 0) && ((try_2subsets) && (block_max_var > (FAST_BC6H_COMPLEX_STD_DEV_THRESH * FAST_BC6H_COMPLEX_STD_DEV_THRESH * 16))))
23418
{
23419
fast_encode_bc6h_2subsets(pPixels, float_pixels, pixel_scales, cur_err, log_blk, block_max_var, mean_r, mean_g, mean_b, axis_r, axis_g, axis_b, params);
23420
}
23421
23422
pack_bc6h_block(*pBlock, log_blk);
23423
}
23424
23425
bool decode_6x6_hdr(const uint8_t *pComp_data, uint32_t comp_data_size, basisu::vector2D<astc_helpers::astc_block>& decoded_blocks, uint32_t& width, uint32_t& height)
23426
{
23427
const uint32_t BLOCK_W = 6, BLOCK_H = 6;
23428
23429
//interval_timer tm;
23430
//tm.start();
23431
23432
width = 0;
23433
height = 0;
23434
23435
if (comp_data_size <= (2 * 3 + 1))
23436
return false;
23437
23438
basist::bitwise_decoder decoder;
23439
if (!decoder.init(pComp_data, comp_data_size))
23440
return false;
23441
23442
if (decoder.get_bits(16) != 0xABCD)
23443
return false;
23444
23445
width = decoder.get_bits(16);
23446
height = decoder.get_bits(16);
23447
23448
if (!width || !height || (width > MAX_ASTC_HDR_6X6_DIM) || (height > MAX_ASTC_HDR_6X6_DIM))
23449
return false;
23450
23451
const uint32_t num_blocks_x = (width + BLOCK_W - 1) / BLOCK_W;
23452
const uint32_t num_blocks_y = (height + BLOCK_H - 1) / BLOCK_H;
23453
23454
const uint32_t total_blocks = num_blocks_x * num_blocks_y;
23455
23456
decoded_blocks.resize(num_blocks_x, num_blocks_y);
23457
//memset(decoded_blocks.get_ptr(), 0, decoded_blocks.size_in_bytes());
23458
23459
// These are the decoded log blocks, NOT the output log blocks.
23460
basisu::vector2D<astc_helpers::log_astc_block> decoded_log_blocks(num_blocks_x, REUSE_MAX_BUFFER_ROWS);
23461
memset(decoded_log_blocks.get_ptr(), 0, decoded_log_blocks.size_in_bytes());
23462
23463
uint32_t cur_bx = 0, cur_by = 0;
23464
int cur_row_index = 0;
23465
23466
uint32_t step_counter = 0;
23467
BASISU_NOTE_UNUSED(step_counter);
23468
23469
while (cur_by < num_blocks_y)
23470
{
23471
step_counter++;
23472
23473
//if ((cur_bx == 9) && (cur_by == 13))
23474
// printf("!");
23475
23476
#if SYNC_MARKERS
23477
uint32_t mk = decoder.get_bits(16);
23478
if (mk != 0xDEAD)
23479
{
23480
printf("!");
23481
assert(0);
23482
return false;
23483
}
23484
#endif
23485
if (decoder.get_bits_remaining() < 1)
23486
return false;
23487
23488
encoding_type et = encoding_type::cBlock;
23489
23490
uint32_t b0 = decoder.get_bits(1);
23491
if (!b0)
23492
{
23493
uint32_t b1 = decoder.get_bits(1);
23494
if (b1)
23495
et = encoding_type::cReuse;
23496
else
23497
{
23498
uint32_t b2 = decoder.get_bits(1);
23499
if (b2)
23500
et = encoding_type::cSolid;
23501
else
23502
et = encoding_type::cRun;
23503
}
23504
}
23505
23506
switch (et)
23507
{
23508
case encoding_type::cRun:
23509
{
23510
if (!cur_bx && !cur_by)
23511
return false;
23512
23513
const uint32_t run_len = decoder.decode_vlc(5) + 1;
23514
23515
uint32_t num_blocks_remaining = total_blocks - (cur_bx + cur_by * num_blocks_x);
23516
if (run_len > num_blocks_remaining)
23517
return false;
23518
23519
uint32_t prev_bx = cur_bx, prev_by = cur_by;
23520
23521
if (cur_bx)
23522
prev_bx--;
23523
else
23524
{
23525
prev_bx = num_blocks_x - 1;
23526
prev_by--;
23527
}
23528
23529
const astc_helpers::log_astc_block& prev_log_blk = decoded_log_blocks(prev_bx, calc_row_index(cur_by, prev_by, cur_row_index));
23530
const astc_helpers::astc_block& prev_phys_blk = decoded_blocks(prev_bx, prev_by);
23531
23532
assert((prev_log_blk.m_user_mode == 255) || (prev_log_blk.m_user_mode < TOTAL_BLOCK_MODE_DECS));
23533
23534
for (uint32_t i = 0; i < run_len; i++)
23535
{
23536
decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index)) = prev_log_blk;
23537
decoded_blocks(cur_bx, cur_by) = prev_phys_blk;
23538
23539
cur_bx++;
23540
if (cur_bx == num_blocks_x)
23541
{
23542
cur_bx = 0;
23543
cur_by++;
23544
cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS;
23545
}
23546
}
23547
23548
break;
23549
}
23550
case encoding_type::cSolid:
23551
{
23552
const basist::half_float rh = (basist::half_float)decoder.get_bits(15);
23553
const basist::half_float gh = (basist::half_float)decoder.get_bits(15);
23554
const basist::half_float bh = (basist::half_float)decoder.get_bits(15);
23555
23556
astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index));
23557
23558
log_blk.clear();
23559
log_blk.m_user_mode = 255;
23560
log_blk.m_solid_color_flag_hdr = true;
23561
log_blk.m_solid_color[0] = rh;
23562
log_blk.m_solid_color[1] = gh;
23563
log_blk.m_solid_color[2] = bh;
23564
log_blk.m_solid_color[3] = basist::float_to_half(1.0f);
23565
23566
bool status = astc_helpers::pack_astc_block(decoded_blocks(cur_bx, cur_by), log_blk);
23567
if (!status)
23568
return false;
23569
23570
cur_bx++;
23571
if (cur_bx == num_blocks_x)
23572
{
23573
cur_bx = 0;
23574
cur_by++;
23575
cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS;
23576
}
23577
23578
break;
23579
}
23580
case encoding_type::cReuse:
23581
{
23582
if (!cur_bx && !cur_by)
23583
return false;
23584
23585
const uint32_t reuse_delta_index = decoder.get_bits(REUSE_XY_DELTA_BITS);
23586
23587
const int reuse_delta_x = g_reuse_xy_deltas[reuse_delta_index].m_x;
23588
const int reuse_delta_y = g_reuse_xy_deltas[reuse_delta_index].m_y;
23589
23590
const int prev_bx = cur_bx + reuse_delta_x, prev_by = cur_by + reuse_delta_y;
23591
if ((prev_bx < 0) || (prev_bx >= (int)num_blocks_x))
23592
return false;
23593
if (prev_by < 0)
23594
return false;
23595
23596
const astc_helpers::log_astc_block& prev_log_blk = decoded_log_blocks(prev_bx, calc_row_index(cur_by, prev_by, cur_row_index));
23597
23598
if (prev_log_blk.m_solid_color_flag_hdr)
23599
return false;
23600
assert(prev_log_blk.m_user_mode < TOTAL_BLOCK_MODE_DECS);
23601
23602
astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index));
23603
astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by);
23604
23605
log_blk = prev_log_blk;
23606
23607
const uint32_t total_grid_weights = log_blk.m_grid_width * log_blk.m_grid_height * (log_blk.m_dual_plane ? 2 : 1);
23608
23609
bool status = decode_values(decoder, total_grid_weights, log_blk.m_weight_ise_range, log_blk.m_weights);
23610
if (!status)
23611
return false;
23612
23613
#if 0
23614
const astc_helpers::astc_block& prev_phys_blk = decoded_blocks(prev_bx, prev_by);
23615
23616
astc_helpers::log_astc_block decomp_blk;
23617
status = astc_helpers::unpack_block(&prev_phys_blk, decomp_blk, BLOCK_W, BLOCK_H);
23618
if (!status)
23619
return false;
23620
23621
uint8_t transcode_weights[MAX_BLOCK_W * MAX_BLOCK_H * 2];
23622
requantize_astc_weights(total_grid_weights, log_blk.m_weights, log_blk.m_weight_ise_range, transcode_weights, decomp_blk.m_weight_ise_range);
23623
23624
copy_weight_grid(log_blk.m_dual_plane, log_blk.m_grid_width, log_blk.m_grid_height, transcode_weights, decomp_blk);
23625
#else
23626
assert(log_blk.m_user_mode < TOTAL_BLOCK_MODE_DECS);
23627
const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)log_blk.m_user_mode];
23628
const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem);
23629
23630
assert(bmd.m_grid_x == log_blk.m_grid_width && bmd.m_grid_y == log_blk.m_grid_height);
23631
assert(bmd.m_dp == log_blk.m_dual_plane);
23632
assert(bmd.m_cem == log_blk.m_color_endpoint_modes[0]);
23633
assert(bmd.m_num_partitions == log_blk.m_num_partitions);
23634
assert(bmd.m_dp_channel == log_blk.m_color_component_selector);
23635
23636
// important: bmd.m_weight_ise_range/m_endpoint_ise_range may not match the logical block's due to deltas.
23637
23638
astc_helpers::log_astc_block decomp_blk;
23639
decomp_blk.clear();
23640
decomp_blk.m_dual_plane = bmd.m_dp;
23641
decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23642
decomp_blk.m_partition_id = log_blk.m_partition_id;
23643
23644
decomp_blk.m_num_partitions = (uint8_t)bmd.m_num_partitions;
23645
23646
for (uint32_t p = 0; p < bmd.m_num_partitions; p++)
23647
decomp_blk.m_color_endpoint_modes[p] = (uint8_t)bmd.m_cem;
23648
23649
decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range;
23650
decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range;
23651
23652
for (uint32_t p = 0; p < bmd.m_num_partitions; p++)
23653
requantize_ise_endpoints(bmd.m_cem, log_blk.m_endpoint_ise_range, log_blk.m_endpoints + num_endpoint_values * p, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints + num_endpoint_values * p);
23654
23655
uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2];
23656
requantize_astc_weights(total_grid_weights, log_blk.m_weights, log_blk.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range);
23657
23658
copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk);
23659
#endif
23660
status = astc_helpers::pack_astc_block(phys_blk, decomp_blk);
23661
if (!status)
23662
return false;
23663
23664
cur_bx++;
23665
if (cur_bx == num_blocks_x)
23666
{
23667
cur_bx = 0;
23668
cur_by++;
23669
cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS;
23670
}
23671
23672
break;
23673
}
23674
case encoding_type::cBlock:
23675
{
23676
const block_mode bm = (block_mode)decoder.decode_truncated_binary((uint32_t)block_mode::cBMTotalModes);
23677
const endpoint_mode em = (endpoint_mode)decoder.decode_truncated_binary((uint32_t)endpoint_mode::cTotal);
23678
23679
switch (em)
23680
{
23681
case endpoint_mode::cUseLeft:
23682
case endpoint_mode::cUseUpper:
23683
{
23684
int neighbor_bx = cur_bx, neighbor_by = cur_by;
23685
23686
if (em == endpoint_mode::cUseLeft)
23687
neighbor_bx--;
23688
else
23689
neighbor_by--;
23690
23691
if ((neighbor_bx < 0) || (neighbor_by < 0))
23692
return false;
23693
23694
const astc_helpers::log_astc_block& neighbor_blk = decoded_log_blocks(neighbor_bx, calc_row_index(cur_by, neighbor_by, cur_row_index));
23695
if (!neighbor_blk.m_color_endpoint_modes[0])
23696
return false;
23697
23698
const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)bm];
23699
const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem);
23700
23701
if (bmd.m_cem != neighbor_blk.m_color_endpoint_modes[0])
23702
return false;
23703
23704
astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index));
23705
astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by);
23706
23707
log_blk.clear();
23708
assert((uint32_t)bm <= UINT8_MAX);
23709
log_blk.m_user_mode = (uint8_t)bm;
23710
log_blk.m_num_partitions = 1;
23711
log_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem;
23712
// Important: Notice how we're copying the neighbor's endpoint ISE range. Not using the mode's endpoint ISE range here.
23713
// This is to avoid introducing more quantization error.
23714
log_blk.m_endpoint_ise_range = neighbor_blk.m_endpoint_ise_range;
23715
log_blk.m_weight_ise_range = (uint8_t)bmd.m_weight_ise_range;
23716
log_blk.m_grid_width = (uint8_t)bmd.m_grid_x;
23717
log_blk.m_grid_height = (uint8_t)bmd.m_grid_y;
23718
log_blk.m_dual_plane = (uint8_t)bmd.m_dp;
23719
log_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23720
23721
memcpy(log_blk.m_endpoints, neighbor_blk.m_endpoints, num_endpoint_values);
23722
23723
const uint32_t total_grid_weights = bmd.m_grid_x * bmd.m_grid_y * (bmd.m_dp ? 2 : 1);
23724
23725
bool status = decode_values(decoder, total_grid_weights, bmd.m_weight_ise_range, log_blk.m_weights);
23726
if (!status)
23727
return false;
23728
23729
astc_helpers::log_astc_block decomp_blk;
23730
decomp_blk.clear();
23731
23732
decomp_blk.m_num_partitions = 1;
23733
decomp_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem;
23734
decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range;
23735
decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range;
23736
decomp_blk.m_dual_plane = (uint8_t)bmd.m_dp;
23737
decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23738
23739
requantize_ise_endpoints(bmd.m_cem, log_blk.m_endpoint_ise_range, log_blk.m_endpoints, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints);
23740
23741
uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2];
23742
requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range);
23743
23744
copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk);
23745
23746
status = astc_helpers::pack_astc_block(phys_blk, decomp_blk);
23747
if (!status)
23748
return false;
23749
23750
cur_bx++;
23751
if (cur_bx == num_blocks_x)
23752
{
23753
cur_bx = 0;
23754
cur_by++;
23755
cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS;
23756
}
23757
23758
break;
23759
}
23760
case endpoint_mode::cUseLeftDelta:
23761
case endpoint_mode::cUseUpperDelta:
23762
{
23763
int neighbor_bx = cur_bx, neighbor_by = cur_by;
23764
23765
if (em == endpoint_mode::cUseLeftDelta)
23766
neighbor_bx--;
23767
else
23768
neighbor_by--;
23769
23770
if ((neighbor_bx < 0) || (neighbor_by < 0))
23771
return false;
23772
23773
const astc_helpers::log_astc_block& neighbor_blk = decoded_log_blocks(neighbor_bx, calc_row_index(cur_by, neighbor_by, cur_row_index));
23774
if (!neighbor_blk.m_color_endpoint_modes[0])
23775
return false;
23776
23777
const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)bm];
23778
const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem);
23779
23780
if (bmd.m_cem != neighbor_blk.m_color_endpoint_modes[0])
23781
return false;
23782
23783
astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index));
23784
astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by);
23785
23786
log_blk.clear();
23787
assert((uint32_t)bm <= UINT8_MAX);
23788
log_blk.m_user_mode = (uint8_t)bm;
23789
log_blk.m_num_partitions = 1;
23790
log_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem;
23791
log_blk.m_dual_plane = bmd.m_dp;
23792
log_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23793
23794
log_blk.m_endpoint_ise_range = (uint8_t)bmd.m_endpoint_ise_range;
23795
requantize_ise_endpoints(bmd.m_cem, neighbor_blk.m_endpoint_ise_range, neighbor_blk.m_endpoints, bmd.m_endpoint_ise_range, log_blk.m_endpoints);
23796
23797
const int total_endpoint_delta_vals = 1 << NUM_ENDPOINT_DELTA_BITS;
23798
const int low_delta_limit = -(total_endpoint_delta_vals / 2); // high_delta_limit = (total_endpoint_delta_vals / 2) - 1;
23799
23800
const auto& ise_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_ISE_to_rank;
23801
const auto& rank_to_ise = astc_helpers::g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range).m_rank_to_ISE;
23802
const int total_endpoint_levels = astc_helpers::get_ise_levels(log_blk.m_endpoint_ise_range);
23803
23804
for (uint32_t i = 0; i < num_endpoint_values; i++)
23805
{
23806
int cur_val = ise_to_rank[log_blk.m_endpoints[i]];
23807
23808
int delta = (int)decoder.get_bits(NUM_ENDPOINT_DELTA_BITS) + low_delta_limit;
23809
23810
cur_val += delta;
23811
if ((cur_val < 0) || (cur_val >= total_endpoint_levels))
23812
return false;
23813
23814
log_blk.m_endpoints[i] = rank_to_ise[cur_val];
23815
}
23816
23817
log_blk.m_weight_ise_range = (uint8_t)bmd.m_weight_ise_range;
23818
log_blk.m_grid_width = (uint8_t)bmd.m_grid_x;
23819
log_blk.m_grid_height = (uint8_t)bmd.m_grid_y;
23820
23821
const uint32_t total_grid_weights = bmd.m_grid_x * bmd.m_grid_y * (bmd.m_dp ? 2 : 1);
23822
23823
bool status = decode_values(decoder, total_grid_weights, bmd.m_weight_ise_range, log_blk.m_weights);
23824
if (!status)
23825
return false;
23826
23827
astc_helpers::log_astc_block decomp_blk;
23828
decomp_blk.clear();
23829
23830
decomp_blk.m_num_partitions = 1;
23831
decomp_blk.m_color_endpoint_modes[0] = (uint8_t)bmd.m_cem;
23832
decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range;
23833
decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range;
23834
decomp_blk.m_dual_plane = (uint8_t)bmd.m_dp;
23835
decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23836
23837
requantize_ise_endpoints(bmd.m_cem, log_blk.m_endpoint_ise_range, log_blk.m_endpoints, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints);
23838
23839
uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2];
23840
requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range);
23841
23842
copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk);
23843
23844
status = astc_helpers::pack_astc_block(phys_blk, decomp_blk);
23845
if (!status)
23846
return false;
23847
23848
cur_bx++;
23849
if (cur_bx == num_blocks_x)
23850
{
23851
cur_bx = 0;
23852
cur_by++;
23853
cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS;
23854
}
23855
23856
break;
23857
}
23858
case endpoint_mode::cRaw:
23859
{
23860
const block_mode_desc& bmd = g_block_mode_descs[(uint32_t)bm];
23861
23862
const uint32_t num_endpoint_values = get_num_endpoint_vals(bmd.m_cem);
23863
23864
astc_helpers::log_astc_block& log_blk = decoded_log_blocks(cur_bx, calc_row_index(cur_by, cur_by, cur_row_index));
23865
astc_helpers::astc_block& phys_blk = decoded_blocks(cur_bx, cur_by);
23866
23867
log_blk.clear();
23868
23869
assert((uint32_t)bm <= UINT8_MAX);
23870
log_blk.m_user_mode = (uint8_t)bm;
23871
23872
log_blk.m_num_partitions = (uint8_t)bmd.m_num_partitions;
23873
23874
for (uint32_t p = 0; p < bmd.m_num_partitions; p++)
23875
log_blk.m_color_endpoint_modes[p] = (uint8_t)bmd.m_cem;
23876
23877
log_blk.m_endpoint_ise_range = (uint8_t)bmd.m_endpoint_ise_range;
23878
log_blk.m_weight_ise_range = (uint8_t)bmd.m_weight_ise_range;
23879
23880
log_blk.m_grid_width = (uint8_t)bmd.m_grid_x;
23881
log_blk.m_grid_height = (uint8_t)bmd.m_grid_y;
23882
log_blk.m_dual_plane = (uint8_t)bmd.m_dp;
23883
log_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23884
23885
if (bmd.m_num_partitions == 2)
23886
{
23887
const uint32_t unique_partition_index = decoder.decode_truncated_binary(NUM_UNIQUE_PARTITIONS2);
23888
log_blk.m_partition_id = (uint16_t)g_part2_unique_index_to_seed[unique_partition_index];
23889
}
23890
else if (bmd.m_num_partitions == 3)
23891
{
23892
const uint32_t unique_partition_index = decoder.decode_truncated_binary(NUM_UNIQUE_PARTITIONS3);
23893
log_blk.m_partition_id = (uint16_t)g_part3_unique_index_to_seed[unique_partition_index];
23894
}
23895
23896
bool status = decode_values(decoder, num_endpoint_values * bmd.m_num_partitions, bmd.m_endpoint_ise_range, log_blk.m_endpoints);
23897
if (!status)
23898
return false;
23899
23900
const uint32_t total_grid_weights = bmd.m_grid_x * bmd.m_grid_y * (bmd.m_dp ? 2 : 1);
23901
23902
status = decode_values(decoder, total_grid_weights, bmd.m_weight_ise_range, log_blk.m_weights);
23903
if (!status)
23904
return false;
23905
23906
astc_helpers::log_astc_block decomp_blk;
23907
decomp_blk.clear();
23908
decomp_blk.m_dual_plane = bmd.m_dp;
23909
decomp_blk.m_color_component_selector = (uint8_t)bmd.m_dp_channel;
23910
decomp_blk.m_partition_id = log_blk.m_partition_id;
23911
23912
decomp_blk.m_num_partitions = (uint8_t)bmd.m_num_partitions;
23913
23914
for (uint32_t p = 0; p < bmd.m_num_partitions; p++)
23915
decomp_blk.m_color_endpoint_modes[p] = (uint8_t)bmd.m_cem;
23916
23917
decomp_blk.m_endpoint_ise_range = (uint8_t)bmd.m_transcode_endpoint_ise_range;
23918
decomp_blk.m_weight_ise_range = (uint8_t)bmd.m_transcode_weight_ise_range;
23919
23920
for (uint32_t p = 0; p < bmd.m_num_partitions; p++)
23921
requantize_ise_endpoints(bmd.m_cem, bmd.m_endpoint_ise_range, log_blk.m_endpoints + num_endpoint_values * p, bmd.m_transcode_endpoint_ise_range, decomp_blk.m_endpoints + num_endpoint_values * p);
23922
23923
uint8_t transcode_weights[BLOCK_W * BLOCK_H * 2];
23924
requantize_astc_weights(total_grid_weights, log_blk.m_weights, bmd.m_weight_ise_range, transcode_weights, bmd.m_transcode_weight_ise_range);
23925
23926
copy_weight_grid(bmd.m_dp, bmd.m_grid_x, bmd.m_grid_y, transcode_weights, decomp_blk);
23927
23928
status = astc_helpers::pack_astc_block(phys_blk, decomp_blk);
23929
if (!status)
23930
return false;
23931
23932
cur_bx++;
23933
if (cur_bx == num_blocks_x)
23934
{
23935
cur_bx = 0;
23936
cur_by++;
23937
cur_row_index = (cur_row_index + 1) % REUSE_MAX_BUFFER_ROWS;
23938
}
23939
23940
break;
23941
}
23942
default:
23943
{
23944
assert(0);
23945
return false;
23946
}
23947
}
23948
23949
break;
23950
}
23951
default:
23952
{
23953
assert(0);
23954
return false;
23955
}
23956
}
23957
}
23958
23959
if (decoder.get_bits(16) != 0xA742)
23960
{
23961
//fmt_error_printf("End marker not found!\n");
23962
return false;
23963
}
23964
23965
//fmt_printf("Total decode_file() time: {} secs\n", tm.get_elapsed_secs());
23966
23967
return true;
23968
}
23969
23970
} // namespace astc_6x6_hdr
23971
23972
#endif // BASISD_SUPPORT_UASTC_HDR
23973
23974
} // namespace basist
23975
23976