Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h
21549 views
1
// basisu_astc_helpers.h
2
// Be sure to define ASTC_HELPERS_IMPLEMENTATION somewhere to get the implementation, otherwise you only get the header.
3
#ifndef BASISU_ASTC_HELPERS_HEADER
4
#define BASISU_ASTC_HELPERS_HEADER
5
6
#include <stdlib.h>
7
#include <stdint.h>
8
#include <math.h>
9
#include <fenv.h>
10
11
namespace astc_helpers
12
{
13
const uint32_t MAX_WEIGHT_VALUE = 64; // grid texel weights must range from [0,64]
14
const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid
15
const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels
16
const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values
17
const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4;
18
19
static const uint32_t NUM_ASTC_BLOCK_SIZES = 14;
20
extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2];
21
22
// The Color Endpoint Modes (CEM's)
23
enum cems
24
{
25
CEM_LDR_LUM_DIRECT = 0,
26
CEM_LDR_LUM_BASE_PLUS_OFS = 1,
27
CEM_HDR_LUM_LARGE_RANGE = 2,
28
CEM_HDR_LUM_SMALL_RANGE = 3,
29
CEM_LDR_LUM_ALPHA_DIRECT = 4,
30
CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS = 5,
31
CEM_LDR_RGB_BASE_SCALE = 6,
32
CEM_HDR_RGB_BASE_SCALE = 7,
33
CEM_LDR_RGB_DIRECT = 8,
34
CEM_LDR_RGB_BASE_PLUS_OFFSET = 9,
35
CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A = 10,
36
CEM_HDR_RGB = 11,
37
CEM_LDR_RGBA_DIRECT = 12,
38
CEM_LDR_RGBA_BASE_PLUS_OFFSET = 13,
39
CEM_HDR_RGB_LDR_ALPHA = 14,
40
CEM_HDR_RGB_HDR_ALPHA = 15
41
};
42
43
// All Bounded Integer Sequence Coding (BISE or ISE) ranges.
44
// Weights: Ranges [0,11] are valid.
45
// Endpoints: Ranges [4,20] are valid.
46
enum bise_levels
47
{
48
BISE_2_LEVELS = 0,
49
BISE_3_LEVELS = 1,
50
BISE_4_LEVELS = 2,
51
BISE_5_LEVELS = 3,
52
BISE_6_LEVELS = 4,
53
BISE_8_LEVELS = 5,
54
BISE_10_LEVELS = 6,
55
BISE_12_LEVELS = 7,
56
BISE_16_LEVELS = 8,
57
BISE_20_LEVELS = 9,
58
BISE_24_LEVELS = 10,
59
BISE_32_LEVELS = 11,
60
BISE_40_LEVELS = 12,
61
BISE_48_LEVELS = 13,
62
BISE_64_LEVELS = 14,
63
BISE_80_LEVELS = 15,
64
BISE_96_LEVELS = 16,
65
BISE_128_LEVELS = 17,
66
BISE_160_LEVELS = 18,
67
BISE_192_LEVELS = 19,
68
BISE_256_LEVELS = 20
69
};
70
71
const uint32_t TOTAL_ISE_RANGES = 21;
72
73
// Valid endpoint ISE ranges
74
const uint32_t FIRST_VALID_ENDPOINT_ISE_RANGE = BISE_6_LEVELS; // 4
75
const uint32_t LAST_VALID_ENDPOINT_ISE_RANGE = BISE_256_LEVELS; // 20
76
const uint32_t TOTAL_ENDPOINT_ISE_RANGES = LAST_VALID_ENDPOINT_ISE_RANGE - FIRST_VALID_ENDPOINT_ISE_RANGE + 1;
77
78
// Valid weight ISE ranges
79
const uint32_t FIRST_VALID_WEIGHT_ISE_RANGE = BISE_2_LEVELS; // 0
80
const uint32_t LAST_VALID_WEIGHT_ISE_RANGE = BISE_32_LEVELS; // 11
81
const uint32_t TOTAL_WEIGHT_ISE_RANGES = LAST_VALID_WEIGHT_ISE_RANGE - FIRST_VALID_WEIGHT_ISE_RANGE + 1;
82
83
// The ISE range table.
84
extern const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3]; // 0=bits (0 to 8), 1=trits (0 or 1), 2=quints (0 or 1)
85
86
// Possible Color Component Select values, used in dual plane mode.
87
// The CCS component will be interpolated using the 2nd weight plane.
88
enum ccs
89
{
90
CCS_GBA_R = 0,
91
CCS_RBA_G = 1,
92
CCS_RGA_B = 2,
93
CCS_RGB_A = 3
94
};
95
96
struct astc_block
97
{
98
uint32_t m_vals[4];
99
};
100
101
const uint32_t MAX_PARTITIONS = 4; // Max # of partitions or subsets for single plane mode
102
const uint32_t MAX_DUAL_PLANE_PARTITIONS = 3; // Max # of partitions or subsets for dual plane mode
103
const uint32_t NUM_PARTITION_PATTERNS = 1024; // Total # of partition pattern seeds (10-bits)
104
const uint32_t MAX_ENDPOINTS = 18; // Maximum # of endpoint values in a block
105
106
struct log_astc_block
107
{
108
bool m_error_flag;
109
110
bool m_solid_color_flag_ldr, m_solid_color_flag_hdr;
111
112
uint8_t m_user_mode; // user defined value, not used in this module
113
114
// Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr
115
uint8_t m_grid_width, m_grid_height; // weight grid dimensions, not the dimension of the block
116
117
bool m_dual_plane;
118
119
uint8_t m_weight_ise_range; // 0-11
120
uint8_t m_endpoint_ise_range; // 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking
121
122
uint8_t m_color_component_selector; // 0-3, controls which channel uses the 2nd (odd) weights, only used in dual plane mode
123
124
uint8_t m_num_partitions; // or the # of subsets, 1-4 (1-3 if dual plane mode)
125
uint16_t m_partition_id; // 10-bits, must be 0 if m_num_partitions==1
126
127
uint8_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's
128
129
union
130
{
131
// ISE weight grid values. In dual plane mode, the order is p0,p1, p0,p1, etc.
132
uint8_t m_weights[MAX_GRID_WEIGHTS];
133
uint16_t m_solid_color[4];
134
};
135
136
// ISE endpoint values
137
// Endpoint order examples:
138
// 1 subset LA : LL0 LH0 AL0 AH0
139
// 1 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0
140
// 1 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0
141
// 2 subset LA : LL0 LH0 AL0 AH0 LL1 LH1 AL1 AH1
142
// 2 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 RL1 RH1 GL1 GH1 BL1 BH1
143
// 2 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 RL1 RH1 GL1 GH1 BL1 BH1 AL1 AH1
144
uint8_t m_endpoints[MAX_ENDPOINTS];
145
146
void clear()
147
{
148
memset(this, 0, sizeof(*this));
149
}
150
};
151
152
// Open interval
153
inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
154
inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
155
156
inline uint32_t get_bits(uint32_t val, int low, int high)
157
{
158
const int num_bits = (high - low) + 1;
159
assert((num_bits >= 1) && (num_bits <= 32));
160
161
val >>= low;
162
if (num_bits != 32)
163
val &= ((1u << num_bits) - 1);
164
165
return val;
166
}
167
168
// Returns the number of levels in the given ISE range.
169
inline uint32_t get_ise_levels(uint32_t ise_range)
170
{
171
assert(ise_range < TOTAL_ISE_RANGES);
172
return (1 + 2 * g_ise_range_table[ise_range][1] + 4 * g_ise_range_table[ise_range][2]) << g_ise_range_table[ise_range][0];
173
}
174
175
inline int get_ise_sequence_bits(int count, int range)
176
{
177
// See 18.22 Data Size Determination - note this will be <= the # of bits actually written by encode_bise(). (It's magic.)
178
int total_bits = g_ise_range_table[range][0] * count;
179
total_bits += (g_ise_range_table[range][1] * 8 * count + 4) / 5;
180
total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3;
181
return total_bits;
182
}
183
184
inline uint32_t weight_interpolate(uint32_t l, uint32_t h, uint32_t w)
185
{
186
assert(w <= MAX_WEIGHT_VALUE);
187
return (l * (64 - w) + h * w + 32) >> 6;
188
}
189
190
void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats = nullptr);
191
192
struct pack_stats
193
{
194
uint32_t m_header_bits;
195
uint32_t m_endpoint_bits;
196
uint32_t m_weight_bits;
197
198
inline pack_stats() { clear(); }
199
inline void clear() { memset(this, 0, sizeof(*this)); }
200
};
201
202
// Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions.
203
bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr, pack_stats *pStats = nullptr);
204
205
// Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component.
206
void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a, pack_stats *pStats = nullptr);
207
208
// Pack HDR void extent (16-bit values are FP16/half floats - no NaN/Inf's)
209
void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats = nullptr);
210
211
// These helpers are all quite slow, but are useful for table preparation.
212
213
// Dequantizes ISE encoded endpoint val to [0,255]
214
uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range); // ISE ranges 4-11
215
216
// Dequantizes ISE encoded weight val to [0,64]
217
uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range); // ISE ranges 0-10
218
219
uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range);
220
uint32_t find_nearest_bise_weight(int v, uint32_t ise_range);
221
222
void create_quant_tables(
223
uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
224
uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
225
uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]
226
uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
227
uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights
228
bool weight_flag); // false if block endpoints, true if weights
229
230
// True if the CEM is LDR.
231
bool is_cem_ldr(uint32_t mode);
232
inline bool is_cem_hdr(uint32_t mode) { return !is_cem_ldr(mode); }
233
234
// True if the passed in dimensions are a valid ASTC block size. There are 14 supported configs, from 4x4 (8bpp) to 12x12 (.89bpp).
235
bool is_valid_block_size(uint32_t w, uint32_t h);
236
237
bool block_has_any_hdr_cems(const log_astc_block& log_blk);
238
bool block_has_any_ldr_cems(const log_astc_block& log_blk);
239
240
// Returns the # of endpoint values for the given CEM.
241
inline uint32_t get_num_cem_values(uint32_t cem) { assert(cem <= 15); return 2 + 2 * (cem >> 2); }
242
243
struct dequant_table
244
{
245
basisu::vector<uint8_t> m_val_to_ise; // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
246
basisu::vector<uint8_t> m_ISE_to_val; // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
247
basisu::vector<uint8_t> m_ISE_to_rank; // returns the level rank index given an ISE symbol, [levels]
248
basisu::vector<uint8_t> m_rank_to_ISE; // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
249
250
void init(bool weight_flag, uint32_t num_levels, bool init_rank_tabs)
251
{
252
m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256);
253
m_ISE_to_val.resize(num_levels);
254
if (init_rank_tabs)
255
{
256
m_ISE_to_rank.resize(num_levels);
257
m_rank_to_ISE.resize(num_levels);
258
}
259
}
260
};
261
262
struct dequant_tables
263
{
264
dequant_table m_weights[TOTAL_WEIGHT_ISE_RANGES];
265
dequant_table m_endpoints[TOTAL_ENDPOINT_ISE_RANGES];
266
267
const dequant_table& get_weight_tab(uint32_t range) const
268
{
269
assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
270
return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
271
}
272
273
dequant_table& get_weight_tab(uint32_t range)
274
{
275
assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
276
return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
277
}
278
279
const dequant_table& get_endpoint_tab(uint32_t range) const
280
{
281
assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
282
return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
283
}
284
285
dequant_table& get_endpoint_tab(uint32_t range)
286
{
287
assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
288
return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
289
}
290
291
void init(bool init_rank_tabs)
292
{
293
for (uint32_t range = FIRST_VALID_WEIGHT_ISE_RANGE; range <= LAST_VALID_WEIGHT_ISE_RANGE; range++)
294
{
295
const uint32_t num_levels = get_ise_levels(range);
296
dequant_table& tab = get_weight_tab(range);
297
298
tab.init(true, num_levels, init_rank_tabs);
299
300
create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, true);
301
}
302
303
for (uint32_t range = FIRST_VALID_ENDPOINT_ISE_RANGE; range <= LAST_VALID_ENDPOINT_ISE_RANGE; range++)
304
{
305
const uint32_t num_levels = get_ise_levels(range);
306
dequant_table& tab = get_endpoint_tab(range);
307
308
tab.init(false, num_levels, init_rank_tabs);
309
310
create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, false);
311
}
312
}
313
};
314
315
extern dequant_tables g_dequant_tables;
316
void init_tables(bool init_rank_tabs);
317
318
struct weighted_sample
319
{
320
uint8_t m_src_x;
321
uint8_t m_src_y;
322
uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8
323
};
324
325
void compute_upsample_weights(
326
int block_width, int block_height,
327
int weight_grid_width, int weight_grid_height,
328
weighted_sample* pWeights); // there will be block_width * block_height bilinear samples
329
330
void upsample_weight_grid(
331
uint32_t bx, uint32_t by, // destination/to dimension
332
uint32_t wx, uint32_t wy, // source/from dimension
333
const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
334
uint8_t* pDst_weights); // [by][bx]
335
336
// Procedurally returns the texel partition/subset index given the block coordinate and config.
337
int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block);
338
339
void blue_contract(
340
int r, int g, int b, int a,
341
int& dr, int& dg, int& db, int& da);
342
343
void bit_transfer_signed(int& a, int& b);
344
345
void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t* pE);
346
347
typedef uint16_t half_float;
348
half_float float_to_half(float val, bool toward_zero);
349
float half_to_float(half_float hval);
350
351
// Notes:
352
// qlog16_to_half(half_to_qlog16(half_val_as_int)) == half_val_as_int (is lossless)
353
// However, this is not lossless in the general sense.
354
inline half_float qlog16_to_half(int k)
355
{
356
assert((k >= 0) && (k <= 0xFFFF));
357
358
int E = (k & 0xF800) >> 11;
359
int M = k & 0x7FF;
360
361
int Mt;
362
if (M < 512)
363
Mt = 3 * M;
364
else if (M >= 1536)
365
Mt = 5 * M - 2048;
366
else
367
Mt = 4 * M - 512;
368
369
return (half_float)((E << 10) + (Mt >> 3));
370
}
371
372
const int MAX_RGB9E5 = 0xff80;
373
void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b);
374
uint32_t pack_rgb9e5(float r, float g, float b);
375
376
enum decode_mode
377
{
378
cDecodeModeSRGB8 = 0, // returns uint8_t's, not valid on HDR blocks
379
cDecodeModeLDR8 = 1, // returns uint8_t's, not valid on HDR blocks
380
cDecodeModeHDR16 = 2, // returns uint16_t's (half floats), valid on all LDR/HDR blocks
381
cDecodeModeRGB9E5 = 3 // returns uint32_t's, packed as RGB 9E5 (shared exponent), see https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
382
};
383
384
// Decodes logical block to output pixels.
385
// pPixels must point to either 32-bit pixel values (SRGB8/LDR8/9E5) or 64-bit pixel values (HDR16)
386
bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode);
387
388
void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t *pBits128, uint32_t bit_ofs);
389
390
// Unpack a physical ASTC encoded GPU texture block to a logical block description.
391
bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height);
392
393
} // namespace astc_helpers
394
395
#endif // BASISU_ASTC_HELPERS_HEADER
396
397
//------------------------------------------------------------------
398
399
#ifdef BASISU_ASTC_HELPERS_IMPLEMENTATION
400
401
namespace astc_helpers
402
{
403
template<typename T> inline T my_min(T a, T b) { return (a < b) ? a : b; }
404
template<typename T> inline T my_max(T a, T b) { return (a > b) ? a : b; }
405
406
const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = {
407
{ 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 },
408
{ 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 },
409
{ 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 },
410
{ 12, 10 }, { 12, 12 }
411
};
412
413
const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3] =
414
{
415
//b t q
416
//2 3 5 // rng ise_index notes
417
{ 1, 0, 0 }, // 0..1 0
418
{ 0, 1, 0 }, // 0..2 1
419
{ 2, 0, 0 }, // 0..3 2
420
{ 0, 0, 1 }, // 0..4 3
421
{ 1, 1, 0 }, // 0..5 4 min endpoint ISE index
422
{ 3, 0, 0 }, // 0..7 5
423
{ 1, 0, 1 }, // 0..9 6
424
{ 2, 1, 0 }, // 0..11 7
425
{ 4, 0, 0 }, // 0..15 8
426
{ 2, 0, 1 }, // 0..19 9
427
{ 3, 1, 0 }, // 0..23 10
428
{ 5, 0, 0 }, // 0..31 11 max weight ISE index
429
{ 3, 0, 1 }, // 0..39 12
430
{ 4, 1, 0 }, // 0..47 13
431
{ 6, 0, 0 }, // 0..63 14
432
{ 4, 0, 1 }, // 0..79 15
433
{ 5, 1, 0 }, // 0..95 16
434
{ 7, 0, 0 }, // 0..127 17
435
{ 5, 0, 1 }, // 0..159 18
436
{ 6, 1, 0 }, // 0..191 19
437
{ 8, 0, 0 }, // 0..255 20
438
};
439
440
static inline void astc_set_bits_1_to_9(uint32_t* pDst, uint32_t& bit_offset, uint32_t code, uint32_t codesize)
441
{
442
uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst);
443
444
assert(codesize <= 9);
445
if (codesize)
446
{
447
uint32_t byte_bit_offset = bit_offset & 7;
448
uint32_t val = code << byte_bit_offset;
449
450
uint32_t index = bit_offset >> 3;
451
pBuf[index] |= (uint8_t)val;
452
453
if (codesize > (8 - byte_bit_offset))
454
pBuf[index + 1] |= (uint8_t)(val >> 8);
455
456
bit_offset += codesize;
457
}
458
}
459
460
static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)
461
{
462
return (bits >> low) & ((1 << (high - low + 1)) - 1);
463
}
464
465
// Writes bits to output in an endian safe way
466
static inline void astc_set_bits(uint32_t* pOutput, uint32_t& bit_pos, uint32_t value, uint32_t total_bits)
467
{
468
assert(total_bits <= 31);
469
assert(value < (1u << total_bits));
470
471
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
472
473
while (total_bits)
474
{
475
const uint32_t bits_to_write = my_min<int>(total_bits, 8 - (bit_pos & 7));
476
477
pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7));
478
479
bit_pos += bits_to_write;
480
total_bits -= bits_to_write;
481
value >>= bits_to_write;
482
}
483
}
484
485
static const uint8_t g_astc_quint_encode[125] =
486
{
487
0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57,
488
58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104,
489
105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54,
490
126, 127, 94, 95, 62, 39, 47, 55, 63, 7 /*31 - results in the same decode as 7*/
491
};
492
493
// Encodes 3 values to output, usable for any range that uses quints and bits
494
static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t* pStats)
495
{
496
// First extract the quints and the bits from the 3 input values
497
int quints = 0, bits[3];
498
const uint32_t bit_mask = (1 << n) - 1;
499
for (int i = 0; i < 3; i++)
500
{
501
static const int s_muls[3] = { 1, 5, 25 };
502
503
const int t = pValues[i] >> n;
504
505
quints += t * s_muls[i];
506
bits[i] = pValues[i] & bit_mask;
507
}
508
509
// Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits.
510
// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
511
512
assert(quints < 125);
513
const int T = g_astc_quint_encode[quints];
514
515
// Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.
516
astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |
517
(bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);
518
519
if (pStats)
520
*pStats += n * 3 + 7;
521
}
522
523
static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,
524
43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154,
525
131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202,
526
208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224,
527
225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159,
528
191, 223, 124, 125, 126 };
529
530
// Encodes 5 values to output, usable for any range that uses trits and bits
531
static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t *pStats)
532
{
533
// First extract the trits and the bits from the 5 input values
534
int trits = 0, bits[5];
535
const uint32_t bit_mask = (1 << n) - 1;
536
for (int i = 0; i < 5; i++)
537
{
538
static const int s_muls[5] = { 1, 3, 9, 27, 81 };
539
540
const int t = pValues[i] >> n;
541
542
trits += t * s_muls[i];
543
bits[i] = pValues[i] & bit_mask;
544
}
545
546
// Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits.
547
// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
548
549
assert(trits < 243);
550
const int T = g_astc_trit_encode[trits];
551
552
// Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94.
553
astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2);
554
555
astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |
556
(bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);
557
558
if (pStats)
559
*pStats += n * 5 + 8;
560
}
561
562
// Packs values using ASTC's BISE to output buffer.
563
void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats)
564
{
565
uint32_t temp[5] = { 0 };
566
567
const int num_bits = g_ise_range_table[range][0];
568
569
int group_size = 0;
570
if (g_ise_range_table[range][1])
571
group_size = 5;
572
else if (g_ise_range_table[range][2])
573
group_size = 3;
574
575
#ifndef NDEBUG
576
const uint32_t num_levels = get_ise_levels(range);
577
for (int i = 0; i < num_vals; i++)
578
{
579
assert(pSrc_vals[i] < num_levels);
580
}
581
#endif
582
583
if (group_size)
584
{
585
// Range has trits or quints - pack each group of 5 or 3 values
586
const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3);
587
588
for (int group_index = 0; group_index < total_groups; group_index++)
589
{
590
uint8_t vals[5] = { 0 };
591
592
const int limit = my_min(group_size, num_vals - group_index * group_size);
593
for (int i = 0; i < limit; i++)
594
vals[i] = pSrc_vals[group_index * group_size + i];
595
596
// Note this always writes a group of 3 or 5 bits values, even for incomplete groups. So it can write more than needed.
597
// get_ise_sequence_bits() returns the # of bits that must be written for proper decoding.
598
if (group_size == 5)
599
astc_encode_trits(temp, vals, bit_pos, num_bits, pStats);
600
else
601
astc_encode_quints(temp, vals, bit_pos, num_bits, pStats);
602
}
603
}
604
else
605
{
606
for (int i = 0; i < num_vals; i++)
607
astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);
608
609
if (pStats)
610
*pStats += num_vals * num_bits;
611
}
612
613
pDst[0] |= temp[0]; pDst[1] |= temp[1];
614
pDst[2] |= temp[2]; pDst[3] |= temp[3];
615
}
616
617
inline uint32_t rev_dword(uint32_t bits)
618
{
619
uint32_t v = (bits << 16) | (bits >> 16);
620
v = ((v & 0x00ff00ff) << 8) | ((v & 0xff00ff00) >> 8); v = ((v & 0x0f0f0f0f) << 4) | ((v & 0xf0f0f0f0) >> 4);
621
v = ((v & 0x33333333) << 2) | ((v & 0xcccccccc) >> 2); v = ((v & 0x55555555) << 1) | ((v & 0xaaaaaaaa) >> 1);
622
return v;
623
}
624
625
static inline bool is_packable(int value, int num_bits) { assert((num_bits >= 1) && (num_bits < 31)); return (value >= 0) && (value < (1 << num_bits)); }
626
627
static bool get_config_bits(const log_astc_block &log_block, uint32_t &config_bits)
628
{
629
config_bits = 0;
630
631
const int W = log_block.m_grid_width, H = log_block.m_grid_height;
632
633
const uint32_t P = log_block.m_weight_ise_range >= 6; // high precision
634
const uint32_t Dp_P = (log_block.m_dual_plane << 1) | P; // pack dual plane+high precision bits
635
636
// See Tables 81-82
637
// Compute p from weight range
638
uint32_t p = 2 + log_block.m_weight_ise_range - (P ? 6 : 0);
639
640
// Rearrange p's bits to p0 p2 p1
641
p = (p >> 1) + ((p & 1) << 2);
642
643
// Try encoding each row of table 82.
644
645
// W+4 H+2
646
if (is_packable(W - 4, 2) && is_packable(H - 2, 2))
647
{
648
config_bits = (Dp_P << 9) | ((W - 4) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | (p & 3);
649
return true;
650
}
651
652
// W+8 H+2
653
if (is_packable(W - 8, 2) && is_packable(H - 2, 2))
654
{
655
config_bits = (Dp_P << 9) | ((W - 8) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 4 | (p & 3);
656
return true;
657
}
658
659
// W+2 H+8
660
if (is_packable(W - 2, 2) && is_packable(H - 8, 2))
661
{
662
config_bits = (Dp_P << 9) | ((H - 8) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 8 | (p & 3);
663
return true;
664
}
665
666
// W+2 H+6
667
if (is_packable(W - 2, 2) && is_packable(H - 6, 1))
668
{
669
config_bits = (Dp_P << 9) | ((H - 6) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
670
return true;
671
}
672
673
// W+2 H+2
674
if (is_packable(W - 2, 1) && is_packable(H - 2, 2))
675
{
676
config_bits = (Dp_P << 9) | ((W) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
677
return true;
678
}
679
680
// 12 H+2
681
if ((W == 12) && is_packable(H - 2, 2))
682
{
683
config_bits = (Dp_P << 9) | ((H - 2) << 5) | (p << 2);
684
return true;
685
}
686
687
// W+2 12
688
if ((H == 12) && is_packable(W - 2, 2))
689
{
690
config_bits = (Dp_P << 9) | (1 << 7) | ((W - 2) << 5) | (p << 2);
691
return true;
692
}
693
694
// 6 10
695
if ((W == 6) && (H == 10))
696
{
697
config_bits = (Dp_P << 9) | (3 << 7) | (p << 2);
698
return true;
699
}
700
701
// 10 6
702
if ((W == 10) && (H == 6))
703
{
704
config_bits = (Dp_P << 9) | (0b1101 << 5) | (p << 2);
705
return true;
706
}
707
708
// W+6 H+6 (no dual plane or high prec)
709
if ((!Dp_P) && is_packable(W - 6, 2) && is_packable(H - 6, 2))
710
{
711
config_bits = ((H - 6) << 9) | 256 | ((W - 6) << 5) | (p << 2);
712
return true;
713
}
714
715
// Failed: unsupported weight grid dimensions or config.
716
return false;
717
}
718
719
bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range, pack_stats *pStats)
720
{
721
memset(&phys_block, 0, sizeof(phys_block));
722
723
if (pExpected_endpoint_range)
724
*pExpected_endpoint_range = -1;
725
726
assert(!log_block.m_error_flag);
727
if (log_block.m_error_flag)
728
return false;
729
730
if (log_block.m_solid_color_flag_ldr)
731
{
732
pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats);
733
return true;
734
}
735
else if (log_block.m_solid_color_flag_hdr)
736
{
737
pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats);
738
return true;
739
}
740
741
if ((log_block.m_num_partitions < 1) || (log_block.m_num_partitions > MAX_PARTITIONS))
742
return false;
743
744
// Max usable weight range is 11
745
if (log_block.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE)
746
return false;
747
748
// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
749
if ((log_block.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_block.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
750
return false;
751
752
if (log_block.m_color_component_selector > 3)
753
return false;
754
755
// TODO: sanity check grid width/height vs. block's physical width/height
756
757
uint32_t config_bits = 0;
758
if (!get_config_bits(log_block, config_bits))
759
return false;
760
761
uint32_t bit_pos = 0;
762
astc_set_bits(&phys_block.m_vals[0], bit_pos, config_bits, 11);
763
if (pStats)
764
pStats->m_header_bits += 11;
765
766
const uint32_t total_grid_weights = (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height);
767
const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);
768
769
// 18.24 Illegal Encodings
770
if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
771
return false;
772
773
uint32_t total_extra_bits = 0;
774
775
astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_num_partitions - 1, 2);
776
if (pStats)
777
pStats->m_header_bits += 2;
778
779
if (log_block.m_num_partitions > 1)
780
{
781
if (log_block.m_partition_id >= NUM_PARTITION_PATTERNS)
782
return false;
783
784
astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_partition_id, 10);
785
if (pStats)
786
pStats->m_header_bits += 10;
787
788
uint32_t highest_cem = 0, lowest_cem = UINT32_MAX;
789
for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
790
{
791
highest_cem = my_max<uint32_t>(highest_cem, log_block.m_color_endpoint_modes[j]);
792
lowest_cem = my_min<uint32_t>(lowest_cem, log_block.m_color_endpoint_modes[j]);
793
}
794
795
if (highest_cem > 15)
796
return false;
797
798
// Ensure CEM range is contiguous
799
if (((highest_cem >> 2) > (1 + (lowest_cem >> 2))))
800
return false;
801
802
// See tables 79/80
803
uint32_t encoded_cem = log_block.m_color_endpoint_modes[0] << 2;
804
if (lowest_cem != highest_cem)
805
{
806
encoded_cem = my_min<uint32_t>(3, 1 + (lowest_cem >> 2));
807
808
// See tables at 23.11 Color Endpoint Mode
809
for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
810
{
811
const int M = log_block.m_color_endpoint_modes[j] & 3;
812
813
const int C = (log_block.m_color_endpoint_modes[j] >> 2) - ((encoded_cem & 3) - 1);
814
if ((C & 1) != C)
815
return false;
816
817
encoded_cem |= (C << (2 + j)) | (M << (2 + log_block.m_num_partitions + 2 * j));
818
}
819
820
total_extra_bits = 3 * log_block.m_num_partitions - 4;
821
822
if ((total_weight_bits + total_extra_bits) > 128)
823
return false;
824
825
uint32_t cem_bit_pos = 128 - total_weight_bits - total_extra_bits;
826
astc_set_bits(&phys_block.m_vals[0], cem_bit_pos, encoded_cem >> 6, total_extra_bits);
827
if (pStats)
828
pStats->m_header_bits += total_extra_bits;
829
}
830
831
astc_set_bits(&phys_block.m_vals[0], bit_pos, encoded_cem & 0x3f, 6);
832
if (pStats)
833
pStats->m_header_bits += 6;
834
}
835
else
836
{
837
if (log_block.m_partition_id)
838
return false;
839
if (log_block.m_color_endpoint_modes[0] > 15)
840
return false;
841
842
astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_color_endpoint_modes[0], 4);
843
if (pStats)
844
pStats->m_header_bits += 4;
845
}
846
847
if (log_block.m_dual_plane)
848
{
849
if (log_block.m_num_partitions > 3)
850
return false;
851
852
total_extra_bits += 2;
853
854
uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits;
855
astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2);
856
if (pStats)
857
pStats->m_header_bits += 2;
858
}
859
860
const uint32_t total_config_bits = bit_pos + total_extra_bits;
861
const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
862
if (num_remaining_bits < 0)
863
return false;
864
865
uint32_t total_cem_vals = 0;
866
for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
867
total_cem_vals += 2 + 2 * (log_block.m_color_endpoint_modes[j] >> 2);
868
869
if (total_cem_vals > MAX_ENDPOINTS)
870
return false;
871
872
int endpoint_ise_range = -1;
873
for (int k = 20; k > 0; k--)
874
{
875
int bits = get_ise_sequence_bits(total_cem_vals, k);
876
if (bits <= num_remaining_bits)
877
{
878
endpoint_ise_range = k;
879
break;
880
}
881
}
882
883
// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
884
if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
885
return false;
886
887
// Ensure the caller utilized the right endpoint ISE range.
888
if ((int)log_block.m_endpoint_ise_range != endpoint_ise_range)
889
{
890
if (pExpected_endpoint_range)
891
*pExpected_endpoint_range = endpoint_ise_range;
892
return false;
893
}
894
895
if (pStats)
896
{
897
pStats->m_endpoint_bits += get_ise_sequence_bits(total_cem_vals, endpoint_ise_range);
898
pStats->m_weight_bits += get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);
899
}
900
901
// Pack endpoints forwards
902
encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range);
903
904
// Pack weights backwards
905
uint32_t weight_data[4] = { 0 };
906
encode_bise(weight_data, log_block.m_weights, 0, total_grid_weights, log_block.m_weight_ise_range);
907
908
for (uint32_t i = 0; i < 4; i++)
909
phys_block.m_vals[i] |= rev_dword(weight_data[3 - i]);
910
911
return true;
912
}
913
914
static inline uint32_t bit_replication_scale(uint32_t src, int num_src_bits, int num_dst_bits)
915
{
916
assert(num_src_bits <= num_dst_bits);
917
assert((src & ((1 << num_src_bits) - 1)) == src);
918
919
uint32_t dst = 0;
920
for (int shift = num_dst_bits - num_src_bits; shift > -num_src_bits; shift -= num_src_bits)
921
dst |= (shift >= 0) ? (src << shift) : (src >> -shift);
922
923
return dst;
924
}
925
926
uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range)
927
{
928
assert((ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE));
929
assert(val < get_ise_levels(ise_range));
930
931
uint32_t u = 0;
932
933
switch (ise_range)
934
{
935
case 5:
936
{
937
u = bit_replication_scale(val, 3, 8);
938
break;
939
}
940
case 8:
941
{
942
u = bit_replication_scale(val, 4, 8);
943
break;
944
}
945
case 11:
946
{
947
u = bit_replication_scale(val, 5, 8);
948
break;
949
}
950
case 14:
951
{
952
u = bit_replication_scale(val, 6, 8);
953
break;
954
}
955
case 17:
956
{
957
u = bit_replication_scale(val, 7, 8);
958
break;
959
}
960
case 20:
961
{
962
u = val;
963
break;
964
}
965
case 4:
966
case 6:
967
case 7:
968
case 9:
969
case 10:
970
case 12:
971
case 13:
972
case 15:
973
case 16:
974
case 18:
975
case 19:
976
{
977
const uint32_t num_bits = g_ise_range_table[ise_range][0];
978
const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
979
const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
980
981
// compute Table 103 row index
982
const int range_index = (num_bits * 2 + (num_quints ? 1 : 0)) - 2;
983
984
assert(range_index >= 0 && range_index <= 10);
985
986
uint32_t bits = val & ((1 << num_bits) - 1);
987
uint32_t tval = val >> num_bits;
988
989
assert(tval < (num_trits ? 3U : 5U));
990
991
uint32_t a = bits & 1;
992
uint32_t b = (bits >> 1) & 1;
993
uint32_t c = (bits >> 2) & 1;
994
uint32_t d = (bits >> 3) & 1;
995
uint32_t e = (bits >> 4) & 1;
996
uint32_t f = (bits >> 5) & 1;
997
998
uint32_t A = a ? 511 : 0;
999
uint32_t B = 0;
1000
1001
switch (range_index)
1002
{
1003
case 2:
1004
{
1005
// 876543210
1006
// b000b0bb0
1007
B = (b << 1) | (b << 2) | (b << 4) | (b << 8);
1008
break;
1009
}
1010
case 3:
1011
{
1012
// 876543210
1013
// b0000bb00
1014
B = (b << 2) | (b << 3) | (b << 8);
1015
break;
1016
}
1017
case 4:
1018
{
1019
// 876543210
1020
// cb000cbcb
1021
B = b | (c << 1) | (b << 2) | (c << 3) | (b << 7) | (c << 8);
1022
break;
1023
}
1024
case 5:
1025
{
1026
// 876543210
1027
// cb0000cbc
1028
B = c | (b << 1) | (c << 2) | (b << 7) | (c << 8);
1029
break;
1030
}
1031
case 6:
1032
{
1033
// 876543210
1034
// dcb000dcb
1035
B = b | (c << 1) | (d << 2) | (b << 6) | (c << 7) | (d << 8);
1036
break;
1037
}
1038
case 7:
1039
{
1040
// 876543210
1041
// dcb0000dc
1042
B = c | (d << 1) | (b << 6) | (c << 7) | (d << 8);
1043
break;
1044
}
1045
case 8:
1046
{
1047
// 876543210
1048
// edcb000ed
1049
B = d | (e << 1) | (b << 5) | (c << 6) | (d << 7) | (e << 8);
1050
break;
1051
}
1052
case 9:
1053
{
1054
// 876543210
1055
// edcb0000e
1056
B = e | (b << 5) | (c << 6) | (d << 7) | (e << 8);
1057
break;
1058
}
1059
case 10:
1060
{
1061
// 876543210
1062
// fedcb000f
1063
B = f | (b << 4) | (c << 5) | (d << 6) | (e << 7) | (f << 8);
1064
break;
1065
}
1066
default:
1067
break;
1068
}
1069
1070
static uint8_t C_vals[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };
1071
uint32_t C = C_vals[range_index];
1072
uint32_t D = tval;
1073
1074
u = D * C + B;
1075
u = u ^ A;
1076
u = (A & 0x80) | (u >> 2);
1077
1078
break;
1079
}
1080
default:
1081
{
1082
assert(0);
1083
break;
1084
}
1085
}
1086
1087
return u;
1088
}
1089
1090
uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range)
1091
{
1092
assert(val < get_ise_levels(ise_range));
1093
1094
uint32_t u = 0;
1095
switch (ise_range)
1096
{
1097
case 0:
1098
{
1099
u = val ? 63 : 0;
1100
break;
1101
}
1102
case 1: // 0-2
1103
{
1104
const uint8_t s_tab_0_2[3] = { 0, 32, 63 };
1105
u = s_tab_0_2[val];
1106
break;
1107
}
1108
case 2: // 0-3
1109
{
1110
u = bit_replication_scale(val, 2, 6);
1111
break;
1112
}
1113
case 3: // 0-4
1114
{
1115
const uint8_t s_tab_0_4[5] = { 0, 16, 32, 47, 63 };
1116
u = s_tab_0_4[val];
1117
break;
1118
}
1119
case 5: // 0-7
1120
{
1121
u = bit_replication_scale(val, 3, 6);
1122
break;
1123
}
1124
case 8: // 0-15
1125
{
1126
u = bit_replication_scale(val, 4, 6);
1127
break;
1128
}
1129
case 11: // 0-31
1130
{
1131
u = bit_replication_scale(val, 5, 6);
1132
break;
1133
}
1134
case 4: // 0-5
1135
case 6: // 0-9
1136
case 7: // 0-11
1137
case 9: // 0-19
1138
case 10: // 0-23
1139
{
1140
const uint32_t num_bits = g_ise_range_table[ise_range][0];
1141
const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
1142
const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
1143
1144
// compute Table 103 row index
1145
const int range_index = num_bits * 2 + (num_quints ? 1 : 0);
1146
1147
// Extract bits and tris/quints from value
1148
const uint32_t bits = val & ((1u << num_bits) - 1);
1149
const uint32_t D = val >> num_bits;
1150
1151
assert(D < (num_trits ? 3U : 5U));
1152
1153
// Now dequantize
1154
// See Table 103. ASTC weight unquantization parameters
1155
static const uint32_t C_table[5] = { 50, 28, 23, 13, 11 };
1156
1157
const uint32_t a = bits & 1, b = (bits >> 1) & 1, c = (bits >> 2) & 1;
1158
1159
const uint32_t A = (a == 0) ? 0 : 0x7F;
1160
1161
uint32_t B = 0;
1162
if (range_index == 4)
1163
B = ((b << 6) | (b << 2) | (b << 0));
1164
else if (range_index == 5)
1165
B = ((b << 6) | (b << 1));
1166
else if (range_index == 6)
1167
B = ((c << 6) | (b << 5) | (c << 1) | (b << 0));
1168
1169
const uint32_t C = C_table[range_index - 2];
1170
1171
u = D * C + B;
1172
u = u ^ A;
1173
u = (A & 0x20) | (u >> 2);
1174
break;
1175
}
1176
default:
1177
assert(0);
1178
break;
1179
}
1180
1181
if (u > 32)
1182
u++;
1183
1184
return u;
1185
}
1186
1187
// Returns the nearest ISE symbol given a [0,255] endpoint value.
1188
uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range)
1189
{
1190
assert(ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE && ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE);
1191
1192
const uint32_t total_levels = get_ise_levels(ise_range);
1193
int best_e = INT_MAX, best_index = 0;
1194
for (uint32_t i = 0; i < total_levels; i++)
1195
{
1196
const int qv = dequant_bise_endpoint(i, ise_range);
1197
int e = labs(v - qv);
1198
if (e < best_e)
1199
{
1200
best_e = e;
1201
best_index = i;
1202
if (!best_e)
1203
break;
1204
}
1205
}
1206
return best_index;
1207
}
1208
1209
// Returns the nearest ISE weight given a [0,64] endpoint value.
1210
uint32_t find_nearest_bise_weight(int v, uint32_t ise_range)
1211
{
1212
assert(ise_range >= FIRST_VALID_WEIGHT_ISE_RANGE && ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
1213
assert(v <= (int)MAX_WEIGHT_VALUE);
1214
1215
const uint32_t total_levels = get_ise_levels(ise_range);
1216
int best_e = INT_MAX, best_index = 0;
1217
for (uint32_t i = 0; i < total_levels; i++)
1218
{
1219
const int qv = dequant_bise_weight(i, ise_range);
1220
int e = labs(v - qv);
1221
if (e < best_e)
1222
{
1223
best_e = e;
1224
best_index = i;
1225
if (!best_e)
1226
break;
1227
}
1228
}
1229
return best_index;
1230
}
1231
1232
void create_quant_tables(
1233
uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
1234
uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
1235
uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]
1236
uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
1237
uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights
1238
bool weight_flag) // false if block endpoints, true if weights
1239
{
1240
const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256;
1241
1242
for (uint32_t i = 0; i < num_dequant_vals; i++)
1243
{
1244
uint32_t bise_index = weight_flag ? astc_helpers::find_nearest_bise_weight(i, ise_range) : astc_helpers::find_nearest_bise_endpoint(i, ise_range);
1245
1246
if (pVal_to_ise)
1247
pVal_to_ise[i] = (uint8_t)bise_index;
1248
1249
if (pISE_to_val)
1250
pISE_to_val[bise_index] = weight_flag ? (uint8_t)astc_helpers::dequant_bise_weight(bise_index, ise_range) : (uint8_t)astc_helpers::dequant_bise_endpoint(bise_index, ise_range);
1251
}
1252
1253
if (pISE_to_rank || pRank_to_ISE)
1254
{
1255
const uint32_t num_levels = get_ise_levels(ise_range);
1256
1257
if (!g_ise_range_table[ise_range][1] && !g_ise_range_table[ise_range][2])
1258
{
1259
// Only bits
1260
for (uint32_t i = 0; i < num_levels; i++)
1261
{
1262
if (pISE_to_rank)
1263
pISE_to_rank[i] = (uint8_t)i;
1264
1265
if (pRank_to_ISE)
1266
pRank_to_ISE[i] = (uint8_t)i;
1267
}
1268
}
1269
else
1270
{
1271
// Range has trits or quints
1272
uint32_t vals[256];
1273
for (uint32_t i = 0; i < num_levels; i++)
1274
{
1275
uint32_t v = weight_flag ? astc_helpers::dequant_bise_weight(i, ise_range) : astc_helpers::dequant_bise_endpoint(i, ise_range);
1276
1277
// Low=ISE value
1278
// High=dequantized value
1279
vals[i] = (v << 16) | i;
1280
}
1281
1282
// Sorts by dequantized value
1283
std::sort(vals, vals + num_levels);
1284
1285
for (uint32_t rank = 0; rank < num_levels; rank++)
1286
{
1287
uint32_t ise_val = (uint8_t)vals[rank];
1288
1289
if (pISE_to_rank)
1290
pISE_to_rank[ise_val] = (uint8_t)rank;
1291
1292
if (pRank_to_ISE)
1293
pRank_to_ISE[rank] = (uint8_t)ise_val;
1294
}
1295
}
1296
}
1297
}
1298
1299
void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats)
1300
{
1301
uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
1302
memset(pDst, 0xFF, 16);
1303
1304
pDst[0] = 0b11111100;
1305
pDst[1] = 0b11111101;
1306
1307
pDst[8] = (uint8_t)rh;
1308
pDst[9] = (uint8_t)(rh >> 8);
1309
pDst[10] = (uint8_t)gh;
1310
pDst[11] = (uint8_t)(gh >> 8);
1311
pDst[12] = (uint8_t)bh;
1312
pDst[13] = (uint8_t)(bh >> 8);
1313
pDst[14] = (uint8_t)ah;
1314
pDst[15] = (uint8_t)(ah >> 8);
1315
1316
if (pStats)
1317
pStats->m_header_bits += 128;
1318
}
1319
1320
// rh-ah are half-floats
1321
void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats *pStats)
1322
{
1323
uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
1324
memset(pDst, 0xFF, 16);
1325
1326
pDst[0] = 0b11111100;
1327
1328
pDst[8] = (uint8_t)rh;
1329
pDst[9] = (uint8_t)(rh >> 8);
1330
pDst[10] = (uint8_t)gh;
1331
pDst[11] = (uint8_t)(gh >> 8);
1332
pDst[12] = (uint8_t)bh;
1333
pDst[13] = (uint8_t)(bh >> 8);
1334
pDst[14] = (uint8_t)ah;
1335
pDst[15] = (uint8_t)(ah >> 8);
1336
1337
if (pStats)
1338
pStats->m_header_bits += 128;
1339
}
1340
1341
bool is_cem_ldr(uint32_t mode)
1342
{
1343
switch (mode)
1344
{
1345
case CEM_LDR_LUM_DIRECT:
1346
case CEM_LDR_LUM_BASE_PLUS_OFS:
1347
case CEM_LDR_LUM_ALPHA_DIRECT:
1348
case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
1349
case CEM_LDR_RGB_BASE_SCALE:
1350
case CEM_LDR_RGB_DIRECT:
1351
case CEM_LDR_RGB_BASE_PLUS_OFFSET:
1352
case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
1353
case CEM_LDR_RGBA_DIRECT:
1354
case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
1355
return true;
1356
default:
1357
break;
1358
}
1359
1360
return false;
1361
}
1362
1363
bool is_valid_block_size(uint32_t w, uint32_t h)
1364
{
1365
assert((w >= MIN_BLOCK_DIM) && (w <= MAX_BLOCK_DIM));
1366
assert((h >= MIN_BLOCK_DIM) && (h <= MAX_BLOCK_DIM));
1367
1368
#define SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true;
1369
SIZECHK(4, 4);
1370
SIZECHK(5, 4);
1371
1372
SIZECHK(5, 5);
1373
1374
SIZECHK(6, 5);
1375
SIZECHK(6, 6);
1376
1377
SIZECHK(8, 5);
1378
SIZECHK(8, 6);
1379
SIZECHK(10, 5);
1380
SIZECHK(10, 6);
1381
1382
SIZECHK(8, 8);
1383
SIZECHK(10, 8);
1384
SIZECHK(10, 10);
1385
1386
SIZECHK(12, 10);
1387
SIZECHK(12, 12);
1388
#undef SIZECHK
1389
1390
return false;
1391
}
1392
1393
bool block_has_any_hdr_cems(const log_astc_block& log_blk)
1394
{
1395
assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
1396
1397
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
1398
if (is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
1399
return true;
1400
1401
return false;
1402
}
1403
1404
bool block_has_any_ldr_cems(const log_astc_block& log_blk)
1405
{
1406
assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
1407
1408
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
1409
if (!is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
1410
return true;
1411
1412
return false;
1413
}
1414
1415
dequant_tables g_dequant_tables;
1416
1417
void precompute_texel_partitions_4x4();
1418
void precompute_texel_partitions_6x6();
1419
1420
void init_tables(bool init_rank_tabs)
1421
{
1422
g_dequant_tables.init(init_rank_tabs);
1423
1424
precompute_texel_partitions_4x4();
1425
precompute_texel_partitions_6x6();
1426
}
1427
1428
void compute_upsample_weights(
1429
int block_width, int block_height,
1430
int weight_grid_width, int weight_grid_height,
1431
weighted_sample* pWeights) // there will be block_width * block_height bilinear samples
1432
{
1433
const uint32_t scaleX = (1024 + block_width / 2) / (block_width - 1);
1434
const uint32_t scaleY = (1024 + block_height / 2) / (block_height - 1);
1435
1436
for (int texelY = 0; texelY < block_height; texelY++)
1437
{
1438
for (int texelX = 0; texelX < block_width; texelX++)
1439
{
1440
const uint32_t gX = (scaleX * texelX * (weight_grid_width - 1) + 32) >> 6;
1441
const uint32_t gY = (scaleY * texelY * (weight_grid_height - 1) + 32) >> 6;
1442
const uint32_t jX = gX >> 4;
1443
const uint32_t jY = gY >> 4;
1444
const uint32_t fX = gX & 0xf;
1445
const uint32_t fY = gY & 0xf;
1446
const uint32_t w11 = (fX * fY + 8) >> 4;
1447
const uint32_t w10 = fY - w11;
1448
const uint32_t w01 = fX - w11;
1449
const uint32_t w00 = 16 - fX - fY + w11;
1450
1451
weighted_sample& s = pWeights[texelX + texelY * block_width];
1452
s.m_src_x = (uint8_t)jX;
1453
s.m_src_y = (uint8_t)jY;
1454
s.m_weights[0][0] = (uint8_t)w00;
1455
s.m_weights[0][1] = (uint8_t)w01;
1456
s.m_weights[1][0] = (uint8_t)w10;
1457
s.m_weights[1][1] = (uint8_t)w11;
1458
}
1459
}
1460
}
1461
1462
// Should be dequantized [0,64] weights
1463
void upsample_weight_grid(
1464
uint32_t bx, uint32_t by, // destination/to dimension
1465
uint32_t wx, uint32_t wy, // source/from dimension
1466
const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
1467
uint8_t* pDst_weights) // [by][bx]
1468
{
1469
assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12));
1470
assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by));
1471
1472
const uint32_t total_src_weights = wx * wy;
1473
const uint32_t total_dst_weights = bx * by;
1474
1475
if (total_src_weights == total_dst_weights)
1476
{
1477
memcpy(pDst_weights, pSrc_weights, total_src_weights);
1478
return;
1479
}
1480
1481
weighted_sample weights[12 * 12];
1482
compute_upsample_weights(bx, by, wx, wy, weights);
1483
1484
const weighted_sample* pS = weights;
1485
1486
for (uint32_t y = 0; y < by; y++)
1487
{
1488
for (uint32_t x = 0; x < bx; x++, ++pS)
1489
{
1490
const uint32_t w00 = pS->m_weights[0][0];
1491
const uint32_t w01 = pS->m_weights[0][1];
1492
const uint32_t w10 = pS->m_weights[1][0];
1493
const uint32_t w11 = pS->m_weights[1][1];
1494
1495
assert(w00 || w01 || w10 || w11);
1496
1497
const uint32_t sx = pS->m_src_x, sy = pS->m_src_y;
1498
1499
uint32_t total = 8;
1500
if (w00) total += pSrc_weights[bounds_check(sx + sy * wx, 0U, total_src_weights)] * w00;
1501
if (w01) total += pSrc_weights[bounds_check(sx + 1 + sy * wx, 0U, total_src_weights)] * w01;
1502
if (w10) total += pSrc_weights[bounds_check(sx + (sy + 1) * wx, 0U, total_src_weights)] * w10;
1503
if (w11) total += pSrc_weights[bounds_check(sx + 1 + (sy + 1) * wx, 0U, total_src_weights)] * w11;
1504
1505
pDst_weights[x + y * bx] = (uint8_t)(total >> 4);
1506
}
1507
}
1508
}
1509
1510
inline uint32_t hash52(uint32_t v)
1511
{
1512
uint32_t p = v;
1513
p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
1514
p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
1515
p ^= p << 6; p ^= p >> 17;
1516
return p;
1517
}
1518
1519
// small_block = num_blk_pixels < 31
1520
int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block)
1521
{
1522
assert(zIn == 0);
1523
1524
const uint32_t x = small_block ? xIn << 1 : xIn;
1525
const uint32_t y = small_block ? yIn << 1 : yIn;
1526
const uint32_t z = small_block ? zIn << 1 : zIn;
1527
const uint32_t seed = seedIn + 1024 * (num_partitions - 1);
1528
const uint32_t rnum = hash52(seed);
1529
1530
uint8_t seed1 = (uint8_t)(rnum & 0xf);
1531
uint8_t seed2 = (uint8_t)((rnum >> 4) & 0xf);
1532
uint8_t seed3 = (uint8_t)((rnum >> 8) & 0xf);
1533
uint8_t seed4 = (uint8_t)((rnum >> 12) & 0xf);
1534
uint8_t seed5 = (uint8_t)((rnum >> 16) & 0xf);
1535
uint8_t seed6 = (uint8_t)((rnum >> 20) & 0xf);
1536
uint8_t seed7 = (uint8_t)((rnum >> 24) & 0xf);
1537
uint8_t seed8 = (uint8_t)((rnum >> 28) & 0xf);
1538
uint8_t seed9 = (uint8_t)((rnum >> 18) & 0xf);
1539
uint8_t seed10 = (uint8_t)((rnum >> 22) & 0xf);
1540
uint8_t seed11 = (uint8_t)((rnum >> 26) & 0xf);
1541
uint8_t seed12 = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf);
1542
1543
seed1 = (uint8_t)(seed1 * seed1);
1544
seed2 = (uint8_t)(seed2 * seed2);
1545
seed3 = (uint8_t)(seed3 * seed3);
1546
seed4 = (uint8_t)(seed4 * seed4);
1547
seed5 = (uint8_t)(seed5 * seed5);
1548
seed6 = (uint8_t)(seed6 * seed6);
1549
seed7 = (uint8_t)(seed7 * seed7);
1550
seed8 = (uint8_t)(seed8 * seed8);
1551
seed9 = (uint8_t)(seed9 * seed9);
1552
seed10 = (uint8_t)(seed10 * seed10);
1553
seed11 = (uint8_t)(seed11 * seed11);
1554
seed12 = (uint8_t)(seed12 * seed12);
1555
1556
const int shA = (seed & 2) != 0 ? 4 : 5;
1557
const int shB = (num_partitions == 3) ? 6 : 5;
1558
const int sh1 = (seed & 1) != 0 ? shA : shB;
1559
const int sh2 = (seed & 1) != 0 ? shB : shA;
1560
const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2;
1561
1562
seed1 = (uint8_t)(seed1 >> sh1);
1563
seed2 = (uint8_t)(seed2 >> sh2);
1564
seed3 = (uint8_t)(seed3 >> sh1);
1565
seed4 = (uint8_t)(seed4 >> sh2);
1566
seed5 = (uint8_t)(seed5 >> sh1);
1567
seed6 = (uint8_t)(seed6 >> sh2);
1568
seed7 = (uint8_t)(seed7 >> sh1);
1569
seed8 = (uint8_t)(seed8 >> sh2);
1570
seed9 = (uint8_t)(seed9 >> sh3);
1571
seed10 = (uint8_t)(seed10 >> sh3);
1572
seed11 = (uint8_t)(seed11 >> sh3);
1573
seed12 = (uint8_t)(seed12 >> sh3);
1574
1575
const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14));
1576
const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10));
1577
const int c = (num_partitions >= 3) ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0;
1578
const int d = (num_partitions >= 4) ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0;
1579
1580
return (a >= b && a >= c && a >= d) ? 0
1581
: (b >= c && b >= d) ? 1
1582
: (c >= d) ? 2
1583
: 3;
1584
}
1585
1586
// 4x4, 2 and 3 subsets
1587
static uint32_t g_texel_partitions_4x4[1024][2];
1588
1589
// 6x6, 2 and 3 subsets (2 subsets low 4 bits, 3 subsets high 4 bits)
1590
static uint8_t g_texel_partitions_6x6[1024][6 * 6];
1591
1592
void precompute_texel_partitions_4x4()
1593
{
1594
for (uint32_t p = 0; p < 1024; p++)
1595
{
1596
uint32_t v2 = 0, v3 = 0;
1597
1598
for (uint32_t y = 0; y < 4; y++)
1599
{
1600
for (uint32_t x = 0; x < 4; x++)
1601
{
1602
const uint32_t shift = x * 2 + y * 8;
1603
v2 |= (compute_texel_partition(p, x, y, 0, 2, true) << shift);
1604
v3 |= (compute_texel_partition(p, x, y, 0, 3, true) << shift);
1605
}
1606
}
1607
1608
g_texel_partitions_4x4[p][0] = v2;
1609
g_texel_partitions_4x4[p][1] = v3;
1610
}
1611
}
1612
1613
void precompute_texel_partitions_6x6()
1614
{
1615
for (uint32_t p = 0; p < 1024; p++)
1616
{
1617
for (uint32_t y = 0; y < 6; y++)
1618
{
1619
for (uint32_t x = 0; x < 6; x++)
1620
{
1621
const uint32_t p2 = compute_texel_partition(p, x, y, 0, 2, false);
1622
const uint32_t p3 = compute_texel_partition(p, x, y, 0, 3, false);
1623
1624
assert((p2 <= 1) && (p3 <= 2));
1625
g_texel_partitions_6x6[p][x + y * 6] = (uint8_t)((p3 << 4) | p2);
1626
}
1627
}
1628
}
1629
}
1630
1631
static inline int get_precompute_texel_partitions_4x4(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions)
1632
{
1633
assert(g_texel_partitions_4x4[1][0]);
1634
assert(seed < 1024);
1635
assert((x <= 3) && (y <= 3));
1636
assert((num_partitions >= 2) && (num_partitions <= 3));
1637
1638
const uint32_t shift = x * 2 + y * 8;
1639
return (g_texel_partitions_4x4[seed][num_partitions - 2] >> shift) & 3;
1640
}
1641
1642
static inline int get_precompute_texel_partitions_6x6(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions)
1643
{
1644
assert(g_texel_partitions_6x6[0][0]);
1645
assert(seed < 1024);
1646
assert((x <= 5) && (y <= 5));
1647
assert((num_partitions >= 2) && (num_partitions <= 3));
1648
1649
const uint32_t shift = (num_partitions == 3) ? 4 : 0;
1650
return (g_texel_partitions_6x6[seed][x + y * 6] >> shift) & 3;
1651
}
1652
1653
void blue_contract(
1654
int r, int g, int b, int a,
1655
int &dr, int &dg, int &db, int &da)
1656
{
1657
dr = (r + b) >> 1;
1658
dg = (g + b) >> 1;
1659
db = b;
1660
da = a;
1661
}
1662
1663
inline void bit_transfer_signed(int& a, int& b)
1664
{
1665
b >>= 1;
1666
b |= (a & 0x80);
1667
a >>= 1;
1668
a &= 0x3F;
1669
if ((a & 0x20) != 0)
1670
a -= 0x40;
1671
}
1672
1673
static inline int clamp(int a, int l, int h)
1674
{
1675
if (a < l)
1676
a = l;
1677
else if (a > h)
1678
a = h;
1679
return a;
1680
}
1681
1682
static inline float clampf(float a, float l, float h)
1683
{
1684
if (a < l)
1685
a = l;
1686
else if (a > h)
1687
a = h;
1688
return a;
1689
}
1690
1691
inline int sign_extend(int src, int num_src_bits)
1692
{
1693
assert((num_src_bits >= 2) && (num_src_bits <= 31));
1694
1695
const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
1696
if (negative)
1697
return src | ~((1 << num_src_bits) - 1);
1698
else
1699
return src & ((1 << num_src_bits) - 1);
1700
}
1701
1702
// endpoints is [4][2]
1703
void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t *pE)
1704
{
1705
assert(cem_index <= CEM_HDR_RGB_HDR_ALPHA);
1706
1707
int v0 = pE[0], v1 = pE[1];
1708
1709
int& e0_r = pEndpoints[0][0], &e0_g = pEndpoints[1][0], &e0_b = pEndpoints[2][0], &e0_a = pEndpoints[3][0];
1710
int& e1_r = pEndpoints[0][1], &e1_g = pEndpoints[1][1], &e1_b = pEndpoints[2][1], &e1_a = pEndpoints[3][1];
1711
1712
switch (cem_index)
1713
{
1714
case CEM_LDR_LUM_DIRECT:
1715
{
1716
e0_r = v0; e1_r = v1;
1717
e0_g = v0; e1_g = v1;
1718
e0_b = v0; e1_b = v1;
1719
e0_a = 0xFF; e1_a = 0xFF;
1720
break;
1721
}
1722
case CEM_LDR_LUM_BASE_PLUS_OFS:
1723
{
1724
int l0 = (v0 >> 2) | (v1 & 0xc0);
1725
int l1 = l0 + (v1 & 0x3f);
1726
1727
if (l1 > 0xFF)
1728
l1 = 0xFF;
1729
1730
e0_r = l0; e1_r = l1;
1731
e0_g = l0; e1_g = l1;
1732
e0_b = l0; e1_b = l1;
1733
e0_a = 0xFF; e1_a = 0xFF;
1734
break;
1735
}
1736
case CEM_LDR_LUM_ALPHA_DIRECT:
1737
{
1738
int v2 = pE[2], v3 = pE[3];
1739
1740
e0_r = v0; e1_r = v1;
1741
e0_g = v0; e1_g = v1;
1742
e0_b = v0; e1_b = v1;
1743
e0_a = v2; e1_a = v3;
1744
break;
1745
}
1746
case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
1747
{
1748
int v2 = pE[2], v3 = pE[3];
1749
1750
bit_transfer_signed(v1, v0);
1751
bit_transfer_signed(v3, v2);
1752
1753
e0_r = v0; e1_r = v0 + v1;
1754
e0_g = v0; e1_g = v0 + v1;
1755
e0_b = v0; e1_b = v0 + v1;
1756
e0_a = v2; e1_a = v2 + v3;
1757
1758
for (uint32_t c = 0; c < 4; c++)
1759
{
1760
pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
1761
pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
1762
}
1763
1764
break;
1765
}
1766
case CEM_LDR_RGB_BASE_SCALE:
1767
{
1768
int v2 = pE[2], v3 = pE[3];
1769
1770
e0_r = (v0 * v3) >> 8; e1_r = v0;
1771
e0_g = (v1 * v3) >> 8; e1_g = v1;
1772
e0_b = (v2 * v3) >> 8; e1_b = v2;
1773
e0_a = 0xFF; e1_a = 0xFF;
1774
1775
break;
1776
}
1777
case CEM_LDR_RGB_DIRECT:
1778
{
1779
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
1780
1781
if ((v1 + v3 + v5) >= (v0 + v2 + v4))
1782
{
1783
e0_r = v0; e1_r = v1;
1784
e0_g = v2; e1_g = v3;
1785
e0_b = v4; e1_b = v5;
1786
e0_a = 0xFF; e1_a = 0xFF;
1787
}
1788
else
1789
{
1790
blue_contract(v1, v3, v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
1791
blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
1792
}
1793
1794
break;
1795
}
1796
case CEM_LDR_RGB_BASE_PLUS_OFFSET:
1797
{
1798
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
1799
1800
bit_transfer_signed(v1, v0);
1801
bit_transfer_signed(v3, v2);
1802
bit_transfer_signed(v5, v4);
1803
1804
if ((v1 + v3 + v5) >= 0)
1805
{
1806
e0_r = v0; e1_r = v0 + v1;
1807
e0_g = v2; e1_g = v2 + v3;
1808
e0_b = v4; e1_b = v4 + v5;
1809
e0_a = 0xFF; e1_a = 0xFF;
1810
}
1811
else
1812
{
1813
blue_contract(v0 + v1, v2 + v3, v4 + v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
1814
blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
1815
}
1816
1817
for (uint32_t c = 0; c < 4; c++)
1818
{
1819
pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
1820
pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
1821
}
1822
1823
break;
1824
}
1825
case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
1826
{
1827
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
1828
1829
e0_r = (v0 * v3) >> 8; e1_r = v0;
1830
e0_g = (v1 * v3) >> 8; e1_g = v1;
1831
e0_b = (v2 * v3) >> 8; e1_b = v2;
1832
e0_a = v4; e1_a = v5;
1833
1834
break;
1835
}
1836
case CEM_LDR_RGBA_DIRECT:
1837
{
1838
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
1839
1840
if ((v1 + v3 + v5) >= (v0 + v2 + v4))
1841
{
1842
e0_r = v0; e1_r = v1;
1843
e0_g = v2; e1_g = v3;
1844
e0_b = v4; e1_b = v5;
1845
e0_a = v6; e1_a = v7;
1846
}
1847
else
1848
{
1849
blue_contract(v1, v3, v5, v7, e0_r, e0_g, e0_b, e0_a);
1850
blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
1851
}
1852
1853
break;
1854
}
1855
case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
1856
{
1857
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
1858
1859
bit_transfer_signed(v1, v0);
1860
bit_transfer_signed(v3, v2);
1861
bit_transfer_signed(v5, v4);
1862
bit_transfer_signed(v7, v6);
1863
1864
if ((v1 + v3 + v5) >= 0)
1865
{
1866
e0_r = v0; e1_r = v0 + v1;
1867
e0_g = v2; e1_g = v2 + v3;
1868
e0_b = v4; e1_b = v4 + v5;
1869
e0_a = v6; e1_a = v6 + v7;
1870
}
1871
else
1872
{
1873
blue_contract(v0 + v1, v2 + v3, v4 + v5, v6 + v7, e0_r, e0_g, e0_b, e0_a);
1874
blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
1875
}
1876
1877
for (uint32_t c = 0; c < 4; c++)
1878
{
1879
pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
1880
pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
1881
}
1882
1883
break;
1884
}
1885
case CEM_HDR_LUM_LARGE_RANGE:
1886
{
1887
int y0, y1;
1888
if (v1 >= v0)
1889
{
1890
y0 = (v0 << 4);
1891
y1 = (v1 << 4);
1892
}
1893
else
1894
{
1895
y0 = (v1 << 4) + 8;
1896
y1 = (v0 << 4) - 8;
1897
}
1898
1899
e0_r = y0; e1_r = y1;
1900
e0_g = y0; e1_g = y1;
1901
e0_b = y0; e1_b = y1;
1902
e0_a = 0x780; e1_a = 0x780;
1903
1904
break;
1905
}
1906
case CEM_HDR_LUM_SMALL_RANGE:
1907
{
1908
int y0, y1, d;
1909
1910
if ((v0 & 0x80) != 0)
1911
{
1912
y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);
1913
d = (v1 & 0x1F) << 2;
1914
}
1915
else
1916
{
1917
y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);
1918
d = (v1 & 0x0F) << 1;
1919
}
1920
1921
y1 = y0 + d;
1922
if (y1 > 0xFFF)
1923
y1 = 0xFFF;
1924
1925
e0_r = y0; e1_r = y1;
1926
e0_g = y0; e1_g = y1;
1927
e0_b = y0; e1_b = y1;
1928
e0_a = 0x780; e1_a = 0x780;
1929
1930
break;
1931
}
1932
case CEM_HDR_RGB_BASE_SCALE:
1933
{
1934
int v2 = pE[2], v3 = pE[3];
1935
1936
int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
1937
1938
int majcomp, mode;
1939
if ((modeval & 0xC) != 0xC)
1940
{
1941
majcomp = modeval >> 2;
1942
mode = modeval & 3;
1943
}
1944
else if (modeval != 0xF)
1945
{
1946
majcomp = modeval & 3;
1947
mode = 4;
1948
}
1949
else
1950
{
1951
majcomp = 0;
1952
mode = 5;
1953
}
1954
1955
int red = v0 & 0x3f;
1956
int green = v1 & 0x1f;
1957
int blue = v2 & 0x1f;
1958
int scale = v3 & 0x1f;
1959
1960
int x0 = (v1 >> 6) & 1;
1961
int x1 = (v1 >> 5) & 1;
1962
int x2 = (v2 >> 6) & 1;
1963
int x3 = (v2 >> 5) & 1;
1964
int x4 = (v3 >> 7) & 1;
1965
int x5 = (v3 >> 6) & 1;
1966
int x6 = (v3 >> 5) & 1;
1967
1968
int ohm = 1 << mode;
1969
if (ohm & 0x30) green |= x0 << 6;
1970
if (ohm & 0x3A) green |= x1 << 5;
1971
if (ohm & 0x30) blue |= x2 << 6;
1972
if (ohm & 0x3A) blue |= x3 << 5;
1973
if (ohm & 0x3D) scale |= x6 << 5;
1974
if (ohm & 0x2D) scale |= x5 << 6;
1975
if (ohm & 0x04) scale |= x4 << 7;
1976
if (ohm & 0x3B) red |= x4 << 6;
1977
if (ohm & 0x04) red |= x3 << 6;
1978
if (ohm & 0x10) red |= x5 << 7;
1979
if (ohm & 0x0F) red |= x2 << 7;
1980
if (ohm & 0x05) red |= x1 << 8;
1981
if (ohm & 0x0A) red |= x0 << 8;
1982
if (ohm & 0x05) red |= x0 << 9;
1983
if (ohm & 0x02) red |= x6 << 9;
1984
if (ohm & 0x01) red |= x3 << 10;
1985
if (ohm & 0x02) red |= x5 << 10;
1986
1987
static const int s_shamts[6] = { 1,1,2,3,4,5 };
1988
1989
const int shamt = s_shamts[mode];
1990
red <<= shamt;
1991
green <<= shamt;
1992
blue <<= shamt;
1993
scale <<= shamt;
1994
1995
if (mode != 5)
1996
{
1997
green = red - green;
1998
blue = red - blue;
1999
}
2000
2001
if (majcomp == 1)
2002
std::swap(red, green);
2003
2004
if (majcomp == 2)
2005
std::swap(red, blue);
2006
2007
e1_r = clamp(red, 0, 0xFFF);
2008
e1_g = clamp(green, 0, 0xFFF);
2009
e1_b = clamp(blue, 0, 0xFFF);
2010
e1_a = 0x780;
2011
2012
e0_r = clamp(red - scale, 0, 0xFFF);
2013
e0_g = clamp(green - scale, 0, 0xFFF);
2014
e0_b = clamp(blue - scale, 0, 0xFFF);
2015
e0_a = 0x780;
2016
2017
break;
2018
}
2019
case CEM_HDR_RGB_HDR_ALPHA:
2020
case CEM_HDR_RGB_LDR_ALPHA:
2021
case CEM_HDR_RGB:
2022
{
2023
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
2024
2025
int majcomp = ((v4 & 0x80) >> 7) | ((v5 & 0x80) >> 6);
2026
2027
e0_a = 0x780;
2028
e1_a = 0x780;
2029
2030
if (majcomp == 3)
2031
{
2032
e0_r = v0 << 4;
2033
e0_g = v2 << 4;
2034
e0_b = (v4 & 0x7f) << 5;
2035
2036
e1_r = v1 << 4;
2037
e1_g = v3 << 4;
2038
e1_b = (v5 & 0x7f) << 5;
2039
}
2040
else
2041
{
2042
int mode = ((v1 & 0x80) >> 7) | ((v2 & 0x80) >> 6) | ((v3 & 0x80) >> 5);
2043
int va = v0 | ((v1 & 0x40) << 2);
2044
int vb0 = v2 & 0x3f;
2045
int vb1 = v3 & 0x3f;
2046
int vc = v1 & 0x3f;
2047
int vd0 = v4 & 0x7f;
2048
int vd1 = v5 & 0x7f;
2049
2050
static const int s_dbitstab[8] = { 7,6,7,6,5,6,5,6 };
2051
vd0 = sign_extend(vd0, s_dbitstab[mode]);
2052
vd1 = sign_extend(vd1, s_dbitstab[mode]);
2053
2054
int x0 = (v2 >> 6) & 1;
2055
int x1 = (v3 >> 6) & 1;
2056
int x2 = (v4 >> 6) & 1;
2057
int x3 = (v5 >> 6) & 1;
2058
int x4 = (v4 >> 5) & 1;
2059
int x5 = (v5 >> 5) & 1;
2060
2061
int ohm = 1 << mode;
2062
if (ohm & 0xA4) va |= x0 << 9;
2063
if (ohm & 0x08) va |= x2 << 9;
2064
if (ohm & 0x50) va |= x4 << 9;
2065
if (ohm & 0x50) va |= x5 << 10;
2066
if (ohm & 0xA0) va |= x1 << 10;
2067
if (ohm & 0xC0) va |= x2 << 11;
2068
if (ohm & 0x04) vc |= x1 << 6;
2069
if (ohm & 0xE8) vc |= x3 << 6;
2070
if (ohm & 0x20) vc |= x2 << 7;
2071
if (ohm & 0x5B) vb0 |= x0 << 6;
2072
if (ohm & 0x5B) vb1 |= x1 << 6;
2073
if (ohm & 0x12) vb0 |= x2 << 7;
2074
if (ohm & 0x12) vb1 |= x3 << 7;
2075
2076
int shamt = (mode >> 1) ^ 3;
2077
va = (uint32_t)va << shamt;
2078
vb0 = (uint32_t)vb0 << shamt;
2079
vb1 = (uint32_t)vb1 << shamt;
2080
vc = (uint32_t)vc << shamt;
2081
vd0 = (uint32_t)vd0 << shamt;
2082
vd1 = (uint32_t)vd1 << shamt;
2083
2084
e1_r = clamp(va, 0, 0xFFF);
2085
e1_g = clamp(va - vb0, 0, 0xFFF);
2086
e1_b = clamp(va - vb1, 0, 0xFFF);
2087
2088
e0_r = clamp(va - vc, 0, 0xFFF);
2089
e0_g = clamp(va - vb0 - vc - vd0, 0, 0xFFF);
2090
e0_b = clamp(va - vb1 - vc - vd1, 0, 0xFFF);
2091
2092
if (majcomp == 1)
2093
{
2094
std::swap(e0_r, e0_g);
2095
std::swap(e1_r, e1_g);
2096
}
2097
else if (majcomp == 2)
2098
{
2099
std::swap(e0_r, e0_b);
2100
std::swap(e1_r, e1_b);
2101
}
2102
}
2103
2104
if (cem_index == CEM_HDR_RGB_LDR_ALPHA)
2105
{
2106
int v6 = pE[6], v7 = pE[7];
2107
2108
e0_a = v6;
2109
e1_a = v7;
2110
}
2111
else if (cem_index == CEM_HDR_RGB_HDR_ALPHA)
2112
{
2113
int v6 = pE[6], v7 = pE[7];
2114
2115
// Extract mode bits
2116
int mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
2117
v6 &= 0x7F;
2118
v7 &= 0x7F;
2119
2120
if (mode == 3)
2121
{
2122
e0_a = v6 << 5;
2123
e1_a = v7 << 5;
2124
}
2125
else
2126
{
2127
v6 |= (v7 << (mode + 1)) & 0x780;
2128
v7 &= (0x3F >> mode);
2129
v7 ^= (0x20 >> mode);
2130
v7 -= (0x20 >> mode);
2131
v6 <<= (4 - mode);
2132
v7 <<= (4 - mode);
2133
2134
v7 += v6;
2135
v7 = clamp(v7, 0, 0xFFF);
2136
e0_a = v6;
2137
e1_a = v7;
2138
}
2139
}
2140
2141
break;
2142
}
2143
default:
2144
{
2145
assert(0);
2146
for (uint32_t c = 0; c < 4; c++)
2147
{
2148
pEndpoints[c][0] = 0;
2149
pEndpoints[c][1] = 0;
2150
}
2151
break;
2152
}
2153
}
2154
}
2155
2156
static inline bool is_half_inf_or_nan(half_float v)
2157
{
2158
return get_bits(v, 10, 14) == 31;
2159
}
2160
2161
// This float->half conversion matches how "F32TO16" works on Intel GPU's.
2162
half_float float_to_half(float val, bool toward_zero)
2163
{
2164
union { float f; int32_t i; uint32_t u; } fi = { val };
2165
const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
2166
int s = flt_s, e = 0, m = 0;
2167
2168
// inf/NaN
2169
if (flt_e == 0xff)
2170
{
2171
e = 31;
2172
if (flt_m != 0) // NaN
2173
m = 1;
2174
}
2175
// not zero or denormal
2176
else if (flt_e != 0)
2177
{
2178
int new_exp = flt_e - 127;
2179
if (new_exp > 15)
2180
e = 31;
2181
else if (new_exp < -14)
2182
{
2183
if (toward_zero)
2184
m = (int)truncf((1 << 24) * fabsf(fi.f));
2185
else
2186
m = lrintf((1 << 24) * fabsf(fi.f));
2187
}
2188
else
2189
{
2190
e = new_exp + 15;
2191
if (toward_zero)
2192
m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13)));
2193
else
2194
m = lrintf((float)flt_m * (1.0f / (float)(1 << 13)));
2195
}
2196
}
2197
2198
assert((0 <= m) && (m <= 1024));
2199
if (m == 1024)
2200
{
2201
e++;
2202
m = 0;
2203
}
2204
2205
assert((s >= 0) && (s <= 1));
2206
assert((e >= 0) && (e <= 31));
2207
assert((m >= 0) && (m <= 1023));
2208
2209
half_float result = (half_float)((s << 15) | (e << 10) | m);
2210
return result;
2211
}
2212
2213
float half_to_float(half_float hval)
2214
{
2215
union { float f; uint32_t u; } x = { 0 };
2216
2217
uint32_t s = ((uint32_t)hval >> 15) & 1;
2218
uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
2219
uint32_t m = (uint32_t)hval & 0x3FF;
2220
2221
if (!e)
2222
{
2223
if (!m)
2224
{
2225
// +- 0
2226
x.u = s << 31;
2227
return x.f;
2228
}
2229
else
2230
{
2231
// denormalized
2232
while (!(m & 0x00000400))
2233
{
2234
m <<= 1;
2235
--e;
2236
}
2237
2238
++e;
2239
m &= ~0x00000400;
2240
}
2241
}
2242
else if (e == 31)
2243
{
2244
if (m == 0)
2245
{
2246
// +/- INF
2247
x.u = (s << 31) | 0x7f800000;
2248
return x.f;
2249
}
2250
else
2251
{
2252
// +/- NaN
2253
x.u = (s << 31) | 0x7f800000 | (m << 13);
2254
return x.f;
2255
}
2256
}
2257
2258
e = e + (127 - 15);
2259
m = m << 13;
2260
2261
assert(s <= 1);
2262
assert(m <= 0x7FFFFF);
2263
assert(e <= 255);
2264
2265
x.u = m | (e << 23) | (s << 31);
2266
return x.f;
2267
}
2268
2269
// See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
2270
const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31;
2271
const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS);
2272
const int RGB9E5_MANTISSA_VALUES = (1 << RGB9E5_MANTISSA_BITS);
2273
const int MAX_RGB9E5_MANTISSA = (RGB9E5_MANTISSA_VALUES - 1);
2274
//const int MAX_RGB9E5 = (int)(((float)MAX_RGB9E5_MANTISSA) / RGB9E5_MANTISSA_VALUES * (1 << MAX_RGB9E5_EXP));
2275
const int EPSILON_RGB9E5 = (int)((1.0f / (float)RGB9E5_MANTISSA_VALUES) / (float)(1 << RGB9E5_EXP_BIAS));
2276
2277
void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b)
2278
{
2279
int x = packed & 511;
2280
int y = (packed >> 9) & 511;
2281
int z = (packed >> 18) & 511;
2282
int w = (packed >> 27) & 31;
2283
2284
const float scale = powf(2.0f, static_cast<float>(w - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
2285
2286
r = x * scale;
2287
g = y * scale;
2288
b = z * scale;
2289
}
2290
2291
// floor_log2 is not correct for the denorm and zero values, but we are going to do a max of this value with the minimum rgb9e5 exponent that will hide these problem cases.
2292
static inline int floor_log2(float x)
2293
{
2294
union float754
2295
{
2296
unsigned int raw;
2297
float value;
2298
};
2299
2300
float754 f;
2301
f.value = x;
2302
// Extract float exponent
2303
return ((f.raw >> 23) & 0xFF) - 127;
2304
}
2305
2306
static inline int maximumi(int a, int b) { return (a > b) ? a : b; }
2307
static inline float maximumf(float a, float b) { return (a > b) ? a : b; }
2308
2309
uint32_t pack_rgb9e5(float r, float g, float b)
2310
{
2311
r = clampf(r, 0.0f, MAX_RGB9E5);
2312
g = clampf(g, 0.0f, MAX_RGB9E5);
2313
b = clampf(b, 0.0f, MAX_RGB9E5);
2314
2315
float maxrgb = maximumf(maximumf(r, g), b);
2316
int exp_shared = maximumi(-RGB9E5_EXP_BIAS - 1, floor_log2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
2317
assert((exp_shared >= 0) && (exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP));
2318
2319
float denom = powf(2.0f, (float)(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
2320
2321
int maxm = (int)floorf((maxrgb / denom) + 0.5f);
2322
if (maxm == (MAX_RGB9E5_MANTISSA + 1))
2323
{
2324
denom *= 2;
2325
exp_shared += 1;
2326
assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
2327
}
2328
else
2329
{
2330
assert(maxm <= MAX_RGB9E5_MANTISSA);
2331
}
2332
2333
int rm = (int)floorf((r / denom) + 0.5f);
2334
int gm = (int)floorf((g / denom) + 0.5f);
2335
int bm = (int)floorf((b / denom) + 0.5f);
2336
2337
assert((rm >= 0) && (rm <= MAX_RGB9E5_MANTISSA));
2338
assert((gm >= 0) && (gm <= MAX_RGB9E5_MANTISSA));
2339
assert((bm >= 0) && (bm <= MAX_RGB9E5_MANTISSA));
2340
2341
return rm | (gm << 9) | (bm << 18) | (exp_shared << 27);
2342
}
2343
2344
static inline int clz17(uint32_t x)
2345
{
2346
assert(x <= 0x1FFFF);
2347
x &= 0x1FFFF;
2348
2349
if (!x)
2350
return 17;
2351
2352
uint32_t n = 0;
2353
while ((x & 0x10000) == 0)
2354
{
2355
x <<= 1u;
2356
n++;
2357
}
2358
2359
return n;
2360
}
2361
2362
static inline uint32_t pack_rgb9e5_ldr_astc(int Cr, int Cg, int Cb)
2363
{
2364
int lz = clz17(Cr | Cg | Cb | 1);
2365
if (Cr == 65535) { Cr = 65536; lz = 0; }
2366
if (Cg == 65535) { Cg = 65536; lz = 0; }
2367
if (Cb == 65535) { Cb = 65536; lz = 0; }
2368
Cr <<= lz; Cg <<= lz; Cb <<= lz;
2369
Cr = (Cr >> 8) & 0x1FF;
2370
Cg = (Cg >> 8) & 0x1FF;
2371
Cb = (Cb >> 8) & 0x1FF;
2372
uint32_t exponent = 16 - lz;
2373
uint32_t texel = (exponent << 27) | (Cb << 18) | (Cg << 9) | Cr;
2374
return texel;
2375
}
2376
2377
static inline uint32_t pack_rgb9e5_hdr_astc(int Cr, int Cg, int Cb)
2378
{
2379
if (Cr > 0x7c00) Cr = 0; else if (Cr == 0x7c00) Cr = 0x7bff;
2380
if (Cg > 0x7c00) Cg = 0; else if (Cg == 0x7c00) Cg = 0x7bff;
2381
if (Cb > 0x7c00) Cb = 0; else if (Cb == 0x7c00) Cb = 0x7bff;
2382
int Re = (Cr >> 10) & 0x1F;
2383
int Ge = (Cg >> 10) & 0x1F;
2384
int Be = (Cb >> 10) & 0x1F;
2385
int Rex = (Re == 0) ? 1 : Re;
2386
int Gex = (Ge == 0) ? 1 : Ge;
2387
int Bex = (Be == 0) ? 1 : Be;
2388
int Xm = ((Cr | Cg | Cb) & 0x200) >> 9;
2389
int Xe = Re | Ge | Be;
2390
uint32_t rshift, gshift, bshift, expo;
2391
2392
if (Xe == 0)
2393
{
2394
expo = rshift = gshift = bshift = Xm;
2395
}
2396
else if (Re >= Ge && Re >= Be)
2397
{
2398
expo = Rex + 1;
2399
rshift = 2;
2400
gshift = Rex - Gex + 2;
2401
bshift = Rex - Bex + 2;
2402
}
2403
else if (Ge >= Be)
2404
{
2405
expo = Gex + 1;
2406
rshift = Gex - Rex + 2;
2407
gshift = 2;
2408
bshift = Gex - Bex + 2;
2409
}
2410
else
2411
{
2412
expo = Bex + 1;
2413
rshift = Bex - Rex + 2;
2414
gshift = Bex - Gex + 2;
2415
bshift = 2;
2416
}
2417
2418
int Rm = (Cr & 0x3FF) | (Re == 0 ? 0 : 0x400);
2419
int Gm = (Cg & 0x3FF) | (Ge == 0 ? 0 : 0x400);
2420
int Bm = (Cb & 0x3FF) | (Be == 0 ? 0 : 0x400);
2421
Rm = (Rm >> rshift) & 0x1FF;
2422
Gm = (Gm >> gshift) & 0x1FF;
2423
Bm = (Bm >> bshift) & 0x1FF;
2424
2425
uint32_t texel = (expo << 27) | (Bm << 18) | (Gm << 9) | (Rm << 0);
2426
return texel;
2427
}
2428
2429
// Important: pPixels is either 32-bit/texel or 64-bit/texel.
2430
bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode)
2431
{
2432
assert(is_valid_block_size(blk_width, blk_height));
2433
2434
assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size());
2435
if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size())
2436
return false;
2437
2438
const uint32_t num_blk_pixels = blk_width * blk_height;
2439
2440
// Write block error color
2441
if (dec_mode == cDecodeModeHDR16)
2442
{
2443
// NaN's
2444
memset(pPixels, 0xFF, num_blk_pixels * sizeof(half_float) * 4);
2445
}
2446
else if (dec_mode == cDecodeModeRGB9E5)
2447
{
2448
const uint32_t purple_9e5 = pack_rgb9e5(1.0f, 0.0f, 1.0f);
2449
2450
for (uint32_t i = 0; i < num_blk_pixels; i++)
2451
((uint32_t*)pPixels)[i] = purple_9e5;
2452
}
2453
else
2454
{
2455
for (uint32_t i = 0; i < num_blk_pixels; i++)
2456
((uint32_t*)pPixels)[i] = 0xFFFF00FF;
2457
}
2458
2459
if (log_blk.m_error_flag)
2460
{
2461
// Should this return false? It's not an invalid logical block config, though.
2462
return false;
2463
}
2464
2465
// Handle solid color blocks
2466
if (log_blk.m_solid_color_flag_ldr)
2467
{
2468
// LDR solid block
2469
if (dec_mode == cDecodeModeHDR16)
2470
{
2471
// Convert LDR pixels to half-float
2472
half_float h[4];
2473
for (uint32_t c = 0; c < 4; c++)
2474
h[c] = (log_blk.m_solid_color[c] == 0xFFFF) ? 0x3C00 : float_to_half((float)log_blk.m_solid_color[c] * (1.0f / 65536.0f), true);
2475
2476
for (uint32_t i = 0; i < num_blk_pixels; i++)
2477
memcpy((uint16_t*)pPixels + i * 4, h, sizeof(half_float) * 4);
2478
}
2479
else if (dec_mode == cDecodeModeRGB9E5)
2480
{
2481
float r = (log_blk.m_solid_color[0] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[0] * (1.0f / 65536.0f));
2482
float g = (log_blk.m_solid_color[1] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[1] * (1.0f / 65536.0f));
2483
float b = (log_blk.m_solid_color[2] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[2] * (1.0f / 65536.0f));
2484
2485
const uint32_t packed = pack_rgb9e5(r, g, b);
2486
2487
for (uint32_t i = 0; i < num_blk_pixels; i++)
2488
((uint32_t*)pPixels)[i] = packed;
2489
}
2490
else
2491
{
2492
// Convert LDR pixels to 8-bits
2493
for (uint32_t i = 0; i < num_blk_pixels; i++)
2494
for (uint32_t c = 0; c < 4; c++)
2495
((uint8_t*)pPixels)[i * 4 + c] = (log_blk.m_solid_color[c] >> 8);
2496
}
2497
2498
return true;
2499
}
2500
else if (log_blk.m_solid_color_flag_hdr)
2501
{
2502
// HDR solid block, decode mode must be half-float or RGB9E5
2503
if (dec_mode == cDecodeModeHDR16)
2504
{
2505
for (uint32_t i = 0; i < num_blk_pixels; i++)
2506
memcpy((uint16_t*)pPixels + i * 4, log_blk.m_solid_color, sizeof(half_float) * 4);
2507
}
2508
else if (dec_mode == cDecodeModeRGB9E5)
2509
{
2510
float r = half_to_float(log_blk.m_solid_color[0]);
2511
float g = half_to_float(log_blk.m_solid_color[1]);
2512
float b = half_to_float(log_blk.m_solid_color[2]);
2513
2514
const uint32_t packed = pack_rgb9e5(r, g, b);
2515
2516
for (uint32_t i = 0; i < num_blk_pixels; i++)
2517
((uint32_t*)pPixels)[i] = packed;
2518
}
2519
else
2520
{
2521
return false;
2522
}
2523
2524
return true;
2525
}
2526
2527
// Sanity check block's config
2528
if ((log_blk.m_grid_width < 2) || (log_blk.m_grid_height < 2))
2529
return false;
2530
if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
2531
return false;
2532
2533
if ((log_blk.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_blk.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
2534
return false;
2535
if ((log_blk.m_weight_ise_range < FIRST_VALID_WEIGHT_ISE_RANGE) || (log_blk.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE))
2536
return false;
2537
if ((log_blk.m_num_partitions < 1) || (log_blk.m_num_partitions > MAX_PARTITIONS))
2538
return false;
2539
if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > MAX_DUAL_PLANE_PARTITIONS))
2540
return false;
2541
if (log_blk.m_partition_id >= NUM_PARTITION_PATTERNS)
2542
return false;
2543
if ((log_blk.m_num_partitions == 1) && (log_blk.m_partition_id > 0))
2544
return false;
2545
if (log_blk.m_color_component_selector > 3)
2546
return false;
2547
2548
const uint32_t total_endpoint_levels = get_ise_levels(log_blk.m_endpoint_ise_range);
2549
const uint32_t total_weight_levels = get_ise_levels(log_blk.m_weight_ise_range);
2550
2551
bool is_ldr_endpoints[MAX_PARTITIONS];
2552
2553
// Check CEM's
2554
uint32_t total_cem_vals = 0;
2555
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
2556
{
2557
if (log_blk.m_color_endpoint_modes[i] > 15)
2558
return false;
2559
2560
total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[i]);
2561
2562
is_ldr_endpoints[i] = is_cem_ldr(log_blk.m_color_endpoint_modes[i]);
2563
}
2564
2565
if (total_cem_vals > MAX_ENDPOINTS)
2566
return false;
2567
2568
const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range);
2569
const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data();
2570
2571
// Dequantized endpoints to [0,255]
2572
uint8_t dequantized_endpoints[MAX_ENDPOINTS];
2573
for (uint32_t i = 0; i < total_cem_vals; i++)
2574
{
2575
if (log_blk.m_endpoints[i] >= total_endpoint_levels)
2576
return false;
2577
dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]];
2578
}
2579
2580
// Dequantize weights to [0,64]
2581
uint8_t dequantized_weights[2][12 * 12];
2582
2583
const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range);
2584
const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data();
2585
2586
const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height;
2587
for (uint32_t i = 0; i < total_weight_vals; i++)
2588
{
2589
if (log_blk.m_weights[i] >= total_weight_levels)
2590
return false;
2591
2592
const uint32_t plane_index = log_blk.m_dual_plane ? (i & 1) : 0;
2593
const uint32_t grid_index = log_blk.m_dual_plane ? (i >> 1) : i;
2594
2595
dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]];
2596
}
2597
2598
// Upsample weight grid. [0,64] weights
2599
uint8_t upsampled_weights[2][12 * 12];
2600
2601
upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[0][0], &upsampled_weights[0][0]);
2602
if (log_blk.m_dual_plane)
2603
upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[1][0], &upsampled_weights[1][0]);
2604
2605
// Decode CEM's
2606
int endpoints[4][4][2]; // [subset][comp][l/h]
2607
2608
uint32_t endpoint_val_index = 0;
2609
for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++)
2610
{
2611
const uint32_t cem_index = log_blk.m_color_endpoint_modes[subset];
2612
2613
decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]);
2614
2615
endpoint_val_index += get_num_cem_values(cem_index);
2616
}
2617
2618
// Decode texels
2619
const bool small_block = num_blk_pixels < 31;
2620
const bool use_precomputed_texel_partitions_4x4 = (blk_width == 4) && (blk_height == 4) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);
2621
const bool use_precomputed_texel_partitions_6x6 = (blk_width == 6) && (blk_height == 6) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);
2622
const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX;
2623
2624
bool success = true;
2625
2626
if (dec_mode == cDecodeModeRGB9E5)
2627
{
2628
// returns uint32_t's
2629
for (uint32_t y = 0; y < blk_height; y++)
2630
{
2631
for (uint32_t x = 0; x < blk_width; x++)
2632
{
2633
const uint32_t pixel_index = x + y * blk_width;
2634
2635
uint32_t subset = 0;
2636
if (log_blk.m_num_partitions > 1)
2637
{
2638
if (use_precomputed_texel_partitions_4x4)
2639
subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
2640
else if (use_precomputed_texel_partitions_6x6)
2641
subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
2642
else
2643
subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);
2644
}
2645
2646
int comp[3];
2647
2648
for (uint32_t c = 0; c < 3; c++)
2649
{
2650
const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
2651
2652
if (is_ldr_endpoints[subset])
2653
{
2654
assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
2655
assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
2656
2657
int le = endpoints[subset][c][0];
2658
int he = endpoints[subset][c][1];
2659
2660
le = (le << 8) | le;
2661
he = (he << 8) | he;
2662
2663
int k = weight_interpolate(le, he, w);
2664
assert((k >= 0) && (k <= 0xFFFF));
2665
2666
comp[c] = k; // 1.0
2667
}
2668
else
2669
{
2670
assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
2671
assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
2672
2673
int le = endpoints[subset][c][0] << 4;
2674
int he = endpoints[subset][c][1] << 4;
2675
2676
int qlog16 = weight_interpolate(le, he, w);
2677
2678
comp[c] = qlog16_to_half(qlog16);
2679
2680
if (is_half_inf_or_nan((half_float)comp[c]))
2681
comp[c] = 0x7BFF;
2682
}
2683
2684
} // c
2685
2686
uint32_t packed;
2687
if (is_ldr_endpoints[subset])
2688
packed = pack_rgb9e5_ldr_astc(comp[0], comp[1], comp[2]);
2689
else
2690
packed = pack_rgb9e5_hdr_astc(comp[0], comp[1], comp[2]);
2691
2692
((uint32_t*)pPixels)[pixel_index] = packed;
2693
2694
} // x
2695
} // y
2696
}
2697
else if (dec_mode == cDecodeModeHDR16)
2698
{
2699
// Note: must round towards zero when converting float to half for ASTC (18.19 Weight Application)
2700
2701
// returns half floats
2702
for (uint32_t y = 0; y < blk_height; y++)
2703
{
2704
for (uint32_t x = 0; x < blk_width; x++)
2705
{
2706
const uint32_t pixel_index = x + y * blk_width;
2707
2708
uint32_t subset = 0;
2709
if (log_blk.m_num_partitions > 1)
2710
{
2711
if (use_precomputed_texel_partitions_4x4)
2712
subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
2713
else if (use_precomputed_texel_partitions_6x6)
2714
subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
2715
else
2716
subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);
2717
}
2718
2719
for (uint32_t c = 0; c < 4; c++)
2720
{
2721
const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
2722
2723
half_float o;
2724
2725
if ( (is_ldr_endpoints[subset]) ||
2726
((log_blk.m_color_endpoint_modes[subset] == CEM_HDR_RGB_LDR_ALPHA) && (c == 3)) )
2727
{
2728
assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
2729
assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
2730
2731
int le = endpoints[subset][c][0];
2732
int he = endpoints[subset][c][1];
2733
2734
le = (le << 8) | le;
2735
he = (he << 8) | he;
2736
2737
int k = weight_interpolate(le, he, w);
2738
assert((k >= 0) && (k <= 0xFFFF));
2739
2740
if (k == 0xFFFF)
2741
o = 0x3C00; // 1.0
2742
else
2743
o = float_to_half((float)k * (1.0f / 65536.0f), true);
2744
}
2745
else
2746
{
2747
assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
2748
assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
2749
2750
int le = endpoints[subset][c][0] << 4;
2751
int he = endpoints[subset][c][1] << 4;
2752
2753
int qlog16 = weight_interpolate(le, he, w);
2754
2755
o = qlog16_to_half(qlog16);
2756
2757
if (is_half_inf_or_nan(o))
2758
o = 0x7BFF;
2759
}
2760
2761
((half_float*)pPixels)[pixel_index * 4 + c] = o;
2762
}
2763
2764
} // x
2765
} // y
2766
}
2767
else
2768
{
2769
// returns uint8_t's
2770
for (uint32_t y = 0; y < blk_height; y++)
2771
{
2772
for (uint32_t x = 0; x < blk_width; x++)
2773
{
2774
const uint32_t pixel_index = x + y * blk_width;
2775
2776
uint32_t subset = 0;
2777
if (log_blk.m_num_partitions > 1)
2778
{
2779
if (use_precomputed_texel_partitions_4x4)
2780
subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
2781
else if (use_precomputed_texel_partitions_6x6)
2782
subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
2783
else
2784
subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);
2785
}
2786
2787
if (!is_ldr_endpoints[subset])
2788
{
2789
((uint32_t*)pPixels)[pixel_index * 4] = 0xFFFF00FF;
2790
success = false;
2791
}
2792
else
2793
{
2794
for (uint32_t c = 0; c < 4; c++)
2795
{
2796
const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
2797
2798
int le = endpoints[subset][c][0];
2799
int he = endpoints[subset][c][1];
2800
2801
// FIXME: the spec is apparently wrong? this matches ARM's and Google's decoder
2802
//if ((dec_mode == cDecodeModeSRGB8) && (c <= 2))
2803
// See https://github.com/ARM-software/astc-encoder/issues/447
2804
if (dec_mode == cDecodeModeSRGB8)
2805
{
2806
le = (le << 8) | 0x80;
2807
he = (he << 8) | 0x80;
2808
}
2809
else
2810
{
2811
le = (le << 8) | le;
2812
he = (he << 8) | he;
2813
}
2814
2815
uint32_t k = weight_interpolate(le, he, w);
2816
2817
// FIXME: This is what the spec says to do in LDR mode, but this is not what ARM's decoder does
2818
// See decompress_symbolic_block(), decode_texel() and unorm16_to_sf16.
2819
// It seems to effectively divide by 65535.0 and convert to FP16, then back to float, mul by 255.0, add .5 and then convert to 8-bit.
2820
((uint8_t*)pPixels)[pixel_index * 4 + c] = (uint8_t)(k >> 8);
2821
}
2822
}
2823
2824
} // x
2825
} // y
2826
}
2827
2828
return success;
2829
}
2830
2831
//------------------------------------------------
2832
// Physical to logical block decoding
2833
2834
// unsigned 128-bit int, with some signed helpers
2835
class uint128
2836
{
2837
uint64_t m_lo, m_hi;
2838
2839
public:
2840
uint128() = default;
2841
inline uint128(uint64_t lo) : m_lo(lo), m_hi(0) { }
2842
inline uint128(uint64_t lo, uint64_t hi) : m_lo(lo), m_hi(hi) { }
2843
inline uint128(const uint128& other) : m_lo(other.m_lo), m_hi(other.m_hi) { }
2844
2845
inline uint128& set_signed(int64_t lo) { m_lo = lo; m_hi = (lo < 0) ? UINT64_MAX : 0; return *this; }
2846
inline uint128& set(uint64_t lo) { m_lo = lo; m_hi = 0; return *this; }
2847
2848
inline explicit operator uint8_t () const { return (uint8_t)m_lo; }
2849
inline explicit operator uint16_t () const { return (uint16_t)m_lo; }
2850
inline explicit operator uint32_t () const { return (uint32_t)m_lo; }
2851
inline explicit operator uint64_t () const { return m_lo; }
2852
2853
inline uint128& operator= (const uint128& rhs) { m_lo = rhs.m_lo; m_hi = rhs.m_hi; return *this; }
2854
inline uint128& operator= (const uint64_t val) { m_lo = val; m_hi = 0; return *this; }
2855
2856
inline uint64_t get_low() const { return m_lo; }
2857
inline uint64_t& get_low() { return m_lo; }
2858
2859
inline uint64_t get_high() const { return m_hi; }
2860
inline uint64_t& get_high() { return m_hi; }
2861
2862
inline bool operator== (const uint128& rhs) const { return (m_lo == rhs.m_lo) && (m_hi == rhs.m_hi); }
2863
inline bool operator!= (const uint128& rhs) const { return (m_lo != rhs.m_lo) || (m_hi != rhs.m_hi); }
2864
2865
inline bool operator< (const uint128& rhs) const
2866
{
2867
if (m_hi < rhs.m_hi)
2868
return true;
2869
2870
if (m_hi == rhs.m_hi)
2871
{
2872
if (m_lo < rhs.m_lo)
2873
return true;
2874
}
2875
2876
return false;
2877
}
2878
2879
inline bool operator> (const uint128& rhs) const { return (rhs < *this); }
2880
2881
inline bool operator<= (const uint128& rhs) const { return (*this == rhs) || (*this < rhs); }
2882
inline bool operator>= (const uint128& rhs) const { return (*this == rhs) || (*this > rhs); }
2883
2884
inline bool is_zero() const { return (m_lo == 0) && (m_hi == 0); }
2885
inline bool is_all_ones() const { return (m_lo == UINT64_MAX) && (m_hi == UINT64_MAX); }
2886
inline bool is_non_zero() const { return (m_lo != 0) || (m_hi != 0); }
2887
inline explicit operator bool() const { return is_non_zero(); }
2888
inline bool is_signed() const { return ((int64_t)m_hi) < 0; }
2889
2890
inline bool signed_less(const uint128& rhs) const
2891
{
2892
const bool l_signed = is_signed(), r_signed = rhs.is_signed();
2893
2894
if (l_signed == r_signed)
2895
return *this < rhs;
2896
2897
if (l_signed && !r_signed)
2898
return true;
2899
2900
assert(!l_signed && r_signed);
2901
return false;
2902
}
2903
2904
inline bool signed_greater(const uint128& rhs) const { return rhs.signed_less(*this); }
2905
inline bool signed_less_equal(const uint128& rhs) const { return !rhs.signed_less(*this); }
2906
inline bool signed_greater_equal(const uint128& rhs) const { return !signed_less(rhs); }
2907
2908
double get_double() const
2909
{
2910
double res = 0;
2911
2912
if (m_hi)
2913
res = (double)m_hi * pow(2.0f, 64.0f);
2914
2915
res += (double)m_lo;
2916
2917
return res;
2918
}
2919
2920
double get_signed_double() const
2921
{
2922
if (is_signed())
2923
return -(uint128(*this).abs().get_double());
2924
else
2925
return get_double();
2926
}
2927
2928
inline uint128 abs() const
2929
{
2930
uint128 res(*this);
2931
if (res.is_signed())
2932
res = -res;
2933
return res;
2934
}
2935
2936
inline uint128& operator<<= (int shift)
2937
{
2938
assert(shift >= 0);
2939
if (shift < 0)
2940
return *this;
2941
2942
m_hi = (shift >= 64) ? ((shift >= 128) ? 0 : (m_lo << (shift - 64))) : (m_hi << shift);
2943
2944
if ((shift) && (shift < 64))
2945
m_hi |= (m_lo >> (64 - shift));
2946
2947
m_lo = (shift >= 64) ? 0 : (m_lo << shift);
2948
2949
return *this;
2950
}
2951
2952
inline uint128 operator<< (int shift) const { uint128 res(*this); res <<= shift; return res; }
2953
2954
inline uint128& operator>>= (int shift)
2955
{
2956
assert(shift >= 0);
2957
if (shift < 0)
2958
return *this;
2959
2960
m_lo = (shift >= 64) ? ((shift >= 128) ? 0 : (m_hi >> (shift - 64))) : (m_lo >> shift);
2961
2962
if ((shift) && (shift < 64))
2963
m_lo |= (m_hi << (64 - shift));
2964
2965
m_hi = (shift >= 64) ? 0 : (m_hi >> shift);
2966
2967
return *this;
2968
}
2969
2970
inline uint128 operator>> (int shift) const { uint128 res(*this); res >>= shift; return res; }
2971
2972
inline uint128 signed_shift_right(int shift) const
2973
{
2974
uint128 res(*this);
2975
res >>= shift;
2976
2977
if (is_signed())
2978
{
2979
uint128 x(0U);
2980
x = ~x;
2981
x >>= shift;
2982
res |= (~x);
2983
}
2984
2985
return res;
2986
}
2987
2988
inline uint128& operator |= (const uint128& rhs) { m_lo |= rhs.m_lo; m_hi |= rhs.m_hi; return *this; }
2989
inline uint128 operator | (const uint128& rhs) const { uint128 res(*this); res |= rhs; return res; }
2990
2991
inline uint128& operator &= (const uint128& rhs) { m_lo &= rhs.m_lo; m_hi &= rhs.m_hi; return *this; }
2992
inline uint128 operator & (const uint128& rhs) const { uint128 res(*this); res &= rhs; return res; }
2993
2994
inline uint128& operator ^= (const uint128& rhs) { m_lo ^= rhs.m_lo; m_hi ^= rhs.m_hi; return *this; }
2995
inline uint128 operator ^ (const uint128& rhs) const { uint128 res(*this); res ^= rhs; return res; }
2996
2997
inline uint128 operator ~() const { return uint128(~m_lo, ~m_hi); }
2998
2999
inline uint128 operator -() const { uint128 res(~*this); if (++res.m_lo == 0) ++res.m_hi; return res; }
3000
3001
// prefix
3002
inline uint128 operator ++()
3003
{
3004
if (++m_lo == 0)
3005
++m_hi;
3006
return *this;
3007
}
3008
3009
// postfix
3010
inline uint128 operator ++(int)
3011
{
3012
uint128 res(*this);
3013
if (++m_lo == 0)
3014
++m_hi;
3015
return res;
3016
}
3017
3018
// prefix
3019
inline uint128 operator --()
3020
{
3021
const uint64_t t = m_lo;
3022
if (--m_lo > t)
3023
--m_hi;
3024
return *this;
3025
}
3026
3027
// postfix
3028
inline uint128 operator --(int)
3029
{
3030
const uint64_t t = m_lo;
3031
uint128 res(*this);
3032
if (--m_lo > t)
3033
--m_hi;
3034
return res;
3035
}
3036
3037
inline uint128& operator+= (const uint128& rhs)
3038
{
3039
const uint64_t t = m_lo + rhs.m_lo;
3040
m_hi = m_hi + rhs.m_hi + (t < m_lo);
3041
m_lo = t;
3042
return *this;
3043
}
3044
3045
inline uint128 operator+ (const uint128& rhs) const { uint128 res(*this); res += rhs; return res; }
3046
3047
inline uint128& operator-= (const uint128& rhs)
3048
{
3049
const uint64_t t = m_lo - rhs.m_lo;
3050
m_hi = m_hi - rhs.m_hi - (t > m_lo);
3051
m_lo = t;
3052
return *this;
3053
}
3054
3055
inline uint128 operator- (const uint128& rhs) const { uint128 res(*this); res -= rhs; return res; }
3056
3057
// computes bit by bit, very slow
3058
uint128& operator*=(const uint128& rhs)
3059
{
3060
uint128 temp(*this), result(0U);
3061
3062
for (uint128 bitmask(rhs); bitmask; bitmask >>= 1, temp <<= 1)
3063
if (bitmask.get_low() & 1)
3064
result += temp;
3065
3066
*this = result;
3067
return *this;
3068
}
3069
3070
uint128 operator*(const uint128& rhs) const { uint128 res(*this); res *= rhs; return res; }
3071
3072
// computes bit by bit, very slow
3073
friend uint128 divide(const uint128& dividend, const uint128& divisor, uint128& remainder)
3074
{
3075
remainder = 0;
3076
3077
if (!divisor)
3078
{
3079
assert(0);
3080
return ~uint128(0U);
3081
}
3082
3083
uint128 quotient(0), one(1);
3084
3085
for (int i = 127; i >= 0; i--)
3086
{
3087
remainder = (remainder << 1) | ((dividend >> i) & one);
3088
if (remainder >= divisor)
3089
{
3090
remainder -= divisor;
3091
quotient |= (one << i);
3092
}
3093
}
3094
3095
return quotient;
3096
}
3097
3098
uint128 operator/(const uint128& rhs) const { uint128 remainder, res; res = divide(*this, rhs, remainder); return res; }
3099
uint128 operator/=(const uint128& rhs) { uint128 remainder; *this = divide(*this, rhs, remainder); return *this; }
3100
3101
uint128 operator%(const uint128& rhs) const { uint128 remainder; divide(*this, rhs, remainder); return remainder; }
3102
uint128 operator%=(const uint128& rhs) { uint128 remainder; divide(*this, rhs, remainder); *this = remainder; return *this; }
3103
3104
void print_hex(FILE* pFile) const
3105
{
3106
fprintf(pFile, "0x%016llx%016llx", (unsigned long long int)m_hi, (unsigned long long int)m_lo);
3107
}
3108
3109
void format_unsigned(std::string& res) const
3110
{
3111
basisu::vector<uint8_t> digits;
3112
digits.reserve(39 + 1);
3113
3114
uint128 k(*this), ten(10);
3115
do
3116
{
3117
uint128 r;
3118
k = divide(k, ten, r);
3119
digits.push_back((uint8_t)r);
3120
} while (k);
3121
3122
for (int i = (int)digits.size() - 1; i >= 0; i--)
3123
res += ('0' + digits[i]);
3124
}
3125
3126
void format_signed(std::string& res) const
3127
{
3128
uint128 val(*this);
3129
3130
if (val.is_signed())
3131
{
3132
res.push_back('-');
3133
val = -val;
3134
}
3135
3136
val.format_unsigned(res);
3137
}
3138
3139
void print_unsigned(FILE* pFile)
3140
{
3141
std::string str;
3142
format_unsigned(str);
3143
fprintf(pFile, "%s", str.c_str());
3144
}
3145
3146
void print_signed(FILE* pFile)
3147
{
3148
std::string str;
3149
format_signed(str);
3150
fprintf(pFile, "%s", str.c_str());
3151
}
3152
3153
uint128 get_reversed_bits() const
3154
{
3155
uint128 res;
3156
3157
const uint32_t* pSrc = (const uint32_t*)this;
3158
uint32_t* pDst = (uint32_t*)&res;
3159
3160
pDst[0] = rev_dword(pSrc[3]);
3161
pDst[1] = rev_dword(pSrc[2]);
3162
pDst[2] = rev_dword(pSrc[1]);
3163
pDst[3] = rev_dword(pSrc[0]);
3164
3165
return res;
3166
}
3167
3168
uint128 get_byteswapped() const
3169
{
3170
uint128 res;
3171
3172
const uint8_t* pSrc = (const uint8_t*)this;
3173
uint8_t* pDst = (uint8_t*)&res;
3174
3175
for (uint32_t i = 0; i < 16; i++)
3176
pDst[i] = pSrc[15 - i];
3177
3178
return res;
3179
}
3180
3181
inline uint64_t get_bits64(uint32_t bit_ofs, uint32_t bit_len) const
3182
{
3183
assert(bit_ofs < 128);
3184
assert(bit_len && (bit_len <= 64) && ((bit_ofs + bit_len) <= 128));
3185
3186
uint128 res(*this);
3187
res >>= bit_ofs;
3188
3189
const uint64_t bitmask = (bit_len == 64) ? UINT64_MAX : ((1ull << bit_len) - 1);
3190
return res.get_low() & bitmask;
3191
}
3192
3193
inline uint32_t get_bits(uint32_t bit_ofs, uint32_t bit_len) const
3194
{
3195
assert(bit_len <= 32);
3196
return (uint32_t)get_bits64(bit_ofs, bit_len);
3197
}
3198
3199
inline uint32_t next_bits(uint32_t& bit_ofs, uint32_t len) const
3200
{
3201
assert(len && (len <= 32));
3202
uint32_t x = get_bits(bit_ofs, len);
3203
bit_ofs += len;
3204
return x;
3205
}
3206
3207
inline uint128& set_bits(uint64_t val, uint32_t bit_ofs, uint32_t num_bits)
3208
{
3209
assert(bit_ofs < 128);
3210
assert(num_bits && (num_bits <= 64) && ((bit_ofs + num_bits) <= 128));
3211
3212
uint128 bitmask(1);
3213
bitmask = (bitmask << num_bits) - 1;
3214
assert(uint128(val) <= bitmask);
3215
3216
bitmask <<= bit_ofs;
3217
*this &= ~bitmask;
3218
3219
*this = *this | (uint128(val) << bit_ofs);
3220
return *this;
3221
}
3222
};
3223
3224
static bool decode_void_extent(const uint128& bits, log_astc_block& log_blk)
3225
{
3226
if (bits.get_bits(10, 2) != 0b11)
3227
return false;
3228
3229
uint32_t bit_ofs = 12;
3230
const uint32_t min_s = bits.next_bits(bit_ofs, 13);
3231
const uint32_t max_s = bits.next_bits(bit_ofs, 13);
3232
const uint32_t min_t = bits.next_bits(bit_ofs, 13);
3233
const uint32_t max_t = bits.next_bits(bit_ofs, 13);
3234
assert(bit_ofs == 64);
3235
3236
const bool all_extents_all_ones = (min_s == 0x1FFF) && (max_s == 0x1FFF) && (min_t == 0x1FFF) && (max_t == 0x1FFF);
3237
3238
if (!all_extents_all_ones && ((min_s >= max_s) || (min_t >= max_t)))
3239
return false;
3240
3241
const bool hdr_flag = bits.get_bits(9, 1) != 0;
3242
3243
if (hdr_flag)
3244
log_blk.m_solid_color_flag_hdr = true;
3245
else
3246
log_blk.m_solid_color_flag_ldr = true;
3247
3248
log_blk.m_solid_color[0] = (uint16_t)bits.get_bits(64, 16);
3249
log_blk.m_solid_color[1] = (uint16_t)bits.get_bits(80, 16);
3250
log_blk.m_solid_color[2] = (uint16_t)bits.get_bits(96, 16);
3251
log_blk.m_solid_color[3] = (uint16_t)bits.get_bits(112, 16);
3252
3253
if (log_blk.m_solid_color_flag_hdr)
3254
{
3255
for (uint32_t c = 0; c < 4; c++)
3256
if (is_half_inf_or_nan(log_blk.m_solid_color[c]))
3257
return false;
3258
}
3259
3260
return true;
3261
}
3262
3263
struct astc_dec_row
3264
{
3265
int8_t Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
3266
};
3267
3268
static const astc_dec_row s_dec_rows[10] =
3269
{
3270
// Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
3271
{ 10, 9, 7, 2, 5, 2, 4, 2, 4, 0, 1 }, // 4 2
3272
{ 10, 9, 7, 2, 5, 2, 8, 2, 4, 0, 1 }, // 8 2
3273
{ 10, 9, 5, 2, 7, 2, 2, 8, 4, 0, 1 }, // 2 8
3274
{ 10, 9, 5, 2, 7, 1, 2, 6, 4, 0, 1 }, // 2 6
3275
3276
{ 10, 9, 7, 1, 5, 2, 2, 2, 4, 0, 1 }, // 2 2
3277
{ 10, 9, 0, 0, 5, 2, 12, 2, 4, 2, 3 }, // 12 2
3278
{ 10, 9, 5, 2, 0, 0, 2, 12, 4, 2, 3 }, // 2 12
3279
{ 10, 9, 0, 0, 0, 0, 6, 10, 4, 2, 3 }, // 6 10
3280
3281
{ 10, 9, 0, 0, 0, 0, 10, 6, 4, 2, 3 }, // 10 6
3282
{ -1, -1, 5, 2, 9, 2, 6, 6, 4, 2, 3 }, // 6 6
3283
};
3284
3285
static bool decode_config(const uint128& bits, log_astc_block& log_blk)
3286
{
3287
// Reserved
3288
if (bits.get_bits(0, 4) == 0)
3289
return false;
3290
3291
// Reserved
3292
if ((bits.get_bits(0, 2) == 0) && (bits.get_bits(6, 3) == 0b111))
3293
{
3294
if (bits.get_bits(2, 4) != 0b1111)
3295
return false;
3296
}
3297
3298
// Void extent
3299
if (bits.get_bits(0, 9) == 0b111111100)
3300
return decode_void_extent(bits, log_blk);
3301
3302
// Check rows
3303
const uint32_t x0_2 = bits.get_bits(0, 2), x2_2 = bits.get_bits(2, 2);
3304
const uint32_t x5_4 = bits.get_bits(5, 4), x8_1 = bits.get_bits(8, 1);
3305
const uint32_t x7_2 = bits.get_bits(7, 2);
3306
3307
int row_index = -1;
3308
if (x0_2 == 0)
3309
{
3310
if (x7_2 == 0b00)
3311
row_index = 5;
3312
else if (x7_2 == 0b01)
3313
row_index = 6;
3314
else if (x5_4 == 0b1100)
3315
row_index = 7;
3316
else if (x5_4 == 0b1101)
3317
row_index = 8;
3318
else if (x7_2 == 0b10)
3319
row_index = 9;
3320
}
3321
else
3322
{
3323
if (x2_2 == 0b00)
3324
row_index = 0;
3325
else if (x2_2 == 0b01)
3326
row_index = 1;
3327
else if (x2_2 == 0b10)
3328
row_index = 2;
3329
else if ((x2_2 == 0b11) && (x8_1 == 0))
3330
row_index = 3;
3331
else if ((x2_2 == 0b11) && (x8_1 == 1))
3332
row_index = 4;
3333
}
3334
if (row_index < 0)
3335
return false;
3336
3337
const astc_dec_row& r = s_dec_rows[row_index];
3338
3339
bool P = false, Dp = false;
3340
uint32_t W = r.W_bias, H = r.H_bias;
3341
3342
if (r.P_ofs >= 0)
3343
P = bits.get_bits(r.P_ofs, 1) != 0;
3344
3345
if (r.Dp_ofs >= 0)
3346
Dp = bits.get_bits(r.Dp_ofs, 1) != 0;
3347
3348
if (r.W_size)
3349
W += bits.get_bits(r.W_ofs, r.W_size);
3350
3351
if (r.H_size)
3352
H += bits.get_bits(r.H_ofs, r.H_size);
3353
3354
assert((W >= MIN_GRID_DIM) && (W <= MAX_BLOCK_DIM));
3355
assert((H >= MIN_GRID_DIM) && (H <= MAX_BLOCK_DIM));
3356
3357
int p0 = bits.get_bits(r.p0_ofs, 1);
3358
int p1 = bits.get_bits(r.p1_ofs, 1);
3359
int p2 = bits.get_bits(r.p2_ofs, 1);
3360
3361
uint32_t p = p0 | (p1 << 1) | (p2 << 2);
3362
if (p < 2)
3363
return false;
3364
3365
log_blk.m_grid_width = (uint8_t)W;
3366
log_blk.m_grid_height = (uint8_t)H;
3367
3368
log_blk.m_weight_ise_range = (uint8_t)((p - 2) + (P * BISE_10_LEVELS));
3369
assert(log_blk.m_weight_ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
3370
3371
log_blk.m_dual_plane = Dp;
3372
3373
return true;
3374
}
3375
3376
static inline uint32_t read_le_dword(const uint8_t* pBytes)
3377
{
3378
return (pBytes[0]) | (pBytes[1] << 8U) | (pBytes[2] << 16U) | (pBytes[3] << 24U);
3379
}
3380
3381
// See 18.12.Integer Sequence Encoding - tables computed by executing the decoder functions with all possible 8/7-bit inputs.
3382
static const uint8_t s_trit_decode[256][5] =
3383
{
3384
{0,0,0,0,0},{1,0,0,0,0},{2,0,0,0,0},{0,0,2,0,0},{0,1,0,0,0},{1,1,0,0,0},{2,1,0,0,0},{1,0,2,0,0},
3385
{0,2,0,0,0},{1,2,0,0,0},{2,2,0,0,0},{2,0,2,0,0},{0,2,2,0,0},{1,2,2,0,0},{2,2,2,0,0},{2,0,2,0,0},
3386
{0,0,1,0,0},{1,0,1,0,0},{2,0,1,0,0},{0,1,2,0,0},{0,1,1,0,0},{1,1,1,0,0},{2,1,1,0,0},{1,1,2,0,0},
3387
{0,2,1,0,0},{1,2,1,0,0},{2,2,1,0,0},{2,1,2,0,0},{0,0,0,2,2},{1,0,0,2,2},{2,0,0,2,2},{0,0,2,2,2},
3388
{0,0,0,1,0},{1,0,0,1,0},{2,0,0,1,0},{0,0,2,1,0},{0,1,0,1,0},{1,1,0,1,0},{2,1,0,1,0},{1,0,2,1,0},
3389
{0,2,0,1,0},{1,2,0,1,0},{2,2,0,1,0},{2,0,2,1,0},{0,2,2,1,0},{1,2,2,1,0},{2,2,2,1,0},{2,0,2,1,0},
3390
{0,0,1,1,0},{1,0,1,1,0},{2,0,1,1,0},{0,1,2,1,0},{0,1,1,1,0},{1,1,1,1,0},{2,1,1,1,0},{1,1,2,1,0},
3391
{0,2,1,1,0},{1,2,1,1,0},{2,2,1,1,0},{2,1,2,1,0},{0,1,0,2,2},{1,1,0,2,2},{2,1,0,2,2},{1,0,2,2,2},
3392
{0,0,0,2,0},{1,0,0,2,0},{2,0,0,2,0},{0,0,2,2,0},{0,1,0,2,0},{1,1,0,2,0},{2,1,0,2,0},{1,0,2,2,0},
3393
{0,2,0,2,0},{1,2,0,2,0},{2,2,0,2,0},{2,0,2,2,0},{0,2,2,2,0},{1,2,2,2,0},{2,2,2,2,0},{2,0,2,2,0},
3394
{0,0,1,2,0},{1,0,1,2,0},{2,0,1,2,0},{0,1,2,2,0},{0,1,1,2,0},{1,1,1,2,0},{2,1,1,2,0},{1,1,2,2,0},
3395
{0,2,1,2,0},{1,2,1,2,0},{2,2,1,2,0},{2,1,2,2,0},{0,2,0,2,2},{1,2,0,2,2},{2,2,0,2,2},{2,0,2,2,2},
3396
{0,0,0,0,2},{1,0,0,0,2},{2,0,0,0,2},{0,0,2,0,2},{0,1,0,0,2},{1,1,0,0,2},{2,1,0,0,2},{1,0,2,0,2},
3397
{0,2,0,0,2},{1,2,0,0,2},{2,2,0,0,2},{2,0,2,0,2},{0,2,2,0,2},{1,2,2,0,2},{2,2,2,0,2},{2,0,2,0,2},
3398
{0,0,1,0,2},{1,0,1,0,2},{2,0,1,0,2},{0,1,2,0,2},{0,1,1,0,2},{1,1,1,0,2},{2,1,1,0,2},{1,1,2,0,2},
3399
{0,2,1,0,2},{1,2,1,0,2},{2,2,1,0,2},{2,1,2,0,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,0,2,2,2},
3400
{0,0,0,0,1},{1,0,0,0,1},{2,0,0,0,1},{0,0,2,0,1},{0,1,0,0,1},{1,1,0,0,1},{2,1,0,0,1},{1,0,2,0,1},
3401
{0,2,0,0,1},{1,2,0,0,1},{2,2,0,0,1},{2,0,2,0,1},{0,2,2,0,1},{1,2,2,0,1},{2,2,2,0,1},{2,0,2,0,1},
3402
{0,0,1,0,1},{1,0,1,0,1},{2,0,1,0,1},{0,1,2,0,1},{0,1,1,0,1},{1,1,1,0,1},{2,1,1,0,1},{1,1,2,0,1},
3403
{0,2,1,0,1},{1,2,1,0,1},{2,2,1,0,1},{2,1,2,0,1},{0,0,1,2,2},{1,0,1,2,2},{2,0,1,2,2},{0,1,2,2,2},
3404
{0,0,0,1,1},{1,0,0,1,1},{2,0,0,1,1},{0,0,2,1,1},{0,1,0,1,1},{1,1,0,1,1},{2,1,0,1,1},{1,0,2,1,1},
3405
{0,2,0,1,1},{1,2,0,1,1},{2,2,0,1,1},{2,0,2,1,1},{0,2,2,1,1},{1,2,2,1,1},{2,2,2,1,1},{2,0,2,1,1},
3406
{0,0,1,1,1},{1,0,1,1,1},{2,0,1,1,1},{0,1,2,1,1},{0,1,1,1,1},{1,1,1,1,1},{2,1,1,1,1},{1,1,2,1,1},
3407
{0,2,1,1,1},{1,2,1,1,1},{2,2,1,1,1},{2,1,2,1,1},{0,1,1,2,2},{1,1,1,2,2},{2,1,1,2,2},{1,1,2,2,2},
3408
{0,0,0,2,1},{1,0,0,2,1},{2,0,0,2,1},{0,0,2,2,1},{0,1,0,2,1},{1,1,0,2,1},{2,1,0,2,1},{1,0,2,2,1},
3409
{0,2,0,2,1},{1,2,0,2,1},{2,2,0,2,1},{2,0,2,2,1},{0,2,2,2,1},{1,2,2,2,1},{2,2,2,2,1},{2,0,2,2,1},
3410
{0,0,1,2,1},{1,0,1,2,1},{2,0,1,2,1},{0,1,2,2,1},{0,1,1,2,1},{1,1,1,2,1},{2,1,1,2,1},{1,1,2,2,1},
3411
{0,2,1,2,1},{1,2,1,2,1},{2,2,1,2,1},{2,1,2,2,1},{0,2,1,2,2},{1,2,1,2,2},{2,2,1,2,2},{2,1,2,2,2},
3412
{0,0,0,1,2},{1,0,0,1,2},{2,0,0,1,2},{0,0,2,1,2},{0,1,0,1,2},{1,1,0,1,2},{2,1,0,1,2},{1,0,2,1,2},
3413
{0,2,0,1,2},{1,2,0,1,2},{2,2,0,1,2},{2,0,2,1,2},{0,2,2,1,2},{1,2,2,1,2},{2,2,2,1,2},{2,0,2,1,2},
3414
{0,0,1,1,2},{1,0,1,1,2},{2,0,1,1,2},{0,1,2,1,2},{0,1,1,1,2},{1,1,1,1,2},{2,1,1,1,2},{1,1,2,1,2},
3415
{0,2,1,1,2},{1,2,1,1,2},{2,2,1,1,2},{2,1,2,1,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,1,2,2,2}
3416
};
3417
3418
static const uint8_t s_quint_decode[128][3] =
3419
{
3420
{0,0,0},{1,0,0},{2,0,0},{3,0,0},{4,0,0},{0,4,0},{4,4,0},{4,4,4},
3421
{0,1,0},{1,1,0},{2,1,0},{3,1,0},{4,1,0},{1,4,0},{4,4,1},{4,4,4},
3422
{0,2,0},{1,2,0},{2,2,0},{3,2,0},{4,2,0},{2,4,0},{4,4,2},{4,4,4},
3423
{0,3,0},{1,3,0},{2,3,0},{3,3,0},{4,3,0},{3,4,0},{4,4,3},{4,4,4},
3424
{0,0,1},{1,0,1},{2,0,1},{3,0,1},{4,0,1},{0,4,1},{4,0,4},{0,4,4},
3425
{0,1,1},{1,1,1},{2,1,1},{3,1,1},{4,1,1},{1,4,1},{4,1,4},{1,4,4},
3426
{0,2,1},{1,2,1},{2,2,1},{3,2,1},{4,2,1},{2,4,1},{4,2,4},{2,4,4},
3427
{0,3,1},{1,3,1},{2,3,1},{3,3,1},{4,3,1},{3,4,1},{4,3,4},{3,4,4},
3428
{0,0,2},{1,0,2},{2,0,2},{3,0,2},{4,0,2},{0,4,2},{2,0,4},{3,0,4},
3429
{0,1,2},{1,1,2},{2,1,2},{3,1,2},{4,1,2},{1,4,2},{2,1,4},{3,1,4},
3430
{0,2,2},{1,2,2},{2,2,2},{3,2,2},{4,2,2},{2,4,2},{2,2,4},{3,2,4},
3431
{0,3,2},{1,3,2},{2,3,2},{3,3,2},{4,3,2},{3,4,2},{2,3,4},{3,3,4},
3432
{0,0,3},{1,0,3},{2,0,3},{3,0,3},{4,0,3},{0,4,3},{0,0,4},{1,0,4},
3433
{0,1,3},{1,1,3},{2,1,3},{3,1,3},{4,1,3},{1,4,3},{0,1,4},{1,1,4},
3434
{0,2,3},{1,2,3},{2,2,3},{3,2,3},{4,2,3},{2,4,3},{0,2,4},{1,2,4},
3435
{0,3,3},{1,3,3},{2,3,3},{3,3,3},{4,3,3},{3,4,3},{0,3,4},{1,3,4}
3436
};
3437
3438
static void decode_trit_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
3439
{
3440
assert((num_vals >= 1) && (num_vals <= 5));
3441
uint32_t m[5] = { 0 }, T = 0;
3442
3443
static const uint8_t s_t_bits[5] = { 2, 2, 1, 2, 1 };
3444
3445
for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
3446
{
3447
if (bits_per_val)
3448
m[c] = bits.next_bits(bit_ofs, bits_per_val);
3449
T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
3450
T_ofs += s_t_bits[c];
3451
}
3452
3453
const uint8_t (&p_trits)[5] = s_trit_decode[T];
3454
3455
for (uint32_t i = 0; i < num_vals; i++)
3456
pVals[i] = (uint8_t)((p_trits[i] << bits_per_val) | m[i]);
3457
}
3458
3459
static void decode_quint_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
3460
{
3461
assert((num_vals >= 1) && (num_vals <= 3));
3462
uint32_t m[3] = { 0 }, T = 0;
3463
3464
static const uint8_t s_t_bits[3] = { 3, 2, 2 };
3465
3466
for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
3467
{
3468
if (bits_per_val)
3469
m[c] = bits.next_bits(bit_ofs, bits_per_val);
3470
T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
3471
T_ofs += s_t_bits[c];
3472
}
3473
3474
const uint8_t (&p_quints)[3] = s_quint_decode[T];
3475
3476
for (uint32_t i = 0; i < num_vals; i++)
3477
pVals[i] = (uint8_t)((p_quints[i] << bits_per_val) | m[i]);
3478
}
3479
3480
static void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t bit_ofs)
3481
{
3482
assert(num_vals && (ise_range < TOTAL_ISE_RANGES));
3483
3484
const uint32_t bits_per_val = g_ise_range_table[ise_range][0];
3485
3486
if (g_ise_range_table[ise_range][1])
3487
{
3488
// Trits+bits, 5 vals per block, 7 bits extra per block
3489
const uint32_t total_blocks = (num_vals + 4) / 5;
3490
for (uint32_t b = 0; b < total_blocks; b++)
3491
{
3492
const uint32_t num_vals_in_block = std::min<int>(num_vals - 5 * b, 5);
3493
decode_trit_block(pVals + 5 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
3494
}
3495
}
3496
else if (g_ise_range_table[ise_range][2])
3497
{
3498
// Quints+bits, 3 vals per block, 8 bits extra per block
3499
const uint32_t total_blocks = (num_vals + 2) / 3;
3500
for (uint32_t b = 0; b < total_blocks; b++)
3501
{
3502
const uint32_t num_vals_in_block = std::min<int>(num_vals - 3 * b, 3);
3503
decode_quint_block(pVals + 3 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
3504
}
3505
}
3506
else
3507
{
3508
assert(bits_per_val);
3509
3510
// Only bits
3511
for (uint32_t i = 0; i < num_vals; i++)
3512
pVals[i] = (uint8_t)bits.next_bits(bit_ofs, bits_per_val);
3513
}
3514
}
3515
3516
void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t* pBits128, uint32_t bit_ofs)
3517
{
3518
const uint128 bits(
3519
(uint64_t)read_le_dword(pBits128) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t))) << 32),
3520
(uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 3)) << 32));
3521
3522
return decode_bise(ise_range, pVals, num_vals, bits, bit_ofs);
3523
}
3524
3525
// Decodes a physical ASTC block to a logical ASTC block.
3526
// blk_width/blk_height are only used to validate the weight grid's dimensions.
3527
bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height)
3528
{
3529
assert(is_valid_block_size(blk_width, blk_height));
3530
3531
const uint8_t* pS = (uint8_t*)pASTC_block;
3532
3533
log_blk.clear();
3534
log_blk.m_error_flag = true;
3535
3536
const uint128 bits(
3537
(uint64_t)read_le_dword(pS) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t))) << 32),
3538
(uint64_t)read_le_dword(pS + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t) * 3)) << 32));
3539
3540
const uint128 rev_bits(bits.get_reversed_bits());
3541
3542
if (!decode_config(bits, log_blk))
3543
return false;
3544
3545
if (log_blk.m_solid_color_flag_hdr || log_blk.m_solid_color_flag_ldr)
3546
{
3547
// Void extent
3548
log_blk.m_error_flag = false;
3549
return true;
3550
}
3551
3552
// Check grid dimensions
3553
if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
3554
return false;
3555
3556
// Now we have the grid width/height, dual plane, weight ISE range
3557
3558
const uint32_t total_grid_weights = (log_blk.m_dual_plane ? 2 : 1) * (log_blk.m_grid_width * log_blk.m_grid_height);
3559
const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_blk.m_weight_ise_range);
3560
3561
// 18.24 Illegal Encodings
3562
if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
3563
return false;
3564
3565
const uint32_t end_of_weight_bit_ofs = 128 - total_weight_bits;
3566
3567
uint32_t total_extra_bits = 0;
3568
3569
// Right before the weight bits, there may be extra CEM bits, then the 2 CCS bits if dual plane.
3570
3571
log_blk.m_num_partitions = (uint8_t)(bits.get_bits(11, 2) + 1);
3572
if (log_blk.m_num_partitions == 1)
3573
log_blk.m_color_endpoint_modes[0] = (uint8_t)(bits.get_bits(13, 4)); // read CEM bits
3574
else
3575
{
3576
// 2 or more partitions
3577
if (log_blk.m_dual_plane && (log_blk.m_num_partitions == 4))
3578
return false;
3579
3580
log_blk.m_partition_id = (uint16_t)bits.get_bits(13, 10);
3581
3582
uint32_t cem_bits = bits.get_bits(23, 6);
3583
3584
if ((cem_bits & 3) == 0)
3585
{
3586
// All CEM's the same
3587
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
3588
log_blk.m_color_endpoint_modes[i] = (uint8_t)(cem_bits >> 2);
3589
}
3590
else
3591
{
3592
// CEM's different, but within up to 2 adjacent classes
3593
const uint32_t first_cem_index = ((cem_bits & 3) - 1) * 4;
3594
3595
total_extra_bits = 3 * log_blk.m_num_partitions - 4;
3596
3597
if ((total_weight_bits + total_extra_bits) > 128)
3598
return false;
3599
3600
uint32_t cem_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
3601
3602
uint32_t c[4] = { 0 }, m[4] = { 0 };
3603
3604
cem_bits >>= 2;
3605
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++, cem_bits >>= 1)
3606
c[i] = cem_bits & 1;
3607
3608
switch (log_blk.m_num_partitions)
3609
{
3610
case 2:
3611
{
3612
m[0] = cem_bits & 3;
3613
m[1] = bits.next_bits(cem_bit_pos, 2);
3614
break;
3615
}
3616
case 3:
3617
{
3618
m[0] = cem_bits & 1;
3619
m[0] |= (bits.next_bits(cem_bit_pos, 1) << 1);
3620
m[1] = bits.next_bits(cem_bit_pos, 2);
3621
m[2] = bits.next_bits(cem_bit_pos, 2);
3622
break;
3623
}
3624
case 4:
3625
{
3626
for (uint32_t i = 0; i < 4; i++)
3627
m[i] = bits.next_bits(cem_bit_pos, 2);
3628
break;
3629
}
3630
default:
3631
{
3632
assert(0);
3633
break;
3634
}
3635
}
3636
3637
assert(cem_bit_pos == end_of_weight_bit_ofs);
3638
3639
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
3640
{
3641
log_blk.m_color_endpoint_modes[i] = (uint8_t)(first_cem_index + (c[i] * 4) + m[i]);
3642
assert(log_blk.m_color_endpoint_modes[i] <= 15);
3643
}
3644
}
3645
}
3646
3647
// Now we have all the CEM indices.
3648
3649
if (log_blk.m_dual_plane)
3650
{
3651
// Read CCS bits, beneath any CEM bits
3652
total_extra_bits += 2;
3653
3654
if (total_extra_bits > end_of_weight_bit_ofs)
3655
return false;
3656
3657
uint32_t ccs_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
3658
log_blk.m_color_component_selector = (uint8_t)(bits.get_bits(ccs_bit_pos, 2));
3659
}
3660
3661
uint32_t config_bit_pos = 11 + 2; // config+num_parts
3662
if (log_blk.m_num_partitions == 1)
3663
config_bit_pos += 4; // CEM bits
3664
else
3665
config_bit_pos += 10 + 6; // part_id+CEM bits
3666
3667
// config+num_parts+total_extra_bits (CEM extra+CCS)
3668
uint32_t total_config_bits = config_bit_pos + total_extra_bits;
3669
3670
// Compute number of remaining bits in block
3671
const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
3672
if (num_remaining_bits < 0)
3673
return false;
3674
3675
// Compute total number of ISE encoded color endpoint mode values
3676
uint32_t total_cem_vals = 0;
3677
for (uint32_t j = 0; j < log_blk.m_num_partitions; j++)
3678
total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[j]);
3679
3680
if (total_cem_vals > MAX_ENDPOINTS)
3681
return false;
3682
3683
// Infer endpoint ISE range based off the # of values we need to encode, and the # of remaining bits in the block
3684
int endpoint_ise_range = -1;
3685
for (int k = 20; k > 0; k--)
3686
{
3687
int b = get_ise_sequence_bits(total_cem_vals, k);
3688
if (b <= num_remaining_bits)
3689
{
3690
endpoint_ise_range = k;
3691
break;
3692
}
3693
}
3694
3695
// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
3696
if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
3697
return false;
3698
3699
log_blk.m_endpoint_ise_range = (uint8_t)endpoint_ise_range;
3700
3701
// Decode endpoints forwards in block
3702
decode_bise(log_blk.m_endpoint_ise_range, log_blk.m_endpoints, total_cem_vals, bits, config_bit_pos);
3703
3704
// Decode grid weights backwards in block
3705
decode_bise(log_blk.m_weight_ise_range, log_blk.m_weights, total_grid_weights, rev_bits, 0);
3706
3707
log_blk.m_error_flag = false;
3708
3709
return true;
3710
}
3711
3712
} // namespace astc_helpers
3713
3714
#endif //BASISU_ASTC_HELPERS_IMPLEMENTATION
3715
3716