Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h
9905 views
1
// basisu_astc_helpers.h
2
// Be sure to define ASTC_HELPERS_IMPLEMENTATION somewhere to get the implementation, otherwise you only get the header.
3
#pragma once
4
#ifndef BASISU_ASTC_HELPERS_HEADER
5
#define BASISU_ASTC_HELPERS_HEADER
6
7
#include <stdlib.h>
8
#include <stdint.h>
9
#include <math.h>
10
#include <fenv.h>
11
12
namespace astc_helpers
13
{
14
const uint32_t MAX_WEIGHT_VALUE = 64; // grid texel weights must range from [0,64]
15
const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid
16
const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels
17
const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values
18
const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4;
19
20
static const uint32_t NUM_ASTC_BLOCK_SIZES = 14;
21
extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2];
22
23
// The Color Endpoint Modes (CEM's)
24
enum cems
25
{
26
CEM_LDR_LUM_DIRECT = 0,
27
CEM_LDR_LUM_BASE_PLUS_OFS = 1,
28
CEM_HDR_LUM_LARGE_RANGE = 2,
29
CEM_HDR_LUM_SMALL_RANGE = 3,
30
CEM_LDR_LUM_ALPHA_DIRECT = 4,
31
CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS = 5,
32
CEM_LDR_RGB_BASE_SCALE = 6,
33
CEM_HDR_RGB_BASE_SCALE = 7,
34
CEM_LDR_RGB_DIRECT = 8,
35
CEM_LDR_RGB_BASE_PLUS_OFFSET = 9,
36
CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A = 10,
37
CEM_HDR_RGB = 11,
38
CEM_LDR_RGBA_DIRECT = 12,
39
CEM_LDR_RGBA_BASE_PLUS_OFFSET = 13,
40
CEM_HDR_RGB_LDR_ALPHA = 14,
41
CEM_HDR_RGB_HDR_ALPHA = 15
42
};
43
44
// All Bounded Integer Sequence Coding (BISE or ISE) ranges.
45
// Weights: Ranges [0,11] are valid.
46
// Endpoints: Ranges [4,20] are valid.
47
enum bise_levels
48
{
49
BISE_2_LEVELS = 0,
50
BISE_3_LEVELS = 1,
51
BISE_4_LEVELS = 2,
52
BISE_5_LEVELS = 3,
53
BISE_6_LEVELS = 4,
54
BISE_8_LEVELS = 5,
55
BISE_10_LEVELS = 6,
56
BISE_12_LEVELS = 7,
57
BISE_16_LEVELS = 8,
58
BISE_20_LEVELS = 9,
59
BISE_24_LEVELS = 10,
60
BISE_32_LEVELS = 11,
61
BISE_40_LEVELS = 12,
62
BISE_48_LEVELS = 13,
63
BISE_64_LEVELS = 14,
64
BISE_80_LEVELS = 15,
65
BISE_96_LEVELS = 16,
66
BISE_128_LEVELS = 17,
67
BISE_160_LEVELS = 18,
68
BISE_192_LEVELS = 19,
69
BISE_256_LEVELS = 20
70
};
71
72
const uint32_t TOTAL_ISE_RANGES = 21;
73
74
// Valid endpoint ISE ranges
75
const uint32_t FIRST_VALID_ENDPOINT_ISE_RANGE = BISE_6_LEVELS; // 4
76
const uint32_t LAST_VALID_ENDPOINT_ISE_RANGE = BISE_256_LEVELS; // 20
77
const uint32_t TOTAL_ENDPOINT_ISE_RANGES = LAST_VALID_ENDPOINT_ISE_RANGE - FIRST_VALID_ENDPOINT_ISE_RANGE + 1;
78
79
// Valid weight ISE ranges
80
const uint32_t FIRST_VALID_WEIGHT_ISE_RANGE = BISE_2_LEVELS; // 0
81
const uint32_t LAST_VALID_WEIGHT_ISE_RANGE = BISE_32_LEVELS; // 11
82
const uint32_t TOTAL_WEIGHT_ISE_RANGES = LAST_VALID_WEIGHT_ISE_RANGE - FIRST_VALID_WEIGHT_ISE_RANGE + 1;
83
84
// The ISE range table.
85
extern const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3]; // 0=bits (0 to 8), 1=trits (0 or 1), 2=quints (0 or 1)
86
87
// Possible Color Component Select values, used in dual plane mode.
88
// The CCS component will be interpolated using the 2nd weight plane.
89
enum ccs
90
{
91
CCS_GBA_R = 0,
92
CCS_RBA_G = 1,
93
CCS_RGA_B = 2,
94
CCS_RGB_A = 3
95
};
96
97
struct astc_block
98
{
99
uint32_t m_vals[4];
100
};
101
102
const uint32_t MAX_PARTITIONS = 4; // Max # of partitions or subsets for single plane mode
103
const uint32_t MAX_DUAL_PLANE_PARTITIONS = 3; // Max # of partitions or subsets for dual plane mode
104
const uint32_t NUM_PARTITION_PATTERNS = 1024; // Total # of partition pattern seeds (10-bits)
105
const uint32_t MAX_ENDPOINTS = 18; // Maximum # of endpoint values in a block
106
107
struct log_astc_block
108
{
109
bool m_error_flag;
110
111
bool m_solid_color_flag_ldr, m_solid_color_flag_hdr;
112
113
uint8_t m_user_mode; // user defined value, not used in this module
114
115
// Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr
116
uint8_t m_grid_width, m_grid_height; // weight grid dimensions, not the dimension of the block
117
118
bool m_dual_plane;
119
120
uint8_t m_weight_ise_range; // 0-11
121
uint8_t m_endpoint_ise_range; // 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking
122
123
uint8_t m_color_component_selector; // 0-3, controls which channel uses the 2nd (odd) weights, only used in dual plane mode
124
125
uint8_t m_num_partitions; // or the # of subsets, 1-4 (1-3 if dual plane mode)
126
uint16_t m_partition_id; // 10-bits, must be 0 if m_num_partitions==1
127
128
uint8_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's
129
130
union
131
{
132
// ISE weight grid values. In dual plane mode, the order is p0,p1, p0,p1, etc.
133
uint8_t m_weights[MAX_GRID_WEIGHTS];
134
uint16_t m_solid_color[4];
135
};
136
137
// ISE endpoint values
138
// Endpoint order examples:
139
// 1 subset LA : LL0 LH0 AL0 AH0
140
// 1 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0
141
// 1 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0
142
// 2 subset LA : LL0 LH0 AL0 AH0 LL1 LH1 AL1 AH1
143
// 2 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 RL1 RH1 GL1 GH1 BL1 BH1
144
// 2 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 RL1 RH1 GL1 GH1 BL1 BH1 AL1 AH1
145
uint8_t m_endpoints[MAX_ENDPOINTS];
146
147
void clear()
148
{
149
memset(this, 0, sizeof(*this));
150
}
151
};
152
153
// Open interval
154
inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
155
inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
156
157
inline uint32_t get_bits(uint32_t val, int low, int high)
158
{
159
const int num_bits = (high - low) + 1;
160
assert((num_bits >= 1) && (num_bits <= 32));
161
162
val >>= low;
163
if (num_bits != 32)
164
val &= ((1u << num_bits) - 1);
165
166
return val;
167
}
168
169
// Returns the number of levels in the given ISE range.
170
inline uint32_t get_ise_levels(uint32_t ise_range)
171
{
172
assert(ise_range < TOTAL_ISE_RANGES);
173
return (1 + 2 * g_ise_range_table[ise_range][1] + 4 * g_ise_range_table[ise_range][2]) << g_ise_range_table[ise_range][0];
174
}
175
176
inline int get_ise_sequence_bits(int count, int range)
177
{
178
// See 18.22 Data Size Determination - note this will be <= the # of bits actually written by encode_bise(). (It's magic.)
179
int total_bits = g_ise_range_table[range][0] * count;
180
total_bits += (g_ise_range_table[range][1] * 8 * count + 4) / 5;
181
total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3;
182
return total_bits;
183
}
184
185
inline uint32_t weight_interpolate(uint32_t l, uint32_t h, uint32_t w)
186
{
187
assert(w <= MAX_WEIGHT_VALUE);
188
return (l * (64 - w) + h * w + 32) >> 6;
189
}
190
191
void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats = nullptr);
192
193
struct pack_stats
194
{
195
uint32_t m_header_bits;
196
uint32_t m_endpoint_bits;
197
uint32_t m_weight_bits;
198
199
inline pack_stats() { clear(); }
200
inline void clear() { memset(this, 0, sizeof(*this)); }
201
};
202
203
// Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions.
204
bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr, pack_stats *pStats = nullptr);
205
206
// Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component.
207
void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a, pack_stats *pStats = nullptr);
208
209
// Pack HDR void extent (16-bit values are FP16/half floats - no NaN/Inf's)
210
void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats = nullptr);
211
212
// These helpers are all quite slow, but are useful for table preparation.
213
214
// Dequantizes ISE encoded endpoint val to [0,255]
215
uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range); // ISE ranges 4-11
216
217
// Dequantizes ISE encoded weight val to [0,64]
218
uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range); // ISE ranges 0-10
219
220
uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range);
221
uint32_t find_nearest_bise_weight(int v, uint32_t ise_range);
222
223
void create_quant_tables(
224
uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
225
uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
226
uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]
227
uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
228
uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights
229
bool weight_flag); // false if block endpoints, true if weights
230
231
// True if the CEM is LDR.
232
bool is_cem_ldr(uint32_t mode);
233
inline bool is_cem_hdr(uint32_t mode) { return !is_cem_ldr(mode); }
234
235
// True if the passed in dimensions are a valid ASTC block size. There are 14 supported configs, from 4x4 (8bpp) to 12x12 (.89bpp).
236
bool is_valid_block_size(uint32_t w, uint32_t h);
237
238
bool block_has_any_hdr_cems(const log_astc_block& log_blk);
239
bool block_has_any_ldr_cems(const log_astc_block& log_blk);
240
241
// Returns the # of endpoint values for the given CEM.
242
inline uint32_t get_num_cem_values(uint32_t cem) { assert(cem <= 15); return 2 + 2 * (cem >> 2); }
243
244
struct dequant_table
245
{
246
basisu::vector<uint8_t> m_val_to_ise; // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
247
basisu::vector<uint8_t> m_ISE_to_val; // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
248
basisu::vector<uint8_t> m_ISE_to_rank; // returns the level rank index given an ISE symbol, [levels]
249
basisu::vector<uint8_t> m_rank_to_ISE; // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
250
251
void init(bool weight_flag, uint32_t num_levels, bool init_rank_tabs)
252
{
253
m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256);
254
m_ISE_to_val.resize(num_levels);
255
if (init_rank_tabs)
256
{
257
m_ISE_to_rank.resize(num_levels);
258
m_rank_to_ISE.resize(num_levels);
259
}
260
}
261
};
262
263
struct dequant_tables
264
{
265
dequant_table m_weights[TOTAL_WEIGHT_ISE_RANGES];
266
dequant_table m_endpoints[TOTAL_ENDPOINT_ISE_RANGES];
267
268
const dequant_table& get_weight_tab(uint32_t range) const
269
{
270
assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
271
return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
272
}
273
274
dequant_table& get_weight_tab(uint32_t range)
275
{
276
assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
277
return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
278
}
279
280
const dequant_table& get_endpoint_tab(uint32_t range) const
281
{
282
assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
283
return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
284
}
285
286
dequant_table& get_endpoint_tab(uint32_t range)
287
{
288
assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
289
return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
290
}
291
292
void init(bool init_rank_tabs)
293
{
294
for (uint32_t range = FIRST_VALID_WEIGHT_ISE_RANGE; range <= LAST_VALID_WEIGHT_ISE_RANGE; range++)
295
{
296
const uint32_t num_levels = get_ise_levels(range);
297
dequant_table& tab = get_weight_tab(range);
298
299
tab.init(true, num_levels, init_rank_tabs);
300
301
create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, true);
302
}
303
304
for (uint32_t range = FIRST_VALID_ENDPOINT_ISE_RANGE; range <= LAST_VALID_ENDPOINT_ISE_RANGE; range++)
305
{
306
const uint32_t num_levels = get_ise_levels(range);
307
dequant_table& tab = get_endpoint_tab(range);
308
309
tab.init(false, num_levels, init_rank_tabs);
310
311
create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, false);
312
}
313
}
314
};
315
316
extern dequant_tables g_dequant_tables;
317
void init_tables(bool init_rank_tabs);
318
319
struct weighted_sample
320
{
321
uint8_t m_src_x;
322
uint8_t m_src_y;
323
uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8
324
};
325
326
void compute_upsample_weights(
327
int block_width, int block_height,
328
int weight_grid_width, int weight_grid_height,
329
weighted_sample* pWeights); // there will be block_width * block_height bilinear samples
330
331
void upsample_weight_grid(
332
uint32_t bx, uint32_t by, // destination/to dimension
333
uint32_t wx, uint32_t wy, // source/from dimension
334
const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
335
uint8_t* pDst_weights); // [by][bx]
336
337
// Procedurally returns the texel partition/subset index given the block coordinate and config.
338
int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block);
339
340
void blue_contract(
341
int r, int g, int b, int a,
342
int& dr, int& dg, int& db, int& da);
343
344
void bit_transfer_signed(int& a, int& b);
345
346
void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t* pE);
347
348
typedef uint16_t half_float;
349
half_float float_to_half(float val, bool toward_zero);
350
float half_to_float(half_float hval);
351
352
// Notes:
353
// qlog16_to_half(half_to_qlog16(half_val_as_int)) == half_val_as_int (is lossless)
354
// However, this is not lossless in the general sense.
355
inline half_float qlog16_to_half(int k)
356
{
357
assert((k >= 0) && (k <= 0xFFFF));
358
359
int E = (k & 0xF800) >> 11;
360
int M = k & 0x7FF;
361
362
int Mt;
363
if (M < 512)
364
Mt = 3 * M;
365
else if (M >= 1536)
366
Mt = 5 * M - 2048;
367
else
368
Mt = 4 * M - 512;
369
370
return (half_float)((E << 10) + (Mt >> 3));
371
}
372
373
const int MAX_RGB9E5 = 0xff80;
374
void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b);
375
uint32_t pack_rgb9e5(float r, float g, float b);
376
377
enum decode_mode
378
{
379
cDecodeModeSRGB8 = 0, // returns uint8_t's, not valid on HDR blocks
380
cDecodeModeLDR8 = 1, // returns uint8_t's, not valid on HDR blocks
381
cDecodeModeHDR16 = 2, // returns uint16_t's (half floats), valid on all LDR/HDR blocks
382
cDecodeModeRGB9E5 = 3 // returns uint32_t's, packed as RGB 9E5 (shared exponent), see https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
383
};
384
385
// Decodes logical block to output pixels.
386
// pPixels must point to either 32-bit pixel values (SRGB8/LDR8/9E5) or 64-bit pixel values (HDR16)
387
bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode);
388
389
void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t *pBits128, uint32_t bit_ofs);
390
391
// Unpack a physical ASTC encoded GPU texture block to a logical block description.
392
bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height);
393
394
} // namespace astc_helpers
395
396
#endif // BASISU_ASTC_HELPERS_HEADER
397
398
//------------------------------------------------------------------
399
400
#ifdef BASISU_ASTC_HELPERS_IMPLEMENTATION
401
402
namespace astc_helpers
403
{
404
template<typename T> inline T my_min(T a, T b) { return (a < b) ? a : b; }
405
template<typename T> inline T my_max(T a, T b) { return (a > b) ? a : b; }
406
407
const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = {
408
{ 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 },
409
{ 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 },
410
{ 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 },
411
{ 12, 10 }, { 12, 12 }
412
};
413
414
const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3] =
415
{
416
//b t q
417
//2 3 5 // rng ise_index notes
418
{ 1, 0, 0 }, // 0..1 0
419
{ 0, 1, 0 }, // 0..2 1
420
{ 2, 0, 0 }, // 0..3 2
421
{ 0, 0, 1 }, // 0..4 3
422
{ 1, 1, 0 }, // 0..5 4 min endpoint ISE index
423
{ 3, 0, 0 }, // 0..7 5
424
{ 1, 0, 1 }, // 0..9 6
425
{ 2, 1, 0 }, // 0..11 7
426
{ 4, 0, 0 }, // 0..15 8
427
{ 2, 0, 1 }, // 0..19 9
428
{ 3, 1, 0 }, // 0..23 10
429
{ 5, 0, 0 }, // 0..31 11 max weight ISE index
430
{ 3, 0, 1 }, // 0..39 12
431
{ 4, 1, 0 }, // 0..47 13
432
{ 6, 0, 0 }, // 0..63 14
433
{ 4, 0, 1 }, // 0..79 15
434
{ 5, 1, 0 }, // 0..95 16
435
{ 7, 0, 0 }, // 0..127 17
436
{ 5, 0, 1 }, // 0..159 18
437
{ 6, 1, 0 }, // 0..191 19
438
{ 8, 0, 0 }, // 0..255 20
439
};
440
441
static inline void astc_set_bits_1_to_9(uint32_t* pDst, uint32_t& bit_offset, uint32_t code, uint32_t codesize)
442
{
443
uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst);
444
445
assert(codesize <= 9);
446
if (codesize)
447
{
448
uint32_t byte_bit_offset = bit_offset & 7;
449
uint32_t val = code << byte_bit_offset;
450
451
uint32_t index = bit_offset >> 3;
452
pBuf[index] |= (uint8_t)val;
453
454
if (codesize > (8 - byte_bit_offset))
455
pBuf[index + 1] |= (uint8_t)(val >> 8);
456
457
bit_offset += codesize;
458
}
459
}
460
461
static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)
462
{
463
return (bits >> low) & ((1 << (high - low + 1)) - 1);
464
}
465
466
// Writes bits to output in an endian safe way
467
static inline void astc_set_bits(uint32_t* pOutput, uint32_t& bit_pos, uint32_t value, uint32_t total_bits)
468
{
469
assert(total_bits <= 31);
470
assert(value < (1u << total_bits));
471
472
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
473
474
while (total_bits)
475
{
476
const uint32_t bits_to_write = my_min<int>(total_bits, 8 - (bit_pos & 7));
477
478
pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7));
479
480
bit_pos += bits_to_write;
481
total_bits -= bits_to_write;
482
value >>= bits_to_write;
483
}
484
}
485
486
static const uint8_t g_astc_quint_encode[125] =
487
{
488
0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57,
489
58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104,
490
105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54,
491
126, 127, 94, 95, 62, 39, 47, 55, 63, 7 /*31 - results in the same decode as 7*/
492
};
493
494
// Encodes 3 values to output, usable for any range that uses quints and bits
495
static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t* pStats)
496
{
497
// First extract the quints and the bits from the 3 input values
498
int quints = 0, bits[3];
499
const uint32_t bit_mask = (1 << n) - 1;
500
for (int i = 0; i < 3; i++)
501
{
502
static const int s_muls[3] = { 1, 5, 25 };
503
504
const int t = pValues[i] >> n;
505
506
quints += t * s_muls[i];
507
bits[i] = pValues[i] & bit_mask;
508
}
509
510
// Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits.
511
// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
512
513
assert(quints < 125);
514
const int T = g_astc_quint_encode[quints];
515
516
// Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.
517
astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |
518
(bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);
519
520
if (pStats)
521
*pStats += n * 3 + 7;
522
}
523
524
static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,
525
43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154,
526
131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202,
527
208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224,
528
225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159,
529
191, 223, 124, 125, 126 };
530
531
// Encodes 5 values to output, usable for any range that uses trits and bits
532
static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t *pStats)
533
{
534
// First extract the trits and the bits from the 5 input values
535
int trits = 0, bits[5];
536
const uint32_t bit_mask = (1 << n) - 1;
537
for (int i = 0; i < 5; i++)
538
{
539
static const int s_muls[5] = { 1, 3, 9, 27, 81 };
540
541
const int t = pValues[i] >> n;
542
543
trits += t * s_muls[i];
544
bits[i] = pValues[i] & bit_mask;
545
}
546
547
// Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits.
548
// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
549
550
assert(trits < 243);
551
const int T = g_astc_trit_encode[trits];
552
553
// Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94.
554
astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2);
555
556
astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |
557
(bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);
558
559
if (pStats)
560
*pStats += n * 5 + 8;
561
}
562
563
// Packs values using ASTC's BISE to output buffer.
564
void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats)
565
{
566
uint32_t temp[5] = { 0 };
567
568
const int num_bits = g_ise_range_table[range][0];
569
570
int group_size = 0;
571
if (g_ise_range_table[range][1])
572
group_size = 5;
573
else if (g_ise_range_table[range][2])
574
group_size = 3;
575
576
#ifndef NDEBUG
577
const uint32_t num_levels = get_ise_levels(range);
578
for (int i = 0; i < num_vals; i++)
579
{
580
assert(pSrc_vals[i] < num_levels);
581
}
582
#endif
583
584
if (group_size)
585
{
586
// Range has trits or quints - pack each group of 5 or 3 values
587
const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3);
588
589
for (int group_index = 0; group_index < total_groups; group_index++)
590
{
591
uint8_t vals[5] = { 0 };
592
593
const int limit = my_min(group_size, num_vals - group_index * group_size);
594
for (int i = 0; i < limit; i++)
595
vals[i] = pSrc_vals[group_index * group_size + i];
596
597
// Note this always writes a group of 3 or 5 bits values, even for incomplete groups. So it can write more than needed.
598
// get_ise_sequence_bits() returns the # of bits that must be written for proper decoding.
599
if (group_size == 5)
600
astc_encode_trits(temp, vals, bit_pos, num_bits, pStats);
601
else
602
astc_encode_quints(temp, vals, bit_pos, num_bits, pStats);
603
}
604
}
605
else
606
{
607
for (int i = 0; i < num_vals; i++)
608
astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);
609
610
if (pStats)
611
*pStats += num_vals * num_bits;
612
}
613
614
pDst[0] |= temp[0]; pDst[1] |= temp[1];
615
pDst[2] |= temp[2]; pDst[3] |= temp[3];
616
}
617
618
inline uint32_t rev_dword(uint32_t bits)
619
{
620
uint32_t v = (bits << 16) | (bits >> 16);
621
v = ((v & 0x00ff00ff) << 8) | ((v & 0xff00ff00) >> 8); v = ((v & 0x0f0f0f0f) << 4) | ((v & 0xf0f0f0f0) >> 4);
622
v = ((v & 0x33333333) << 2) | ((v & 0xcccccccc) >> 2); v = ((v & 0x55555555) << 1) | ((v & 0xaaaaaaaa) >> 1);
623
return v;
624
}
625
626
static inline bool is_packable(int value, int num_bits) { assert((num_bits >= 1) && (num_bits < 31)); return (value >= 0) && (value < (1 << num_bits)); }
627
628
static bool get_config_bits(const log_astc_block &log_block, uint32_t &config_bits)
629
{
630
config_bits = 0;
631
632
const int W = log_block.m_grid_width, H = log_block.m_grid_height;
633
634
const uint32_t P = log_block.m_weight_ise_range >= 6; // high precision
635
const uint32_t Dp_P = (log_block.m_dual_plane << 1) | P; // pack dual plane+high precision bits
636
637
// See Tables 81-82
638
// Compute p from weight range
639
uint32_t p = 2 + log_block.m_weight_ise_range - (P ? 6 : 0);
640
641
// Rearrange p's bits to p0 p2 p1
642
p = (p >> 1) + ((p & 1) << 2);
643
644
// Try encoding each row of table 82.
645
646
// W+4 H+2
647
if (is_packable(W - 4, 2) && is_packable(H - 2, 2))
648
{
649
config_bits = (Dp_P << 9) | ((W - 4) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | (p & 3);
650
return true;
651
}
652
653
// W+8 H+2
654
if (is_packable(W - 8, 2) && is_packable(H - 2, 2))
655
{
656
config_bits = (Dp_P << 9) | ((W - 8) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 4 | (p & 3);
657
return true;
658
}
659
660
// W+2 H+8
661
if (is_packable(W - 2, 2) && is_packable(H - 8, 2))
662
{
663
config_bits = (Dp_P << 9) | ((H - 8) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 8 | (p & 3);
664
return true;
665
}
666
667
// W+2 H+6
668
if (is_packable(W - 2, 2) && is_packable(H - 6, 1))
669
{
670
config_bits = (Dp_P << 9) | ((H - 6) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
671
return true;
672
}
673
674
// W+2 H+2
675
if (is_packable(W - 2, 1) && is_packable(H - 2, 2))
676
{
677
config_bits = (Dp_P << 9) | ((W) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
678
return true;
679
}
680
681
// 12 H+2
682
if ((W == 12) && is_packable(H - 2, 2))
683
{
684
config_bits = (Dp_P << 9) | ((H - 2) << 5) | (p << 2);
685
return true;
686
}
687
688
// W+2 12
689
if ((H == 12) && is_packable(W - 2, 2))
690
{
691
config_bits = (Dp_P << 9) | (1 << 7) | ((W - 2) << 5) | (p << 2);
692
return true;
693
}
694
695
// 6 10
696
if ((W == 6) && (H == 10))
697
{
698
config_bits = (Dp_P << 9) | (3 << 7) | (p << 2);
699
return true;
700
}
701
702
// 10 6
703
if ((W == 10) && (H == 6))
704
{
705
config_bits = (Dp_P << 9) | (0b1101 << 5) | (p << 2);
706
return true;
707
}
708
709
// W+6 H+6 (no dual plane or high prec)
710
if ((!Dp_P) && is_packable(W - 6, 2) && is_packable(H - 6, 2))
711
{
712
config_bits = ((H - 6) << 9) | 256 | ((W - 6) << 5) | (p << 2);
713
return true;
714
}
715
716
// Failed: unsupported weight grid dimensions or config.
717
return false;
718
}
719
720
bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range, pack_stats *pStats)
721
{
722
memset(&phys_block, 0, sizeof(phys_block));
723
724
if (pExpected_endpoint_range)
725
*pExpected_endpoint_range = -1;
726
727
assert(!log_block.m_error_flag);
728
if (log_block.m_error_flag)
729
return false;
730
731
if (log_block.m_solid_color_flag_ldr)
732
{
733
pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats);
734
return true;
735
}
736
else if (log_block.m_solid_color_flag_hdr)
737
{
738
pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats);
739
return true;
740
}
741
742
if ((log_block.m_num_partitions < 1) || (log_block.m_num_partitions > MAX_PARTITIONS))
743
return false;
744
745
// Max usable weight range is 11
746
if (log_block.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE)
747
return false;
748
749
// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
750
if ((log_block.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_block.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
751
return false;
752
753
if (log_block.m_color_component_selector > 3)
754
return false;
755
756
// TODO: sanity check grid width/height vs. block's physical width/height
757
758
uint32_t config_bits = 0;
759
if (!get_config_bits(log_block, config_bits))
760
return false;
761
762
uint32_t bit_pos = 0;
763
astc_set_bits(&phys_block.m_vals[0], bit_pos, config_bits, 11);
764
if (pStats)
765
pStats->m_header_bits += 11;
766
767
const uint32_t total_grid_weights = (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height);
768
const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);
769
770
// 18.24 Illegal Encodings
771
if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
772
return false;
773
774
uint32_t total_extra_bits = 0;
775
776
astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_num_partitions - 1, 2);
777
if (pStats)
778
pStats->m_header_bits += 2;
779
780
if (log_block.m_num_partitions > 1)
781
{
782
if (log_block.m_partition_id >= NUM_PARTITION_PATTERNS)
783
return false;
784
785
astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_partition_id, 10);
786
if (pStats)
787
pStats->m_header_bits += 10;
788
789
uint32_t highest_cem = 0, lowest_cem = UINT32_MAX;
790
for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
791
{
792
highest_cem = my_max<uint32_t>(highest_cem, log_block.m_color_endpoint_modes[j]);
793
lowest_cem = my_min<uint32_t>(lowest_cem, log_block.m_color_endpoint_modes[j]);
794
}
795
796
if (highest_cem > 15)
797
return false;
798
799
// Ensure CEM range is contiguous
800
if (((highest_cem >> 2) > (1 + (lowest_cem >> 2))))
801
return false;
802
803
// See tables 79/80
804
uint32_t encoded_cem = log_block.m_color_endpoint_modes[0] << 2;
805
if (lowest_cem != highest_cem)
806
{
807
encoded_cem = my_min<uint32_t>(3, 1 + (lowest_cem >> 2));
808
809
// See tables at 23.11 Color Endpoint Mode
810
for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
811
{
812
const int M = log_block.m_color_endpoint_modes[j] & 3;
813
814
const int C = (log_block.m_color_endpoint_modes[j] >> 2) - ((encoded_cem & 3) - 1);
815
if ((C & 1) != C)
816
return false;
817
818
encoded_cem |= (C << (2 + j)) | (M << (2 + log_block.m_num_partitions + 2 * j));
819
}
820
821
total_extra_bits = 3 * log_block.m_num_partitions - 4;
822
823
if ((total_weight_bits + total_extra_bits) > 128)
824
return false;
825
826
uint32_t cem_bit_pos = 128 - total_weight_bits - total_extra_bits;
827
astc_set_bits(&phys_block.m_vals[0], cem_bit_pos, encoded_cem >> 6, total_extra_bits);
828
if (pStats)
829
pStats->m_header_bits += total_extra_bits;
830
}
831
832
astc_set_bits(&phys_block.m_vals[0], bit_pos, encoded_cem & 0x3f, 6);
833
if (pStats)
834
pStats->m_header_bits += 6;
835
}
836
else
837
{
838
if (log_block.m_partition_id)
839
return false;
840
if (log_block.m_color_endpoint_modes[0] > 15)
841
return false;
842
843
astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_color_endpoint_modes[0], 4);
844
if (pStats)
845
pStats->m_header_bits += 4;
846
}
847
848
if (log_block.m_dual_plane)
849
{
850
if (log_block.m_num_partitions > 3)
851
return false;
852
853
total_extra_bits += 2;
854
855
uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits;
856
astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2);
857
if (pStats)
858
pStats->m_header_bits += 2;
859
}
860
861
const uint32_t total_config_bits = bit_pos + total_extra_bits;
862
const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
863
if (num_remaining_bits < 0)
864
return false;
865
866
uint32_t total_cem_vals = 0;
867
for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
868
total_cem_vals += 2 + 2 * (log_block.m_color_endpoint_modes[j] >> 2);
869
870
if (total_cem_vals > MAX_ENDPOINTS)
871
return false;
872
873
int endpoint_ise_range = -1;
874
for (int k = 20; k > 0; k--)
875
{
876
int bits = get_ise_sequence_bits(total_cem_vals, k);
877
if (bits <= num_remaining_bits)
878
{
879
endpoint_ise_range = k;
880
break;
881
}
882
}
883
884
// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
885
if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
886
return false;
887
888
// Ensure the caller utilized the right endpoint ISE range.
889
if ((int)log_block.m_endpoint_ise_range != endpoint_ise_range)
890
{
891
if (pExpected_endpoint_range)
892
*pExpected_endpoint_range = endpoint_ise_range;
893
return false;
894
}
895
896
if (pStats)
897
{
898
pStats->m_endpoint_bits += get_ise_sequence_bits(total_cem_vals, endpoint_ise_range);
899
pStats->m_weight_bits += get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);
900
}
901
902
// Pack endpoints forwards
903
encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range);
904
905
// Pack weights backwards
906
uint32_t weight_data[4] = { 0 };
907
encode_bise(weight_data, log_block.m_weights, 0, total_grid_weights, log_block.m_weight_ise_range);
908
909
for (uint32_t i = 0; i < 4; i++)
910
phys_block.m_vals[i] |= rev_dword(weight_data[3 - i]);
911
912
return true;
913
}
914
915
static inline uint32_t bit_replication_scale(uint32_t src, int num_src_bits, int num_dst_bits)
916
{
917
assert(num_src_bits <= num_dst_bits);
918
assert((src & ((1 << num_src_bits) - 1)) == src);
919
920
uint32_t dst = 0;
921
for (int shift = num_dst_bits - num_src_bits; shift > -num_src_bits; shift -= num_src_bits)
922
dst |= (shift >= 0) ? (src << shift) : (src >> -shift);
923
924
return dst;
925
}
926
927
uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range)
928
{
929
assert((ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE));
930
assert(val < get_ise_levels(ise_range));
931
932
uint32_t u = 0;
933
934
switch (ise_range)
935
{
936
case 5:
937
{
938
u = bit_replication_scale(val, 3, 8);
939
break;
940
}
941
case 8:
942
{
943
u = bit_replication_scale(val, 4, 8);
944
break;
945
}
946
case 11:
947
{
948
u = bit_replication_scale(val, 5, 8);
949
break;
950
}
951
case 14:
952
{
953
u = bit_replication_scale(val, 6, 8);
954
break;
955
}
956
case 17:
957
{
958
u = bit_replication_scale(val, 7, 8);
959
break;
960
}
961
case 20:
962
{
963
u = val;
964
break;
965
}
966
case 4:
967
case 6:
968
case 7:
969
case 9:
970
case 10:
971
case 12:
972
case 13:
973
case 15:
974
case 16:
975
case 18:
976
case 19:
977
{
978
const uint32_t num_bits = g_ise_range_table[ise_range][0];
979
const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
980
const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
981
982
// compute Table 103 row index
983
const int range_index = (num_bits * 2 + (num_quints ? 1 : 0)) - 2;
984
985
assert(range_index >= 0 && range_index <= 10);
986
987
uint32_t bits = val & ((1 << num_bits) - 1);
988
uint32_t tval = val >> num_bits;
989
990
assert(tval < (num_trits ? 3U : 5U));
991
992
uint32_t a = bits & 1;
993
uint32_t b = (bits >> 1) & 1;
994
uint32_t c = (bits >> 2) & 1;
995
uint32_t d = (bits >> 3) & 1;
996
uint32_t e = (bits >> 4) & 1;
997
uint32_t f = (bits >> 5) & 1;
998
999
uint32_t A = a ? 511 : 0;
1000
uint32_t B = 0;
1001
1002
switch (range_index)
1003
{
1004
case 2:
1005
{
1006
// 876543210
1007
// b000b0bb0
1008
B = (b << 1) | (b << 2) | (b << 4) | (b << 8);
1009
break;
1010
}
1011
case 3:
1012
{
1013
// 876543210
1014
// b0000bb00
1015
B = (b << 2) | (b << 3) | (b << 8);
1016
break;
1017
}
1018
case 4:
1019
{
1020
// 876543210
1021
// cb000cbcb
1022
B = b | (c << 1) | (b << 2) | (c << 3) | (b << 7) | (c << 8);
1023
break;
1024
}
1025
case 5:
1026
{
1027
// 876543210
1028
// cb0000cbc
1029
B = c | (b << 1) | (c << 2) | (b << 7) | (c << 8);
1030
break;
1031
}
1032
case 6:
1033
{
1034
// 876543210
1035
// dcb000dcb
1036
B = b | (c << 1) | (d << 2) | (b << 6) | (c << 7) | (d << 8);
1037
break;
1038
}
1039
case 7:
1040
{
1041
// 876543210
1042
// dcb0000dc
1043
B = c | (d << 1) | (b << 6) | (c << 7) | (d << 8);
1044
break;
1045
}
1046
case 8:
1047
{
1048
// 876543210
1049
// edcb000ed
1050
B = d | (e << 1) | (b << 5) | (c << 6) | (d << 7) | (e << 8);
1051
break;
1052
}
1053
case 9:
1054
{
1055
// 876543210
1056
// edcb0000e
1057
B = e | (b << 5) | (c << 6) | (d << 7) | (e << 8);
1058
break;
1059
}
1060
case 10:
1061
{
1062
// 876543210
1063
// fedcb000f
1064
B = f | (b << 4) | (c << 5) | (d << 6) | (e << 7) | (f << 8);
1065
break;
1066
}
1067
default:
1068
break;
1069
}
1070
1071
static uint8_t C_vals[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };
1072
uint32_t C = C_vals[range_index];
1073
uint32_t D = tval;
1074
1075
u = D * C + B;
1076
u = u ^ A;
1077
u = (A & 0x80) | (u >> 2);
1078
1079
break;
1080
}
1081
default:
1082
{
1083
assert(0);
1084
break;
1085
}
1086
}
1087
1088
return u;
1089
}
1090
1091
uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range)
1092
{
1093
assert(val < get_ise_levels(ise_range));
1094
1095
uint32_t u = 0;
1096
switch (ise_range)
1097
{
1098
case 0:
1099
{
1100
u = val ? 63 : 0;
1101
break;
1102
}
1103
case 1: // 0-2
1104
{
1105
const uint8_t s_tab_0_2[3] = { 0, 32, 63 };
1106
u = s_tab_0_2[val];
1107
break;
1108
}
1109
case 2: // 0-3
1110
{
1111
u = bit_replication_scale(val, 2, 6);
1112
break;
1113
}
1114
case 3: // 0-4
1115
{
1116
const uint8_t s_tab_0_4[5] = { 0, 16, 32, 47, 63 };
1117
u = s_tab_0_4[val];
1118
break;
1119
}
1120
case 5: // 0-7
1121
{
1122
u = bit_replication_scale(val, 3, 6);
1123
break;
1124
}
1125
case 8: // 0-15
1126
{
1127
u = bit_replication_scale(val, 4, 6);
1128
break;
1129
}
1130
case 11: // 0-31
1131
{
1132
u = bit_replication_scale(val, 5, 6);
1133
break;
1134
}
1135
case 4: // 0-5
1136
case 6: // 0-9
1137
case 7: // 0-11
1138
case 9: // 0-19
1139
case 10: // 0-23
1140
{
1141
const uint32_t num_bits = g_ise_range_table[ise_range][0];
1142
const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
1143
const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
1144
1145
// compute Table 103 row index
1146
const int range_index = num_bits * 2 + (num_quints ? 1 : 0);
1147
1148
// Extract bits and tris/quints from value
1149
const uint32_t bits = val & ((1u << num_bits) - 1);
1150
const uint32_t D = val >> num_bits;
1151
1152
assert(D < (num_trits ? 3U : 5U));
1153
1154
// Now dequantize
1155
// See Table 103. ASTC weight unquantization parameters
1156
static const uint32_t C_table[5] = { 50, 28, 23, 13, 11 };
1157
1158
const uint32_t a = bits & 1, b = (bits >> 1) & 1, c = (bits >> 2) & 1;
1159
1160
const uint32_t A = (a == 0) ? 0 : 0x7F;
1161
1162
uint32_t B = 0;
1163
if (range_index == 4)
1164
B = ((b << 6) | (b << 2) | (b << 0));
1165
else if (range_index == 5)
1166
B = ((b << 6) | (b << 1));
1167
else if (range_index == 6)
1168
B = ((c << 6) | (b << 5) | (c << 1) | (b << 0));
1169
1170
const uint32_t C = C_table[range_index - 2];
1171
1172
u = D * C + B;
1173
u = u ^ A;
1174
u = (A & 0x20) | (u >> 2);
1175
break;
1176
}
1177
default:
1178
assert(0);
1179
break;
1180
}
1181
1182
if (u > 32)
1183
u++;
1184
1185
return u;
1186
}
1187
1188
// Returns the nearest ISE symbol given a [0,255] endpoint value.
1189
uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range)
1190
{
1191
assert(ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE && ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE);
1192
1193
const uint32_t total_levels = get_ise_levels(ise_range);
1194
int best_e = INT_MAX, best_index = 0;
1195
for (uint32_t i = 0; i < total_levels; i++)
1196
{
1197
const int qv = dequant_bise_endpoint(i, ise_range);
1198
int e = labs(v - qv);
1199
if (e < best_e)
1200
{
1201
best_e = e;
1202
best_index = i;
1203
if (!best_e)
1204
break;
1205
}
1206
}
1207
return best_index;
1208
}
1209
1210
// Returns the nearest ISE weight given a [0,64] endpoint value.
1211
uint32_t find_nearest_bise_weight(int v, uint32_t ise_range)
1212
{
1213
assert(ise_range >= FIRST_VALID_WEIGHT_ISE_RANGE && ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
1214
assert(v <= (int)MAX_WEIGHT_VALUE);
1215
1216
const uint32_t total_levels = get_ise_levels(ise_range);
1217
int best_e = INT_MAX, best_index = 0;
1218
for (uint32_t i = 0; i < total_levels; i++)
1219
{
1220
const int qv = dequant_bise_weight(i, ise_range);
1221
int e = labs(v - qv);
1222
if (e < best_e)
1223
{
1224
best_e = e;
1225
best_index = i;
1226
if (!best_e)
1227
break;
1228
}
1229
}
1230
return best_index;
1231
}
1232
1233
void create_quant_tables(
1234
uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
1235
uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
1236
uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]
1237
uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
1238
uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights
1239
bool weight_flag) // false if block endpoints, true if weights
1240
{
1241
const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256;
1242
1243
for (uint32_t i = 0; i < num_dequant_vals; i++)
1244
{
1245
uint32_t bise_index = weight_flag ? astc_helpers::find_nearest_bise_weight(i, ise_range) : astc_helpers::find_nearest_bise_endpoint(i, ise_range);
1246
1247
if (pVal_to_ise)
1248
pVal_to_ise[i] = (uint8_t)bise_index;
1249
1250
if (pISE_to_val)
1251
pISE_to_val[bise_index] = weight_flag ? (uint8_t)astc_helpers::dequant_bise_weight(bise_index, ise_range) : (uint8_t)astc_helpers::dequant_bise_endpoint(bise_index, ise_range);
1252
}
1253
1254
if (pISE_to_rank || pRank_to_ISE)
1255
{
1256
const uint32_t num_levels = get_ise_levels(ise_range);
1257
1258
if (!g_ise_range_table[ise_range][1] && !g_ise_range_table[ise_range][2])
1259
{
1260
// Only bits
1261
for (uint32_t i = 0; i < num_levels; i++)
1262
{
1263
if (pISE_to_rank)
1264
pISE_to_rank[i] = (uint8_t)i;
1265
1266
if (pRank_to_ISE)
1267
pRank_to_ISE[i] = (uint8_t)i;
1268
}
1269
}
1270
else
1271
{
1272
// Range has trits or quints
1273
uint32_t vals[256];
1274
for (uint32_t i = 0; i < num_levels; i++)
1275
{
1276
uint32_t v = weight_flag ? astc_helpers::dequant_bise_weight(i, ise_range) : astc_helpers::dequant_bise_endpoint(i, ise_range);
1277
1278
// Low=ISE value
1279
// High=dequantized value
1280
vals[i] = (v << 16) | i;
1281
}
1282
1283
// Sorts by dequantized value
1284
std::sort(vals, vals + num_levels);
1285
1286
for (uint32_t rank = 0; rank < num_levels; rank++)
1287
{
1288
uint32_t ise_val = (uint8_t)vals[rank];
1289
1290
if (pISE_to_rank)
1291
pISE_to_rank[ise_val] = (uint8_t)rank;
1292
1293
if (pRank_to_ISE)
1294
pRank_to_ISE[rank] = (uint8_t)ise_val;
1295
}
1296
}
1297
}
1298
}
1299
1300
void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats)
1301
{
1302
uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
1303
memset(pDst, 0xFF, 16);
1304
1305
pDst[0] = 0b11111100;
1306
pDst[1] = 0b11111101;
1307
1308
pDst[8] = (uint8_t)rh;
1309
pDst[9] = (uint8_t)(rh >> 8);
1310
pDst[10] = (uint8_t)gh;
1311
pDst[11] = (uint8_t)(gh >> 8);
1312
pDst[12] = (uint8_t)bh;
1313
pDst[13] = (uint8_t)(bh >> 8);
1314
pDst[14] = (uint8_t)ah;
1315
pDst[15] = (uint8_t)(ah >> 8);
1316
1317
if (pStats)
1318
pStats->m_header_bits += 128;
1319
}
1320
1321
// rh-ah are half-floats
1322
void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats *pStats)
1323
{
1324
uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
1325
memset(pDst, 0xFF, 16);
1326
1327
pDst[0] = 0b11111100;
1328
1329
pDst[8] = (uint8_t)rh;
1330
pDst[9] = (uint8_t)(rh >> 8);
1331
pDst[10] = (uint8_t)gh;
1332
pDst[11] = (uint8_t)(gh >> 8);
1333
pDst[12] = (uint8_t)bh;
1334
pDst[13] = (uint8_t)(bh >> 8);
1335
pDst[14] = (uint8_t)ah;
1336
pDst[15] = (uint8_t)(ah >> 8);
1337
1338
if (pStats)
1339
pStats->m_header_bits += 128;
1340
}
1341
1342
bool is_cem_ldr(uint32_t mode)
1343
{
1344
switch (mode)
1345
{
1346
case CEM_LDR_LUM_DIRECT:
1347
case CEM_LDR_LUM_BASE_PLUS_OFS:
1348
case CEM_LDR_LUM_ALPHA_DIRECT:
1349
case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
1350
case CEM_LDR_RGB_BASE_SCALE:
1351
case CEM_LDR_RGB_DIRECT:
1352
case CEM_LDR_RGB_BASE_PLUS_OFFSET:
1353
case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
1354
case CEM_LDR_RGBA_DIRECT:
1355
case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
1356
return true;
1357
default:
1358
break;
1359
}
1360
1361
return false;
1362
}
1363
1364
bool is_valid_block_size(uint32_t w, uint32_t h)
1365
{
1366
assert((w >= MIN_BLOCK_DIM) && (w <= MAX_BLOCK_DIM));
1367
assert((h >= MIN_BLOCK_DIM) && (h <= MAX_BLOCK_DIM));
1368
1369
#define SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true;
1370
SIZECHK(4, 4);
1371
SIZECHK(5, 4);
1372
1373
SIZECHK(5, 5);
1374
1375
SIZECHK(6, 5);
1376
SIZECHK(6, 6);
1377
1378
SIZECHK(8, 5);
1379
SIZECHK(8, 6);
1380
SIZECHK(10, 5);
1381
SIZECHK(10, 6);
1382
1383
SIZECHK(8, 8);
1384
SIZECHK(10, 8);
1385
SIZECHK(10, 10);
1386
1387
SIZECHK(12, 10);
1388
SIZECHK(12, 12);
1389
#undef SIZECHK
1390
1391
return false;
1392
}
1393
1394
bool block_has_any_hdr_cems(const log_astc_block& log_blk)
1395
{
1396
assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
1397
1398
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
1399
if (is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
1400
return true;
1401
1402
return false;
1403
}
1404
1405
bool block_has_any_ldr_cems(const log_astc_block& log_blk)
1406
{
1407
assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
1408
1409
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
1410
if (!is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
1411
return true;
1412
1413
return false;
1414
}
1415
1416
dequant_tables g_dequant_tables;
1417
1418
void precompute_texel_partitions_4x4();
1419
void precompute_texel_partitions_6x6();
1420
1421
void init_tables(bool init_rank_tabs)
1422
{
1423
g_dequant_tables.init(init_rank_tabs);
1424
1425
precompute_texel_partitions_4x4();
1426
precompute_texel_partitions_6x6();
1427
}
1428
1429
void compute_upsample_weights(
1430
int block_width, int block_height,
1431
int weight_grid_width, int weight_grid_height,
1432
weighted_sample* pWeights) // there will be block_width * block_height bilinear samples
1433
{
1434
const uint32_t scaleX = (1024 + block_width / 2) / (block_width - 1);
1435
const uint32_t scaleY = (1024 + block_height / 2) / (block_height - 1);
1436
1437
for (int texelY = 0; texelY < block_height; texelY++)
1438
{
1439
for (int texelX = 0; texelX < block_width; texelX++)
1440
{
1441
const uint32_t gX = (scaleX * texelX * (weight_grid_width - 1) + 32) >> 6;
1442
const uint32_t gY = (scaleY * texelY * (weight_grid_height - 1) + 32) >> 6;
1443
const uint32_t jX = gX >> 4;
1444
const uint32_t jY = gY >> 4;
1445
const uint32_t fX = gX & 0xf;
1446
const uint32_t fY = gY & 0xf;
1447
const uint32_t w11 = (fX * fY + 8) >> 4;
1448
const uint32_t w10 = fY - w11;
1449
const uint32_t w01 = fX - w11;
1450
const uint32_t w00 = 16 - fX - fY + w11;
1451
1452
weighted_sample& s = pWeights[texelX + texelY * block_width];
1453
s.m_src_x = (uint8_t)jX;
1454
s.m_src_y = (uint8_t)jY;
1455
s.m_weights[0][0] = (uint8_t)w00;
1456
s.m_weights[0][1] = (uint8_t)w01;
1457
s.m_weights[1][0] = (uint8_t)w10;
1458
s.m_weights[1][1] = (uint8_t)w11;
1459
}
1460
}
1461
}
1462
1463
// Should be dequantized [0,64] weights
1464
void upsample_weight_grid(
1465
uint32_t bx, uint32_t by, // destination/to dimension
1466
uint32_t wx, uint32_t wy, // source/from dimension
1467
const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
1468
uint8_t* pDst_weights) // [by][bx]
1469
{
1470
assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12));
1471
assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by));
1472
1473
const uint32_t total_src_weights = wx * wy;
1474
const uint32_t total_dst_weights = bx * by;
1475
1476
if (total_src_weights == total_dst_weights)
1477
{
1478
memcpy(pDst_weights, pSrc_weights, total_src_weights);
1479
return;
1480
}
1481
1482
weighted_sample weights[12 * 12];
1483
compute_upsample_weights(bx, by, wx, wy, weights);
1484
1485
const weighted_sample* pS = weights;
1486
1487
for (uint32_t y = 0; y < by; y++)
1488
{
1489
for (uint32_t x = 0; x < bx; x++, ++pS)
1490
{
1491
const uint32_t w00 = pS->m_weights[0][0];
1492
const uint32_t w01 = pS->m_weights[0][1];
1493
const uint32_t w10 = pS->m_weights[1][0];
1494
const uint32_t w11 = pS->m_weights[1][1];
1495
1496
assert(w00 || w01 || w10 || w11);
1497
1498
const uint32_t sx = pS->m_src_x, sy = pS->m_src_y;
1499
1500
uint32_t total = 8;
1501
if (w00) total += pSrc_weights[bounds_check(sx + sy * wx, 0U, total_src_weights)] * w00;
1502
if (w01) total += pSrc_weights[bounds_check(sx + 1 + sy * wx, 0U, total_src_weights)] * w01;
1503
if (w10) total += pSrc_weights[bounds_check(sx + (sy + 1) * wx, 0U, total_src_weights)] * w10;
1504
if (w11) total += pSrc_weights[bounds_check(sx + 1 + (sy + 1) * wx, 0U, total_src_weights)] * w11;
1505
1506
pDst_weights[x + y * bx] = (uint8_t)(total >> 4);
1507
}
1508
}
1509
}
1510
1511
inline uint32_t hash52(uint32_t v)
1512
{
1513
uint32_t p = v;
1514
p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
1515
p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
1516
p ^= p << 6; p ^= p >> 17;
1517
return p;
1518
}
1519
1520
// small_block = num_blk_pixels < 31
1521
int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block)
1522
{
1523
assert(zIn == 0);
1524
1525
const uint32_t x = small_block ? xIn << 1 : xIn;
1526
const uint32_t y = small_block ? yIn << 1 : yIn;
1527
const uint32_t z = small_block ? zIn << 1 : zIn;
1528
const uint32_t seed = seedIn + 1024 * (num_partitions - 1);
1529
const uint32_t rnum = hash52(seed);
1530
1531
uint8_t seed1 = (uint8_t)(rnum & 0xf);
1532
uint8_t seed2 = (uint8_t)((rnum >> 4) & 0xf);
1533
uint8_t seed3 = (uint8_t)((rnum >> 8) & 0xf);
1534
uint8_t seed4 = (uint8_t)((rnum >> 12) & 0xf);
1535
uint8_t seed5 = (uint8_t)((rnum >> 16) & 0xf);
1536
uint8_t seed6 = (uint8_t)((rnum >> 20) & 0xf);
1537
uint8_t seed7 = (uint8_t)((rnum >> 24) & 0xf);
1538
uint8_t seed8 = (uint8_t)((rnum >> 28) & 0xf);
1539
uint8_t seed9 = (uint8_t)((rnum >> 18) & 0xf);
1540
uint8_t seed10 = (uint8_t)((rnum >> 22) & 0xf);
1541
uint8_t seed11 = (uint8_t)((rnum >> 26) & 0xf);
1542
uint8_t seed12 = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf);
1543
1544
seed1 = (uint8_t)(seed1 * seed1);
1545
seed2 = (uint8_t)(seed2 * seed2);
1546
seed3 = (uint8_t)(seed3 * seed3);
1547
seed4 = (uint8_t)(seed4 * seed4);
1548
seed5 = (uint8_t)(seed5 * seed5);
1549
seed6 = (uint8_t)(seed6 * seed6);
1550
seed7 = (uint8_t)(seed7 * seed7);
1551
seed8 = (uint8_t)(seed8 * seed8);
1552
seed9 = (uint8_t)(seed9 * seed9);
1553
seed10 = (uint8_t)(seed10 * seed10);
1554
seed11 = (uint8_t)(seed11 * seed11);
1555
seed12 = (uint8_t)(seed12 * seed12);
1556
1557
const int shA = (seed & 2) != 0 ? 4 : 5;
1558
const int shB = (num_partitions == 3) ? 6 : 5;
1559
const int sh1 = (seed & 1) != 0 ? shA : shB;
1560
const int sh2 = (seed & 1) != 0 ? shB : shA;
1561
const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2;
1562
1563
seed1 = (uint8_t)(seed1 >> sh1);
1564
seed2 = (uint8_t)(seed2 >> sh2);
1565
seed3 = (uint8_t)(seed3 >> sh1);
1566
seed4 = (uint8_t)(seed4 >> sh2);
1567
seed5 = (uint8_t)(seed5 >> sh1);
1568
seed6 = (uint8_t)(seed6 >> sh2);
1569
seed7 = (uint8_t)(seed7 >> sh1);
1570
seed8 = (uint8_t)(seed8 >> sh2);
1571
seed9 = (uint8_t)(seed9 >> sh3);
1572
seed10 = (uint8_t)(seed10 >> sh3);
1573
seed11 = (uint8_t)(seed11 >> sh3);
1574
seed12 = (uint8_t)(seed12 >> sh3);
1575
1576
const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14));
1577
const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10));
1578
const int c = (num_partitions >= 3) ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0;
1579
const int d = (num_partitions >= 4) ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0;
1580
1581
return (a >= b && a >= c && a >= d) ? 0
1582
: (b >= c && b >= d) ? 1
1583
: (c >= d) ? 2
1584
: 3;
1585
}
1586
1587
// 4x4, 2 and 3 subsets
1588
static uint32_t g_texel_partitions_4x4[1024][2];
1589
1590
// 6x6, 2 and 3 subsets (2 subsets low 4 bits, 3 subsets high 4 bits)
1591
static uint8_t g_texel_partitions_6x6[1024][6 * 6];
1592
1593
void precompute_texel_partitions_4x4()
1594
{
1595
for (uint32_t p = 0; p < 1024; p++)
1596
{
1597
uint32_t v2 = 0, v3 = 0;
1598
1599
for (uint32_t y = 0; y < 4; y++)
1600
{
1601
for (uint32_t x = 0; x < 4; x++)
1602
{
1603
const uint32_t shift = x * 2 + y * 8;
1604
v2 |= (compute_texel_partition(p, x, y, 0, 2, true) << shift);
1605
v3 |= (compute_texel_partition(p, x, y, 0, 3, true) << shift);
1606
}
1607
}
1608
1609
g_texel_partitions_4x4[p][0] = v2;
1610
g_texel_partitions_4x4[p][1] = v3;
1611
}
1612
}
1613
1614
void precompute_texel_partitions_6x6()
1615
{
1616
for (uint32_t p = 0; p < 1024; p++)
1617
{
1618
for (uint32_t y = 0; y < 6; y++)
1619
{
1620
for (uint32_t x = 0; x < 6; x++)
1621
{
1622
const uint32_t p2 = compute_texel_partition(p, x, y, 0, 2, false);
1623
const uint32_t p3 = compute_texel_partition(p, x, y, 0, 3, false);
1624
1625
assert((p2 <= 1) && (p3 <= 2));
1626
g_texel_partitions_6x6[p][x + y * 6] = (uint8_t)((p3 << 4) | p2);
1627
}
1628
}
1629
}
1630
}
1631
1632
static inline int get_precompute_texel_partitions_4x4(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions)
1633
{
1634
assert(g_texel_partitions_4x4[1][0]);
1635
assert(seed < 1024);
1636
assert((x <= 3) && (y <= 3));
1637
assert((num_partitions >= 2) && (num_partitions <= 3));
1638
1639
const uint32_t shift = x * 2 + y * 8;
1640
return (g_texel_partitions_4x4[seed][num_partitions - 2] >> shift) & 3;
1641
}
1642
1643
static inline int get_precompute_texel_partitions_6x6(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions)
1644
{
1645
assert(g_texel_partitions_6x6[0][0]);
1646
assert(seed < 1024);
1647
assert((x <= 5) && (y <= 5));
1648
assert((num_partitions >= 2) && (num_partitions <= 3));
1649
1650
const uint32_t shift = (num_partitions == 3) ? 4 : 0;
1651
return (g_texel_partitions_6x6[seed][x + y * 6] >> shift) & 3;
1652
}
1653
1654
void blue_contract(
1655
int r, int g, int b, int a,
1656
int &dr, int &dg, int &db, int &da)
1657
{
1658
dr = (r + b) >> 1;
1659
dg = (g + b) >> 1;
1660
db = b;
1661
da = a;
1662
}
1663
1664
inline void bit_transfer_signed(int& a, int& b)
1665
{
1666
b >>= 1;
1667
b |= (a & 0x80);
1668
a >>= 1;
1669
a &= 0x3F;
1670
if ((a & 0x20) != 0)
1671
a -= 0x40;
1672
}
1673
1674
static inline int clamp(int a, int l, int h)
1675
{
1676
if (a < l)
1677
a = l;
1678
else if (a > h)
1679
a = h;
1680
return a;
1681
}
1682
1683
static inline float clampf(float a, float l, float h)
1684
{
1685
if (a < l)
1686
a = l;
1687
else if (a > h)
1688
a = h;
1689
return a;
1690
}
1691
1692
inline int sign_extend(int src, int num_src_bits)
1693
{
1694
assert((num_src_bits >= 2) && (num_src_bits <= 31));
1695
1696
const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
1697
if (negative)
1698
return src | ~((1 << num_src_bits) - 1);
1699
else
1700
return src & ((1 << num_src_bits) - 1);
1701
}
1702
1703
// endpoints is [4][2]
1704
void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t *pE)
1705
{
1706
assert(cem_index <= CEM_HDR_RGB_HDR_ALPHA);
1707
1708
int v0 = pE[0], v1 = pE[1];
1709
1710
int& e0_r = pEndpoints[0][0], &e0_g = pEndpoints[1][0], &e0_b = pEndpoints[2][0], &e0_a = pEndpoints[3][0];
1711
int& e1_r = pEndpoints[0][1], &e1_g = pEndpoints[1][1], &e1_b = pEndpoints[2][1], &e1_a = pEndpoints[3][1];
1712
1713
switch (cem_index)
1714
{
1715
case CEM_LDR_LUM_DIRECT:
1716
{
1717
e0_r = v0; e1_r = v1;
1718
e0_g = v0; e1_g = v1;
1719
e0_b = v0; e1_b = v1;
1720
e0_a = 0xFF; e1_a = 0xFF;
1721
break;
1722
}
1723
case CEM_LDR_LUM_BASE_PLUS_OFS:
1724
{
1725
int l0 = (v0 >> 2) | (v1 & 0xc0);
1726
int l1 = l0 + (v1 & 0x3f);
1727
1728
if (l1 > 0xFF)
1729
l1 = 0xFF;
1730
1731
e0_r = l0; e1_r = l1;
1732
e0_g = l0; e1_g = l1;
1733
e0_b = l0; e1_b = l1;
1734
e0_a = 0xFF; e1_a = 0xFF;
1735
break;
1736
}
1737
case CEM_LDR_LUM_ALPHA_DIRECT:
1738
{
1739
int v2 = pE[2], v3 = pE[3];
1740
1741
e0_r = v0; e1_r = v1;
1742
e0_g = v0; e1_g = v1;
1743
e0_b = v0; e1_b = v1;
1744
e0_a = v2; e1_a = v3;
1745
break;
1746
}
1747
case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
1748
{
1749
int v2 = pE[2], v3 = pE[3];
1750
1751
bit_transfer_signed(v1, v0);
1752
bit_transfer_signed(v3, v2);
1753
1754
e0_r = v0; e1_r = v0 + v1;
1755
e0_g = v0; e1_g = v0 + v1;
1756
e0_b = v0; e1_b = v0 + v1;
1757
e0_a = v2; e1_a = v2 + v3;
1758
1759
for (uint32_t c = 0; c < 4; c++)
1760
{
1761
pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
1762
pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
1763
}
1764
1765
break;
1766
}
1767
case CEM_LDR_RGB_BASE_SCALE:
1768
{
1769
int v2 = pE[2], v3 = pE[3];
1770
1771
e0_r = (v0 * v3) >> 8; e1_r = v0;
1772
e0_g = (v1 * v3) >> 8; e1_g = v1;
1773
e0_b = (v2 * v3) >> 8; e1_b = v2;
1774
e0_a = 0xFF; e1_a = 0xFF;
1775
1776
break;
1777
}
1778
case CEM_LDR_RGB_DIRECT:
1779
{
1780
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
1781
1782
if ((v1 + v3 + v5) >= (v0 + v2 + v4))
1783
{
1784
e0_r = v0; e1_r = v1;
1785
e0_g = v2; e1_g = v3;
1786
e0_b = v4; e1_b = v5;
1787
e0_a = 0xFF; e1_a = 0xFF;
1788
}
1789
else
1790
{
1791
blue_contract(v1, v3, v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
1792
blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
1793
}
1794
1795
break;
1796
}
1797
case CEM_LDR_RGB_BASE_PLUS_OFFSET:
1798
{
1799
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
1800
1801
bit_transfer_signed(v1, v0);
1802
bit_transfer_signed(v3, v2);
1803
bit_transfer_signed(v5, v4);
1804
1805
if ((v1 + v3 + v5) >= 0)
1806
{
1807
e0_r = v0; e1_r = v0 + v1;
1808
e0_g = v2; e1_g = v2 + v3;
1809
e0_b = v4; e1_b = v4 + v5;
1810
e0_a = 0xFF; e1_a = 0xFF;
1811
}
1812
else
1813
{
1814
blue_contract(v0 + v1, v2 + v3, v4 + v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
1815
blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
1816
}
1817
1818
for (uint32_t c = 0; c < 4; c++)
1819
{
1820
pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
1821
pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
1822
}
1823
1824
break;
1825
}
1826
case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
1827
{
1828
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
1829
1830
e0_r = (v0 * v3) >> 8; e1_r = v0;
1831
e0_g = (v1 * v3) >> 8; e1_g = v1;
1832
e0_b = (v2 * v3) >> 8; e1_b = v2;
1833
e0_a = v4; e1_a = v5;
1834
1835
break;
1836
}
1837
case CEM_LDR_RGBA_DIRECT:
1838
{
1839
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
1840
1841
if ((v1 + v3 + v5) >= (v0 + v2 + v4))
1842
{
1843
e0_r = v0; e1_r = v1;
1844
e0_g = v2; e1_g = v3;
1845
e0_b = v4; e1_b = v5;
1846
e0_a = v6; e1_a = v7;
1847
}
1848
else
1849
{
1850
blue_contract(v1, v3, v5, v7, e0_r, e0_g, e0_b, e0_a);
1851
blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
1852
}
1853
1854
break;
1855
}
1856
case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
1857
{
1858
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
1859
1860
bit_transfer_signed(v1, v0);
1861
bit_transfer_signed(v3, v2);
1862
bit_transfer_signed(v5, v4);
1863
bit_transfer_signed(v7, v6);
1864
1865
if ((v1 + v3 + v5) >= 0)
1866
{
1867
e0_r = v0; e1_r = v0 + v1;
1868
e0_g = v2; e1_g = v2 + v3;
1869
e0_b = v4; e1_b = v4 + v5;
1870
e0_a = v6; e1_a = v6 + v7;
1871
}
1872
else
1873
{
1874
blue_contract(v0 + v1, v2 + v3, v4 + v5, v6 + v7, e0_r, e0_g, e0_b, e0_a);
1875
blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
1876
}
1877
1878
for (uint32_t c = 0; c < 4; c++)
1879
{
1880
pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
1881
pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
1882
}
1883
1884
break;
1885
}
1886
case CEM_HDR_LUM_LARGE_RANGE:
1887
{
1888
int y0, y1;
1889
if (v1 >= v0)
1890
{
1891
y0 = (v0 << 4);
1892
y1 = (v1 << 4);
1893
}
1894
else
1895
{
1896
y0 = (v1 << 4) + 8;
1897
y1 = (v0 << 4) - 8;
1898
}
1899
1900
e0_r = y0; e1_r = y1;
1901
e0_g = y0; e1_g = y1;
1902
e0_b = y0; e1_b = y1;
1903
e0_a = 0x780; e1_a = 0x780;
1904
1905
break;
1906
}
1907
case CEM_HDR_LUM_SMALL_RANGE:
1908
{
1909
int y0, y1, d;
1910
1911
if ((v0 & 0x80) != 0)
1912
{
1913
y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);
1914
d = (v1 & 0x1F) << 2;
1915
}
1916
else
1917
{
1918
y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);
1919
d = (v1 & 0x0F) << 1;
1920
}
1921
1922
y1 = y0 + d;
1923
if (y1 > 0xFFF)
1924
y1 = 0xFFF;
1925
1926
e0_r = y0; e1_r = y1;
1927
e0_g = y0; e1_g = y1;
1928
e0_b = y0; e1_b = y1;
1929
e0_a = 0x780; e1_a = 0x780;
1930
1931
break;
1932
}
1933
case CEM_HDR_RGB_BASE_SCALE:
1934
{
1935
int v2 = pE[2], v3 = pE[3];
1936
1937
int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
1938
1939
int majcomp, mode;
1940
if ((modeval & 0xC) != 0xC)
1941
{
1942
majcomp = modeval >> 2;
1943
mode = modeval & 3;
1944
}
1945
else if (modeval != 0xF)
1946
{
1947
majcomp = modeval & 3;
1948
mode = 4;
1949
}
1950
else
1951
{
1952
majcomp = 0;
1953
mode = 5;
1954
}
1955
1956
int red = v0 & 0x3f;
1957
int green = v1 & 0x1f;
1958
int blue = v2 & 0x1f;
1959
int scale = v3 & 0x1f;
1960
1961
int x0 = (v1 >> 6) & 1;
1962
int x1 = (v1 >> 5) & 1;
1963
int x2 = (v2 >> 6) & 1;
1964
int x3 = (v2 >> 5) & 1;
1965
int x4 = (v3 >> 7) & 1;
1966
int x5 = (v3 >> 6) & 1;
1967
int x6 = (v3 >> 5) & 1;
1968
1969
int ohm = 1 << mode;
1970
if (ohm & 0x30) green |= x0 << 6;
1971
if (ohm & 0x3A) green |= x1 << 5;
1972
if (ohm & 0x30) blue |= x2 << 6;
1973
if (ohm & 0x3A) blue |= x3 << 5;
1974
if (ohm & 0x3D) scale |= x6 << 5;
1975
if (ohm & 0x2D) scale |= x5 << 6;
1976
if (ohm & 0x04) scale |= x4 << 7;
1977
if (ohm & 0x3B) red |= x4 << 6;
1978
if (ohm & 0x04) red |= x3 << 6;
1979
if (ohm & 0x10) red |= x5 << 7;
1980
if (ohm & 0x0F) red |= x2 << 7;
1981
if (ohm & 0x05) red |= x1 << 8;
1982
if (ohm & 0x0A) red |= x0 << 8;
1983
if (ohm & 0x05) red |= x0 << 9;
1984
if (ohm & 0x02) red |= x6 << 9;
1985
if (ohm & 0x01) red |= x3 << 10;
1986
if (ohm & 0x02) red |= x5 << 10;
1987
1988
static const int s_shamts[6] = { 1,1,2,3,4,5 };
1989
1990
const int shamt = s_shamts[mode];
1991
red <<= shamt;
1992
green <<= shamt;
1993
blue <<= shamt;
1994
scale <<= shamt;
1995
1996
if (mode != 5)
1997
{
1998
green = red - green;
1999
blue = red - blue;
2000
}
2001
2002
if (majcomp == 1)
2003
std::swap(red, green);
2004
2005
if (majcomp == 2)
2006
std::swap(red, blue);
2007
2008
e1_r = clamp(red, 0, 0xFFF);
2009
e1_g = clamp(green, 0, 0xFFF);
2010
e1_b = clamp(blue, 0, 0xFFF);
2011
e1_a = 0x780;
2012
2013
e0_r = clamp(red - scale, 0, 0xFFF);
2014
e0_g = clamp(green - scale, 0, 0xFFF);
2015
e0_b = clamp(blue - scale, 0, 0xFFF);
2016
e0_a = 0x780;
2017
2018
break;
2019
}
2020
case CEM_HDR_RGB_HDR_ALPHA:
2021
case CEM_HDR_RGB_LDR_ALPHA:
2022
case CEM_HDR_RGB:
2023
{
2024
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
2025
2026
int majcomp = ((v4 & 0x80) >> 7) | ((v5 & 0x80) >> 6);
2027
2028
e0_a = 0x780;
2029
e1_a = 0x780;
2030
2031
if (majcomp == 3)
2032
{
2033
e0_r = v0 << 4;
2034
e0_g = v2 << 4;
2035
e0_b = (v4 & 0x7f) << 5;
2036
2037
e1_r = v1 << 4;
2038
e1_g = v3 << 4;
2039
e1_b = (v5 & 0x7f) << 5;
2040
}
2041
else
2042
{
2043
int mode = ((v1 & 0x80) >> 7) | ((v2 & 0x80) >> 6) | ((v3 & 0x80) >> 5);
2044
int va = v0 | ((v1 & 0x40) << 2);
2045
int vb0 = v2 & 0x3f;
2046
int vb1 = v3 & 0x3f;
2047
int vc = v1 & 0x3f;
2048
int vd0 = v4 & 0x7f;
2049
int vd1 = v5 & 0x7f;
2050
2051
static const int s_dbitstab[8] = { 7,6,7,6,5,6,5,6 };
2052
vd0 = sign_extend(vd0, s_dbitstab[mode]);
2053
vd1 = sign_extend(vd1, s_dbitstab[mode]);
2054
2055
int x0 = (v2 >> 6) & 1;
2056
int x1 = (v3 >> 6) & 1;
2057
int x2 = (v4 >> 6) & 1;
2058
int x3 = (v5 >> 6) & 1;
2059
int x4 = (v4 >> 5) & 1;
2060
int x5 = (v5 >> 5) & 1;
2061
2062
int ohm = 1 << mode;
2063
if (ohm & 0xA4) va |= x0 << 9;
2064
if (ohm & 0x08) va |= x2 << 9;
2065
if (ohm & 0x50) va |= x4 << 9;
2066
if (ohm & 0x50) va |= x5 << 10;
2067
if (ohm & 0xA0) va |= x1 << 10;
2068
if (ohm & 0xC0) va |= x2 << 11;
2069
if (ohm & 0x04) vc |= x1 << 6;
2070
if (ohm & 0xE8) vc |= x3 << 6;
2071
if (ohm & 0x20) vc |= x2 << 7;
2072
if (ohm & 0x5B) vb0 |= x0 << 6;
2073
if (ohm & 0x5B) vb1 |= x1 << 6;
2074
if (ohm & 0x12) vb0 |= x2 << 7;
2075
if (ohm & 0x12) vb1 |= x3 << 7;
2076
2077
int shamt = (mode >> 1) ^ 3;
2078
va = (uint32_t)va << shamt;
2079
vb0 = (uint32_t)vb0 << shamt;
2080
vb1 = (uint32_t)vb1 << shamt;
2081
vc = (uint32_t)vc << shamt;
2082
vd0 = (uint32_t)vd0 << shamt;
2083
vd1 = (uint32_t)vd1 << shamt;
2084
2085
e1_r = clamp(va, 0, 0xFFF);
2086
e1_g = clamp(va - vb0, 0, 0xFFF);
2087
e1_b = clamp(va - vb1, 0, 0xFFF);
2088
2089
e0_r = clamp(va - vc, 0, 0xFFF);
2090
e0_g = clamp(va - vb0 - vc - vd0, 0, 0xFFF);
2091
e0_b = clamp(va - vb1 - vc - vd1, 0, 0xFFF);
2092
2093
if (majcomp == 1)
2094
{
2095
std::swap(e0_r, e0_g);
2096
std::swap(e1_r, e1_g);
2097
}
2098
else if (majcomp == 2)
2099
{
2100
std::swap(e0_r, e0_b);
2101
std::swap(e1_r, e1_b);
2102
}
2103
}
2104
2105
if (cem_index == CEM_HDR_RGB_LDR_ALPHA)
2106
{
2107
int v6 = pE[6], v7 = pE[7];
2108
2109
e0_a = v6;
2110
e1_a = v7;
2111
}
2112
else if (cem_index == CEM_HDR_RGB_HDR_ALPHA)
2113
{
2114
int v6 = pE[6], v7 = pE[7];
2115
2116
// Extract mode bits
2117
int mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
2118
v6 &= 0x7F;
2119
v7 &= 0x7F;
2120
2121
if (mode == 3)
2122
{
2123
e0_a = v6 << 5;
2124
e1_a = v7 << 5;
2125
}
2126
else
2127
{
2128
v6 |= (v7 << (mode + 1)) & 0x780;
2129
v7 &= (0x3F >> mode);
2130
v7 ^= (0x20 >> mode);
2131
v7 -= (0x20 >> mode);
2132
v6 <<= (4 - mode);
2133
v7 <<= (4 - mode);
2134
2135
v7 += v6;
2136
v7 = clamp(v7, 0, 0xFFF);
2137
e0_a = v6;
2138
e1_a = v7;
2139
}
2140
}
2141
2142
break;
2143
}
2144
default:
2145
{
2146
assert(0);
2147
for (uint32_t c = 0; c < 4; c++)
2148
{
2149
pEndpoints[c][0] = 0;
2150
pEndpoints[c][1] = 0;
2151
}
2152
break;
2153
}
2154
}
2155
}
2156
2157
static inline bool is_half_inf_or_nan(half_float v)
2158
{
2159
return get_bits(v, 10, 14) == 31;
2160
}
2161
2162
// This float->half conversion matches how "F32TO16" works on Intel GPU's.
2163
half_float float_to_half(float val, bool toward_zero)
2164
{
2165
union { float f; int32_t i; uint32_t u; } fi = { val };
2166
const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
2167
int s = flt_s, e = 0, m = 0;
2168
2169
// inf/NaN
2170
if (flt_e == 0xff)
2171
{
2172
e = 31;
2173
if (flt_m != 0) // NaN
2174
m = 1;
2175
}
2176
// not zero or denormal
2177
else if (flt_e != 0)
2178
{
2179
int new_exp = flt_e - 127;
2180
if (new_exp > 15)
2181
e = 31;
2182
else if (new_exp < -14)
2183
{
2184
if (toward_zero)
2185
m = (int)truncf((1 << 24) * fabsf(fi.f));
2186
else
2187
m = lrintf((1 << 24) * fabsf(fi.f));
2188
}
2189
else
2190
{
2191
e = new_exp + 15;
2192
if (toward_zero)
2193
m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13)));
2194
else
2195
m = lrintf((float)flt_m * (1.0f / (float)(1 << 13)));
2196
}
2197
}
2198
2199
assert((0 <= m) && (m <= 1024));
2200
if (m == 1024)
2201
{
2202
e++;
2203
m = 0;
2204
}
2205
2206
assert((s >= 0) && (s <= 1));
2207
assert((e >= 0) && (e <= 31));
2208
assert((m >= 0) && (m <= 1023));
2209
2210
half_float result = (half_float)((s << 15) | (e << 10) | m);
2211
return result;
2212
}
2213
2214
float half_to_float(half_float hval)
2215
{
2216
union { float f; uint32_t u; } x = { 0 };
2217
2218
uint32_t s = ((uint32_t)hval >> 15) & 1;
2219
uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
2220
uint32_t m = (uint32_t)hval & 0x3FF;
2221
2222
if (!e)
2223
{
2224
if (!m)
2225
{
2226
// +- 0
2227
x.u = s << 31;
2228
return x.f;
2229
}
2230
else
2231
{
2232
// denormalized
2233
while (!(m & 0x00000400))
2234
{
2235
m <<= 1;
2236
--e;
2237
}
2238
2239
++e;
2240
m &= ~0x00000400;
2241
}
2242
}
2243
else if (e == 31)
2244
{
2245
if (m == 0)
2246
{
2247
// +/- INF
2248
x.u = (s << 31) | 0x7f800000;
2249
return x.f;
2250
}
2251
else
2252
{
2253
// +/- NaN
2254
x.u = (s << 31) | 0x7f800000 | (m << 13);
2255
return x.f;
2256
}
2257
}
2258
2259
e = e + (127 - 15);
2260
m = m << 13;
2261
2262
assert(s <= 1);
2263
assert(m <= 0x7FFFFF);
2264
assert(e <= 255);
2265
2266
x.u = m | (e << 23) | (s << 31);
2267
return x.f;
2268
}
2269
2270
// See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
2271
const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31;
2272
const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS);
2273
const int RGB9E5_MANTISSA_VALUES = (1 << RGB9E5_MANTISSA_BITS);
2274
const int MAX_RGB9E5_MANTISSA = (RGB9E5_MANTISSA_VALUES - 1);
2275
//const int MAX_RGB9E5 = (int)(((float)MAX_RGB9E5_MANTISSA) / RGB9E5_MANTISSA_VALUES * (1 << MAX_RGB9E5_EXP));
2276
const int EPSILON_RGB9E5 = (int)((1.0f / (float)RGB9E5_MANTISSA_VALUES) / (float)(1 << RGB9E5_EXP_BIAS));
2277
2278
void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b)
2279
{
2280
int x = packed & 511;
2281
int y = (packed >> 9) & 511;
2282
int z = (packed >> 18) & 511;
2283
int w = (packed >> 27) & 31;
2284
2285
const float scale = powf(2.0f, static_cast<float>(w - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
2286
2287
r = x * scale;
2288
g = y * scale;
2289
b = z * scale;
2290
}
2291
2292
// floor_log2 is not correct for the denorm and zero values, but we are going to do a max of this value with the minimum rgb9e5 exponent that will hide these problem cases.
2293
static inline int floor_log2(float x)
2294
{
2295
union float754
2296
{
2297
unsigned int raw;
2298
float value;
2299
};
2300
2301
float754 f;
2302
f.value = x;
2303
// Extract float exponent
2304
return ((f.raw >> 23) & 0xFF) - 127;
2305
}
2306
2307
static inline int maximumi(int a, int b) { return (a > b) ? a : b; }
2308
static inline float maximumf(float a, float b) { return (a > b) ? a : b; }
2309
2310
uint32_t pack_rgb9e5(float r, float g, float b)
2311
{
2312
r = clampf(r, 0.0f, MAX_RGB9E5);
2313
g = clampf(g, 0.0f, MAX_RGB9E5);
2314
b = clampf(b, 0.0f, MAX_RGB9E5);
2315
2316
float maxrgb = maximumf(maximumf(r, g), b);
2317
int exp_shared = maximumi(-RGB9E5_EXP_BIAS - 1, floor_log2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
2318
assert((exp_shared >= 0) && (exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP));
2319
2320
float denom = powf(2.0f, (float)(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
2321
2322
int maxm = (int)floorf((maxrgb / denom) + 0.5f);
2323
if (maxm == (MAX_RGB9E5_MANTISSA + 1))
2324
{
2325
denom *= 2;
2326
exp_shared += 1;
2327
assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
2328
}
2329
else
2330
{
2331
assert(maxm <= MAX_RGB9E5_MANTISSA);
2332
}
2333
2334
int rm = (int)floorf((r / denom) + 0.5f);
2335
int gm = (int)floorf((g / denom) + 0.5f);
2336
int bm = (int)floorf((b / denom) + 0.5f);
2337
2338
assert((rm >= 0) && (rm <= MAX_RGB9E5_MANTISSA));
2339
assert((gm >= 0) && (gm <= MAX_RGB9E5_MANTISSA));
2340
assert((bm >= 0) && (bm <= MAX_RGB9E5_MANTISSA));
2341
2342
return rm | (gm << 9) | (bm << 18) | (exp_shared << 27);
2343
}
2344
2345
static inline int clz17(uint32_t x)
2346
{
2347
assert(x <= 0x1FFFF);
2348
x &= 0x1FFFF;
2349
2350
if (!x)
2351
return 17;
2352
2353
uint32_t n = 0;
2354
while ((x & 0x10000) == 0)
2355
{
2356
x <<= 1u;
2357
n++;
2358
}
2359
2360
return n;
2361
}
2362
2363
static inline uint32_t pack_rgb9e5_ldr_astc(int Cr, int Cg, int Cb)
2364
{
2365
int lz = clz17(Cr | Cg | Cb | 1);
2366
if (Cr == 65535) { Cr = 65536; lz = 0; }
2367
if (Cg == 65535) { Cg = 65536; lz = 0; }
2368
if (Cb == 65535) { Cb = 65536; lz = 0; }
2369
Cr <<= lz; Cg <<= lz; Cb <<= lz;
2370
Cr = (Cr >> 8) & 0x1FF;
2371
Cg = (Cg >> 8) & 0x1FF;
2372
Cb = (Cb >> 8) & 0x1FF;
2373
uint32_t exponent = 16 - lz;
2374
uint32_t texel = (exponent << 27) | (Cb << 18) | (Cg << 9) | Cr;
2375
return texel;
2376
}
2377
2378
static inline uint32_t pack_rgb9e5_hdr_astc(int Cr, int Cg, int Cb)
2379
{
2380
if (Cr > 0x7c00) Cr = 0; else if (Cr == 0x7c00) Cr = 0x7bff;
2381
if (Cg > 0x7c00) Cg = 0; else if (Cg == 0x7c00) Cg = 0x7bff;
2382
if (Cb > 0x7c00) Cb = 0; else if (Cb == 0x7c00) Cb = 0x7bff;
2383
int Re = (Cr >> 10) & 0x1F;
2384
int Ge = (Cg >> 10) & 0x1F;
2385
int Be = (Cb >> 10) & 0x1F;
2386
int Rex = (Re == 0) ? 1 : Re;
2387
int Gex = (Ge == 0) ? 1 : Ge;
2388
int Bex = (Be == 0) ? 1 : Be;
2389
int Xm = ((Cr | Cg | Cb) & 0x200) >> 9;
2390
int Xe = Re | Ge | Be;
2391
uint32_t rshift, gshift, bshift, expo;
2392
2393
if (Xe == 0)
2394
{
2395
expo = rshift = gshift = bshift = Xm;
2396
}
2397
else if (Re >= Ge && Re >= Be)
2398
{
2399
expo = Rex + 1;
2400
rshift = 2;
2401
gshift = Rex - Gex + 2;
2402
bshift = Rex - Bex + 2;
2403
}
2404
else if (Ge >= Be)
2405
{
2406
expo = Gex + 1;
2407
rshift = Gex - Rex + 2;
2408
gshift = 2;
2409
bshift = Gex - Bex + 2;
2410
}
2411
else
2412
{
2413
expo = Bex + 1;
2414
rshift = Bex - Rex + 2;
2415
gshift = Bex - Gex + 2;
2416
bshift = 2;
2417
}
2418
2419
int Rm = (Cr & 0x3FF) | (Re == 0 ? 0 : 0x400);
2420
int Gm = (Cg & 0x3FF) | (Ge == 0 ? 0 : 0x400);
2421
int Bm = (Cb & 0x3FF) | (Be == 0 ? 0 : 0x400);
2422
Rm = (Rm >> rshift) & 0x1FF;
2423
Gm = (Gm >> gshift) & 0x1FF;
2424
Bm = (Bm >> bshift) & 0x1FF;
2425
2426
uint32_t texel = (expo << 27) | (Bm << 18) | (Gm << 9) | (Rm << 0);
2427
return texel;
2428
}
2429
2430
// Important: pPixels is either 32-bit/texel or 64-bit/texel.
2431
bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode)
2432
{
2433
assert(is_valid_block_size(blk_width, blk_height));
2434
2435
assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size());
2436
if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size())
2437
return false;
2438
2439
const uint32_t num_blk_pixels = blk_width * blk_height;
2440
2441
// Write block error color
2442
if (dec_mode == cDecodeModeHDR16)
2443
{
2444
// NaN's
2445
memset(pPixels, 0xFF, num_blk_pixels * sizeof(half_float) * 4);
2446
}
2447
else if (dec_mode == cDecodeModeRGB9E5)
2448
{
2449
const uint32_t purple_9e5 = pack_rgb9e5(1.0f, 0.0f, 1.0f);
2450
2451
for (uint32_t i = 0; i < num_blk_pixels; i++)
2452
((uint32_t*)pPixels)[i] = purple_9e5;
2453
}
2454
else
2455
{
2456
for (uint32_t i = 0; i < num_blk_pixels; i++)
2457
((uint32_t*)pPixels)[i] = 0xFFFF00FF;
2458
}
2459
2460
if (log_blk.m_error_flag)
2461
{
2462
// Should this return false? It's not an invalid logical block config, though.
2463
return false;
2464
}
2465
2466
// Handle solid color blocks
2467
if (log_blk.m_solid_color_flag_ldr)
2468
{
2469
// LDR solid block
2470
if (dec_mode == cDecodeModeHDR16)
2471
{
2472
// Convert LDR pixels to half-float
2473
half_float h[4];
2474
for (uint32_t c = 0; c < 4; c++)
2475
h[c] = (log_blk.m_solid_color[c] == 0xFFFF) ? 0x3C00 : float_to_half((float)log_blk.m_solid_color[c] * (1.0f / 65536.0f), true);
2476
2477
for (uint32_t i = 0; i < num_blk_pixels; i++)
2478
memcpy((uint16_t*)pPixels + i * 4, h, sizeof(half_float) * 4);
2479
}
2480
else if (dec_mode == cDecodeModeRGB9E5)
2481
{
2482
float r = (log_blk.m_solid_color[0] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[0] * (1.0f / 65536.0f));
2483
float g = (log_blk.m_solid_color[1] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[1] * (1.0f / 65536.0f));
2484
float b = (log_blk.m_solid_color[2] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[2] * (1.0f / 65536.0f));
2485
2486
const uint32_t packed = pack_rgb9e5(r, g, b);
2487
2488
for (uint32_t i = 0; i < num_blk_pixels; i++)
2489
((uint32_t*)pPixels)[i] = packed;
2490
}
2491
else
2492
{
2493
// Convert LDR pixels to 8-bits
2494
for (uint32_t i = 0; i < num_blk_pixels; i++)
2495
for (uint32_t c = 0; c < 4; c++)
2496
((uint8_t*)pPixels)[i * 4 + c] = (log_blk.m_solid_color[c] >> 8);
2497
}
2498
2499
return true;
2500
}
2501
else if (log_blk.m_solid_color_flag_hdr)
2502
{
2503
// HDR solid block, decode mode must be half-float or RGB9E5
2504
if (dec_mode == cDecodeModeHDR16)
2505
{
2506
for (uint32_t i = 0; i < num_blk_pixels; i++)
2507
memcpy((uint16_t*)pPixels + i * 4, log_blk.m_solid_color, sizeof(half_float) * 4);
2508
}
2509
else if (dec_mode == cDecodeModeRGB9E5)
2510
{
2511
float r = half_to_float(log_blk.m_solid_color[0]);
2512
float g = half_to_float(log_blk.m_solid_color[1]);
2513
float b = half_to_float(log_blk.m_solid_color[2]);
2514
2515
const uint32_t packed = pack_rgb9e5(r, g, b);
2516
2517
for (uint32_t i = 0; i < num_blk_pixels; i++)
2518
((uint32_t*)pPixels)[i] = packed;
2519
}
2520
else
2521
{
2522
return false;
2523
}
2524
2525
return true;
2526
}
2527
2528
// Sanity check block's config
2529
if ((log_blk.m_grid_width < 2) || (log_blk.m_grid_height < 2))
2530
return false;
2531
if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
2532
return false;
2533
2534
if ((log_blk.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_blk.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
2535
return false;
2536
if ((log_blk.m_weight_ise_range < FIRST_VALID_WEIGHT_ISE_RANGE) || (log_blk.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE))
2537
return false;
2538
if ((log_blk.m_num_partitions < 1) || (log_blk.m_num_partitions > MAX_PARTITIONS))
2539
return false;
2540
if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > MAX_DUAL_PLANE_PARTITIONS))
2541
return false;
2542
if (log_blk.m_partition_id >= NUM_PARTITION_PATTERNS)
2543
return false;
2544
if ((log_blk.m_num_partitions == 1) && (log_blk.m_partition_id > 0))
2545
return false;
2546
if (log_blk.m_color_component_selector > 3)
2547
return false;
2548
2549
const uint32_t total_endpoint_levels = get_ise_levels(log_blk.m_endpoint_ise_range);
2550
const uint32_t total_weight_levels = get_ise_levels(log_blk.m_weight_ise_range);
2551
2552
bool is_ldr_endpoints[MAX_PARTITIONS];
2553
2554
// Check CEM's
2555
uint32_t total_cem_vals = 0;
2556
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
2557
{
2558
if (log_blk.m_color_endpoint_modes[i] > 15)
2559
return false;
2560
2561
total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[i]);
2562
2563
is_ldr_endpoints[i] = is_cem_ldr(log_blk.m_color_endpoint_modes[i]);
2564
}
2565
2566
if (total_cem_vals > MAX_ENDPOINTS)
2567
return false;
2568
2569
const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range);
2570
const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data();
2571
2572
// Dequantized endpoints to [0,255]
2573
uint8_t dequantized_endpoints[MAX_ENDPOINTS];
2574
for (uint32_t i = 0; i < total_cem_vals; i++)
2575
{
2576
if (log_blk.m_endpoints[i] >= total_endpoint_levels)
2577
return false;
2578
dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]];
2579
}
2580
2581
// Dequantize weights to [0,64]
2582
uint8_t dequantized_weights[2][12 * 12];
2583
2584
const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range);
2585
const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data();
2586
2587
const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height;
2588
for (uint32_t i = 0; i < total_weight_vals; i++)
2589
{
2590
if (log_blk.m_weights[i] >= total_weight_levels)
2591
return false;
2592
2593
const uint32_t plane_index = log_blk.m_dual_plane ? (i & 1) : 0;
2594
const uint32_t grid_index = log_blk.m_dual_plane ? (i >> 1) : i;
2595
2596
dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]];
2597
}
2598
2599
// Upsample weight grid. [0,64] weights
2600
uint8_t upsampled_weights[2][12 * 12];
2601
2602
upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[0][0], &upsampled_weights[0][0]);
2603
if (log_blk.m_dual_plane)
2604
upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[1][0], &upsampled_weights[1][0]);
2605
2606
// Decode CEM's
2607
int endpoints[4][4][2]; // [subset][comp][l/h]
2608
2609
uint32_t endpoint_val_index = 0;
2610
for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++)
2611
{
2612
const uint32_t cem_index = log_blk.m_color_endpoint_modes[subset];
2613
2614
decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]);
2615
2616
endpoint_val_index += get_num_cem_values(cem_index);
2617
}
2618
2619
// Decode texels
2620
const bool small_block = num_blk_pixels < 31;
2621
const bool use_precomputed_texel_partitions_4x4 = (blk_width == 4) && (blk_height == 4) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);
2622
const bool use_precomputed_texel_partitions_6x6 = (blk_width == 6) && (blk_height == 6) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);
2623
const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX;
2624
2625
bool success = true;
2626
2627
if (dec_mode == cDecodeModeRGB9E5)
2628
{
2629
// returns uint32_t's
2630
for (uint32_t y = 0; y < blk_height; y++)
2631
{
2632
for (uint32_t x = 0; x < blk_width; x++)
2633
{
2634
const uint32_t pixel_index = x + y * blk_width;
2635
2636
uint32_t subset = 0;
2637
if (log_blk.m_num_partitions > 1)
2638
{
2639
if (use_precomputed_texel_partitions_4x4)
2640
subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
2641
else if (use_precomputed_texel_partitions_6x6)
2642
subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
2643
else
2644
subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);
2645
}
2646
2647
int comp[3];
2648
2649
for (uint32_t c = 0; c < 3; c++)
2650
{
2651
const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
2652
2653
if (is_ldr_endpoints[subset])
2654
{
2655
assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
2656
assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
2657
2658
int le = endpoints[subset][c][0];
2659
int he = endpoints[subset][c][1];
2660
2661
le = (le << 8) | le;
2662
he = (he << 8) | he;
2663
2664
int k = weight_interpolate(le, he, w);
2665
assert((k >= 0) && (k <= 0xFFFF));
2666
2667
comp[c] = k; // 1.0
2668
}
2669
else
2670
{
2671
assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
2672
assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
2673
2674
int le = endpoints[subset][c][0] << 4;
2675
int he = endpoints[subset][c][1] << 4;
2676
2677
int qlog16 = weight_interpolate(le, he, w);
2678
2679
comp[c] = qlog16_to_half(qlog16);
2680
2681
if (is_half_inf_or_nan((half_float)comp[c]))
2682
comp[c] = 0x7BFF;
2683
}
2684
2685
} // c
2686
2687
uint32_t packed;
2688
if (is_ldr_endpoints[subset])
2689
packed = pack_rgb9e5_ldr_astc(comp[0], comp[1], comp[2]);
2690
else
2691
packed = pack_rgb9e5_hdr_astc(comp[0], comp[1], comp[2]);
2692
2693
((uint32_t*)pPixels)[pixel_index] = packed;
2694
2695
} // x
2696
} // y
2697
}
2698
else if (dec_mode == cDecodeModeHDR16)
2699
{
2700
// Note: must round towards zero when converting float to half for ASTC (18.19 Weight Application)
2701
2702
// returns half floats
2703
for (uint32_t y = 0; y < blk_height; y++)
2704
{
2705
for (uint32_t x = 0; x < blk_width; x++)
2706
{
2707
const uint32_t pixel_index = x + y * blk_width;
2708
2709
uint32_t subset = 0;
2710
if (log_blk.m_num_partitions > 1)
2711
{
2712
if (use_precomputed_texel_partitions_4x4)
2713
subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
2714
else if (use_precomputed_texel_partitions_6x6)
2715
subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
2716
else
2717
subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);
2718
}
2719
2720
for (uint32_t c = 0; c < 4; c++)
2721
{
2722
const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
2723
2724
half_float o;
2725
2726
if ( (is_ldr_endpoints[subset]) ||
2727
((log_blk.m_color_endpoint_modes[subset] == CEM_HDR_RGB_LDR_ALPHA) && (c == 3)) )
2728
{
2729
assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
2730
assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
2731
2732
int le = endpoints[subset][c][0];
2733
int he = endpoints[subset][c][1];
2734
2735
le = (le << 8) | le;
2736
he = (he << 8) | he;
2737
2738
int k = weight_interpolate(le, he, w);
2739
assert((k >= 0) && (k <= 0xFFFF));
2740
2741
if (k == 0xFFFF)
2742
o = 0x3C00; // 1.0
2743
else
2744
o = float_to_half((float)k * (1.0f / 65536.0f), true);
2745
}
2746
else
2747
{
2748
assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
2749
assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
2750
2751
int le = endpoints[subset][c][0] << 4;
2752
int he = endpoints[subset][c][1] << 4;
2753
2754
int qlog16 = weight_interpolate(le, he, w);
2755
2756
o = qlog16_to_half(qlog16);
2757
2758
if (is_half_inf_or_nan(o))
2759
o = 0x7BFF;
2760
}
2761
2762
((half_float*)pPixels)[pixel_index * 4 + c] = o;
2763
}
2764
2765
} // x
2766
} // y
2767
}
2768
else
2769
{
2770
// returns uint8_t's
2771
for (uint32_t y = 0; y < blk_height; y++)
2772
{
2773
for (uint32_t x = 0; x < blk_width; x++)
2774
{
2775
const uint32_t pixel_index = x + y * blk_width;
2776
2777
uint32_t subset = 0;
2778
if (log_blk.m_num_partitions > 1)
2779
{
2780
if (use_precomputed_texel_partitions_4x4)
2781
subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
2782
else if (use_precomputed_texel_partitions_6x6)
2783
subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
2784
else
2785
subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);
2786
}
2787
2788
if (!is_ldr_endpoints[subset])
2789
{
2790
((uint32_t*)pPixels)[pixel_index * 4] = 0xFFFF00FF;
2791
success = false;
2792
}
2793
else
2794
{
2795
for (uint32_t c = 0; c < 4; c++)
2796
{
2797
const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
2798
2799
int le = endpoints[subset][c][0];
2800
int he = endpoints[subset][c][1];
2801
2802
// FIXME: the spec is apparently wrong? this matches ARM's and Google's decoder
2803
//if ((dec_mode == cDecodeModeSRGB8) && (c <= 2))
2804
// See https://github.com/ARM-software/astc-encoder/issues/447
2805
if (dec_mode == cDecodeModeSRGB8)
2806
{
2807
le = (le << 8) | 0x80;
2808
he = (he << 8) | 0x80;
2809
}
2810
else
2811
{
2812
le = (le << 8) | le;
2813
he = (he << 8) | he;
2814
}
2815
2816
uint32_t k = weight_interpolate(le, he, w);
2817
2818
// FIXME: This is what the spec says to do in LDR mode, but this is not what ARM's decoder does
2819
// See decompress_symbolic_block(), decode_texel() and unorm16_to_sf16.
2820
// It seems to effectively divide by 65535.0 and convert to FP16, then back to float, mul by 255.0, add .5 and then convert to 8-bit.
2821
((uint8_t*)pPixels)[pixel_index * 4 + c] = (uint8_t)(k >> 8);
2822
}
2823
}
2824
2825
} // x
2826
} // y
2827
}
2828
2829
return success;
2830
}
2831
2832
//------------------------------------------------
2833
// Physical to logical block decoding
2834
2835
// unsigned 128-bit int, with some signed helpers
2836
class uint128
2837
{
2838
uint64_t m_lo, m_hi;
2839
2840
public:
2841
uint128() = default;
2842
inline uint128(uint64_t lo) : m_lo(lo), m_hi(0) { }
2843
inline uint128(uint64_t lo, uint64_t hi) : m_lo(lo), m_hi(hi) { }
2844
inline uint128(const uint128& other) : m_lo(other.m_lo), m_hi(other.m_hi) { }
2845
2846
inline uint128& set_signed(int64_t lo) { m_lo = lo; m_hi = (lo < 0) ? UINT64_MAX : 0; return *this; }
2847
inline uint128& set(uint64_t lo) { m_lo = lo; m_hi = 0; return *this; }
2848
2849
inline explicit operator uint8_t () const { return (uint8_t)m_lo; }
2850
inline explicit operator uint16_t () const { return (uint16_t)m_lo; }
2851
inline explicit operator uint32_t () const { return (uint32_t)m_lo; }
2852
inline explicit operator uint64_t () const { return m_lo; }
2853
2854
inline uint128& operator= (const uint128& rhs) { m_lo = rhs.m_lo; m_hi = rhs.m_hi; return *this; }
2855
inline uint128& operator= (const uint64_t val) { m_lo = val; m_hi = 0; return *this; }
2856
2857
inline uint64_t get_low() const { return m_lo; }
2858
inline uint64_t& get_low() { return m_lo; }
2859
2860
inline uint64_t get_high() const { return m_hi; }
2861
inline uint64_t& get_high() { return m_hi; }
2862
2863
inline bool operator== (const uint128& rhs) const { return (m_lo == rhs.m_lo) && (m_hi == rhs.m_hi); }
2864
inline bool operator!= (const uint128& rhs) const { return (m_lo != rhs.m_lo) || (m_hi != rhs.m_hi); }
2865
2866
inline bool operator< (const uint128& rhs) const
2867
{
2868
if (m_hi < rhs.m_hi)
2869
return true;
2870
2871
if (m_hi == rhs.m_hi)
2872
{
2873
if (m_lo < rhs.m_lo)
2874
return true;
2875
}
2876
2877
return false;
2878
}
2879
2880
inline bool operator> (const uint128& rhs) const { return (rhs < *this); }
2881
2882
inline bool operator<= (const uint128& rhs) const { return (*this == rhs) || (*this < rhs); }
2883
inline bool operator>= (const uint128& rhs) const { return (*this == rhs) || (*this > rhs); }
2884
2885
inline bool is_zero() const { return (m_lo == 0) && (m_hi == 0); }
2886
inline bool is_all_ones() const { return (m_lo == UINT64_MAX) && (m_hi == UINT64_MAX); }
2887
inline bool is_non_zero() const { return (m_lo != 0) || (m_hi != 0); }
2888
inline explicit operator bool() const { return is_non_zero(); }
2889
inline bool is_signed() const { return ((int64_t)m_hi) < 0; }
2890
2891
inline bool signed_less(const uint128& rhs) const
2892
{
2893
const bool l_signed = is_signed(), r_signed = rhs.is_signed();
2894
2895
if (l_signed == r_signed)
2896
return *this < rhs;
2897
2898
if (l_signed && !r_signed)
2899
return true;
2900
2901
assert(!l_signed && r_signed);
2902
return false;
2903
}
2904
2905
inline bool signed_greater(const uint128& rhs) const { return rhs.signed_less(*this); }
2906
inline bool signed_less_equal(const uint128& rhs) const { return !rhs.signed_less(*this); }
2907
inline bool signed_greater_equal(const uint128& rhs) const { return !signed_less(rhs); }
2908
2909
double get_double() const
2910
{
2911
double res = 0;
2912
2913
if (m_hi)
2914
res = (double)m_hi * pow(2.0f, 64.0f);
2915
2916
res += (double)m_lo;
2917
2918
return res;
2919
}
2920
2921
double get_signed_double() const
2922
{
2923
if (is_signed())
2924
return -(uint128(*this).abs().get_double());
2925
else
2926
return get_double();
2927
}
2928
2929
inline uint128 abs() const
2930
{
2931
uint128 res(*this);
2932
if (res.is_signed())
2933
res = -res;
2934
return res;
2935
}
2936
2937
inline uint128& operator<<= (int shift)
2938
{
2939
assert(shift >= 0);
2940
if (shift < 0)
2941
return *this;
2942
2943
m_hi = (shift >= 64) ? ((shift >= 128) ? 0 : (m_lo << (shift - 64))) : (m_hi << shift);
2944
2945
if ((shift) && (shift < 64))
2946
m_hi |= (m_lo >> (64 - shift));
2947
2948
m_lo = (shift >= 64) ? 0 : (m_lo << shift);
2949
2950
return *this;
2951
}
2952
2953
inline uint128 operator<< (int shift) const { uint128 res(*this); res <<= shift; return res; }
2954
2955
inline uint128& operator>>= (int shift)
2956
{
2957
assert(shift >= 0);
2958
if (shift < 0)
2959
return *this;
2960
2961
m_lo = (shift >= 64) ? ((shift >= 128) ? 0 : (m_hi >> (shift - 64))) : (m_lo >> shift);
2962
2963
if ((shift) && (shift < 64))
2964
m_lo |= (m_hi << (64 - shift));
2965
2966
m_hi = (shift >= 64) ? 0 : (m_hi >> shift);
2967
2968
return *this;
2969
}
2970
2971
inline uint128 operator>> (int shift) const { uint128 res(*this); res >>= shift; return res; }
2972
2973
inline uint128 signed_shift_right(int shift) const
2974
{
2975
uint128 res(*this);
2976
res >>= shift;
2977
2978
if (is_signed())
2979
{
2980
uint128 x(0U);
2981
x = ~x;
2982
x >>= shift;
2983
res |= (~x);
2984
}
2985
2986
return res;
2987
}
2988
2989
inline uint128& operator |= (const uint128& rhs) { m_lo |= rhs.m_lo; m_hi |= rhs.m_hi; return *this; }
2990
inline uint128 operator | (const uint128& rhs) const { uint128 res(*this); res |= rhs; return res; }
2991
2992
inline uint128& operator &= (const uint128& rhs) { m_lo &= rhs.m_lo; m_hi &= rhs.m_hi; return *this; }
2993
inline uint128 operator & (const uint128& rhs) const { uint128 res(*this); res &= rhs; return res; }
2994
2995
inline uint128& operator ^= (const uint128& rhs) { m_lo ^= rhs.m_lo; m_hi ^= rhs.m_hi; return *this; }
2996
inline uint128 operator ^ (const uint128& rhs) const { uint128 res(*this); res ^= rhs; return res; }
2997
2998
inline uint128 operator ~() const { return uint128(~m_lo, ~m_hi); }
2999
3000
inline uint128 operator -() const { uint128 res(~*this); if (++res.m_lo == 0) ++res.m_hi; return res; }
3001
3002
// prefix
3003
inline uint128 operator ++()
3004
{
3005
if (++m_lo == 0)
3006
++m_hi;
3007
return *this;
3008
}
3009
3010
// postfix
3011
inline uint128 operator ++(int)
3012
{
3013
uint128 res(*this);
3014
if (++m_lo == 0)
3015
++m_hi;
3016
return res;
3017
}
3018
3019
// prefix
3020
inline uint128 operator --()
3021
{
3022
const uint64_t t = m_lo;
3023
if (--m_lo > t)
3024
--m_hi;
3025
return *this;
3026
}
3027
3028
// postfix
3029
inline uint128 operator --(int)
3030
{
3031
const uint64_t t = m_lo;
3032
uint128 res(*this);
3033
if (--m_lo > t)
3034
--m_hi;
3035
return res;
3036
}
3037
3038
inline uint128& operator+= (const uint128& rhs)
3039
{
3040
const uint64_t t = m_lo + rhs.m_lo;
3041
m_hi = m_hi + rhs.m_hi + (t < m_lo);
3042
m_lo = t;
3043
return *this;
3044
}
3045
3046
inline uint128 operator+ (const uint128& rhs) const { uint128 res(*this); res += rhs; return res; }
3047
3048
inline uint128& operator-= (const uint128& rhs)
3049
{
3050
const uint64_t t = m_lo - rhs.m_lo;
3051
m_hi = m_hi - rhs.m_hi - (t > m_lo);
3052
m_lo = t;
3053
return *this;
3054
}
3055
3056
inline uint128 operator- (const uint128& rhs) const { uint128 res(*this); res -= rhs; return res; }
3057
3058
// computes bit by bit, very slow
3059
uint128& operator*=(const uint128& rhs)
3060
{
3061
uint128 temp(*this), result(0U);
3062
3063
for (uint128 bitmask(rhs); bitmask; bitmask >>= 1, temp <<= 1)
3064
if (bitmask.get_low() & 1)
3065
result += temp;
3066
3067
*this = result;
3068
return *this;
3069
}
3070
3071
uint128 operator*(const uint128& rhs) const { uint128 res(*this); res *= rhs; return res; }
3072
3073
// computes bit by bit, very slow
3074
friend uint128 divide(const uint128& dividend, const uint128& divisor, uint128& remainder)
3075
{
3076
remainder = 0;
3077
3078
if (!divisor)
3079
{
3080
assert(0);
3081
return ~uint128(0U);
3082
}
3083
3084
uint128 quotient(0), one(1);
3085
3086
for (int i = 127; i >= 0; i--)
3087
{
3088
remainder = (remainder << 1) | ((dividend >> i) & one);
3089
if (remainder >= divisor)
3090
{
3091
remainder -= divisor;
3092
quotient |= (one << i);
3093
}
3094
}
3095
3096
return quotient;
3097
}
3098
3099
uint128 operator/(const uint128& rhs) const { uint128 remainder, res; res = divide(*this, rhs, remainder); return res; }
3100
uint128 operator/=(const uint128& rhs) { uint128 remainder; *this = divide(*this, rhs, remainder); return *this; }
3101
3102
uint128 operator%(const uint128& rhs) const { uint128 remainder; divide(*this, rhs, remainder); return remainder; }
3103
uint128 operator%=(const uint128& rhs) { uint128 remainder; divide(*this, rhs, remainder); *this = remainder; return *this; }
3104
3105
void print_hex(FILE* pFile) const
3106
{
3107
fprintf(pFile, "0x%016llx%016llx", (unsigned long long int)m_hi, (unsigned long long int)m_lo);
3108
}
3109
3110
void format_unsigned(std::string& res) const
3111
{
3112
basisu::vector<uint8_t> digits;
3113
digits.reserve(39 + 1);
3114
3115
uint128 k(*this), ten(10);
3116
do
3117
{
3118
uint128 r;
3119
k = divide(k, ten, r);
3120
digits.push_back((uint8_t)r);
3121
} while (k);
3122
3123
for (int i = (int)digits.size() - 1; i >= 0; i--)
3124
res += ('0' + digits[i]);
3125
}
3126
3127
void format_signed(std::string& res) const
3128
{
3129
uint128 val(*this);
3130
3131
if (val.is_signed())
3132
{
3133
res.push_back('-');
3134
val = -val;
3135
}
3136
3137
val.format_unsigned(res);
3138
}
3139
3140
void print_unsigned(FILE* pFile)
3141
{
3142
std::string str;
3143
format_unsigned(str);
3144
fprintf(pFile, "%s", str.c_str());
3145
}
3146
3147
void print_signed(FILE* pFile)
3148
{
3149
std::string str;
3150
format_signed(str);
3151
fprintf(pFile, "%s", str.c_str());
3152
}
3153
3154
uint128 get_reversed_bits() const
3155
{
3156
uint128 res;
3157
3158
const uint32_t* pSrc = (const uint32_t*)this;
3159
uint32_t* pDst = (uint32_t*)&res;
3160
3161
pDst[0] = rev_dword(pSrc[3]);
3162
pDst[1] = rev_dword(pSrc[2]);
3163
pDst[2] = rev_dword(pSrc[1]);
3164
pDst[3] = rev_dword(pSrc[0]);
3165
3166
return res;
3167
}
3168
3169
uint128 get_byteswapped() const
3170
{
3171
uint128 res;
3172
3173
const uint8_t* pSrc = (const uint8_t*)this;
3174
uint8_t* pDst = (uint8_t*)&res;
3175
3176
for (uint32_t i = 0; i < 16; i++)
3177
pDst[i] = pSrc[15 - i];
3178
3179
return res;
3180
}
3181
3182
inline uint64_t get_bits64(uint32_t bit_ofs, uint32_t bit_len) const
3183
{
3184
assert(bit_ofs < 128);
3185
assert(bit_len && (bit_len <= 64) && ((bit_ofs + bit_len) <= 128));
3186
3187
uint128 res(*this);
3188
res >>= bit_ofs;
3189
3190
const uint64_t bitmask = (bit_len == 64) ? UINT64_MAX : ((1ull << bit_len) - 1);
3191
return res.get_low() & bitmask;
3192
}
3193
3194
inline uint32_t get_bits(uint32_t bit_ofs, uint32_t bit_len) const
3195
{
3196
assert(bit_len <= 32);
3197
return (uint32_t)get_bits64(bit_ofs, bit_len);
3198
}
3199
3200
inline uint32_t next_bits(uint32_t& bit_ofs, uint32_t len) const
3201
{
3202
assert(len && (len <= 32));
3203
uint32_t x = get_bits(bit_ofs, len);
3204
bit_ofs += len;
3205
return x;
3206
}
3207
3208
inline uint128& set_bits(uint64_t val, uint32_t bit_ofs, uint32_t num_bits)
3209
{
3210
assert(bit_ofs < 128);
3211
assert(num_bits && (num_bits <= 64) && ((bit_ofs + num_bits) <= 128));
3212
3213
uint128 bitmask(1);
3214
bitmask = (bitmask << num_bits) - 1;
3215
assert(uint128(val) <= bitmask);
3216
3217
bitmask <<= bit_ofs;
3218
*this &= ~bitmask;
3219
3220
*this = *this | (uint128(val) << bit_ofs);
3221
return *this;
3222
}
3223
};
3224
3225
static bool decode_void_extent(const uint128& bits, log_astc_block& log_blk)
3226
{
3227
if (bits.get_bits(10, 2) != 0b11)
3228
return false;
3229
3230
uint32_t bit_ofs = 12;
3231
const uint32_t min_s = bits.next_bits(bit_ofs, 13);
3232
const uint32_t max_s = bits.next_bits(bit_ofs, 13);
3233
const uint32_t min_t = bits.next_bits(bit_ofs, 13);
3234
const uint32_t max_t = bits.next_bits(bit_ofs, 13);
3235
assert(bit_ofs == 64);
3236
3237
const bool all_extents_all_ones = (min_s == 0x1FFF) && (max_s == 0x1FFF) && (min_t == 0x1FFF) && (max_t == 0x1FFF);
3238
3239
if (!all_extents_all_ones && ((min_s >= max_s) || (min_t >= max_t)))
3240
return false;
3241
3242
const bool hdr_flag = bits.get_bits(9, 1) != 0;
3243
3244
if (hdr_flag)
3245
log_blk.m_solid_color_flag_hdr = true;
3246
else
3247
log_blk.m_solid_color_flag_ldr = true;
3248
3249
log_blk.m_solid_color[0] = (uint16_t)bits.get_bits(64, 16);
3250
log_blk.m_solid_color[1] = (uint16_t)bits.get_bits(80, 16);
3251
log_blk.m_solid_color[2] = (uint16_t)bits.get_bits(96, 16);
3252
log_blk.m_solid_color[3] = (uint16_t)bits.get_bits(112, 16);
3253
3254
if (log_blk.m_solid_color_flag_hdr)
3255
{
3256
for (uint32_t c = 0; c < 4; c++)
3257
if (is_half_inf_or_nan(log_blk.m_solid_color[c]))
3258
return false;
3259
}
3260
3261
return true;
3262
}
3263
3264
struct astc_dec_row
3265
{
3266
int8_t Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
3267
};
3268
3269
static const astc_dec_row s_dec_rows[10] =
3270
{
3271
// Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
3272
{ 10, 9, 7, 2, 5, 2, 4, 2, 4, 0, 1 }, // 4 2
3273
{ 10, 9, 7, 2, 5, 2, 8, 2, 4, 0, 1 }, // 8 2
3274
{ 10, 9, 5, 2, 7, 2, 2, 8, 4, 0, 1 }, // 2 8
3275
{ 10, 9, 5, 2, 7, 1, 2, 6, 4, 0, 1 }, // 2 6
3276
3277
{ 10, 9, 7, 1, 5, 2, 2, 2, 4, 0, 1 }, // 2 2
3278
{ 10, 9, 0, 0, 5, 2, 12, 2, 4, 2, 3 }, // 12 2
3279
{ 10, 9, 5, 2, 0, 0, 2, 12, 4, 2, 3 }, // 2 12
3280
{ 10, 9, 0, 0, 0, 0, 6, 10, 4, 2, 3 }, // 6 10
3281
3282
{ 10, 9, 0, 0, 0, 0, 10, 6, 4, 2, 3 }, // 10 6
3283
{ -1, -1, 5, 2, 9, 2, 6, 6, 4, 2, 3 }, // 6 6
3284
};
3285
3286
static bool decode_config(const uint128& bits, log_astc_block& log_blk)
3287
{
3288
// Reserved
3289
if (bits.get_bits(0, 4) == 0)
3290
return false;
3291
3292
// Reserved
3293
if ((bits.get_bits(0, 2) == 0) && (bits.get_bits(6, 3) == 0b111))
3294
{
3295
if (bits.get_bits(2, 4) != 0b1111)
3296
return false;
3297
}
3298
3299
// Void extent
3300
if (bits.get_bits(0, 9) == 0b111111100)
3301
return decode_void_extent(bits, log_blk);
3302
3303
// Check rows
3304
const uint32_t x0_2 = bits.get_bits(0, 2), x2_2 = bits.get_bits(2, 2);
3305
const uint32_t x5_4 = bits.get_bits(5, 4), x8_1 = bits.get_bits(8, 1);
3306
const uint32_t x7_2 = bits.get_bits(7, 2);
3307
3308
int row_index = -1;
3309
if (x0_2 == 0)
3310
{
3311
if (x7_2 == 0b00)
3312
row_index = 5;
3313
else if (x7_2 == 0b01)
3314
row_index = 6;
3315
else if (x5_4 == 0b1100)
3316
row_index = 7;
3317
else if (x5_4 == 0b1101)
3318
row_index = 8;
3319
else if (x7_2 == 0b10)
3320
row_index = 9;
3321
}
3322
else
3323
{
3324
if (x2_2 == 0b00)
3325
row_index = 0;
3326
else if (x2_2 == 0b01)
3327
row_index = 1;
3328
else if (x2_2 == 0b10)
3329
row_index = 2;
3330
else if ((x2_2 == 0b11) && (x8_1 == 0))
3331
row_index = 3;
3332
else if ((x2_2 == 0b11) && (x8_1 == 1))
3333
row_index = 4;
3334
}
3335
if (row_index < 0)
3336
return false;
3337
3338
const astc_dec_row& r = s_dec_rows[row_index];
3339
3340
bool P = false, Dp = false;
3341
uint32_t W = r.W_bias, H = r.H_bias;
3342
3343
if (r.P_ofs >= 0)
3344
P = bits.get_bits(r.P_ofs, 1) != 0;
3345
3346
if (r.Dp_ofs >= 0)
3347
Dp = bits.get_bits(r.Dp_ofs, 1) != 0;
3348
3349
if (r.W_size)
3350
W += bits.get_bits(r.W_ofs, r.W_size);
3351
3352
if (r.H_size)
3353
H += bits.get_bits(r.H_ofs, r.H_size);
3354
3355
assert((W >= MIN_GRID_DIM) && (W <= MAX_BLOCK_DIM));
3356
assert((H >= MIN_GRID_DIM) && (H <= MAX_BLOCK_DIM));
3357
3358
int p0 = bits.get_bits(r.p0_ofs, 1);
3359
int p1 = bits.get_bits(r.p1_ofs, 1);
3360
int p2 = bits.get_bits(r.p2_ofs, 1);
3361
3362
uint32_t p = p0 | (p1 << 1) | (p2 << 2);
3363
if (p < 2)
3364
return false;
3365
3366
log_blk.m_grid_width = (uint8_t)W;
3367
log_blk.m_grid_height = (uint8_t)H;
3368
3369
log_blk.m_weight_ise_range = (uint8_t)((p - 2) + (P * BISE_10_LEVELS));
3370
assert(log_blk.m_weight_ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
3371
3372
log_blk.m_dual_plane = Dp;
3373
3374
return true;
3375
}
3376
3377
static inline uint32_t read_le_dword(const uint8_t* pBytes)
3378
{
3379
return (pBytes[0]) | (pBytes[1] << 8U) | (pBytes[2] << 16U) | (pBytes[3] << 24U);
3380
}
3381
3382
// See 18.12.Integer Sequence Encoding - tables computed by executing the decoder functions with all possible 8/7-bit inputs.
3383
static const uint8_t s_trit_decode[256][5] =
3384
{
3385
{0,0,0,0,0},{1,0,0,0,0},{2,0,0,0,0},{0,0,2,0,0},{0,1,0,0,0},{1,1,0,0,0},{2,1,0,0,0},{1,0,2,0,0},
3386
{0,2,0,0,0},{1,2,0,0,0},{2,2,0,0,0},{2,0,2,0,0},{0,2,2,0,0},{1,2,2,0,0},{2,2,2,0,0},{2,0,2,0,0},
3387
{0,0,1,0,0},{1,0,1,0,0},{2,0,1,0,0},{0,1,2,0,0},{0,1,1,0,0},{1,1,1,0,0},{2,1,1,0,0},{1,1,2,0,0},
3388
{0,2,1,0,0},{1,2,1,0,0},{2,2,1,0,0},{2,1,2,0,0},{0,0,0,2,2},{1,0,0,2,2},{2,0,0,2,2},{0,0,2,2,2},
3389
{0,0,0,1,0},{1,0,0,1,0},{2,0,0,1,0},{0,0,2,1,0},{0,1,0,1,0},{1,1,0,1,0},{2,1,0,1,0},{1,0,2,1,0},
3390
{0,2,0,1,0},{1,2,0,1,0},{2,2,0,1,0},{2,0,2,1,0},{0,2,2,1,0},{1,2,2,1,0},{2,2,2,1,0},{2,0,2,1,0},
3391
{0,0,1,1,0},{1,0,1,1,0},{2,0,1,1,0},{0,1,2,1,0},{0,1,1,1,0},{1,1,1,1,0},{2,1,1,1,0},{1,1,2,1,0},
3392
{0,2,1,1,0},{1,2,1,1,0},{2,2,1,1,0},{2,1,2,1,0},{0,1,0,2,2},{1,1,0,2,2},{2,1,0,2,2},{1,0,2,2,2},
3393
{0,0,0,2,0},{1,0,0,2,0},{2,0,0,2,0},{0,0,2,2,0},{0,1,0,2,0},{1,1,0,2,0},{2,1,0,2,0},{1,0,2,2,0},
3394
{0,2,0,2,0},{1,2,0,2,0},{2,2,0,2,0},{2,0,2,2,0},{0,2,2,2,0},{1,2,2,2,0},{2,2,2,2,0},{2,0,2,2,0},
3395
{0,0,1,2,0},{1,0,1,2,0},{2,0,1,2,0},{0,1,2,2,0},{0,1,1,2,0},{1,1,1,2,0},{2,1,1,2,0},{1,1,2,2,0},
3396
{0,2,1,2,0},{1,2,1,2,0},{2,2,1,2,0},{2,1,2,2,0},{0,2,0,2,2},{1,2,0,2,2},{2,2,0,2,2},{2,0,2,2,2},
3397
{0,0,0,0,2},{1,0,0,0,2},{2,0,0,0,2},{0,0,2,0,2},{0,1,0,0,2},{1,1,0,0,2},{2,1,0,0,2},{1,0,2,0,2},
3398
{0,2,0,0,2},{1,2,0,0,2},{2,2,0,0,2},{2,0,2,0,2},{0,2,2,0,2},{1,2,2,0,2},{2,2,2,0,2},{2,0,2,0,2},
3399
{0,0,1,0,2},{1,0,1,0,2},{2,0,1,0,2},{0,1,2,0,2},{0,1,1,0,2},{1,1,1,0,2},{2,1,1,0,2},{1,1,2,0,2},
3400
{0,2,1,0,2},{1,2,1,0,2},{2,2,1,0,2},{2,1,2,0,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,0,2,2,2},
3401
{0,0,0,0,1},{1,0,0,0,1},{2,0,0,0,1},{0,0,2,0,1},{0,1,0,0,1},{1,1,0,0,1},{2,1,0,0,1},{1,0,2,0,1},
3402
{0,2,0,0,1},{1,2,0,0,1},{2,2,0,0,1},{2,0,2,0,1},{0,2,2,0,1},{1,2,2,0,1},{2,2,2,0,1},{2,0,2,0,1},
3403
{0,0,1,0,1},{1,0,1,0,1},{2,0,1,0,1},{0,1,2,0,1},{0,1,1,0,1},{1,1,1,0,1},{2,1,1,0,1},{1,1,2,0,1},
3404
{0,2,1,0,1},{1,2,1,0,1},{2,2,1,0,1},{2,1,2,0,1},{0,0,1,2,2},{1,0,1,2,2},{2,0,1,2,2},{0,1,2,2,2},
3405
{0,0,0,1,1},{1,0,0,1,1},{2,0,0,1,1},{0,0,2,1,1},{0,1,0,1,1},{1,1,0,1,1},{2,1,0,1,1},{1,0,2,1,1},
3406
{0,2,0,1,1},{1,2,0,1,1},{2,2,0,1,1},{2,0,2,1,1},{0,2,2,1,1},{1,2,2,1,1},{2,2,2,1,1},{2,0,2,1,1},
3407
{0,0,1,1,1},{1,0,1,1,1},{2,0,1,1,1},{0,1,2,1,1},{0,1,1,1,1},{1,1,1,1,1},{2,1,1,1,1},{1,1,2,1,1},
3408
{0,2,1,1,1},{1,2,1,1,1},{2,2,1,1,1},{2,1,2,1,1},{0,1,1,2,2},{1,1,1,2,2},{2,1,1,2,2},{1,1,2,2,2},
3409
{0,0,0,2,1},{1,0,0,2,1},{2,0,0,2,1},{0,0,2,2,1},{0,1,0,2,1},{1,1,0,2,1},{2,1,0,2,1},{1,0,2,2,1},
3410
{0,2,0,2,1},{1,2,0,2,1},{2,2,0,2,1},{2,0,2,2,1},{0,2,2,2,1},{1,2,2,2,1},{2,2,2,2,1},{2,0,2,2,1},
3411
{0,0,1,2,1},{1,0,1,2,1},{2,0,1,2,1},{0,1,2,2,1},{0,1,1,2,1},{1,1,1,2,1},{2,1,1,2,1},{1,1,2,2,1},
3412
{0,2,1,2,1},{1,2,1,2,1},{2,2,1,2,1},{2,1,2,2,1},{0,2,1,2,2},{1,2,1,2,2},{2,2,1,2,2},{2,1,2,2,2},
3413
{0,0,0,1,2},{1,0,0,1,2},{2,0,0,1,2},{0,0,2,1,2},{0,1,0,1,2},{1,1,0,1,2},{2,1,0,1,2},{1,0,2,1,2},
3414
{0,2,0,1,2},{1,2,0,1,2},{2,2,0,1,2},{2,0,2,1,2},{0,2,2,1,2},{1,2,2,1,2},{2,2,2,1,2},{2,0,2,1,2},
3415
{0,0,1,1,2},{1,0,1,1,2},{2,0,1,1,2},{0,1,2,1,2},{0,1,1,1,2},{1,1,1,1,2},{2,1,1,1,2},{1,1,2,1,2},
3416
{0,2,1,1,2},{1,2,1,1,2},{2,2,1,1,2},{2,1,2,1,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,1,2,2,2}
3417
};
3418
3419
static const uint8_t s_quint_decode[128][3] =
3420
{
3421
{0,0,0},{1,0,0},{2,0,0},{3,0,0},{4,0,0},{0,4,0},{4,4,0},{4,4,4},
3422
{0,1,0},{1,1,0},{2,1,0},{3,1,0},{4,1,0},{1,4,0},{4,4,1},{4,4,4},
3423
{0,2,0},{1,2,0},{2,2,0},{3,2,0},{4,2,0},{2,4,0},{4,4,2},{4,4,4},
3424
{0,3,0},{1,3,0},{2,3,0},{3,3,0},{4,3,0},{3,4,0},{4,4,3},{4,4,4},
3425
{0,0,1},{1,0,1},{2,0,1},{3,0,1},{4,0,1},{0,4,1},{4,0,4},{0,4,4},
3426
{0,1,1},{1,1,1},{2,1,1},{3,1,1},{4,1,1},{1,4,1},{4,1,4},{1,4,4},
3427
{0,2,1},{1,2,1},{2,2,1},{3,2,1},{4,2,1},{2,4,1},{4,2,4},{2,4,4},
3428
{0,3,1},{1,3,1},{2,3,1},{3,3,1},{4,3,1},{3,4,1},{4,3,4},{3,4,4},
3429
{0,0,2},{1,0,2},{2,0,2},{3,0,2},{4,0,2},{0,4,2},{2,0,4},{3,0,4},
3430
{0,1,2},{1,1,2},{2,1,2},{3,1,2},{4,1,2},{1,4,2},{2,1,4},{3,1,4},
3431
{0,2,2},{1,2,2},{2,2,2},{3,2,2},{4,2,2},{2,4,2},{2,2,4},{3,2,4},
3432
{0,3,2},{1,3,2},{2,3,2},{3,3,2},{4,3,2},{3,4,2},{2,3,4},{3,3,4},
3433
{0,0,3},{1,0,3},{2,0,3},{3,0,3},{4,0,3},{0,4,3},{0,0,4},{1,0,4},
3434
{0,1,3},{1,1,3},{2,1,3},{3,1,3},{4,1,3},{1,4,3},{0,1,4},{1,1,4},
3435
{0,2,3},{1,2,3},{2,2,3},{3,2,3},{4,2,3},{2,4,3},{0,2,4},{1,2,4},
3436
{0,3,3},{1,3,3},{2,3,3},{3,3,3},{4,3,3},{3,4,3},{0,3,4},{1,3,4}
3437
};
3438
3439
static void decode_trit_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
3440
{
3441
assert((num_vals >= 1) && (num_vals <= 5));
3442
uint32_t m[5] = { 0 }, T = 0;
3443
3444
static const uint8_t s_t_bits[5] = { 2, 2, 1, 2, 1 };
3445
3446
for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
3447
{
3448
if (bits_per_val)
3449
m[c] = bits.next_bits(bit_ofs, bits_per_val);
3450
T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
3451
T_ofs += s_t_bits[c];
3452
}
3453
3454
const uint8_t (&p_trits)[5] = s_trit_decode[T];
3455
3456
for (uint32_t i = 0; i < num_vals; i++)
3457
pVals[i] = (uint8_t)((p_trits[i] << bits_per_val) | m[i]);
3458
}
3459
3460
static void decode_quint_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
3461
{
3462
assert((num_vals >= 1) && (num_vals <= 3));
3463
uint32_t m[3] = { 0 }, T = 0;
3464
3465
static const uint8_t s_t_bits[3] = { 3, 2, 2 };
3466
3467
for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
3468
{
3469
if (bits_per_val)
3470
m[c] = bits.next_bits(bit_ofs, bits_per_val);
3471
T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
3472
T_ofs += s_t_bits[c];
3473
}
3474
3475
const uint8_t (&p_quints)[3] = s_quint_decode[T];
3476
3477
for (uint32_t i = 0; i < num_vals; i++)
3478
pVals[i] = (uint8_t)((p_quints[i] << bits_per_val) | m[i]);
3479
}
3480
3481
static void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t bit_ofs)
3482
{
3483
assert(num_vals && (ise_range < TOTAL_ISE_RANGES));
3484
3485
const uint32_t bits_per_val = g_ise_range_table[ise_range][0];
3486
3487
if (g_ise_range_table[ise_range][1])
3488
{
3489
// Trits+bits, 5 vals per block, 7 bits extra per block
3490
const uint32_t total_blocks = (num_vals + 4) / 5;
3491
for (uint32_t b = 0; b < total_blocks; b++)
3492
{
3493
const uint32_t num_vals_in_block = std::min<int>(num_vals - 5 * b, 5);
3494
decode_trit_block(pVals + 5 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
3495
}
3496
}
3497
else if (g_ise_range_table[ise_range][2])
3498
{
3499
// Quints+bits, 3 vals per block, 8 bits extra per block
3500
const uint32_t total_blocks = (num_vals + 2) / 3;
3501
for (uint32_t b = 0; b < total_blocks; b++)
3502
{
3503
const uint32_t num_vals_in_block = std::min<int>(num_vals - 3 * b, 3);
3504
decode_quint_block(pVals + 3 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
3505
}
3506
}
3507
else
3508
{
3509
assert(bits_per_val);
3510
3511
// Only bits
3512
for (uint32_t i = 0; i < num_vals; i++)
3513
pVals[i] = (uint8_t)bits.next_bits(bit_ofs, bits_per_val);
3514
}
3515
}
3516
3517
void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t* pBits128, uint32_t bit_ofs)
3518
{
3519
const uint128 bits(
3520
(uint64_t)read_le_dword(pBits128) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t))) << 32),
3521
(uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 3)) << 32));
3522
3523
return decode_bise(ise_range, pVals, num_vals, bits, bit_ofs);
3524
}
3525
3526
// Decodes a physical ASTC block to a logical ASTC block.
3527
// blk_width/blk_height are only used to validate the weight grid's dimensions.
3528
bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height)
3529
{
3530
assert(is_valid_block_size(blk_width, blk_height));
3531
3532
const uint8_t* pS = (uint8_t*)pASTC_block;
3533
3534
log_blk.clear();
3535
log_blk.m_error_flag = true;
3536
3537
const uint128 bits(
3538
(uint64_t)read_le_dword(pS) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t))) << 32),
3539
(uint64_t)read_le_dword(pS + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t) * 3)) << 32));
3540
3541
const uint128 rev_bits(bits.get_reversed_bits());
3542
3543
if (!decode_config(bits, log_blk))
3544
return false;
3545
3546
if (log_blk.m_solid_color_flag_hdr || log_blk.m_solid_color_flag_ldr)
3547
{
3548
// Void extent
3549
log_blk.m_error_flag = false;
3550
return true;
3551
}
3552
3553
// Check grid dimensions
3554
if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
3555
return false;
3556
3557
// Now we have the grid width/height, dual plane, weight ISE range
3558
3559
const uint32_t total_grid_weights = (log_blk.m_dual_plane ? 2 : 1) * (log_blk.m_grid_width * log_blk.m_grid_height);
3560
const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_blk.m_weight_ise_range);
3561
3562
// 18.24 Illegal Encodings
3563
if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
3564
return false;
3565
3566
const uint32_t end_of_weight_bit_ofs = 128 - total_weight_bits;
3567
3568
uint32_t total_extra_bits = 0;
3569
3570
// Right before the weight bits, there may be extra CEM bits, then the 2 CCS bits if dual plane.
3571
3572
log_blk.m_num_partitions = (uint8_t)(bits.get_bits(11, 2) + 1);
3573
if (log_blk.m_num_partitions == 1)
3574
log_blk.m_color_endpoint_modes[0] = (uint8_t)(bits.get_bits(13, 4)); // read CEM bits
3575
else
3576
{
3577
// 2 or more partitions
3578
if (log_blk.m_dual_plane && (log_blk.m_num_partitions == 4))
3579
return false;
3580
3581
log_blk.m_partition_id = (uint16_t)bits.get_bits(13, 10);
3582
3583
uint32_t cem_bits = bits.get_bits(23, 6);
3584
3585
if ((cem_bits & 3) == 0)
3586
{
3587
// All CEM's the same
3588
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
3589
log_blk.m_color_endpoint_modes[i] = (uint8_t)(cem_bits >> 2);
3590
}
3591
else
3592
{
3593
// CEM's different, but within up to 2 adjacent classes
3594
const uint32_t first_cem_index = ((cem_bits & 3) - 1) * 4;
3595
3596
total_extra_bits = 3 * log_blk.m_num_partitions - 4;
3597
3598
if ((total_weight_bits + total_extra_bits) > 128)
3599
return false;
3600
3601
uint32_t cem_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
3602
3603
uint32_t c[4] = { 0 }, m[4] = { 0 };
3604
3605
cem_bits >>= 2;
3606
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++, cem_bits >>= 1)
3607
c[i] = cem_bits & 1;
3608
3609
switch (log_blk.m_num_partitions)
3610
{
3611
case 2:
3612
{
3613
m[0] = cem_bits & 3;
3614
m[1] = bits.next_bits(cem_bit_pos, 2);
3615
break;
3616
}
3617
case 3:
3618
{
3619
m[0] = cem_bits & 1;
3620
m[0] |= (bits.next_bits(cem_bit_pos, 1) << 1);
3621
m[1] = bits.next_bits(cem_bit_pos, 2);
3622
m[2] = bits.next_bits(cem_bit_pos, 2);
3623
break;
3624
}
3625
case 4:
3626
{
3627
for (uint32_t i = 0; i < 4; i++)
3628
m[i] = bits.next_bits(cem_bit_pos, 2);
3629
break;
3630
}
3631
default:
3632
{
3633
assert(0);
3634
break;
3635
}
3636
}
3637
3638
assert(cem_bit_pos == end_of_weight_bit_ofs);
3639
3640
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
3641
{
3642
log_blk.m_color_endpoint_modes[i] = (uint8_t)(first_cem_index + (c[i] * 4) + m[i]);
3643
assert(log_blk.m_color_endpoint_modes[i] <= 15);
3644
}
3645
}
3646
}
3647
3648
// Now we have all the CEM indices.
3649
3650
if (log_blk.m_dual_plane)
3651
{
3652
// Read CCS bits, beneath any CEM bits
3653
total_extra_bits += 2;
3654
3655
if (total_extra_bits > end_of_weight_bit_ofs)
3656
return false;
3657
3658
uint32_t ccs_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
3659
log_blk.m_color_component_selector = (uint8_t)(bits.get_bits(ccs_bit_pos, 2));
3660
}
3661
3662
uint32_t config_bit_pos = 11 + 2; // config+num_parts
3663
if (log_blk.m_num_partitions == 1)
3664
config_bit_pos += 4; // CEM bits
3665
else
3666
config_bit_pos += 10 + 6; // part_id+CEM bits
3667
3668
// config+num_parts+total_extra_bits (CEM extra+CCS)
3669
uint32_t total_config_bits = config_bit_pos + total_extra_bits;
3670
3671
// Compute number of remaining bits in block
3672
const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
3673
if (num_remaining_bits < 0)
3674
return false;
3675
3676
// Compute total number of ISE encoded color endpoint mode values
3677
uint32_t total_cem_vals = 0;
3678
for (uint32_t j = 0; j < log_blk.m_num_partitions; j++)
3679
total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[j]);
3680
3681
if (total_cem_vals > MAX_ENDPOINTS)
3682
return false;
3683
3684
// Infer endpoint ISE range based off the # of values we need to encode, and the # of remaining bits in the block
3685
int endpoint_ise_range = -1;
3686
for (int k = 20; k > 0; k--)
3687
{
3688
int b = get_ise_sequence_bits(total_cem_vals, k);
3689
if (b <= num_remaining_bits)
3690
{
3691
endpoint_ise_range = k;
3692
break;
3693
}
3694
}
3695
3696
// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
3697
if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
3698
return false;
3699
3700
log_blk.m_endpoint_ise_range = (uint8_t)endpoint_ise_range;
3701
3702
// Decode endpoints forwards in block
3703
decode_bise(log_blk.m_endpoint_ise_range, log_blk.m_endpoints, total_cem_vals, bits, config_bit_pos);
3704
3705
// Decode grid weights backwards in block
3706
decode_bise(log_blk.m_weight_ise_range, log_blk.m_weights, total_grid_weights, rev_bits, 0);
3707
3708
log_blk.m_error_flag = false;
3709
3710
return true;
3711
}
3712
3713
} // namespace astc_helpers
3714
3715
#endif //BASISU_ASTC_HELPERS_IMPLEMENTATION
3716
3717