Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/encoder/basisu_astc_hdr_common.h
9903 views
1
// File: basisu_astc_hdr_common.h
2
#pragma once
3
#include "basisu_enc.h"
4
#include "basisu_gpu_texture.h"
5
#include "../transcoder/basisu_astc_helpers.h"
6
#include "../transcoder/basisu_astc_hdr_core.h"
7
8
namespace basisu
9
{
10
const uint32_t MAX_ASTC_HDR_BLOCK_W = 6, MAX_ASTC_HDR_BLOCK_H = 6;
11
const uint32_t MAX_ASTC_HDR_ENC_BLOCK_PIXELS = 6 * 6;
12
13
const uint32_t MODE11_TOTAL_SUBMODES = 8; // plus an extra hidden submode, directly encoded, for direct, so really 9 (see tables 99/100 of the ASTC spec)
14
const uint32_t MODE7_TOTAL_SUBMODES = 6;
15
16
// [ise_range][0] = # levels
17
// [ise_range][1...] = lerp value [0,64]
18
// in ASTC order
19
// Supported ISE weight ranges: 0 to 11, 12 total
20
const uint32_t MIN_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_2_LEVELS; // ISE 0=2 levels
21
const uint32_t MAX_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_32_LEVELS; // ISE 11=16 levels
22
const uint32_t MIN_SUPPORTED_WEIGHT_LEVELS = 2;
23
const uint32_t MAX_SUPPORTED_WEIGHT_LEVELS = 32;
24
25
extern const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][33];
26
27
const float Q_LOG_BIAS_4x4 = .125f; // the original UASTC HDR 4x4 log bias
28
const float Q_LOG_BIAS_6x6 = 1.0f; // the log bias both encoders use now
29
30
const float LDR_TO_HDR_NITS = 100.0f;
31
32
struct astc_hdr_codec_base_options
33
{
34
float m_r_err_scale, m_g_err_scale;
35
float m_q_log_bias;
36
37
bool m_ultra_quant;
38
39
// If true, the ASTC HDR compressor is allowed to more aggressively vary weight indices for slightly higher compression in non-fastest mode. This will hurt BC6H quality, however.
40
bool m_allow_uber_mode;
41
42
bool m_mode7_full_s_optimization;
43
44
bool m_take_first_non_clamping_mode11_submode;
45
bool m_take_first_non_clamping_mode7_submode;
46
47
bool m_disable_weight_plane_optimization;
48
49
astc_hdr_codec_base_options() { init(); }
50
51
void init();
52
};
53
54
inline int get_bit(
55
int src_val, int src_bit)
56
{
57
assert(src_bit >= 0 && src_bit <= 31);
58
int bit = (src_val >> src_bit) & 1;
59
return bit;
60
}
61
62
inline void pack_bit(
63
int& dst, int dst_bit,
64
int src_val, int src_bit = 0)
65
{
66
assert(dst_bit >= 0 && dst_bit <= 31);
67
int bit = get_bit(src_val, src_bit);
68
dst |= (bit << dst_bit);
69
}
70
71
inline uint32_t get_max_qlog(uint32_t bits)
72
{
73
switch (bits)
74
{
75
case 7: return basist::MAX_QLOG7;
76
case 8: return basist::MAX_QLOG8;
77
case 9: return basist::MAX_QLOG9;
78
case 10: return basist::MAX_QLOG10;
79
case 11: return basist::MAX_QLOG11;
80
case 12: return basist::MAX_QLOG12;
81
case 16: return basist::MAX_QLOG16;
82
default: assert(0); break;
83
}
84
return 0;
85
}
86
87
#if 0
88
inline float get_max_qlog_val(uint32_t bits)
89
{
90
switch (bits)
91
{
92
case 7: return MAX_QLOG7_VAL;
93
case 8: return MAX_QLOG8_VAL;
94
case 9: return MAX_QLOG9_VAL;
95
case 10: return MAX_QLOG10_VAL;
96
case 11: return MAX_QLOG11_VAL;
97
case 12: return MAX_QLOG12_VAL;
98
case 16: return MAX_QLOG16_VAL;
99
default: assert(0); break;
100
}
101
return 0;
102
}
103
#endif
104
105
#if 0
106
// Input is the low 11 bits of the qlog
107
// Returns the 10-bit mantissa of the half float value
108
int qlog11_to_half_float_mantissa(int M)
109
{
110
assert(M <= 0x7FF);
111
int Mt;
112
if (M < 512)
113
Mt = 3 * M;
114
else if (M >= 1536)
115
Mt = 5 * M - 2048;
116
else
117
Mt = 4 * M - 512;
118
return (Mt >> 3);
119
}
120
#endif
121
122
// Input is the 10-bit mantissa of the half float value
123
// Output is the 11-bit qlog value
124
// Inverse of qlog11_to_half_float_mantissa()
125
inline int half_float_mantissa_to_qlog11(int hf)
126
{
127
int q0 = (hf * 8 + 2) / 3;
128
int q1 = (hf * 8 + 2048 + 4) / 5;
129
130
if (q0 < 512)
131
return q0;
132
else if (q1 >= 1536)
133
return q1;
134
135
int q2 = (hf * 8 + 512 + 2) / 4;
136
return q2;
137
}
138
139
inline int half_to_qlog16(int hf)
140
{
141
assert(!basist::half_is_signed((basist::half_float)hf) && !basist::is_half_inf_or_nan((basist::half_float)hf));
142
143
// extract 5 bits exponent, which is carried through to qlog16 unchanged
144
const int exp = (hf >> 10) & 0x1F;
145
146
// extract and invert the 10 bit mantissa to nearest qlog11 (should be lossless)
147
const int mantissa = half_float_mantissa_to_qlog11(hf & 0x3FF);
148
assert(mantissa <= 0x7FF);
149
150
// Now combine to qlog16, which is what ASTC HDR interpolates using the [0-64] weights.
151
uint32_t qlog16 = (exp << 11) | mantissa;
152
153
// should be a lossless operation
154
assert(astc_helpers::qlog16_to_half(qlog16) == hf);
155
156
return qlog16;
157
}
158
159
void interpolate_qlog12_colors(
160
const int e[2][3],
161
basist::half_float* pDecoded_half,
162
vec3F* pDecoded_float,
163
uint32_t n, uint32_t ise_weight_range);
164
165
bool get_astc_hdr_mode_11_block_colors(
166
const uint8_t* pEndpoints,
167
basist::half_float* pDecoded_half,
168
vec3F* pDecoded_float,
169
uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
170
171
bool get_astc_hdr_mode_7_block_colors(
172
const uint8_t* pEndpoints,
173
basist::half_float* pDecoded_half,
174
vec3F* pDecoded_float,
175
uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
176
177
// Fast high precision piecewise linear approximation of log2(bias+x).
178
// Half may be zero, positive or denormal. No NaN/Inf/negative.
179
BASISU_FORCE_INLINE double q(basist::half_float x, float log_bias)
180
{
181
union { float f; int32_t i; uint32_t u; } fi;
182
183
fi.f = fast_half_to_float_pos_not_inf_or_nan(x);
184
185
assert(fi.f >= 0.0f);
186
187
fi.f += log_bias;
188
189
return (double)fi.u; // approx log2f(fi.f), need to return double for the precision
190
}
191
192
BASISU_FORCE_INLINE uint32_t q2(basist::half_float x, float log_bias)
193
{
194
union { float f; int32_t i; uint32_t u; } fi;
195
196
fi.f = fast_half_to_float_pos_not_inf_or_nan(x);
197
198
assert(fi.f >= 0.0f);
199
200
fi.f += log_bias;
201
202
return fi.u;
203
}
204
205
double eval_selectors(
206
uint32_t num_pixels,
207
uint8_t* pWeights,
208
uint32_t ise_weight_range,
209
const basist::half_float* pBlock_pixels_half,
210
uint32_t num_weight_levels,
211
const basist::half_float* pDecoded_half,
212
const astc_hdr_codec_base_options& coptions,
213
uint32_t usable_selector_bitmask = UINT32_MAX);
214
215
double eval_selectors_dual_plane(
216
uint32_t channel_index,
217
uint32_t num_pixels,
218
uint8_t* pWeights0, uint8_t* pWeights1,
219
const basist::half_float* pBlock_pixels_half,
220
uint32_t num_weight_levels,
221
const basist::half_float* pDecoded_half,
222
const astc_hdr_codec_base_options& coptions,
223
uint32_t usable_selector_bitmask = UINT32_MAX);
224
225
double compute_block_error(uint32_t num_pixels, const basist::half_float* pOrig_block, const basist::half_float* pPacked_block, const astc_hdr_codec_base_options& coptions);
226
227
const uint32_t FIRST_MODE7_SUBMODE_INDEX = 0;
228
const uint32_t MAX_MODE7_SUBMODE_INDEX = 5;
229
230
bool pack_mode7(
231
const vec3F& high_color_q16, const float s_q16,
232
uint32_t ise_endpoint_range, uint8_t* pEndpoints,
233
uint32_t ise_weight_range, // only used for determining biasing during CEM 7 packing
234
const astc_hdr_codec_base_options& coptions,
235
int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used);
236
237
bool try_mode7(
238
uint32_t num_pixels,
239
uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
240
const vec3F& high_color_q16, const float s_q16,
241
const basist::half_float block_pixels_half[][3],
242
uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions,
243
uint32_t ise_endpoint_range,
244
int32_t first_submode = 0, int32_t last_submode = MAX_MODE7_SUBMODE_INDEX);
245
246
bool pack_mode11(
247
const vec3F& low_color_q16, const vec3F& high_color_q16,
248
uint32_t ise_endpoint_range, uint8_t* pEndpoints,
249
const astc_hdr_codec_base_options& coptions,
250
bool direct_only, int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used);
251
252
bool try_mode11(uint32_t num_pixels,
253
uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
254
const vec3F& low_color_q16, const vec3F& high_color_q16,
255
const basist::half_float block_pixels_half[][3],
256
uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,
257
bool constrain_ise_weight_selectors,
258
int32_t first_submode, int32_t last_submode, bool ignore_clamping);
259
260
bool try_mode11_dual_plane(uint32_t channel_index, uint32_t num_pixels,
261
uint8_t* pEndpoints, uint8_t* pWeights0, uint8_t* pWeights1, double& cur_block_error, uint32_t& submode_used,
262
const vec3F& low_color_q16, const vec3F& high_color_q16,
263
const basist::half_float block_pixels_half[][3],
264
uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,
265
bool constrain_ise_weight_selectors,
266
int32_t first_submode, int32_t last_submode, bool ignore_clamping);
267
268
const int FIRST_MODE11_SUBMODE_INDEX = -1;
269
const int MAX_MODE11_SUBMODE_INDEX = 7;
270
271
enum opt_mode_t
272
{
273
cNoOpt,
274
cOrdinaryLeastSquares,
275
cWeightedLeastSquares,
276
cWeightedLeastSquaresHeavy,
277
cWeightedAverage
278
};
279
280
struct encode_astc_block_stats
281
{
282
uint32_t m_num_pixels;
283
vec3F m_mean_q16;
284
vec3F m_axis_q16;
285
286
void init(uint32_t num_pixels, const vec4F pBlock_pixels_q16[]);
287
};
288
289
double encode_astc_hdr_block_mode_11(
290
uint32_t num_pixels,
291
const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
292
uint32_t ise_weight_range,
293
uint32_t& best_submode,
294
double cur_block_error,
295
uint8_t* blk_endpoints, uint8_t* blk_weights,
296
const astc_hdr_codec_base_options& coptions,
297
bool direct_only,
298
uint32_t ise_endpoint_range,
299
bool uber_mode,
300
bool constrain_ise_weight_selectors,
301
int32_t first_submode, int32_t last_submode, bool ignore_clamping,
302
opt_mode_t opt_mode,
303
const encode_astc_block_stats *pBlock_stats = nullptr);
304
305
double encode_astc_hdr_block_downsampled_mode_11(
306
uint32_t block_x, uint32_t block_y, uint32_t grid_x, uint32_t grid_y,
307
uint32_t ise_weight_range, uint32_t ise_endpoint_range,
308
uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
309
double cur_block_error,
310
int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode,
311
uint8_t* pBlk_endpoints, uint8_t* pBlk_weights, uint32_t& best_submode,
312
const astc_hdr_codec_base_options& coptions,
313
const encode_astc_block_stats* pBlock_stats = nullptr);
314
315
double encode_astc_hdr_block_mode_11_dual_plane(
316
uint32_t num_pixels,
317
const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
318
uint32_t channel_index, // 0-2
319
uint32_t ise_weight_range,
320
uint32_t& best_submode,
321
double cur_block_error,
322
uint8_t* blk_endpoints, uint8_t* blk_weights0, uint8_t* blk_weights1,
323
const astc_hdr_codec_base_options& coptions,
324
bool direct_only,
325
uint32_t ise_endpoint_range,
326
bool uber_mode,
327
bool constrain_ise_weight_selectors,
328
int32_t first_submode, int32_t last_submode,
329
bool ignore_clamping);
330
331
double encode_astc_hdr_block_mode_7(
332
uint32_t num_pixels,
333
const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
334
uint32_t ise_weight_range,
335
uint32_t& best_submode,
336
double cur_block_error,
337
uint8_t* blk_endpoints, //[4]
338
uint8_t* blk_weights, // [num_pixels]
339
const astc_hdr_codec_base_options& coptions,
340
uint32_t ise_endpoint_range,
341
int first_submode = 0, int last_submode = MAX_MODE7_SUBMODE_INDEX,
342
const encode_astc_block_stats *pBlock_stats = nullptr);
343
344
//--------------------------------------------------------------------------------------------------------------------------
345
346
struct mode11_log_desc
347
{
348
int32_t m_submode;
349
int32_t m_maj_comp;
350
351
// Or R0, G0, B0 if maj_comp==3 (direct)
352
int32_t m_a; // positive
353
int32_t m_c; // positive
354
int32_t m_b0; // positive
355
356
// Or R1, G1, B1 if maj_comp==3 (direct)
357
int32_t m_b1; // positive
358
int32_t m_d0; // if not direct, is signed
359
int32_t m_d1; // if not direct, is signed
360
361
// limits if not direct
362
int32_t m_a_bits, m_c_bits, m_b_bits, m_d_bits;
363
int32_t m_max_a_val, m_max_c_val, m_max_b_val, m_min_d_val, m_max_d_val;
364
365
void clear() { clear_obj(*this); }
366
367
bool is_direct() const { return m_maj_comp == 3; }
368
};
369
370
//--------------------------------------------------------------------------------------------------------------------------
371
bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& rgb_q16, float s_q16, int& max_clamp_mag, uint32_t ise_weight_range, bool early_out_if_clamped, int max_clamp_mag_accept_thresh);
372
373
bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, int val_q[2][3], int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0);
374
bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0);
375
void pack_astc_mode11_direct(uint8_t* pEndpoints, vec3F l_q16, vec3F h_q16);
376
377
bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints);
378
void unpack_mode11(const uint8_t* pEndpoints, mode11_log_desc& desc);
379
380
void decode_cem_11_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index);
381
void decode_cem_7_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index);
382
383
void dequantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_raw_weights);
384
385
const float* get_6x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height);
386
387
void downsample_weight_grid(
388
const float* pMatrix_weights,
389
uint32_t bx, uint32_t by, // source/from dimension (block size)
390
uint32_t wx, uint32_t wy, // dest/to dimension (grid size)
391
const uint8_t* pSrc_weights, // these are dequantized weights, NOT ISE symbols, [by][bx]
392
uint8_t* pDst_weights); // [wy][wx]
393
394
void downsample_ise_weights(
395
uint32_t weight_ise_range, uint32_t quant_weight_ise_range,
396
uint32_t block_w, uint32_t block_h,
397
uint32_t grid_w, uint32_t grid_h,
398
const uint8_t* pSrc_weights, uint8_t* pDst_weights);
399
400
void downsample_ise_weights_dual_plane(
401
uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range,
402
uint32_t block_w, uint32_t block_h,
403
uint32_t grid_w, uint32_t grid_h,
404
const uint8_t* pSrc_weights0, const uint8_t* pSrc_weights1,
405
uint8_t* pDst_weights);
406
407
bool refine_endpoints(
408
uint32_t cem,
409
uint32_t endpoint_ise_range,
410
uint8_t* pEndpoint_vals, // the endpoints to optimize
411
uint32_t block_w, uint32_t block_h, // block dimensions
412
uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid
413
uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
414
const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets
415
astc_hdr_codec_base_options& coptions, opt_mode_t opt_mode);
416
417
extern bool g_astc_hdr_enc_initialized;
418
419
// This MUST be called before encoding any blocks.
420
void astc_hdr_enc_init();
421
422
} // namespace basisu
423
424
425