CoCalc -- basisu_astc_hdr

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/encoder/basisu_astc_hdr_common.h
⁹⁹⁰³ views
1
// File: basisu_astc_hdr_common.h
2
#pragma once
3
#include "basisu_enc.h"
4
#include "basisu_gpu_texture.h"
5
#include "../transcoder/basisu_astc_helpers.h"
6
#include "../transcoder/basisu_astc_hdr_core.h"
7

8
namespace basisu
9
{
10
	const uint32_t MAX_ASTC_HDR_BLOCK_W = 6, MAX_ASTC_HDR_BLOCK_H = 6;
11
	const uint32_t MAX_ASTC_HDR_ENC_BLOCK_PIXELS = 6 * 6;
12

13
	const uint32_t MODE11_TOTAL_SUBMODES = 8; // plus an extra hidden submode, directly encoded, for direct, so really 9 (see tables 99/100 of the ASTC spec)
14
	const uint32_t MODE7_TOTAL_SUBMODES = 6;
15
		
16
	// [ise_range][0] = # levels
17
	// [ise_range][1...] = lerp value [0,64]
18
	// in ASTC order
19
	// Supported ISE weight ranges: 0 to 11, 12 total
20
	const uint32_t MIN_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_2_LEVELS; // ISE 0=2 levels
21
	const uint32_t MAX_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_32_LEVELS; // ISE 11=16 levels
22
	const uint32_t MIN_SUPPORTED_WEIGHT_LEVELS = 2;
23
	const uint32_t MAX_SUPPORTED_WEIGHT_LEVELS = 32;
24

25
	extern const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][33];
26

27
	const float Q_LOG_BIAS_4x4 = .125f; // the original UASTC HDR 4x4 log bias
28
	const float Q_LOG_BIAS_6x6 = 1.0f; // the log bias both encoders use now
29

30
	const float LDR_TO_HDR_NITS = 100.0f;
31

32
	struct astc_hdr_codec_base_options
33
	{
34
		float m_r_err_scale, m_g_err_scale;
35
		float m_q_log_bias;
36
		
37
		bool m_ultra_quant;
38
		
39
		// If true, the ASTC HDR compressor is allowed to more aggressively vary weight indices for slightly higher compression in non-fastest mode. This will hurt BC6H quality, however.
40
		bool m_allow_uber_mode;
41

42
		bool m_mode7_full_s_optimization;
43

44
		bool m_take_first_non_clamping_mode11_submode;
45
		bool m_take_first_non_clamping_mode7_submode;
46

47
		bool m_disable_weight_plane_optimization;
48
		
49
		astc_hdr_codec_base_options() { init(); }
50

51
		void init();
52
	};
53

54
	inline int get_bit(
55
		int src_val, int src_bit)
56
	{
57
		assert(src_bit >= 0 && src_bit <= 31);
58
		int bit = (src_val >> src_bit) & 1;
59
		return bit;
60
	}
61

62
	inline void pack_bit(
63
		int& dst, int dst_bit,
64
		int src_val, int src_bit = 0)
65
	{
66
		assert(dst_bit >= 0 && dst_bit <= 31);
67
		int bit = get_bit(src_val, src_bit);
68
		dst |= (bit << dst_bit);
69
	}
70

71
	inline uint32_t get_max_qlog(uint32_t bits)
72
	{
73
		switch (bits)
74
		{
75
		case 7: return basist::MAX_QLOG7;
76
		case 8: return basist::MAX_QLOG8;
77
		case 9: return basist::MAX_QLOG9;
78
		case 10: return basist::MAX_QLOG10;
79
		case 11: return basist::MAX_QLOG11;
80
		case 12: return basist::MAX_QLOG12;
81
		case 16: return basist::MAX_QLOG16;
82
		default: assert(0); break;
83
		}
84
		return 0;
85
	}
86

87
#if 0
88
	inline float get_max_qlog_val(uint32_t bits)
89
	{
90
		switch (bits)
91
		{
92
		case 7: return MAX_QLOG7_VAL;
93
		case 8: return MAX_QLOG8_VAL;
94
		case 9: return MAX_QLOG9_VAL;
95
		case 10: return MAX_QLOG10_VAL;
96
		case 11: return MAX_QLOG11_VAL;
97
		case 12: return MAX_QLOG12_VAL;
98
		case 16: return MAX_QLOG16_VAL;
99
		default: assert(0); break;
100
		}
101
		return 0;
102
	}
103
#endif
104

105
#if 0
106
	// Input is the low 11 bits of the qlog
107
	// Returns the 10-bit mantissa of the half float value
108
	int qlog11_to_half_float_mantissa(int M)
109
	{
110
		assert(M <= 0x7FF);
111
		int Mt;
112
		if (M < 512)
113
			Mt = 3 * M;
114
		else if (M >= 1536)
115
			Mt = 5 * M - 2048;
116
		else
117
			Mt = 4 * M - 512;
118
		return (Mt >> 3);
119
	}
120
#endif
121

122
	// Input is the 10-bit mantissa of the half float value
123
	// Output is the 11-bit qlog value
124
	// Inverse of qlog11_to_half_float_mantissa()
125
	inline int half_float_mantissa_to_qlog11(int hf)
126
	{
127
		int q0 = (hf * 8 + 2) / 3;
128
		int q1 = (hf * 8 + 2048 + 4) / 5;
129

130
		if (q0 < 512)
131
			return q0;
132
		else if (q1 >= 1536)
133
			return q1;
134

135
		int q2 = (hf * 8 + 512 + 2) / 4;
136
		return q2;
137
	}
138

139
	inline int half_to_qlog16(int hf)
140
	{
141
		assert(!basist::half_is_signed((basist::half_float)hf) && !basist::is_half_inf_or_nan((basist::half_float)hf));
142

143
		// extract 5 bits exponent, which is carried through to qlog16 unchanged
144
		const int exp = (hf >> 10) & 0x1F;
145

146
		// extract and invert the 10 bit mantissa to nearest qlog11 (should be lossless)
147
		const int mantissa = half_float_mantissa_to_qlog11(hf & 0x3FF);
148
		assert(mantissa <= 0x7FF);
149

150
		// Now combine to qlog16, which is what ASTC HDR interpolates using the [0-64] weights.
151
		uint32_t qlog16 = (exp << 11) | mantissa;
152

153
		// should be a lossless operation
154
		assert(astc_helpers::qlog16_to_half(qlog16) == hf);
155

156
		return qlog16;
157
	}
158

159
	void interpolate_qlog12_colors(
160
		const int e[2][3],
161
		basist::half_float* pDecoded_half,
162
		vec3F* pDecoded_float,
163
		uint32_t n, uint32_t ise_weight_range);
164

165
	bool get_astc_hdr_mode_11_block_colors(
166
		const uint8_t* pEndpoints,
167
		basist::half_float* pDecoded_half,
168
		vec3F* pDecoded_float,
169
		uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
170

171
	bool get_astc_hdr_mode_7_block_colors(
172
		const uint8_t* pEndpoints,
173
		basist::half_float* pDecoded_half,
174
		vec3F* pDecoded_float,
175
		uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
176
			
177
	// Fast high precision piecewise linear approximation of log2(bias+x).
178
	// Half may be zero, positive or denormal. No NaN/Inf/negative.
179
	BASISU_FORCE_INLINE double q(basist::half_float x, float log_bias)
180
	{
181
		union { float f; int32_t i; uint32_t u; } fi;
182

183
		fi.f = fast_half_to_float_pos_not_inf_or_nan(x);
184

185
		assert(fi.f >= 0.0f);
186
						
187
		fi.f += log_bias;
188

189
		return (double)fi.u; // approx log2f(fi.f), need to return double for the precision
190
	}
191

192
	BASISU_FORCE_INLINE uint32_t q2(basist::half_float x, float log_bias)
193
	{
194
		union { float f; int32_t i; uint32_t u; } fi;
195

196
		fi.f = fast_half_to_float_pos_not_inf_or_nan(x);
197

198
		assert(fi.f >= 0.0f);
199
		
200
		fi.f += log_bias;
201

202
		return fi.u;
203
	}
204

205
	double eval_selectors(
206
		uint32_t num_pixels,
207
		uint8_t* pWeights,
208
		uint32_t ise_weight_range,
209
		const basist::half_float* pBlock_pixels_half,
210
		uint32_t num_weight_levels,
211
		const basist::half_float* pDecoded_half,
212
		const astc_hdr_codec_base_options& coptions,
213
		uint32_t usable_selector_bitmask = UINT32_MAX);
214

215
	double eval_selectors_dual_plane(
216
		uint32_t channel_index,
217
		uint32_t num_pixels,
218
		uint8_t* pWeights0, uint8_t* pWeights1,
219
		const basist::half_float* pBlock_pixels_half,
220
		uint32_t num_weight_levels,
221
		const basist::half_float* pDecoded_half,
222
		const astc_hdr_codec_base_options& coptions,
223
		uint32_t usable_selector_bitmask = UINT32_MAX);
224

225
	double compute_block_error(uint32_t num_pixels, const basist::half_float* pOrig_block, const basist::half_float* pPacked_block, const astc_hdr_codec_base_options& coptions);
226

227
	const uint32_t FIRST_MODE7_SUBMODE_INDEX = 0;
228
	const uint32_t MAX_MODE7_SUBMODE_INDEX = 5;
229

230
	bool pack_mode7(
231
		const vec3F& high_color_q16, const float s_q16,
232
		uint32_t ise_endpoint_range, uint8_t* pEndpoints,
233
		uint32_t ise_weight_range, // only used for determining biasing during CEM 7 packing
234
		const astc_hdr_codec_base_options& coptions,
235
		int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used);
236

237
	bool try_mode7(
238
		uint32_t num_pixels,
239
		uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
240
		const vec3F& high_color_q16, const float s_q16,
241
		const basist::half_float block_pixels_half[][3],
242
		uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions,
243
		uint32_t ise_endpoint_range,
244
		int32_t first_submode = 0, int32_t last_submode = MAX_MODE7_SUBMODE_INDEX);
245

246
	bool pack_mode11(
247
		const vec3F& low_color_q16, const vec3F& high_color_q16,
248
		uint32_t ise_endpoint_range, uint8_t* pEndpoints,
249
		const astc_hdr_codec_base_options& coptions,
250
		bool direct_only, int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used);
251

252
	bool try_mode11(uint32_t num_pixels,
253
		uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
254
		const vec3F& low_color_q16, const vec3F& high_color_q16,
255
		const basist::half_float block_pixels_half[][3],
256
		uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,
257
		bool constrain_ise_weight_selectors,
258
		int32_t first_submode, int32_t last_submode, bool ignore_clamping);
259

260
	bool try_mode11_dual_plane(uint32_t channel_index, uint32_t num_pixels,
261
		uint8_t* pEndpoints, uint8_t* pWeights0, uint8_t* pWeights1, double& cur_block_error, uint32_t& submode_used,
262
		const vec3F& low_color_q16, const vec3F& high_color_q16,
263
		const basist::half_float block_pixels_half[][3],
264
		uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,
265
		bool constrain_ise_weight_selectors,
266
		int32_t first_submode, int32_t last_submode, bool ignore_clamping);
267

268
	const int FIRST_MODE11_SUBMODE_INDEX = -1;
269
	const int MAX_MODE11_SUBMODE_INDEX = 7;
270

271
	enum opt_mode_t
272
	{
273
		cNoOpt,
274
		cOrdinaryLeastSquares,
275
		cWeightedLeastSquares,
276
		cWeightedLeastSquaresHeavy,
277
		cWeightedAverage
278
	};
279

280
	struct encode_astc_block_stats
281
	{
282
		uint32_t m_num_pixels;
283
		vec3F m_mean_q16;
284
		vec3F m_axis_q16;
285

286
		void init(uint32_t num_pixels, const vec4F pBlock_pixels_q16[]);
287
	};
288

289
	double encode_astc_hdr_block_mode_11(
290
		uint32_t num_pixels,
291
		const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
292
		uint32_t ise_weight_range,
293
		uint32_t& best_submode,
294
		double cur_block_error,
295
		uint8_t* blk_endpoints, uint8_t* blk_weights,
296
		const astc_hdr_codec_base_options& coptions,
297
		bool direct_only,
298
		uint32_t ise_endpoint_range,
299
		bool uber_mode,
300
		bool constrain_ise_weight_selectors,
301
		int32_t first_submode, int32_t last_submode, bool ignore_clamping, 
302
		opt_mode_t opt_mode, 
303
		const encode_astc_block_stats *pBlock_stats = nullptr);
304

305
	double encode_astc_hdr_block_downsampled_mode_11(
306
		uint32_t block_x, uint32_t block_y, uint32_t grid_x, uint32_t grid_y,
307
		uint32_t ise_weight_range, uint32_t ise_endpoint_range,
308
		uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
309
		double cur_block_error,
310
		int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode,
311
		uint8_t* pBlk_endpoints, uint8_t* pBlk_weights, uint32_t& best_submode,
312
		const astc_hdr_codec_base_options& coptions,
313
		const encode_astc_block_stats* pBlock_stats = nullptr);
314

315
	double encode_astc_hdr_block_mode_11_dual_plane(
316
		uint32_t num_pixels,
317
		const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
318
		uint32_t channel_index,		// 0-2
319
		uint32_t ise_weight_range,
320
		uint32_t& best_submode,
321
		double cur_block_error,
322
		uint8_t* blk_endpoints, uint8_t* blk_weights0, uint8_t* blk_weights1,
323
		const astc_hdr_codec_base_options& coptions,
324
		bool direct_only,
325
		uint32_t ise_endpoint_range,
326
		bool uber_mode,
327
		bool constrain_ise_weight_selectors,
328
		int32_t first_submode, int32_t last_submode, 
329
		bool ignore_clamping);
330

331
	double encode_astc_hdr_block_mode_7(
332
		uint32_t num_pixels,
333
		const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
334
		uint32_t ise_weight_range,
335
		uint32_t& best_submode,
336
		double cur_block_error,
337
		uint8_t* blk_endpoints,  //[4]
338
		uint8_t* blk_weights, // [num_pixels]
339
		const astc_hdr_codec_base_options& coptions,
340
		uint32_t ise_endpoint_range, 
341
		int first_submode = 0, int last_submode = MAX_MODE7_SUBMODE_INDEX, 
342
		const encode_astc_block_stats *pBlock_stats = nullptr);
343

344
	//--------------------------------------------------------------------------------------------------------------------------
345

346
	struct mode11_log_desc
347
	{
348
		int32_t m_submode;
349
		int32_t m_maj_comp;
350

351
		// Or R0, G0, B0 if maj_comp==3 (direct)
352
		int32_t m_a;  // positive
353
		int32_t m_c;  // positive
354
		int32_t m_b0; // positive
355

356
		// Or R1, G1, B1 if maj_comp==3 (direct)
357
		int32_t m_b1; // positive
358
		int32_t m_d0; // if not direct, is signed
359
		int32_t m_d1; // if not direct, is signed
360

361
		// limits if not direct
362
		int32_t m_a_bits, m_c_bits, m_b_bits, m_d_bits;
363
		int32_t m_max_a_val, m_max_c_val, m_max_b_val, m_min_d_val, m_max_d_val;
364

365
		void clear() { clear_obj(*this); }
366

367
		bool is_direct() const { return m_maj_comp == 3; }
368
	};
369

370
	//--------------------------------------------------------------------------------------------------------------------------
371
	bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& rgb_q16, float s_q16, int& max_clamp_mag, uint32_t ise_weight_range, bool early_out_if_clamped, int max_clamp_mag_accept_thresh);
372

373
	bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, int val_q[2][3], int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0);
374
	bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0);
375
	void pack_astc_mode11_direct(uint8_t* pEndpoints, vec3F l_q16, vec3F h_q16);
376
	
377
	bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints);
378
	void unpack_mode11(const uint8_t* pEndpoints, mode11_log_desc& desc);
379

380
	void decode_cem_11_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index);
381
	void decode_cem_7_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index);
382
		
383
	void dequantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_raw_weights);
384

385
	const float* get_6x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height);
386
	
387
	void downsample_weight_grid(
388
		const float* pMatrix_weights,
389
		uint32_t bx, uint32_t by,		// source/from dimension (block size)
390
		uint32_t wx, uint32_t wy,		// dest/to dimension (grid size)
391
		const uint8_t* pSrc_weights,	// these are dequantized weights, NOT ISE symbols, [by][bx]
392
		uint8_t* pDst_weights);			// [wy][wx]
393

394
	void downsample_ise_weights(
395
		uint32_t weight_ise_range, uint32_t quant_weight_ise_range,
396
		uint32_t block_w, uint32_t block_h,
397
		uint32_t grid_w, uint32_t grid_h,
398
		const uint8_t* pSrc_weights, uint8_t* pDst_weights);
399

400
	void downsample_ise_weights_dual_plane(
401
		uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range,
402
		uint32_t block_w, uint32_t block_h,
403
		uint32_t grid_w, uint32_t grid_h,
404
		const uint8_t* pSrc_weights0, const uint8_t* pSrc_weights1,
405
		uint8_t* pDst_weights);
406

407
	bool refine_endpoints(
408
		uint32_t cem,
409
		uint32_t endpoint_ise_range,
410
		uint8_t* pEndpoint_vals, // the endpoints to optimize
411
		uint32_t block_w, uint32_t block_h, // block dimensions
412
		uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid
413
		uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
414
		const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets
415
		astc_hdr_codec_base_options& coptions, opt_mode_t opt_mode);
416
	
417
	extern bool g_astc_hdr_enc_initialized;
418

419
	// This MUST be called before encoding any blocks.
420
	void astc_hdr_enc_init();
421

422
} // namespace basisu
423

424

425
Product

Resources

Company