CoCalc -- basisu_transcoder

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h
²¹¹⁷⁹ views
1
// basisu_transcoder_internal.h - Universal texture format transcoder library.
2
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
3
//
4
// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
5
//
6
// Licensed under the Apache License, Version 2.0 (the "License");
7
// you may not use this file except in compliance with the License.
8
// You may obtain a copy of the License at
9
//
10
//    http://www.apache.org/licenses/LICENSE-2.0
11
//
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
#pragma once
18

19
#ifdef _MSC_VER
20
#pragma warning (disable: 4127) //  conditional expression is constant
21
#endif
22

23
// v1.50: Added UASTC HDR 4x4 support
24
// v1.60: Added RDO ASTC HDR 6x6 and intermediate support
25
#define BASISD_LIB_VERSION 160
26
#define BASISD_VERSION_STRING "01.60"
27

28
#ifdef _DEBUG
29
#define BASISD_BUILD_DEBUG
30
#else
31
#define BASISD_BUILD_RELEASE
32
#endif
33

34
#include "basisu.h"
35

36
#define BASISD_znew (z = 36969 * (z & 65535) + (z >> 16))
37

38
namespace basisu
39
{
40
	extern bool g_debug_printf;
41
}
42

43
namespace basist
44
{
45
	// Low-level formats directly supported by the transcoder (other supported texture formats are combinations of these low-level block formats).
46
	// You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices.
47
	enum class block_format
48
	{
49
		cETC1,								// ETC1S RGB 
50
		cETC2_RGBA,							// full ETC2 EAC RGBA8 block
51
		cBC1,								// DXT1 RGB 
52
		cBC3,								// BC4 block followed by a four color BC1 block
53
		cBC4,								// DXT5A (alpha block only)
54
		cBC5,								// two BC4 blocks
55
		cPVRTC1_4_RGB,						// opaque-only PVRTC1 4bpp
56
		cPVRTC1_4_RGBA,						// PVRTC1 4bpp RGBA
57
		cBC7,								// Full BC7 block, any mode
58
		cBC7_M5_COLOR,						// RGB BC7 mode 5 color (writes an opaque mode 5 block)
59
		cBC7_M5_ALPHA,						// alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.)
60
		cETC2_EAC_A8,						// alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format)
61
		cASTC_4x4,							// ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC 
62
											// data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking.
63
		
64
		cATC_RGB,
65
		cATC_RGBA_INTERPOLATED_ALPHA,
66
		cFXT1_RGB,							// Opaque-only, has oddball 8x4 pixel block size
67

68
		cPVRTC2_4_RGB,
69
		cPVRTC2_4_RGBA,
70

71
		cETC2_EAC_R11,
72
		cETC2_EAC_RG11,
73
												
74
		cIndices,							// Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits)
75

76
		cRGB32,								// Writes RGB components to 32bpp output pixels
77
		cRGBA32,							// Writes RGB255 components to 32bpp output pixels
78
		cA32,								// Writes alpha component to 32bpp output pixels
79
				
80
		cRGB565,
81
		cBGR565,
82
		
83
		cRGBA4444_COLOR,
84
		cRGBA4444_ALPHA,
85
		cRGBA4444_COLOR_OPAQUE,
86
		cRGBA4444,
87
		cRGBA_HALF,
88
		cRGB_HALF,
89
		cRGB_9E5,
90

91
		cUASTC_4x4,							// LDR, universal
92
		cUASTC_HDR_4x4,						// HDR, transcodes only to 4x4 HDR ASTC, BC6H, or uncompressed
93
		cBC6H,
94
		cASTC_HDR_4x4,
95
		cASTC_HDR_6x6,
96
								
97
		cTotalBlockFormats
98
	};
99

100
	inline uint32_t get_block_width(block_format fmt)
101
	{
102
		switch (fmt)
103
		{
104
		case block_format::cFXT1_RGB:
105
			return 8;
106
		case block_format::cASTC_HDR_6x6:
107
			return 6;
108
		default:
109
			break;
110
		}
111
		return 4;
112
	}
113

114
	inline uint32_t get_block_height(block_format fmt)
115
	{
116
		switch (fmt)
117
		{
118
		case block_format::cASTC_HDR_6x6:
119
			return 6;
120
		default:
121
			break;
122
		}
123
		return 4;
124
	}
125

126
	const int COLOR5_PAL0_PREV_HI = 9, COLOR5_PAL0_DELTA_LO = -9, COLOR5_PAL0_DELTA_HI = 31;
127
	const int COLOR5_PAL1_PREV_HI = 21, COLOR5_PAL1_DELTA_LO = -21, COLOR5_PAL1_DELTA_HI = 21;
128
	const int COLOR5_PAL2_PREV_HI = 31, COLOR5_PAL2_DELTA_LO = -31, COLOR5_PAL2_DELTA_HI = 9;
129
	const int COLOR5_PAL_MIN_DELTA_B_RUNLEN = 3, COLOR5_PAL_DELTA_5_RUNLEN_VLC_BITS = 3;
130

131
	const uint32_t ENDPOINT_PRED_TOTAL_SYMBOLS = (4 * 4 * 4 * 4) + 1;
132
	const uint32_t ENDPOINT_PRED_REPEAT_LAST_SYMBOL = ENDPOINT_PRED_TOTAL_SYMBOLS - 1;
133
	const uint32_t ENDPOINT_PRED_MIN_REPEAT_COUNT = 3;
134
	const uint32_t ENDPOINT_PRED_COUNT_VLC_BITS = 4;
135

136
	const uint32_t NUM_ENDPOINT_PREDS = 3;// BASISU_ARRAY_SIZE(g_endpoint_preds);
137
	const uint32_t CR_ENDPOINT_PRED_INDEX = NUM_ENDPOINT_PREDS - 1;
138
	const uint32_t NO_ENDPOINT_PRED_INDEX = 3;//NUM_ENDPOINT_PREDS;
139
	const uint32_t MAX_SELECTOR_HISTORY_BUF_SIZE = 64;
140
	const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3;
141
	const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6;
142
	const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS);
143
		
144
	uint16_t crc16(const void *r, size_t size, uint16_t crc);
145
		
146
	class huffman_decoding_table
147
	{
148
		friend class bitwise_decoder;
149

150
	public:
151
		huffman_decoding_table()
152
		{
153
		}
154

155
		void clear()
156
		{
157
			basisu::clear_vector(m_code_sizes);
158
			basisu::clear_vector(m_lookup);
159
			basisu::clear_vector(m_tree);
160
		}
161

162
		bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits)
163
		{
164
			if (!total_syms)
165
			{
166
				clear();
167
				return true;
168
			}
169

170
			m_code_sizes.resize(total_syms);
171
			memcpy(&m_code_sizes[0], pCode_sizes, total_syms);
172

173
			const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;
174

175
			m_lookup.resize(0);
176
			m_lookup.resize(huffman_fast_lookup_size);
177

178
			m_tree.resize(0);
179
			m_tree.resize(total_syms * 2);
180

181
			uint32_t syms_using_codesize[basisu::cHuffmanMaxSupportedInternalCodeSize + 1];
182
			basisu::clear_obj(syms_using_codesize);
183
			for (uint32_t i = 0; i < total_syms; i++)
184
			{
185
				if (pCode_sizes[i] > basisu::cHuffmanMaxSupportedInternalCodeSize)
186
					return false;
187
				syms_using_codesize[pCode_sizes[i]]++;
188
			}
189

190
			uint32_t next_code[basisu::cHuffmanMaxSupportedInternalCodeSize + 1];
191
			next_code[0] = next_code[1] = 0;
192

193
			uint32_t used_syms = 0, total = 0;
194
			for (uint32_t i = 1; i < basisu::cHuffmanMaxSupportedInternalCodeSize; i++)
195
			{
196
				used_syms += syms_using_codesize[i];
197
				next_code[i + 1] = (total = ((total + syms_using_codesize[i]) << 1));
198
			}
199

200
			if (((1U << basisu::cHuffmanMaxSupportedInternalCodeSize) != total) && (used_syms != 1U))
201
				return false;
202

203
			for (int tree_next = -1, sym_index = 0; sym_index < (int)total_syms; ++sym_index)
204
			{
205
				uint32_t rev_code = 0, l, cur_code, code_size = pCode_sizes[sym_index];
206
				if (!code_size)
207
					continue;
208

209
				cur_code = next_code[code_size]++;
210

211
				for (l = code_size; l > 0; l--, cur_code >>= 1)
212
					rev_code = (rev_code << 1) | (cur_code & 1);
213

214
				if (code_size <= fast_lookup_bits)
215
				{
216
					uint32_t k = (code_size << 16) | sym_index;
217
					while (rev_code < huffman_fast_lookup_size)
218
					{
219
						if (m_lookup[rev_code] != 0)
220
						{
221
							// Supplied codesizes can't create a valid prefix code.
222
							return false;
223
						}
224

225
						m_lookup[rev_code] = k;
226
						rev_code += (1 << code_size);
227
					}
228
					continue;
229
				}
230

231
				int tree_cur;
232
				if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)]))
233
				{
234
					const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1);
235
					if (m_lookup[idx] != 0)
236
					{
237
						// Supplied codesizes can't create a valid prefix code.
238
						return false;
239
					}
240

241
					m_lookup[idx] = tree_next;
242
					tree_cur = tree_next;
243
					tree_next -= 2;
244
				}
245

246
				if (tree_cur >= 0)
247
				{
248
					// Supplied codesizes can't create a valid prefix code.
249
					return false;
250
				}
251

252
				rev_code >>= (fast_lookup_bits - 1);
253

254
				for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--)
255
				{
256
					tree_cur -= ((rev_code >>= 1) & 1);
257

258
					const int idx = -tree_cur - 1;
259
					if (idx < 0)
260
						return false;
261
					else if (idx >= (int)m_tree.size())
262
						m_tree.resize(idx + 1);
263
										
264
					if (!m_tree[idx])
265
					{
266
						m_tree[idx] = (int16_t)tree_next;
267
						tree_cur = tree_next;
268
						tree_next -= 2;
269
					}
270
					else
271
					{
272
						tree_cur = m_tree[idx];
273
						if (tree_cur >= 0)
274
						{
275
							// Supplied codesizes can't create a valid prefix code.
276
							return false;
277
						}
278
					}
279
				}
280

281
				tree_cur -= ((rev_code >>= 1) & 1);
282

283
				const int idx = -tree_cur - 1;
284
				if (idx < 0)
285
					return false;
286
				else if (idx >= (int)m_tree.size())
287
					m_tree.resize(idx + 1);
288

289
				if (m_tree[idx] != 0)
290
				{
291
					// Supplied codesizes can't create a valid prefix code.
292
					return false;
293
				}
294

295
				m_tree[idx] = (int16_t)sym_index;
296
			}
297

298
			return true;
299
		}
300

301
		const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; }
302
		const basisu::int_vec &get_lookup() const { return m_lookup; }
303
		const basisu::int16_vec &get_tree() const { return m_tree; }
304

305
		bool is_valid() const { return m_code_sizes.size() > 0; }
306

307
	private:
308
		basisu::uint8_vec m_code_sizes;
309
		basisu::int_vec m_lookup;
310
		basisu::int16_vec m_tree;
311
	};
312

313
	class bitwise_decoder
314
	{
315
	public:
316
		bitwise_decoder() :
317
			m_buf_size(0),
318
			m_pBuf(nullptr),
319
			m_pBuf_start(nullptr),
320
			m_pBuf_end(nullptr),
321
			m_bit_buf(0),
322
			m_bit_buf_size(0)
323
		{
324
		}
325

326
		void clear()
327
		{
328
			m_buf_size = 0;
329
			m_pBuf = nullptr;
330
			m_pBuf_start = nullptr;
331
			m_pBuf_end = nullptr;
332
			m_bit_buf = 0;
333
			m_bit_buf_size = 0;
334
		}
335

336
		bool init(const uint8_t *pBuf, uint32_t buf_size)
337
		{
338
			if ((!pBuf) && (buf_size))
339
				return false;
340

341
			m_buf_size = buf_size;
342
			m_pBuf = pBuf;
343
			m_pBuf_start = pBuf;
344
			m_pBuf_end = pBuf + buf_size;
345
			m_bit_buf = 0;
346
			m_bit_buf_size = 0;
347
			return true;
348
		}
349

350
		void stop()
351
		{
352
		}
353

354
		inline uint32_t peek_bits(uint32_t num_bits)
355
		{
356
			if (!num_bits)
357
				return 0;
358

359
			assert(num_bits <= 25);
360

361
			while (m_bit_buf_size < num_bits)
362
			{
363
				uint32_t c = 0;
364
				if (m_pBuf < m_pBuf_end)
365
					c = *m_pBuf++;
366

367
				m_bit_buf |= (c << m_bit_buf_size);
368
				m_bit_buf_size += 8;
369
				assert(m_bit_buf_size <= 32);
370
			}
371

372
			return m_bit_buf & ((1 << num_bits) - 1);
373
		}
374

375
		void remove_bits(uint32_t num_bits)
376
		{
377
			assert(m_bit_buf_size >= num_bits);
378

379
			m_bit_buf >>= num_bits;
380
			m_bit_buf_size -= num_bits;
381
		}
382

383
		uint32_t get_bits(uint32_t num_bits)
384
		{
385
			if (num_bits > 25)
386
			{
387
				assert(num_bits <= 32);
388

389
				const uint32_t bits0 = peek_bits(25);
390
				m_bit_buf >>= 25;
391
				m_bit_buf_size -= 25;
392
				num_bits -= 25;
393

394
				const uint32_t bits = peek_bits(num_bits);
395
				m_bit_buf >>= num_bits;
396
				m_bit_buf_size -= num_bits;
397

398
				return bits0 | (bits << 25);
399
			}
400

401
			const uint32_t bits = peek_bits(num_bits);
402

403
			m_bit_buf >>= num_bits;
404
			m_bit_buf_size -= num_bits;
405

406
			return bits;
407
		}
408

409
		uint32_t decode_truncated_binary(uint32_t n)
410
		{
411
			assert(n >= 2);
412

413
			const uint32_t k = basisu::floor_log2i(n);
414
			const uint32_t u = (1 << (k + 1)) - n;
415

416
			uint32_t result = get_bits(k);
417

418
			if (result >= u)
419
				result = ((result << 1) | get_bits(1)) - u;
420

421
			return result;
422
		}
423

424
		uint32_t decode_rice(uint32_t m)
425
		{
426
			assert(m);
427

428
			uint32_t q = 0;
429
			for (;;)
430
			{
431
				uint32_t k = peek_bits(16);
432
				
433
				uint32_t l = 0;
434
				while (k & 1)
435
				{
436
					l++;
437
					k >>= 1;
438
				}
439
				
440
				q += l;
441

442
				remove_bits(l);
443

444
				if (l < 16)
445
					break;
446
			}
447

448
			return (q << m) + (get_bits(m + 1) >> 1);
449
		}
450

451
		inline uint32_t decode_vlc(uint32_t chunk_bits)
452
		{
453
			assert(chunk_bits);
454

455
			const uint32_t chunk_size = 1 << chunk_bits;
456
			const uint32_t chunk_mask = chunk_size - 1;
457
					
458
			uint32_t v = 0;
459
			uint32_t ofs = 0;
460

461
			for ( ; ; )
462
			{
463
				uint32_t s = get_bits(chunk_bits + 1);
464
				v |= ((s & chunk_mask) << ofs);
465
				ofs += chunk_bits;
466

467
				if ((s & chunk_size) == 0)
468
					break;
469
				
470
				if (ofs >= 32)
471
				{
472
					assert(0);
473
					break;
474
				}
475
			}
476

477
			return v;
478
		}
479

480
		inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits)
481
		{
482
			assert(ct.m_code_sizes.size());
483

484
			const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;
485
						
486
			while (m_bit_buf_size < 16)
487
			{
488
				uint32_t c = 0;
489
				if (m_pBuf < m_pBuf_end)
490
					c = *m_pBuf++;
491

492
				m_bit_buf |= (c << m_bit_buf_size);
493
				m_bit_buf_size += 8;
494
				assert(m_bit_buf_size <= 32);
495
			}
496
						
497
			int code_len;
498

499
			int sym;
500
			if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0)
501
			{
502
				code_len = sym >> 16;
503
				sym &= 0xFFFF;
504
			}
505
			else
506
			{
507
				code_len = fast_lookup_bits;
508
				do
509
				{
510
					sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1
511
				} while (sym < 0);
512
			}
513

514
			m_bit_buf >>= code_len;
515
			m_bit_buf_size -= code_len;
516

517
			return sym;
518
		}
519

520
		bool read_huffman_table(huffman_decoding_table &ct)
521
		{
522
			ct.clear();
523

524
			const uint32_t total_used_syms = get_bits(basisu::cHuffmanMaxSymsLog2);
525

526
			if (!total_used_syms)
527
				return true;
528
			if (total_used_syms > basisu::cHuffmanMaxSyms)
529
				return false;
530

531
			uint8_t code_length_code_sizes[basisu::cHuffmanTotalCodelengthCodes];
532
			basisu::clear_obj(code_length_code_sizes);
533

534
			const uint32_t num_codelength_codes = get_bits(5);
535
			if ((num_codelength_codes < 1) || (num_codelength_codes > basisu::cHuffmanTotalCodelengthCodes))
536
				return false;
537

538
			for (uint32_t i = 0; i < num_codelength_codes; i++)
539
				code_length_code_sizes[basisu::g_huffman_sorted_codelength_codes[i]] = static_cast<uint8_t>(get_bits(3));
540

541
			huffman_decoding_table code_length_table;
542
			if (!code_length_table.init(basisu::cHuffmanTotalCodelengthCodes, code_length_code_sizes))
543
				return false;
544

545
			if (!code_length_table.is_valid())
546
				return false;
547

548
			basisu::uint8_vec code_sizes(total_used_syms);
549

550
			uint32_t cur = 0;
551
			while (cur < total_used_syms)
552
			{
553
				int c = decode_huffman(code_length_table);
554

555
				if (c <= 16)
556
					code_sizes[cur++] = static_cast<uint8_t>(c);
557
				else if (c == basisu::cHuffmanSmallZeroRunCode)
558
					cur += get_bits(basisu::cHuffmanSmallZeroRunExtraBits) + basisu::cHuffmanSmallZeroRunSizeMin;
559
				else if (c == basisu::cHuffmanBigZeroRunCode)
560
					cur += get_bits(basisu::cHuffmanBigZeroRunExtraBits) + basisu::cHuffmanBigZeroRunSizeMin;
561
				else
562
				{
563
					if (!cur)
564
						return false;
565

566
					uint32_t l;
567
					if (c == basisu::cHuffmanSmallRepeatCode)
568
						l = get_bits(basisu::cHuffmanSmallRepeatExtraBits) + basisu::cHuffmanSmallRepeatSizeMin;
569
					else
570
						l = get_bits(basisu::cHuffmanBigRepeatExtraBits) + basisu::cHuffmanBigRepeatSizeMin;
571

572
					const uint8_t prev = code_sizes[cur - 1];
573
					if (prev == 0)
574
						return false;
575
					do
576
					{
577
						if (cur >= total_used_syms)
578
							return false;
579
						code_sizes[cur++] = prev;
580
					} while (--l > 0);
581
				}
582
			}
583

584
			if (cur != total_used_syms)
585
				return false;
586

587
			return ct.init(total_used_syms, &code_sizes[0]);
588
		}
589

590
		size_t get_bits_remaining() const
591
		{
592
			size_t total_bytes_remaining = m_pBuf_end - m_pBuf;
593
			return total_bytes_remaining * 8 + m_bit_buf_size;
594
		}
595

596
	private:
597
		uint32_t m_buf_size;
598
		const uint8_t *m_pBuf;
599
		const uint8_t *m_pBuf_start;
600
		const uint8_t *m_pBuf_end;
601

602
		uint32_t m_bit_buf;
603
		uint32_t m_bit_buf_size;
604
	};
605

606
	inline uint32_t basisd_rand(uint32_t seed)
607
	{
608
		if (!seed)
609
			seed++;
610
		uint32_t z = seed;
611
		BASISD_znew;
612
		return z;
613
	}
614

615
	// Returns random number in [0,limit). Max limit is 0xFFFF.
616
	inline uint32_t basisd_urand(uint32_t& seed, uint32_t limit)
617
	{
618
		seed = basisd_rand(seed);
619
		return (((seed ^ (seed >> 16)) & 0xFFFF) * limit) >> 16;
620
	}
621

622
	class approx_move_to_front
623
	{
624
	public:
625
		approx_move_to_front(uint32_t n)
626
		{
627
			init(n);
628
		}
629

630
		void init(uint32_t n)
631
		{
632
			m_values.resize(n);
633
			m_rover = n / 2;
634
		}
635

636
		const basisu::int_vec& get_values() const { return m_values; }
637
		basisu::int_vec& get_values() { return m_values; }
638

639
		uint32_t size() const { return (uint32_t)m_values.size(); }
640

641
		const int& operator[] (uint32_t index) const { return m_values[index]; }
642
		int operator[] (uint32_t index) { return m_values[index]; }
643

644
		void add(int new_value)
645
		{
646
			m_values[m_rover++] = new_value;
647
			if (m_rover == m_values.size())
648
				m_rover = (uint32_t)m_values.size() / 2;
649
		}
650

651
		void use(uint32_t index)
652
		{
653
			if (index)
654
			{
655
				//std::swap(m_values[index / 2], m_values[index]);
656
				int x = m_values[index / 2];
657
				int y = m_values[index];
658
				m_values[index / 2] = y;
659
				m_values[index] = x;
660
			}
661
		}
662

663
		// returns -1 if not found
664
		int find(int value) const
665
		{
666
			for (uint32_t i = 0; i < m_values.size(); i++)
667
				if (m_values[i] == value)
668
					return i;
669
			return -1;
670
		}
671

672
		void reset()
673
		{
674
			const uint32_t n = (uint32_t)m_values.size();
675

676
			m_values.clear();
677

678
			init(n);
679
		}
680

681
	private:
682
		basisu::int_vec m_values;
683
		uint32_t m_rover;
684
	};
685

686
	struct decoder_etc_block;
687
	
688
	inline uint8_t clamp255(int32_t i)
689
	{
690
		return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);
691
	}
692

693
	enum eNoClamp
694
	{
695
		cNoClamp = 0
696
	};
697

698
	struct color32
699
	{
700
		union
701
		{
702
			struct
703
			{
704
				uint8_t r;
705
				uint8_t g;
706
				uint8_t b;
707
				uint8_t a;
708
			};
709

710
			uint8_t c[4];
711
			
712
			uint32_t m;
713
		};
714

715
		color32() { }
716

717
		color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
718
		color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); }
719

720
		void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); c[3] = static_cast<uint8_t>(va); }
721

722
		void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); }
723
		void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
724

725
		void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg);	c[2] = clamp255(vb); c[3] = clamp255(va); }
726

727
		uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; }
728
		uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; }
729

730
		bool operator== (const color32&rhs) const { return m == rhs.m; }
731

732
		static color32 comp_min(const color32& a, const color32& b) { return color32(cNoClamp, basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); }
733
		static color32 comp_max(const color32& a, const color32& b) { return color32(cNoClamp, basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); }
734
	};
735

736
	struct endpoint
737
	{
738
		color32 m_color5;
739
		uint8_t m_inten5;
740
		bool operator== (const endpoint& rhs) const
741
		{
742
			return (m_color5.r == rhs.m_color5.r) && (m_color5.g == rhs.m_color5.g) && (m_color5.b == rhs.m_color5.b) && (m_inten5 == rhs.m_inten5);
743
		}
744
		bool operator!= (const endpoint& rhs) const { return !(*this == rhs); }
745
	};
746

747
	struct selector
748
	{
749
		// Plain selectors (2-bits per value)
750
		uint8_t m_selectors[4];
751

752
		// ETC1 selectors
753
		uint8_t m_bytes[4];
754

755
		uint8_t m_lo_selector, m_hi_selector;
756
		uint8_t m_num_unique_selectors;
757
		bool operator== (const selector& rhs) const
758
		{
759
			return (m_selectors[0] == rhs.m_selectors[0]) &&
760
				(m_selectors[1] == rhs.m_selectors[1]) &&
761
				(m_selectors[2] == rhs.m_selectors[2]) &&
762
				(m_selectors[3] == rhs.m_selectors[3]);
763
		}
764
		bool operator!= (const selector& rhs) const
765
		{
766
			return !(*this == rhs);
767
		}
768

769
		void init_flags()
770
		{
771
			uint32_t hist[4] = { 0, 0, 0, 0 };
772
			for (uint32_t y = 0; y < 4; y++)
773
			{
774
				for (uint32_t x = 0; x < 4; x++)
775
				{
776
					uint32_t s = get_selector(x, y);
777
					hist[s]++;
778
				}
779
			}
780

781
			m_lo_selector = 3;
782
			m_hi_selector = 0;
783
			m_num_unique_selectors = 0;
784

785
			for (uint32_t i = 0; i < 4; i++)
786
			{
787
				if (hist[i])
788
				{
789
					m_num_unique_selectors++;
790
					if (i < m_lo_selector) m_lo_selector = static_cast<uint8_t>(i);
791
					if (i > m_hi_selector) m_hi_selector = static_cast<uint8_t>(i);
792
				}
793
			}
794
		}
795

796
		// Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
797
		inline uint32_t get_selector(uint32_t x, uint32_t y) const
798
		{
799
			assert((x < 4) && (y < 4));
800
			return (m_selectors[y] >> (x * 2)) & 3;
801
		}
802

803
		void set_selector(uint32_t x, uint32_t y, uint32_t val)
804
		{
805
			static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 };
806

807
			assert((x | y | val) < 4);
808

809
			m_selectors[y] &= ~(3 << (x * 2));
810
			m_selectors[y] |= (val << (x * 2));
811

812
			const uint32_t etc1_bit_index = x * 4 + y;
813

814
			uint8_t *p = &m_bytes[3 - (etc1_bit_index >> 3)];
815

816
			const uint32_t byte_bit_ofs = etc1_bit_index & 7;
817
			const uint32_t mask = 1 << byte_bit_ofs;
818

819
			const uint32_t etc1_val = s_selector_index_to_etc1[val];
820

821
			const uint32_t lsb = etc1_val & 1;
822
			const uint32_t msb = etc1_val >> 1;
823

824
			p[0] &= ~mask;
825
			p[0] |= (lsb << byte_bit_ofs);
826

827
			p[-2] &= ~mask;
828
			p[-2] |= (msb << byte_bit_ofs);
829
		}
830
	};
831

832
	bool basis_block_format_is_uncompressed(block_format tex_type);
833

834
	//------------------------------------
835

836
	typedef uint16_t half_float;
837

838
	const double MIN_DENORM_HALF_FLOAT = 0.000000059604645; // smallest positive subnormal number
839
	const double MIN_HALF_FLOAT = 0.00006103515625; // smallest positive normal number
840
	const double MAX_HALF_FLOAT = 65504.0; // largest normal number
841
	const uint32_t MAX_HALF_FLOAT_AS_INT_BITS = 0x7BFF; // the half float rep for 65504.0
842

843
	inline uint32_t get_bits(uint32_t val, int low, int high)
844
	{
845
		const int num_bits = (high - low) + 1;
846
		assert((num_bits >= 1) && (num_bits <= 32));
847

848
		val >>= low;
849
		if (num_bits != 32)
850
			val &= ((1u << num_bits) - 1);
851

852
		return val;
853
	}
854

855
	inline bool is_half_inf_or_nan(half_float v)
856
	{
857
		return get_bits(v, 10, 14) == 31;
858
	}
859

860
	inline bool is_half_denorm(half_float v)
861
	{
862
		int e = (v >> 10) & 31;
863
		return !e;
864
	}
865

866
	inline int get_half_exp(half_float v)
867
	{
868
		int e = ((v >> 10) & 31);
869
		return e ? (e - 15) : -14;
870
	}
871

872
	inline int get_half_mantissa(half_float v)
873
	{
874
		if (is_half_denorm(v))
875
			return v & 0x3FF;
876
		return (v & 0x3FF) | 0x400;
877
	}
878

879
	inline float get_half_mantissaf(half_float v)
880
	{
881
		return ((float)get_half_mantissa(v)) / 1024.0f;
882
	}
883

884
	inline int get_half_sign(half_float v)
885
	{
886
		return v ? ((v & 0x8000) ? -1 : 1) : 0;
887
	}
888

889
	inline bool half_is_signed(half_float v)
890
	{
891
		return (v & 0x8000) != 0;
892
	}
893

894
#if 0
895
	int hexp = get_half_exp(Cf);
896
	float hman = get_half_mantissaf(Cf);
897
	int hsign = get_half_sign(Cf);
898
	float k = powf(2.0f, hexp) * hman * hsign;
899
	if (is_half_inf_or_nan(Cf))
900
		k = std::numeric_limits<float>::quiet_NaN();
901
#endif
902

903
	half_float float_to_half(float val);
904

905
	inline float half_to_float(half_float hval)
906
	{
907
		union { float f; uint32_t u; } x = { 0 };
908

909
		uint32_t s = ((uint32_t)hval >> 15) & 1;
910
		uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
911
		uint32_t m = (uint32_t)hval & 0x3FF;
912

913
		if (!e)
914
		{
915
			if (!m)
916
			{
917
				// +- 0
918
				x.u = s << 31;
919
				return x.f;
920
			}
921
			else
922
			{
923
				// denormalized
924
				while (!(m & 0x00000400))
925
				{
926
					m <<= 1;
927
					--e;
928
				}
929

930
				++e;
931
				m &= ~0x00000400;
932
			}
933
		}
934
		else if (e == 31)
935
		{
936
			if (m == 0)
937
			{
938
				// +/- INF
939
				x.u = (s << 31) | 0x7f800000;
940
				return x.f;
941
			}
942
			else
943
			{
944
				// +/- NaN
945
				x.u = (s << 31) | 0x7f800000 | (m << 13);
946
				return x.f;
947
			}
948
		}
949

950
		e = e + (127 - 15);
951
		m = m << 13;
952

953
		assert(s <= 1);
954
		assert(m <= 0x7FFFFF);
955
		assert(e <= 255);
956

957
		x.u = m | (e << 23) | (s << 31);
958
		return x.f;
959
	}
960

961
	// Originally from bc6h_enc.h
962

963
	void bc6h_enc_init();
964

965
	const uint32_t MAX_BLOG16_VAL = 0xFFFF;
966

967
	// BC6H internals
968
	const uint32_t NUM_BC6H_MODES = 14;
969
	const uint32_t BC6H_LAST_MODE_INDEX = 13;
970
	const uint32_t BC6H_FIRST_1SUBSET_MODE_INDEX = 10; // in the MS docs, this is "mode 11" (where the first mode is 1), 60 bits for endpoints (10.10, 10.10, 10.10), 63 bits for weights
971
	const uint32_t TOTAL_BC6H_PARTITION_PATTERNS = 32;
972

973
	extern const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4]; // base, r, g, b
974

975
	struct bc6h_bit_layout
976
	{
977
		int8_t m_comp; // R=0,G=1,B=2,D=3 (D=partition index)
978
		int8_t m_index; // 0-3, 0-1 Low/High subset 1, 2-3 Low/High subset 2, -1=partition index (d)
979
		int8_t m_last_bit;
980
		int8_t m_first_bit; // may be -1 if a single bit, may be >m_last_bit if reversed
981
	};
982

983
	const uint32_t MAX_BC6H_LAYOUT_INDEX = 25;
984
	extern const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX];
985

986
	extern const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4]; // [y][x]
987

988
	extern const uint8_t g_bc6h_weight3[8];
989
	extern const uint8_t g_bc6h_weight4[16];
990

991
	extern const int8_t g_bc6h_mode_lookup[32];
992
		
993
	// Converts b16 to half float
994
	inline half_float bc6h_blog16_to_half(uint32_t comp)
995
	{
996
		assert(comp <= 0xFFFF);
997

998
		// scale the magnitude by 31/64
999
		comp = (comp * 31u) >> 6u;
1000
		return (half_float)comp;
1001
	}
1002

1003
	const uint32_t MAX_BC6H_HALF_FLOAT_AS_UINT = 0x7BFF;
1004

1005
	// Inverts bc6h_blog16_to_half().
1006
	// Returns the nearest blog16 given a half value. 
1007
	inline uint32_t bc6h_half_to_blog16(half_float h)
1008
	{
1009
		assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
1010
		return (h * 64 + 30) / 31;
1011
	}
1012

1013
	// Suboptimal, but very close.
1014
	inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits)
1015
	{
1016
		assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
1017
		return (h * 64 + 30) / (31 * (1 << (16 - num_bits)));
1018
	}
1019

1020
	struct bc6h_block
1021
	{
1022
		uint8_t m_bytes[16];
1023
	};
1024

1025
	void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
1026
	void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
1027
	void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
1028
	void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
1029
	void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
1030
	void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
1031
	bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]);
1032

1033
	struct bc6h_logical_block
1034
	{
1035
		uint32_t m_mode;
1036
		uint32_t m_partition_pattern;	// must be 0 if 1 subset
1037
		uint32_t m_endpoints[3][4];		// [comp][subset*2+lh_index] - must be already properly packed
1038
		uint8_t m_weights[16];			// weights must be of the proper size, taking into account skipped MSB's which must be 0
1039

1040
		void clear()
1041
		{
1042
			basisu::clear_obj(*this);
1043
		}
1044
	};
1045

1046
	void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk);
1047
		
1048
	namespace bc7_mode_5_encoder
1049
	{
1050
		void encode_bc7_mode_5_block(void* pDst_block, color32* pPixels, bool hq_mode);
1051
	}
1052
		
1053
} // namespace basist
1054

1055

1056

1057

1058
Product

Resources

Company