CoCalc -- basisu_gpu

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
⁹⁹⁰² views
1
// basisu_gpu_texture.cpp
2
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
//    http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
#include "basisu_gpu_texture.h"
16
#include "basisu_enc.h"
17
#include "basisu_pvrtc1_4.h"
18
#include "3rdparty/android_astc_decomp.h"
19
#include "basisu_bc7enc.h"
20
#include "../transcoder/basisu_astc_hdr_core.h"
21

22
#define BASISU_USE_GOOGLE_ASTC_DECODER (1)
23

24
namespace basisu
25
{
26
	//------------------------------------------------------------------------------------------------
27
	// ETC2 EAC
28

29
	void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels)
30
	{
31
		static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8");
32

33
		const eac_a8_block *pBlock = static_cast<const eac_a8_block *>(pBlock_bits);
34

35
		const int8_t *pTable = g_etc2_eac_tables[pBlock->m_table];
36
		
37
		const uint64_t selector_bits = pBlock->get_selector_bits();
38
		
39
		const int32_t base = pBlock->m_base;
40
		const int32_t mul = pBlock->m_multiplier;
41

42
		pPixels[0].a = clamp255(base + pTable[pBlock->get_selector(0, 0, selector_bits)] * mul);
43
		pPixels[1].a = clamp255(base + pTable[pBlock->get_selector(1, 0, selector_bits)] * mul);
44
		pPixels[2].a = clamp255(base + pTable[pBlock->get_selector(2, 0, selector_bits)] * mul);
45
		pPixels[3].a = clamp255(base + pTable[pBlock->get_selector(3, 0, selector_bits)] * mul);
46

47
		pPixels[4].a = clamp255(base + pTable[pBlock->get_selector(0, 1, selector_bits)] * mul);
48
		pPixels[5].a = clamp255(base + pTable[pBlock->get_selector(1, 1, selector_bits)] * mul);
49
		pPixels[6].a = clamp255(base + pTable[pBlock->get_selector(2, 1, selector_bits)] * mul);
50
		pPixels[7].a = clamp255(base + pTable[pBlock->get_selector(3, 1, selector_bits)] * mul);
51

52
		pPixels[8].a = clamp255(base + pTable[pBlock->get_selector(0, 2, selector_bits)] * mul);
53
		pPixels[9].a = clamp255(base + pTable[pBlock->get_selector(1, 2, selector_bits)] * mul);
54
		pPixels[10].a = clamp255(base + pTable[pBlock->get_selector(2, 2, selector_bits)] * mul);
55
		pPixels[11].a = clamp255(base + pTable[pBlock->get_selector(3, 2, selector_bits)] * mul);
56

57
		pPixels[12].a = clamp255(base + pTable[pBlock->get_selector(0, 3, selector_bits)] * mul);
58
		pPixels[13].a = clamp255(base + pTable[pBlock->get_selector(1, 3, selector_bits)] * mul);
59
		pPixels[14].a = clamp255(base + pTable[pBlock->get_selector(2, 3, selector_bits)] * mul);
60
		pPixels[15].a = clamp255(base + pTable[pBlock->get_selector(3, 3, selector_bits)] * mul);
61
	}
62

63
	//------------------------------------------------------------------------------------------------
64
	// BC1
65
	struct bc1_block
66
	{
67
		enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
68

69
		uint8_t m_low_color[cTotalEndpointBytes];
70
		uint8_t m_high_color[cTotalEndpointBytes];
71
		uint8_t m_selectors[cTotalSelectorBytes];
72
				
73
		inline uint32_t get_high_color() const	{ return m_high_color[0] | (m_high_color[1] << 8U); }
74
		inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
75

76
		static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) 
77
		{
78
			r = (c >> 11) & 31;
79
			g = (c >> 5) & 63;
80
			b = c & 31;
81
			
82
			r = (r << 3) | (r >> 2);
83
			g = (g << 2) | (g >> 4);
84
			b = (b << 3) | (b >> 2);
85
		}
86

87
		inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * 2)) & 3; }
88
	};
89

90
	// Returns true if the block uses 3 color punchthrough alpha mode.
91
	bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
92
	{
93
		static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
94

95
		const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
96

97
		const uint32_t l = pBlock->get_low_color();
98
		const uint32_t h = pBlock->get_high_color();
99

100
		color_rgba c[4];
101

102
		uint32_t r0, g0, b0, r1, g1, b1;
103
		bc1_block::unpack_color(l, r0, g0, b0);
104
		bc1_block::unpack_color(h, r1, g1, b1);
105

106
		c[0].set_noclamp_rgba(r0, g0, b0, 255);
107
		c[1].set_noclamp_rgba(r1, g1, b1, 255);
108

109
		bool used_punchthrough = false;
110

111
		if (l > h)
112
		{
113
			c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
114
			c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
115
		}
116
		else
117
		{
118
			c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
119
			c[3].set_noclamp_rgba(0, 0, 0, 0);
120
			used_punchthrough = true;
121
		}
122

123
		if (set_alpha)
124
		{
125
			for (uint32_t y = 0; y < 4; y++, pPixels += 4)
126
			{
127
				pPixels[0] = c[pBlock->get_selector(0, y)]; 
128
				pPixels[1] = c[pBlock->get_selector(1, y)]; 
129
				pPixels[2] = c[pBlock->get_selector(2, y)]; 
130
				pPixels[3] = c[pBlock->get_selector(3, y)];
131
			}
132
		}
133
		else
134
		{
135
			for (uint32_t y = 0; y < 4; y++, pPixels += 4)
136
			{
137
				pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); 
138
				pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); 
139
				pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); 
140
				pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
141
			}
142
		}
143

144
		return used_punchthrough;
145
	}
146

147
	bool unpack_bc1_nv(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
148
	{
149
		static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
150

151
		const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
152

153
		const uint32_t l = pBlock->get_low_color();
154
		const uint32_t h = pBlock->get_high_color();
155

156
		color_rgba c[4];
157

158
		int r0 = (l >> 11) & 31;
159
		int g0 = (l >> 5) & 63;
160
		int b0 = l & 31;
161
		int r1 = (h >> 11) & 31;
162
		int g1 = (h >> 5) & 63;
163
		int b1 = h & 31;
164

165
		c[0].b = (uint8_t)((3 * b0 * 22) / 8);
166
		c[0].g = (uint8_t)((g0 << 2) | (g0 >> 4));
167
		c[0].r = (uint8_t)((3 * r0 * 22) / 8);
168
		c[0].a = 0xFF;
169

170
		c[1].r = (uint8_t)((3 * r1 * 22) / 8);
171
		c[1].g = (uint8_t)((g1 << 2) | (g1 >> 4));
172
		c[1].b = (uint8_t)((3 * b1 * 22) / 8);
173
		c[1].a = 0xFF;
174

175
		int gdiff = c[1].g - c[0].g;
176

177
		bool used_punchthrough = false;
178

179
		if (l > h)
180
		{
181
			c[2].r = (uint8_t)(((2 * r0 + r1) * 22) / 8);
182
			c[2].g = (uint8_t)(((256 * c[0].g + gdiff/4 + 128 + gdiff * 80) / 256));
183
			c[2].b = (uint8_t)(((2 * b0 + b1) * 22) / 8);
184
			c[2].a = 0xFF;
185

186
			c[3].r = (uint8_t)(((2 * r1 + r0) * 22) / 8);
187
			c[3].g = (uint8_t)((256 * c[1].g - gdiff/4 + 128 - gdiff * 80) / 256);
188
			c[3].b = (uint8_t)(((2 * b1 + b0) * 22) / 8);
189
			c[3].a = 0xFF;
190
		}
191
		else
192
		{
193
			c[2].r = (uint8_t)(((r0 + r1) * 33) / 8);
194
			c[2].g = (uint8_t)((256 * c[0].g + gdiff/4 + 128 + gdiff * 128) / 256);
195
			c[2].b = (uint8_t)(((b0 + b1) * 33) / 8);
196
			c[2].a = 0xFF;
197

198
			c[3].set_noclamp_rgba(0, 0, 0, 0);
199
			used_punchthrough = true;
200
		}
201

202
		if (set_alpha)
203
		{
204
			for (uint32_t y = 0; y < 4; y++, pPixels += 4)
205
			{
206
				pPixels[0] = c[pBlock->get_selector(0, y)]; 
207
				pPixels[1] = c[pBlock->get_selector(1, y)]; 
208
				pPixels[2] = c[pBlock->get_selector(2, y)]; 
209
				pPixels[3] = c[pBlock->get_selector(3, y)];
210
			}
211
		}
212
		else
213
		{
214
			for (uint32_t y = 0; y < 4; y++, pPixels += 4)
215
			{
216
				pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); 
217
				pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); 
218
				pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); 
219
				pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
220
			}
221
		}
222

223
		return used_punchthrough;
224
	}
225

226
	static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; }
227
	static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; }
228

229
	bool unpack_bc1_amd(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
230
	{
231
		const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
232

233
		const uint32_t l = pBlock->get_low_color();
234
		const uint32_t h = pBlock->get_high_color();
235

236
		color_rgba c[4];
237

238
		uint32_t r0, g0, b0, r1, g1, b1;
239
		bc1_block::unpack_color(l, r0, g0, b0);
240
		bc1_block::unpack_color(h, r1, g1, b1);
241

242
		c[0].set_noclamp_rgba(r0, g0, b0, 255);
243
		c[1].set_noclamp_rgba(r1, g1, b1, 255);
244
				
245
		bool used_punchthrough = false;
246

247
		if (l > h)
248
		{
249
			c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
250
			c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
251
		}
252
		else
253
		{
254
			c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
255
			c[3].set_noclamp_rgba(0, 0, 0, 0);
256
			used_punchthrough = true;
257
		}
258

259
		if (set_alpha)
260
		{
261
			for (uint32_t y = 0; y < 4; y++, pPixels += 4)
262
			{
263
				pPixels[0] = c[pBlock->get_selector(0, y)]; 
264
				pPixels[1] = c[pBlock->get_selector(1, y)]; 
265
				pPixels[2] = c[pBlock->get_selector(2, y)]; 
266
				pPixels[3] = c[pBlock->get_selector(3, y)];
267
			}
268
		}
269
		else
270
		{
271
			for (uint32_t y = 0; y < 4; y++, pPixels += 4)
272
			{
273
				pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); 
274
				pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); 
275
				pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); 
276
				pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
277
			}
278
		}
279

280
		return used_punchthrough;
281
	}
282

283
	//------------------------------------------------------------------------------------------------
284
	// BC3-5
285

286
	struct bc4_block
287
	{
288
		enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };
289
		uint8_t m_endpoints[2];
290

291
		uint8_t m_selectors[cTotalSelectorBytes];
292

293
		inline uint32_t get_low_alpha() const { return m_endpoints[0]; }
294
		inline uint32_t get_high_alpha() const { return m_endpoints[1]; }
295
		inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
296

297
		inline uint64_t get_selector_bits() const
298
		{ 
299
			return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) |
300
				(((uint64_t)m_selectors[4]) << 32U) |
301
				(((uint64_t)m_selectors[5]) << 40U);
302
		}
303

304
		inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const
305
		{
306
			assert((x < 4U) && (y < 4U));
307
			return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1);
308
		}
309
				
310
		static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h)
311
		{
312
			pDst[0] = static_cast<uint8_t>(l);
313
			pDst[1] = static_cast<uint8_t>(h);
314
			pDst[2] = static_cast<uint8_t>((l * 4 + h) / 5);
315
			pDst[3] = static_cast<uint8_t>((l * 3 + h * 2) / 5);
316
			pDst[4] = static_cast<uint8_t>((l * 2 + h * 3) / 5);
317
			pDst[5] = static_cast<uint8_t>((l + h * 4) / 5);
318
			pDst[6] = 0;
319
			pDst[7] = 255;
320
			return 6;
321
		}
322

323
		static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h)
324
		{
325
			pDst[0] = static_cast<uint8_t>(l);
326
			pDst[1] = static_cast<uint8_t>(h);
327
			pDst[2] = static_cast<uint8_t>((l * 6 + h) / 7);
328
			pDst[3] = static_cast<uint8_t>((l * 5 + h * 2) / 7);
329
			pDst[4] = static_cast<uint8_t>((l * 4 + h * 3) / 7);
330
			pDst[5] = static_cast<uint8_t>((l * 3 + h * 4) / 7);
331
			pDst[6] = static_cast<uint8_t>((l * 2 + h * 5) / 7);
332
			pDst[7] = static_cast<uint8_t>((l + h * 6) / 7);
333
			return 8;
334
		}
335

336
		static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h)
337
		{
338
			if (l > h)
339
				return get_block_values8(pDst, l, h);
340
			else
341
				return get_block_values6(pDst, l, h);
342
		}
343
	};
344

345
	void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride)
346
	{
347
		static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8");
348

349
		const bc4_block *pBlock = static_cast<const bc4_block *>(pBlock_bits);
350

351
		uint8_t sel_values[8];
352
		bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha());
353

354
		const uint64_t selector_bits = pBlock->get_selector_bits();
355

356
		for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U))
357
		{
358
			pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)];
359
			pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)];
360
			pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)];
361
			pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)];
362
		}
363
	}
364
	
365
	// Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3.
366
	bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels)
367
	{
368
		bool success = true;
369

370
		if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(bc4_block), pPixels, true))
371
			success = false;
372

373
		unpack_bc4(pBlock_bits, &pPixels[0].a, sizeof(color_rgba));
374
		
375
		return success;
376
	}
377

378
	// writes RG
379
	void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels)
380
	{
381
		unpack_bc4(pBlock_bits, &pPixels[0].r, sizeof(color_rgba));
382
		unpack_bc4((const uint8_t *)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba));
383
	}
384
	
385
	//------------------------------------------------------------------------------------------------
386
	// ATC isn't officially documented, so I'm assuming these references:
387
	// http://www.guildsoftware.com/papers/2012.Converting.DXTC.to.ATC.pdf
388
	// https://github.com/Triang3l/S3TConv/blob/master/s3tconv_atitc.c
389
	// The paper incorrectly says the ATC lerp factors are 1/3 and 2/3, but they are actually 3/8 and 5/8.
390
	void unpack_atc(const void* pBlock_bits, color_rgba* pPixels)
391
	{
392
		const uint8_t* pBytes = static_cast<const uint8_t*>(pBlock_bits);
393

394
		const uint16_t color0 = pBytes[0] | (pBytes[1] << 8U);
395
		const uint16_t color1 = pBytes[2] | (pBytes[3] << 8U);
396
		uint32_t sels = pBytes[4] | (pBytes[5] << 8U) | (pBytes[6] << 16U) | (pBytes[7] << 24U);
397

398
		const bool mode = (color0 & 0x8000) != 0;
399

400
		color_rgba c[4];
401

402
		c[0].set((color0 >> 10) & 31, (color0 >> 5) & 31, color0 & 31, 255);
403
		c[0].r = (c[0].r << 3) | (c[0].r >> 2);
404
		c[0].g = (c[0].g << 3) | (c[0].g >> 2);
405
		c[0].b = (c[0].b << 3) | (c[0].b >> 2);
406

407
		c[3].set((color1 >> 11) & 31, (color1 >> 5) & 63, color1 & 31, 255);
408
		c[3].r = (c[3].r << 3) | (c[3].r >> 2);
409
		c[3].g = (c[3].g << 2) | (c[3].g >> 4);
410
		c[3].b = (c[3].b << 3) | (c[3].b >> 2);
411

412
		if (mode)
413
		{
414
			c[1].set(basisu::maximum(0, c[0].r - (c[3].r >> 2)), basisu::maximum(0, c[0].g - (c[3].g >> 2)), basisu::maximum(0, c[0].b - (c[3].b >> 2)), 255);
415
			c[2] = c[0];
416
			c[0].set(0, 0, 0, 255);
417
		}
418
		else
419
		{
420
			c[1].r = (c[0].r * 5 + c[3].r * 3) >> 3;
421
			c[1].g = (c[0].g * 5 + c[3].g * 3) >> 3;
422
			c[1].b = (c[0].b * 5 + c[3].b * 3) >> 3;
423

424
			c[2].r = (c[0].r * 3 + c[3].r * 5) >> 3;
425
			c[2].g = (c[0].g * 3 + c[3].g * 5) >> 3;
426
			c[2].b = (c[0].b * 3 + c[3].b * 5) >> 3;
427
		}
428

429
		for (uint32_t i = 0; i < 16; i++)
430
		{
431
			const uint32_t s = sels & 3;
432
			
433
			pPixels[i] = c[s];
434
							
435
			sels >>= 2;
436
		}
437
	}
438

439
	//------------------------------------------------------------------------------------------------
440
	// BC7 mode 0-7 decompression.
441
	// Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines.
442

443
	static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; }
444
	static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; }
445

446
	static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6; }
447
	static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - basist::g_bc7_weights3[w]) + h * basist::g_bc7_weights3[w] + 32) >> 6; }
448
	static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - basist::g_bc7_weights4[w]) + h * basist::g_bc7_weights4[w] + 32) >> 6; }
449
	static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits)
450
	{
451
		assert(l <= 255 && h <= 255);
452
		switch (bits)
453
		{
454
		case 2: return bc7_interp2(l, h, w);
455
		case 3: return bc7_interp3(l, h, w);
456
		case 4: return bc7_interp4(l, h, w);
457
		default: 
458
			break;
459
		}
460
		return 0;
461
	}
462
		
463
	bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
464
	{
465
		//const uint32_t SUBSETS = 3;
466
		const uint32_t ENDPOINTS = 6;
467
		const uint32_t COMPS = 3;
468
		const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2;
469
		const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5;
470
		const uint32_t PBITS = (mode == 0) ? 6 : 0;
471
		const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
472
		
473
		uint32_t bit_offset = 0;
474
		const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
475

476
		if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
477

478
		const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6);
479

480
		color_rgba endpoints[ENDPOINTS];
481
		for (uint32_t c = 0; c < COMPS; c++)
482
			for (uint32_t e = 0; e < ENDPOINTS; e++)
483
				endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
484

485
		uint32_t pbits[6];
486
		for (uint32_t p = 0; p < PBITS; p++)
487
			pbits[p] = read_bits32(pBuf, bit_offset, 1);
488

489
		uint32_t weights[16];
490
		for (uint32_t i = 0; i < 16; i++)
491
			weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_third_subset_1[part]) || (i == basist::g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
492

493
		assert(bit_offset == 128);
494

495
		for (uint32_t e = 0; e < ENDPOINTS; e++)
496
			for (uint32_t c = 0; c < 4; c++)
497
				endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS)));
498

499
		color_rgba block_colors[3][8];
500
		for (uint32_t s = 0; s < 3; s++)
501
			for (uint32_t i = 0; i < WEIGHT_VALS; i++)
502
			{
503
				for (uint32_t c = 0; c < 3; c++)
504
					block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
505
				block_colors[s][i][3] = 255;
506
			}
507

508
		for (uint32_t i = 0; i < 16; i++)
509
			pPixels[i] = block_colors[basist::g_bc7_partition3[part * 16 + i]][weights[i]];
510

511
		return true;
512
	}
513

514
	bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
515
	{
516
		//const uint32_t SUBSETS = 2;
517
		const uint32_t ENDPOINTS = 4;
518
		const uint32_t COMPS = (mode == 7) ? 4 : 3;
519
		const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2;
520
		const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7);
521
		const uint32_t PBITS = (mode == 1) ? 2 : 4;
522
		const uint32_t SHARED_PBITS = (mode == 1) ? true : false;
523
		const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
524
		
525
		uint32_t bit_offset = 0;
526
		const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
527

528
		if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
529

530
		const uint32_t part = read_bits32(pBuf, bit_offset, 6);
531

532
		color_rgba endpoints[ENDPOINTS];
533
		for (uint32_t c = 0; c < COMPS; c++)
534
			for (uint32_t e = 0; e < ENDPOINTS; e++)
535
				endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
536
		
537
		uint32_t pbits[4];
538
		for (uint32_t p = 0; p < PBITS; p++)
539
			pbits[p] = read_bits32(pBuf, bit_offset, 1);
540
						
541
		uint32_t weights[16];
542
		for (uint32_t i = 0; i < 16; i++)
543
			weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
544
		
545
		assert(bit_offset == 128);
546

547
		for (uint32_t e = 0; e < ENDPOINTS; e++)
548
			for (uint32_t c = 0; c < 4; c++)
549
				endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS));
550
		
551
		color_rgba block_colors[2][8];
552
		for (uint32_t s = 0; s < 2; s++)
553
			for (uint32_t i = 0; i < WEIGHT_VALS; i++)
554
			{
555
				for (uint32_t c = 0; c < COMPS; c++)
556
					block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
557
				block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3];
558
			}
559

560
		for (uint32_t i = 0; i < 16; i++)
561
			pPixels[i] = block_colors[basist::g_bc7_partition2[part * 16 + i]][weights[i]];
562

563
		return true;
564
	}
565

566
	bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
567
	{
568
		const uint32_t ENDPOINTS = 2;
569
		const uint32_t COMPS = 4;
570
		const uint32_t WEIGHT_BITS = 2;
571
		const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2;
572
		const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7;
573
		const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8;
574
		//const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
575
		//const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS;
576

577
		uint32_t bit_offset = 0;
578
		const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
579

580
		if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
581

582
		const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2);
583
		const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0;
584

585
		color_rgba endpoints[ENDPOINTS];
586
		for (uint32_t c = 0; c < COMPS; c++)
587
			for (uint32_t e = 0; e < ENDPOINTS; e++)
588
				endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
589
		
590
		const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS,  index_mode ? WEIGHT_BITS : A_WEIGHT_BITS };
591
		
592
		uint32_t weights[16], a_weights[16];
593
		
594
		for (uint32_t i = 0; i < 16; i++)
595
			(index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0));
596

597
		for (uint32_t i = 0; i < 16; i++)
598
			(index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0));
599

600
		assert(bit_offset == 128);
601

602
		for (uint32_t e = 0; e < ENDPOINTS; e++)
603
			for (uint32_t c = 0; c < 4; c++)
604
				endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
605

606
		color_rgba block_colors[8];
607
		for (uint32_t i = 0; i < (1U << weight_bits[0]); i++)
608
			for (uint32_t c = 0; c < 3; c++)
609
				block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]);
610

611
		for (uint32_t i = 0; i < (1U << weight_bits[1]); i++)
612
			block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]);
613

614
		for (uint32_t i = 0; i < 16; i++)
615
		{
616
			pPixels[i] = block_colors[weights[i]];
617
			pPixels[i].a = block_colors[a_weights[i]].a;
618
			if (comp_rot >= 1)
619
				std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]);
620
		}
621

622
		return true;
623
	}
624

625
	struct bc7_mode_6
626
	{
627
		struct
628
		{
629
			uint64_t m_mode : 7;
630
			uint64_t m_r0 : 7;
631
			uint64_t m_r1 : 7;
632
			uint64_t m_g0 : 7;
633
			uint64_t m_g1 : 7;
634
			uint64_t m_b0 : 7;
635
			uint64_t m_b1 : 7;
636
			uint64_t m_a0 : 7;
637
			uint64_t m_a1 : 7;
638
			uint64_t m_p0 : 1;
639
		} m_lo;
640

641
		union
642
		{
643
			struct
644
			{
645
				uint64_t m_p1 : 1;
646
				uint64_t m_s00 : 3;
647
				uint64_t m_s10 : 4;
648
				uint64_t m_s20 : 4;
649
				uint64_t m_s30 : 4;
650

651
				uint64_t m_s01 : 4;
652
				uint64_t m_s11 : 4;
653
				uint64_t m_s21 : 4;
654
				uint64_t m_s31 : 4;
655

656
				uint64_t m_s02 : 4;
657
				uint64_t m_s12 : 4;
658
				uint64_t m_s22 : 4;
659
				uint64_t m_s32 : 4;
660

661
				uint64_t m_s03 : 4;
662
				uint64_t m_s13 : 4;
663
				uint64_t m_s23 : 4;
664
				uint64_t m_s33 : 4;
665

666
			} m_hi;
667

668
			uint64_t m_hi_bits;
669
		};
670
	};
671

672
	bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels)
673
	{
674
		static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16");
675

676
		const bc7_mode_6 &block = *static_cast<const bc7_mode_6 *>(pBlock_bits);
677

678
		if (block.m_lo.m_mode != (1 << 6))
679
			return false;
680

681
		const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0);
682
		const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0);
683
		const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0);
684
		const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0);
685
		const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1);
686
		const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1);
687
		const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1);
688
		const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1);
689

690
		color_rgba vals[16];
691
		for (uint32_t i = 0; i < 16; i++)
692
		{
693
			const uint32_t w = basist::g_bc7_weights4[i];
694
			const uint32_t iw = 64 - w;
695
			vals[i].set_noclamp_rgba( 
696
				(r0 * iw + r1 * w + 32) >> 6, 
697
				(g0 * iw + g1 * w + 32) >> 6, 
698
				(b0 * iw + b1 * w + 32) >> 6, 
699
				(a0 * iw + a1 * w + 32) >> 6);
700
		}
701

702
		pPixels[0] = vals[block.m_hi.m_s00];
703
		pPixels[1] = vals[block.m_hi.m_s10];
704
		pPixels[2] = vals[block.m_hi.m_s20];
705
		pPixels[3] = vals[block.m_hi.m_s30];
706

707
		pPixels[4] = vals[block.m_hi.m_s01];
708
		pPixels[5] = vals[block.m_hi.m_s11];
709
		pPixels[6] = vals[block.m_hi.m_s21];
710
		pPixels[7] = vals[block.m_hi.m_s31];
711
		
712
		pPixels[8] = vals[block.m_hi.m_s02];
713
		pPixels[9] = vals[block.m_hi.m_s12];
714
		pPixels[10] = vals[block.m_hi.m_s22];
715
		pPixels[11] = vals[block.m_hi.m_s32];
716

717
		pPixels[12] = vals[block.m_hi.m_s03];
718
		pPixels[13] = vals[block.m_hi.m_s13];
719
		pPixels[14] = vals[block.m_hi.m_s23];
720
		pPixels[15] = vals[block.m_hi.m_s33];
721

722
		return true;
723
	}
724

725
	bool unpack_bc7(const void *pBlock, color_rgba *pPixels)
726
	{
727
		const uint32_t first_byte = static_cast<const uint8_t*>(pBlock)[0];
728

729
		for (uint32_t mode = 0; mode <= 7; mode++)
730
		{
731
			if (first_byte & (1U << mode))
732
			{
733
				switch (mode)
734
				{
735
				case 0:
736
				case 2:
737
					return unpack_bc7_mode0_2(mode, pBlock, pPixels);
738
				case 1:
739
				case 3:
740
				case 7:
741
					return unpack_bc7_mode1_3_7(mode, pBlock, pPixels);
742
				case 4:
743
				case 5:
744
					return unpack_bc7_mode4_5(mode, pBlock, pPixels);
745
				case 6:
746
					return unpack_bc7_mode6(pBlock, pPixels);
747
				default:
748
					break;
749
				}
750
			}
751
		}
752

753
		return false;
754
	}
755
	
756
	static inline int bc6h_sign_extend(int val, int bits)
757
	{
758
		assert((bits >= 1) && (bits < 32));
759
		assert((val >= 0) && (val < (1 << bits)));
760
		return (val << (32 - bits)) >> (32 - bits);
761
	}
762

763
	static inline int bc6h_apply_delta(int base, int delta, int num_bits, int is_signed)
764
	{
765
		int bitmask = ((1 << num_bits) - 1);
766
		int v = (base + delta) & bitmask;
767
		return is_signed ? bc6h_sign_extend(v, num_bits) : v;
768
	}
769

770
	static int bc6h_dequantize(int val, int bits, int is_signed)
771
	{
772
		int result;
773
		if (is_signed)
774
		{
775
			if (bits >= 16)
776
				result = val;
777
			else
778
			{
779
				int s_flag = 0;
780
				if (val < 0)
781
				{
782
					s_flag = 1;
783
					val = -val;
784
				}
785

786
				if (val == 0)
787
					result = 0;
788
				else if (val >= ((1 << (bits - 1)) - 1))
789
					result = 0x7FFF;
790
				else
791
					result = ((val << 15) + 0x4000) >> (bits - 1);
792

793
				if (s_flag)
794
					result = -result;
795
			}
796
		}
797
		else
798
		{
799
			if (bits >= 15)
800
				result = val;
801
			else if (!val)
802
				result = 0;
803
			else if (val == ((1 << bits) - 1))
804
				result = 0xFFFF;
805
			else
806
				result = ((val << 16) + 0x8000) >> bits;
807
		}
808
		return result;
809
	}
810

811
	static inline int bc6h_interpolate(int a, int b, const uint8_t* pWeights, int index)
812
	{
813
		return (a * (64 - (int)pWeights[index]) + b * (int)pWeights[index] + 32) >> 6;
814
	}
815

816
	static inline basist::half_float bc6h_convert_to_half(int val, int is_signed)
817
	{
818
		if (!is_signed)
819
		{
820
			// scale by 31/64
821
			return (basist::half_float)((val * 31) >> 6);
822
		}
823

824
		// scale by 31/32
825
		val = (val < 0) ? -(((-val) * 31) >> 5) : (val * 31) >> 5;
826

827
		int s = 0;
828
		if (val < 0)
829
		{
830
			s = 0x8000;
831
			val = -val;
832
		}
833

834
		return (basist::half_float)(s | val);
835
	}
836

837
	static inline uint32_t bc6h_get_bits(uint32_t num_bits, uint64_t& l, uint64_t& h, uint32_t& total_bits)
838
	{
839
		assert((num_bits) && (num_bits <= 63));
840

841
		uint32_t v = (uint32_t)(l & ((1U << num_bits) - 1U));
842

843
		l >>= num_bits;
844
		l |= (h << (64U - num_bits));
845
		h >>= num_bits;
846

847
		total_bits += num_bits;
848
		assert(total_bits <= 128);
849

850
		return v;
851
	}
852

853
	static inline uint32_t bc6h_reverse_bits(uint32_t v, uint32_t num_bits)
854
	{
855
		uint32_t res = 0;
856
		for (uint32_t i = 0; i < num_bits; i++)
857
		{
858
			uint32_t bit = (v & (1u << i)) != 0u;
859
			res |= (bit << (num_bits - 1u - i));
860
		}
861
		return res;
862
	}
863

864
	static inline uint64_t bc6h_read_le_qword(const void* p)
865
	{
866
		const uint8_t* pSrc = static_cast<const uint8_t*>(p);
867
		return ((uint64_t)read_le_dword(pSrc)) | (((uint64_t)read_le_dword(pSrc + sizeof(uint32_t))) << 32U);
868
	}
869

870
	bool unpack_bc6h(const void* pSrc_block, void* pDst_block, bool is_signed, uint32_t dest_pitch_in_halfs)
871
	{
872
		assert(dest_pitch_in_halfs >= 4 * 3);
873

874
		const uint32_t MAX_SUBSETS = 2, MAX_COMPS = 3;
875

876
		const uint8_t* pSrc = static_cast<const uint8_t*>(pSrc_block);
877
		basist::half_float* pDst = static_cast<basist::half_float*>(pDst_block);
878

879
		uint64_t blo = bc6h_read_le_qword(pSrc), bhi = bc6h_read_le_qword(pSrc + sizeof(uint64_t));
880

881
		// Unpack mode
882
		const int mode = basist::g_bc6h_mode_lookup[blo & 31];
883
		if (mode < 0)
884
		{
885
			for (int y = 0; y < 4; y++)
886
			{
887
				memset(pDst, 0, sizeof(basist::half_float) * 4);
888
				pDst += dest_pitch_in_halfs;
889
			}
890
			return false;
891
		}
892

893
		// Skip mode bits
894
		uint32_t total_bits_read = 0;
895
		bc6h_get_bits((mode < 2) ? 2 : 5, blo, bhi, total_bits_read);
896

897
		assert(mode < (int)basist::NUM_BC6H_MODES);
898

899
		const uint32_t num_subsets = (mode >= 10) ? 1 : 2;
900
		const bool is_mode_9_or_10 = (mode == 9) || (mode == 10);
901

902
		// Unpack endpoint components
903
		int comps[MAX_SUBSETS][MAX_COMPS][2] = { { { 0 } } };		// [subset][comp][l/h]
904
		int part_index = 0;
905

906
		uint32_t layout_index = 0;
907
		while (layout_index < basist::MAX_BC6H_LAYOUT_INDEX)
908
		{
909
			const basist::bc6h_bit_layout& layout = basist::g_bc6h_bit_layouts[mode][layout_index];
910

911
			if (layout.m_comp < 0)
912
				break;
913

914
			const int subset = layout.m_index >> 1, lh_index = layout.m_index & 1;
915
			assert((layout.m_comp == 3) || ((subset >= 0) && (subset < (int)MAX_SUBSETS)));
916

917
			const int last_bit = layout.m_last_bit, first_bit = layout.m_first_bit;
918
			assert(last_bit >= 0);
919

920
			int& res = (layout.m_comp == 3) ? part_index : comps[subset][layout.m_comp][lh_index];
921

922
			if (first_bit < 0)
923
			{
924
				res |= (bc6h_get_bits(1, blo, bhi, total_bits_read) << last_bit);
925
			}
926
			else
927
			{
928
				const int total_bits = iabs(last_bit - first_bit) + 1;
929
				const int bit_shift = basisu::minimum(first_bit, last_bit);
930

931
				int b = bc6h_get_bits(total_bits, blo, bhi, total_bits_read);
932

933
				if (last_bit < first_bit)
934
					b = bc6h_reverse_bits(b, total_bits);
935

936
				res |= (b << bit_shift);
937
			}
938

939
			layout_index++;
940
		}
941
		assert(layout_index != basist::MAX_BC6H_LAYOUT_INDEX);
942

943
		// Sign extend/dequantize endpoints
944
		const int num_sig_bits = basist::g_bc6h_mode_sig_bits[mode][0];
945
		if (is_signed)
946
		{
947
			for (uint32_t comp = 0; comp < 3; comp++)
948
				comps[0][comp][0] = bc6h_sign_extend(comps[0][comp][0], num_sig_bits);
949
		}
950

951
		if (is_signed || !is_mode_9_or_10)
952
		{
953
			for (uint32_t subset = 0; subset < num_subsets; subset++)
954
				for (uint32_t comp = 0; comp < 3; comp++)
955
					for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++)
956
						comps[subset][comp][lh] = bc6h_sign_extend(comps[subset][comp][lh], basist::g_bc6h_mode_sig_bits[mode][1 + comp]);
957
		}
958

959
		if (!is_mode_9_or_10)
960
		{
961
			for (uint32_t subset = 0; subset < num_subsets; subset++)
962
				for (uint32_t comp = 0; comp < 3; comp++)
963
					for (uint32_t lh = (subset ? 0 : 1); lh < 2; lh++)
964
						comps[subset][comp][lh] = bc6h_apply_delta(comps[0][comp][0], comps[subset][comp][lh], num_sig_bits, is_signed);
965
		}
966

967
		for (uint32_t subset = 0; subset < num_subsets; subset++)
968
			for (uint32_t comp = 0; comp < 3; comp++)
969
				for (uint32_t lh = 0; lh < 2; lh++)
970
					comps[subset][comp][lh] = bc6h_dequantize(comps[subset][comp][lh], num_sig_bits, is_signed);
971

972
		// Now unpack weights and output texels
973
		const int weight_bits = (mode >= 10) ? 4 : 3;
974
		const uint8_t* pWeights = (mode >= 10) ? basist::g_bc6h_weight4 : basist::g_bc6h_weight3;
975

976
		dest_pitch_in_halfs -= 4 * 3;
977

978
		for (uint32_t y = 0; y < 4; y++)
979
		{
980
			for (uint32_t x = 0; x < 4; x++)
981
			{
982
				int subset = (num_subsets == 1) ? ((x | y) ? 0 : 0x80) : basist::g_bc6h_2subset_patterns[part_index][y][x];
983
				const int num_bits = weight_bits + ((subset & 0x80) ? -1 : 0);
984

985
				subset &= 1;
986

987
				const int weight_index = bc6h_get_bits(num_bits, blo, bhi, total_bits_read);
988

989
				pDst[0] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][0][0], comps[subset][0][1], pWeights, weight_index), is_signed);
990
				pDst[1] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][1][0], comps[subset][1][1], pWeights, weight_index), is_signed);
991
				pDst[2] = bc6h_convert_to_half(bc6h_interpolate(comps[subset][2][0], comps[subset][2][1], pWeights, weight_index), is_signed);
992

993
				pDst += 3;
994
			}
995

996
			pDst += dest_pitch_in_halfs;
997
		}
998

999
		assert(total_bits_read == 128);
1000
		return true;
1001
	}
1002
	//------------------------------------------------------------------------------------------------
1003
	// FXT1 (for fun, and because some modern Intel parts support it, and because a subset is like BC1)
1004

1005
	struct fxt1_block
1006
	{
1007
		union
1008
		{
1009
			struct
1010
			{
1011
				uint64_t m_t00 : 2;
1012
				uint64_t m_t01 : 2;
1013
				uint64_t m_t02 : 2;
1014
				uint64_t m_t03 : 2;
1015
				uint64_t m_t04 : 2;
1016
				uint64_t m_t05 : 2;
1017
				uint64_t m_t06 : 2;
1018
				uint64_t m_t07 : 2;
1019
				uint64_t m_t08 : 2;
1020
				uint64_t m_t09 : 2;
1021
				uint64_t m_t10 : 2;
1022
				uint64_t m_t11 : 2;
1023
				uint64_t m_t12 : 2;
1024
				uint64_t m_t13 : 2;
1025
				uint64_t m_t14 : 2;
1026
				uint64_t m_t15 : 2;
1027
				uint64_t m_t16 : 2;
1028
				uint64_t m_t17 : 2;
1029
				uint64_t m_t18 : 2;
1030
				uint64_t m_t19 : 2;
1031
				uint64_t m_t20 : 2;
1032
				uint64_t m_t21 : 2;
1033
				uint64_t m_t22 : 2;
1034
				uint64_t m_t23 : 2;
1035
				uint64_t m_t24 : 2;
1036
				uint64_t m_t25 : 2;
1037
				uint64_t m_t26 : 2;
1038
				uint64_t m_t27 : 2;
1039
				uint64_t m_t28 : 2;
1040
				uint64_t m_t29 : 2;
1041
				uint64_t m_t30 : 2;
1042
				uint64_t m_t31 : 2;
1043
			} m_lo;
1044
			uint64_t m_lo_bits;
1045
			uint8_t m_sels[8];
1046
		};
1047

1048
		union
1049
		{
1050
			struct
1051
			{
1052
#ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING
1053
				// This is the format that 3DFX's DECOMP.EXE tool expects, which I'm assuming is what the actual 3DFX hardware wanted.
1054
				// Unfortunately, color0/color1 and color2/color3 are flipped relative to the official OpenGL extension and Intel's documentation!
1055
				uint64_t m_b1 : 5;
1056
				uint64_t m_g1 : 5;
1057
				uint64_t m_r1 : 5;
1058
				uint64_t m_b0 : 5;
1059
				uint64_t m_g0 : 5;
1060
				uint64_t m_r0 : 5;
1061
				uint64_t m_b3 : 5;
1062
				uint64_t m_g3 : 5;
1063
				uint64_t m_r3 : 5;
1064
				uint64_t m_b2 : 5;
1065
				uint64_t m_g2 : 5;
1066
				uint64_t m_r2 : 5;
1067
#else
1068
				// Intel's encoding, and the encoding in the OpenGL FXT1 spec.
1069
				uint64_t m_b0 : 5;
1070
				uint64_t m_g0 : 5;
1071
				uint64_t m_r0 : 5;
1072
				uint64_t m_b1 : 5;
1073
				uint64_t m_g1 : 5;
1074
				uint64_t m_r1 : 5;
1075
				uint64_t m_b2 : 5;
1076
				uint64_t m_g2 : 5;
1077
				uint64_t m_r2 : 5;
1078
				uint64_t m_b3 : 5;
1079
				uint64_t m_g3 : 5;
1080
				uint64_t m_r3 : 5;
1081
#endif
1082
				uint64_t m_alpha : 1;
1083
				uint64_t m_glsb : 2;
1084
				uint64_t m_mode : 1;
1085
			} m_hi;
1086

1087
			uint64_t m_hi_bits;
1088
		};
1089
	};
1090

1091
	static color_rgba expand_565(const color_rgba& c)
1092
	{
1093
		return color_rgba((c.r << 3) | (c.r >> 2), (c.g << 2) | (c.g >> 4), (c.b << 3) | (c.b >> 2), 255);
1094
	}
1095

1096
	// We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment.
1097
	bool unpack_fxt1(const void *p, color_rgba *pPixels)
1098
	{
1099
		const fxt1_block* pBlock = static_cast<const fxt1_block*>(p);
1100

1101
		if (pBlock->m_hi.m_mode == 0)
1102
			return false;
1103
		if (pBlock->m_hi.m_alpha == 1)
1104
			return false;
1105
				
1106
		color_rgba colors[4];
1107

1108
		colors[0].r = pBlock->m_hi.m_r0;
1109
		colors[0].g = (uint8_t)((pBlock->m_hi.m_g0 << 1) | ((pBlock->m_lo.m_t00 >> 1) ^ (pBlock->m_hi.m_glsb & 1)));
1110
		colors[0].b = pBlock->m_hi.m_b0;
1111
		colors[0].a = 255;
1112

1113
		colors[1].r = pBlock->m_hi.m_r1;
1114
		colors[1].g = (uint8_t)((pBlock->m_hi.m_g1 << 1) | (pBlock->m_hi.m_glsb & 1));
1115
		colors[1].b = pBlock->m_hi.m_b1;
1116
		colors[1].a = 255;
1117

1118
		colors[2].r = pBlock->m_hi.m_r2;
1119
		colors[2].g = (uint8_t)((pBlock->m_hi.m_g2 << 1) | ((pBlock->m_lo.m_t16 >> 1) ^ (pBlock->m_hi.m_glsb >> 1)));
1120
		colors[2].b = pBlock->m_hi.m_b2;
1121
		colors[2].a = 255;
1122

1123
		colors[3].r = pBlock->m_hi.m_r3;
1124
		colors[3].g = (uint8_t)((pBlock->m_hi.m_g3 << 1) | (pBlock->m_hi.m_glsb >> 1));
1125
		colors[3].b = pBlock->m_hi.m_b3;
1126
		colors[3].a = 255;
1127

1128
		for (uint32_t i = 0; i < 4; i++)
1129
			colors[i] = expand_565(colors[i]);
1130

1131
		color_rgba block0_colors[4];
1132
		block0_colors[0] = colors[0];
1133
		block0_colors[1] = color_rgba((colors[0].r * 2 + colors[1].r + 1) / 3, (colors[0].g * 2 + colors[1].g + 1) / 3, (colors[0].b * 2 + colors[1].b + 1) / 3, 255);
1134
		block0_colors[2] = color_rgba((colors[1].r * 2 + colors[0].r + 1) / 3, (colors[1].g * 2 + colors[0].g + 1) / 3, (colors[1].b * 2 + colors[0].b + 1) / 3, 255);
1135
		block0_colors[3] = colors[1];
1136

1137
		for (uint32_t i = 0; i < 16; i++)
1138
		{
1139
			const uint32_t sel = (pBlock->m_sels[i >> 2] >> ((i & 3) * 2)) & 3;
1140

1141
			const uint32_t x = i & 3;
1142
			const uint32_t y = i >> 2;
1143
			pPixels[x + y * 8] = block0_colors[sel];
1144
		}
1145

1146
		color_rgba block1_colors[4];
1147
		block1_colors[0] = colors[2];
1148
		block1_colors[1] = color_rgba((colors[2].r * 2 + colors[3].r + 1) / 3, (colors[2].g * 2 + colors[3].g + 1) / 3, (colors[2].b * 2 + colors[3].b + 1) / 3, 255);
1149
		block1_colors[2] = color_rgba((colors[3].r * 2 + colors[2].r + 1) / 3, (colors[3].g * 2 + colors[2].g + 1) / 3, (colors[3].b * 2 + colors[2].b + 1) / 3, 255);
1150
		block1_colors[3] = colors[3];
1151

1152
		for (uint32_t i = 0; i < 16; i++)
1153
		{
1154
			const uint32_t sel = (pBlock->m_sels[4 + (i >> 2)] >> ((i & 3) * 2)) & 3;
1155
			
1156
			const uint32_t x = i & 3;
1157
			const uint32_t y = i >> 2;
1158
			pPixels[4 + x + y * 8] = block1_colors[sel];
1159
		}
1160

1161
		return true;
1162
	}
1163

1164
	//------------------------------------------------------------------------------------------------
1165
	// PVRTC2 (non-interpolated, hard_flag=1 modulation=0 subset only!)
1166

1167
	struct pvrtc2_block
1168
	{
1169
		uint8_t m_modulation[4];
1170

1171
		union
1172
		{
1173
			union
1174
			{
1175
				// Opaque mode: RGB colora=554 and colorb=555
1176
				struct
1177
				{
1178
					uint32_t m_mod_flag : 1;
1179
					uint32_t m_blue_a : 4;
1180
					uint32_t m_green_a : 5;
1181
					uint32_t m_red_a : 5;
1182
					uint32_t m_hard_flag : 1;
1183
					uint32_t m_blue_b : 5;
1184
					uint32_t m_green_b : 5;
1185
					uint32_t m_red_b : 5;
1186
					uint32_t m_opaque_flag : 1;
1187

1188
				} m_opaque_color_data;
1189

1190
				// Transparent mode: RGBA colora=4433 and colorb=4443
1191
				struct
1192
				{
1193
					uint32_t m_mod_flag : 1;
1194
					uint32_t m_blue_a : 3;
1195
					uint32_t m_green_a : 4;
1196
					uint32_t m_red_a : 4;
1197
					uint32_t m_alpha_a : 3;
1198
					uint32_t m_hard_flag : 1;
1199
					uint32_t m_blue_b : 4;
1200
					uint32_t m_green_b : 4;
1201
					uint32_t m_red_b : 4;
1202
					uint32_t m_alpha_b : 3;
1203
					uint32_t m_opaque_flag : 1;
1204

1205
				} m_trans_color_data;
1206
			};
1207

1208
			uint32_t m_color_data_bits;
1209
		};
1210
	};
1211

1212
	static color_rgba convert_rgb_555_to_888(const color_rgba& col)
1213
	{
1214
		return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), 255);
1215
	}
1216
	
1217
	static color_rgba convert_rgba_5554_to_8888(const color_rgba& col)
1218
	{
1219
		return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]);
1220
	}
1221

1222
	// PVRTC2 is currently limited to only what our transcoder outputs (non-interpolated, hard_flag=1 modulation=0). In this mode, PVRTC2 looks much like BC1/ATC.
1223
	bool unpack_pvrtc2(const void *p, color_rgba *pPixels)
1224
	{
1225
		const pvrtc2_block* pBlock = static_cast<const pvrtc2_block*>(p);
1226

1227
		if ((!pBlock->m_opaque_color_data.m_hard_flag) || (pBlock->m_opaque_color_data.m_mod_flag))
1228
		{
1229
			// This mode isn't supported by the transcoder, so we aren't bothering with it here.
1230
			return false;
1231
		}
1232

1233
		color_rgba colors[4];
1234

1235
		if (pBlock->m_opaque_color_data.m_opaque_flag)
1236
		{
1237
			// colora=554
1238
			color_rgba color_a(pBlock->m_opaque_color_data.m_red_a, pBlock->m_opaque_color_data.m_green_a, (pBlock->m_opaque_color_data.m_blue_a << 1) | (pBlock->m_opaque_color_data.m_blue_a >> 3), 255);
1239
			
1240
			// colora=555
1241
			color_rgba color_b(pBlock->m_opaque_color_data.m_red_b, pBlock->m_opaque_color_data.m_green_b, pBlock->m_opaque_color_data.m_blue_b, 255);
1242
						
1243
			colors[0] = convert_rgb_555_to_888(color_a);
1244
			colors[3] = convert_rgb_555_to_888(color_b);
1245

1246
			colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, 255);
1247
			colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, 255);
1248
		}
1249
		else
1250
		{
1251
			// colora=4433 
1252
			color_rgba color_a(
1253
				(pBlock->m_trans_color_data.m_red_a << 1) | (pBlock->m_trans_color_data.m_red_a >> 3), 
1254
				(pBlock->m_trans_color_data.m_green_a << 1) | (pBlock->m_trans_color_data.m_green_a >> 3),
1255
				(pBlock->m_trans_color_data.m_blue_a << 2) | (pBlock->m_trans_color_data.m_blue_a >> 1), 
1256
				pBlock->m_trans_color_data.m_alpha_a << 1);
1257

1258
			//colorb=4443
1259
			color_rgba color_b(
1260
				(pBlock->m_trans_color_data.m_red_b << 1) | (pBlock->m_trans_color_data.m_red_b >> 3),
1261
				(pBlock->m_trans_color_data.m_green_b << 1) | (pBlock->m_trans_color_data.m_green_b >> 3),
1262
				(pBlock->m_trans_color_data.m_blue_b << 1) | (pBlock->m_trans_color_data.m_blue_b >> 3),
1263
				(pBlock->m_trans_color_data.m_alpha_b << 1) | 1);
1264

1265
			colors[0] = convert_rgba_5554_to_8888(color_a);
1266
			colors[3] = convert_rgba_5554_to_8888(color_b);
1267
		}
1268

1269
		colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, (colors[0].a * 5 + colors[3].a * 3) / 8);
1270
		colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, (colors[0].a * 3 + colors[3].a * 5) / 8);
1271

1272
		for (uint32_t i = 0; i < 16; i++)
1273
		{
1274
			const uint32_t sel = (pBlock->m_modulation[i >> 2] >> ((i & 3) * 2)) & 3;
1275
			pPixels[i] = colors[sel];
1276
		}
1277

1278
		return true;
1279
	}
1280

1281
	//------------------------------------------------------------------------------------------------
1282
	// ETC2 EAC R11 or RG11
1283

1284
	struct etc2_eac_r11
1285
	{
1286
		uint64_t m_base	: 8;
1287
		uint64_t m_table	: 4;
1288
		uint64_t m_mul		: 4;
1289
		uint64_t m_sels_0 : 8;
1290
		uint64_t m_sels_1 : 8;
1291
		uint64_t m_sels_2 : 8;
1292
		uint64_t m_sels_3 : 8;
1293
		uint64_t m_sels_4 : 8;
1294
		uint64_t m_sels_5 : 8;
1295

1296
		uint64_t get_sels() const
1297
		{
1298
			return ((uint64_t)m_sels_0 << 40U) | ((uint64_t)m_sels_1 << 32U) | ((uint64_t)m_sels_2 << 24U) | ((uint64_t)m_sels_3 << 16U) | ((uint64_t)m_sels_4 << 8U) | m_sels_5;
1299
		}
1300

1301
		void set_sels(uint64_t v)
1302
		{
1303
			m_sels_0 = (v >> 40U) & 0xFF;
1304
			m_sels_1 = (v >> 32U) & 0xFF;
1305
			m_sels_2 = (v >> 24U) & 0xFF;
1306
			m_sels_3 = (v >> 16U) & 0xFF;
1307
			m_sels_4 = (v >> 8U) & 0xFF;
1308
			m_sels_5 = v & 0xFF;
1309
		}
1310
	};
1311

1312
	struct etc2_eac_rg11
1313
	{
1314
		etc2_eac_r11 m_c[2];
1315
	};
1316

1317
	void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c)
1318
	{
1319
		const etc2_eac_r11* pBlock = static_cast<const etc2_eac_r11*>(p);
1320
		const uint64_t sels = pBlock->get_sels();
1321

1322
		const int base = (int)pBlock->m_base * 8 + 4;
1323
		const int mul = pBlock->m_mul ? ((int)pBlock->m_mul * 8) : 1;
1324
		const int table = (int)pBlock->m_table;
1325

1326
		for (uint32_t y = 0; y < 4; y++)
1327
		{
1328
			for (uint32_t x = 0; x < 4; x++)
1329
			{
1330
				const uint32_t shift = 45 - ((y + x * 4) * 3);
1331
				
1332
				const uint32_t sel = (uint32_t)((sels >> shift) & 7);
1333
				
1334
				int val = base + g_etc2_eac_tables[table][sel] * mul;
1335
				val = clamp<int>(val, 0, 2047);
1336

1337
				// Convert to 8-bits with rounding
1338
				//pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1024) / 2047);
1339
				pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1023) / 2047);
1340

1341
			} // x
1342
		} // y
1343
	}
1344

1345
	void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels)
1346
	{
1347
		for (uint32_t c = 0; c < 2; c++)
1348
		{
1349
			const etc2_eac_r11* pBlock = &static_cast<const etc2_eac_rg11*>(p)->m_c[c];
1350

1351
			unpack_etc2_eac_r(pBlock, pPixels, c);
1352
		}
1353
	}
1354

1355
	//------------------------------------------------------------------------------------------------
1356
	// UASTC
1357

1358
	void unpack_uastc(const void* p, color_rgba* pPixels)
1359
	{
1360
		basist::unpack_uastc(*static_cast<const basist::uastc_block*>(p), (basist::color32 *)pPixels, false);
1361
	}
1362
			
1363
	// Unpacks to RGBA, R, RG, or A. LDR GPU texture formats only.
1364
	bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels)
1365
	{
1366
		switch (fmt)
1367
		{
1368
		case texture_format::cBC1:
1369
		{
1370
			unpack_bc1(pBlock, pPixels, true);
1371
			break;
1372
		}
1373
		case texture_format::cBC1_NV:
1374
		{
1375
			unpack_bc1_nv(pBlock, pPixels, true);
1376
			break;
1377
		}
1378
		case texture_format::cBC1_AMD:
1379
		{
1380
			unpack_bc1_amd(pBlock, pPixels, true);
1381
			break;
1382
		}
1383
		case texture_format::cBC3:
1384
		{
1385
			return unpack_bc3(pBlock, pPixels);
1386
		}
1387
		case texture_format::cBC4:
1388
		{
1389
			// Unpack to R
1390
			unpack_bc4(pBlock, &pPixels[0].r, sizeof(color_rgba));
1391
			break;
1392
		}
1393
		case texture_format::cBC5:
1394
		{
1395
			unpack_bc5(pBlock, pPixels);
1396
			break;
1397
		}
1398
		case texture_format::cBC7:
1399
		{
1400
			return unpack_bc7(pBlock, pPixels);
1401
		}
1402
		// Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color)
1403
		case texture_format::cETC2_RGB:
1404
		case texture_format::cETC1:
1405
		case texture_format::cETC1S:
1406
		{
1407
			return unpack_etc1(*static_cast<const etc_block*>(pBlock), pPixels);
1408
		}
1409
		case texture_format::cETC2_RGBA:
1410
		{
1411
			if (!unpack_etc1(static_cast<const etc_block*>(pBlock)[1], pPixels))
1412
				return false;
1413
			unpack_etc2_eac(pBlock, pPixels);
1414
			break;
1415
		}
1416
		case texture_format::cETC2_ALPHA:
1417
		{
1418
			// Unpack to A
1419
			unpack_etc2_eac(pBlock, pPixels);
1420
			break;
1421
		}
1422
		case texture_format::cBC6HSigned:
1423
		case texture_format::cBC6HUnsigned:
1424
		case texture_format::cASTC_HDR_4x4:
1425
		case texture_format::cUASTC_HDR_4x4:
1426
		case texture_format::cASTC_HDR_6x6:
1427
		{
1428
			// Can't unpack HDR blocks in unpack_block() because it returns 32bpp pixel data.
1429
			assert(0);
1430
			return false;
1431
		}
1432
		case texture_format::cASTC_LDR_4x4:
1433
		{
1434
			const bool astc_srgb = false;
1435
			bool status = basisu_astc::astc::decompress_ldr(reinterpret_cast<uint8_t*>(pPixels), static_cast<const uint8_t*>(pBlock), astc_srgb, 4, 4);
1436
			assert(status);
1437

1438
			if (!status)
1439
				return false;
1440
			
1441
			break;
1442
		}
1443
		case texture_format::cATC_RGB:
1444
		{
1445
			unpack_atc(pBlock, pPixels);
1446
			break;
1447
		}
1448
		case texture_format::cATC_RGBA_INTERPOLATED_ALPHA:
1449
		{
1450
			unpack_atc(static_cast<const uint8_t*>(pBlock) + 8, pPixels);
1451
			unpack_bc4(pBlock, &pPixels[0].a, sizeof(color_rgba));
1452
			break;
1453
		}
1454
		case texture_format::cFXT1_RGB:
1455
		{
1456
			unpack_fxt1(pBlock, pPixels);
1457
			break;
1458
		}
1459
		case texture_format::cPVRTC2_4_RGBA:
1460
		{
1461
			unpack_pvrtc2(pBlock, pPixels);
1462
			break;
1463
		}
1464
		case texture_format::cETC2_R11_EAC:
1465
		{
1466
			unpack_etc2_eac_r(static_cast<const etc2_eac_r11 *>(pBlock), pPixels, 0);
1467
			break;
1468
		}
1469
		case texture_format::cETC2_RG11_EAC:
1470
		{
1471
			unpack_etc2_eac_rg(pBlock, pPixels);
1472
			break;
1473
		}
1474
		case texture_format::cUASTC4x4:
1475
		{
1476
			unpack_uastc(pBlock, pPixels);
1477
			break;
1478
		}
1479
		default:
1480
		{
1481
			assert(0);
1482
			// TODO
1483
			return false;
1484
		}
1485
		}
1486
		return true;
1487
	}
1488

1489
	bool unpack_block_hdr(texture_format fmt, const void* pBlock, vec4F* pPixels)
1490
	{
1491
		switch (fmt)
1492
		{
1493
			case texture_format::cASTC_HDR_6x6:
1494
			{
1495
#if BASISU_USE_GOOGLE_ASTC_DECODER
1496
				bool status = basisu_astc::astc::decompress_hdr(&pPixels[0][0], (uint8_t*)pBlock, 6, 6);
1497
				assert(status);
1498
				if (!status)
1499
					return false;
1500
#else
1501
				// Use our decoder
1502
				basist::half_float half_block[6 * 6][4];
1503

1504
				astc_helpers::log_astc_block log_blk;
1505
				if (!astc_helpers::unpack_block(pBlock, log_blk, 6, 6))
1506
					return false;
1507
				if (!astc_helpers::decode_block(log_blk, half_block, 6, 6, astc_helpers::cDecodeModeHDR16))
1508
					return false;
1509

1510
				for (uint32_t p = 0; p < (6 * 6); p++)
1511
				{
1512
					pPixels[p][0] = basist::half_to_float(half_block[p][0]);
1513
					pPixels[p][1] = basist::half_to_float(half_block[p][1]);
1514
					pPixels[p][2] = basist::half_to_float(half_block[p][2]);
1515
					pPixels[p][3] = basist::half_to_float(half_block[p][3]);
1516
				}
1517
#endif
1518
				return true;
1519
			}
1520
			case texture_format::cASTC_HDR_4x4:
1521
			case texture_format::cUASTC_HDR_4x4:
1522
			{
1523
#if BASISU_USE_GOOGLE_ASTC_DECODER
1524
				// Use Google's decoder
1525
				bool status = basisu_astc::astc::decompress_hdr(&pPixels[0][0], (uint8_t*)pBlock, 4, 4);
1526
				assert(status);
1527
				if (!status)
1528
					return false;
1529
#else
1530
				// Use our decoder
1531
				basist::half_float half_block[16][4];
1532
				
1533
				astc_helpers::log_astc_block log_blk;
1534
				if (!astc_helpers::unpack_block(pBlock, log_blk, 4, 4))
1535
					return false;
1536
				if (!astc_helpers::decode_block(log_blk, half_block, 4, 4, astc_helpers::cDecodeModeHDR16))
1537
					return false;
1538

1539
				for (uint32_t p = 0; p < 16; p++)
1540
				{
1541
					pPixels[p][0] = basist::half_to_float(half_block[p][0]);
1542
					pPixels[p][1] = basist::half_to_float(half_block[p][1]);
1543
					pPixels[p][2] = basist::half_to_float(half_block[p][2]);
1544
					pPixels[p][3] = basist::half_to_float(half_block[p][3]);
1545
				}
1546

1547
				//memset(pPixels, 0, sizeof(vec4F) * 16);
1548
#endif
1549
				return true;
1550
			}
1551
			case texture_format::cBC6HSigned:
1552
			case texture_format::cBC6HUnsigned:
1553
			{
1554
				basist::half_float half_block[16][3];
1555

1556
				unpack_bc6h(pBlock, half_block, fmt == texture_format::cBC6HSigned);
1557

1558
				for (uint32_t p = 0; p < 16; p++)
1559
				{
1560
					pPixels[p][0] = basist::half_to_float(half_block[p][0]);
1561
					pPixels[p][1] = basist::half_to_float(half_block[p][1]);
1562
					pPixels[p][2] = basist::half_to_float(half_block[p][2]);
1563
					pPixels[p][3] = 1.0f;
1564
				}
1565

1566
				return true;
1567
			}
1568
			default:
1569
			{
1570
				break;
1571
			}
1572
		}
1573

1574
		assert(0);
1575
		return false;
1576
	}
1577
		
1578
	bool gpu_image::unpack(image& img) const
1579
	{
1580
		img.resize(get_pixel_width(), get_pixel_height());
1581
		img.set_all(g_black_color);
1582

1583
		if (!img.get_width() || !img.get_height())
1584
			return true;
1585

1586
		if ((m_fmt == texture_format::cPVRTC1_4_RGB) || (m_fmt == texture_format::cPVRTC1_4_RGBA))
1587
		{
1588
			pvrtc4_image pi(m_width, m_height);
1589
			
1590
			if (get_total_blocks() != pi.get_total_blocks())
1591
				return false;
1592
			
1593
			memcpy(&pi.get_blocks()[0], get_ptr(), get_size_in_bytes());
1594

1595
			pi.deswizzle();
1596

1597
			pi.unpack_all_pixels(img);
1598

1599
			return true;
1600
		}
1601

1602
		assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize));
1603
		color_rgba pixels[cMaxBlockSize * cMaxBlockSize];
1604
		for (uint32_t i = 0; i < cMaxBlockSize * cMaxBlockSize; i++)
1605
			pixels[i] = g_black_color;
1606

1607
		bool success = true;
1608

1609
		for (uint32_t by = 0; by < m_blocks_y; by++)
1610
		{
1611
			for (uint32_t bx = 0; bx < m_blocks_x; bx++)
1612
			{
1613
				const void* pBlock = get_block_ptr(bx, by);
1614

1615
				if (!unpack_block(m_fmt, pBlock, pixels))
1616
					success = false;
1617

1618
				img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height);
1619
			} // bx
1620
		} // by
1621

1622
		return success;
1623
	}
1624

1625
	bool gpu_image::unpack_hdr(imagef& img) const
1626
	{
1627
		if ((m_fmt != texture_format::cASTC_HDR_4x4) && (m_fmt != texture_format::cUASTC_HDR_4x4) && (m_fmt != texture_format::cASTC_HDR_6x6) &&
1628
			(m_fmt != texture_format::cBC6HUnsigned) &&	(m_fmt != texture_format::cBC6HSigned))
1629
		{
1630
			// Can't call on LDR images, at least currently. (Could unpack the LDR data and convert to float.)
1631
			assert(0);
1632
			return false;
1633
		}
1634

1635
		img.resize(get_pixel_width(), get_pixel_height());
1636
		img.set_all(vec4F(0.0f));
1637

1638
		if (!img.get_width() || !img.get_height())
1639
			return true;
1640

1641
		assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize));
1642
		vec4F pixels[cMaxBlockSize * cMaxBlockSize];
1643
		clear_obj(pixels);
1644

1645
		bool success = true;
1646

1647
		for (uint32_t by = 0; by < m_blocks_y; by++)
1648
		{
1649
			for (uint32_t bx = 0; bx < m_blocks_x; bx++)
1650
			{
1651
				const void* pBlock = get_block_ptr(bx, by);
1652

1653
				if (!unpack_block_hdr(m_fmt, pBlock, pixels))
1654
					success = false;
1655

1656
				img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height);
1657
			} // bx
1658
		} // by
1659

1660
		return success;
1661
	}
1662
		
1663
	// KTX1 texture file writing
1664
	static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };
1665

1666
	// KTX/GL enums
1667
	enum
1668
	{
1669
		KTX_ENDIAN = 0x04030201, 
1670
		KTX_OPPOSITE_ENDIAN = 0x01020304,
1671
		KTX_ETC1_RGB8_OES = 0x8D64,
1672
		KTX_RED = 0x1903,
1673
		KTX_RG = 0x8227,
1674
		KTX_RGB = 0x1907,
1675
		KTX_RGBA = 0x1908,
1676

1677
		KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0,
1678
		KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3,
1679
		KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB,
1680
		KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD,
1681
		KTX_COMPRESSED_RGB8_ETC2 = 0x9274,
1682
		KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278,
1683
		KTX_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C,
1684
		KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D,
1685
		KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT = 0x8E8E,
1686
		KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F,
1687
		KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00,
1688
		KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02,
1689
		
1690
		KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0,
1691
		KTX_COMPRESSED_RGBA_ASTC_5x4_KHR = 0x93B1,
1692
		KTX_COMPRESSED_RGBA_ASTC_5x5_KHR = 0x93B2,
1693
		KTX_COMPRESSED_RGBA_ASTC_6x5_KHR = 0x93B3,
1694
		KTX_COMPRESSED_RGBA_ASTC_6x6_KHR = 0x93B4,
1695
		KTX_COMPRESSED_RGBA_ASTC_8x5_KHR = 0x93B5,
1696
		KTX_COMPRESSED_RGBA_ASTC_8x6_KHR = 0x93B6,
1697
		KTX_COMPRESSED_RGBA_ASTC_8x8_KHR = 0x93B7,
1698
		KTX_COMPRESSED_RGBA_ASTC_10x5_KHR = 0x93B8,
1699
		KTX_COMPRESSED_RGBA_ASTC_10x6_KHR = 0x93B9,
1700
		KTX_COMPRESSED_RGBA_ASTC_10x8_KHR = 0x93BA,
1701
		KTX_COMPRESSED_RGBA_ASTC_10x10_KHR = 0x93BB,
1702
		KTX_COMPRESSED_RGBA_ASTC_12x10_KHR = 0x93BC,
1703
		KTX_COMPRESSED_RGBA_ASTC_12x12_KHR = 0x93BD,
1704

1705
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR = 0x93D0,
1706
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR = 0x93D1,
1707
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR = 0x93D2,
1708
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR = 0x93D3,
1709
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR = 0x93D4,
1710
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR = 0x93D5,
1711
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR = 0x93D6,
1712
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR = 0x93D7,
1713
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR = 0x93D8,
1714
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR = 0x93D9,
1715
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR = 0x93DA,
1716
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR = 0x93DB,
1717
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR = 0x93DC,
1718
		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR = 0x93DD,
1719

1720
		KTX_COMPRESSED_RGBA_UASTC_4x4_KHR = 0x94CC, // TODO - Use proper value!
1721

1722
		KTX_ATC_RGB_AMD = 0x8C92,
1723
		KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD = 0x87EE,
1724

1725
		KTX_COMPRESSED_RGB_FXT1_3DFX = 0x86B0,
1726
		KTX_COMPRESSED_RGBA_FXT1_3DFX = 0x86B1,
1727
		KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG = 0x9138,
1728
		KTX_COMPRESSED_R11_EAC = 0x9270,
1729
		KTX_COMPRESSED_RG11_EAC = 0x9272
1730
	};
1731
		
1732
	struct ktx_header
1733
	{
1734
		uint8_t m_identifier[12];
1735
		packed_uint<4> m_endianness;
1736
		packed_uint<4> m_glType;
1737
		packed_uint<4> m_glTypeSize;
1738
		packed_uint<4> m_glFormat;
1739
		packed_uint<4> m_glInternalFormat;
1740
		packed_uint<4> m_glBaseInternalFormat;
1741
		packed_uint<4> m_pixelWidth;
1742
		packed_uint<4> m_pixelHeight;
1743
		packed_uint<4> m_pixelDepth;
1744
		packed_uint<4> m_numberOfArrayElements;
1745
		packed_uint<4> m_numberOfFaces;
1746
		packed_uint<4> m_numberOfMipmapLevels;
1747
		packed_uint<4> m_bytesOfKeyValueData;
1748

1749
		void clear() { clear_obj(*this);	}
1750
	};
1751

1752
	// Input is a texture array of mipmapped gpu_image's: gpu_images[array_index][level_index]
1753
	bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag)
1754
	{
1755
		if (!gpu_images.size())
1756
		{
1757
			assert(0);
1758
			return false;
1759
		}
1760

1761
		uint32_t width = 0, height = 0, total_levels = 0;
1762
		basisu::texture_format fmt = texture_format::cInvalidTextureFormat;
1763

1764
		// Sanity check the input
1765
		if (cubemap_flag)
1766
		{
1767
			if ((gpu_images.size() % 6) != 0)
1768
			{
1769
				assert(0);
1770
				return false;
1771
			}
1772
		}
1773
				
1774
		for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)
1775
		{
1776
			const gpu_image_vec &levels = gpu_images[array_index];
1777

1778
			if (!levels.size())
1779
			{
1780
				// Empty mip chain
1781
				assert(0);
1782
				return false;
1783
			}
1784

1785
			if (!array_index)
1786
			{
1787
				width = levels[0].get_pixel_width();
1788
				height = levels[0].get_pixel_height();
1789
				total_levels = (uint32_t)levels.size();
1790
				fmt = levels[0].get_format();
1791
			}
1792
			else
1793
			{
1794
				if ((width != levels[0].get_pixel_width()) ||
1795
				    (height != levels[0].get_pixel_height()) ||
1796
				    (total_levels != levels.size()))
1797
				{
1798
					// All cubemap/texture array faces must be the same dimension
1799
					assert(0);
1800
					return false;
1801
				}
1802
			}
1803

1804
			for (uint32_t level_index = 0; level_index < levels.size(); level_index++)
1805
			{
1806
				if (level_index)
1807
				{
1808
					if ( (levels[level_index].get_pixel_width() != maximum<uint32_t>(1, levels[0].get_pixel_width() >> level_index)) ||
1809
							(levels[level_index].get_pixel_height() != maximum<uint32_t>(1, levels[0].get_pixel_height() >> level_index)) )
1810
					{
1811
						// Malformed mipmap chain
1812
						assert(0);
1813
						return false;
1814
					}
1815
				}
1816

1817
				if (fmt != levels[level_index].get_format())
1818
				{
1819
					// All input textures must use the same GPU format
1820
					assert(0);
1821
					return false;
1822
				}
1823
			}
1824
		}
1825

1826
		uint32_t internal_fmt = KTX_ETC1_RGB8_OES, base_internal_fmt = KTX_RGB;
1827

1828
		switch (fmt)
1829
		{
1830
		case texture_format::cBC1:
1831
		case texture_format::cBC1_NV:
1832
		case texture_format::cBC1_AMD:
1833
		{
1834
			internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT;
1835
			break;
1836
		}
1837
		case texture_format::cBC3:
1838
		{
1839
			internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT;
1840
			base_internal_fmt = KTX_RGBA;
1841
			break;
1842
		}
1843
		case texture_format::cBC4:
1844
		{
1845
			internal_fmt = KTX_COMPRESSED_RED_RGTC1_EXT;// KTX_COMPRESSED_LUMINANCE_LATC1_EXT;
1846
			base_internal_fmt = KTX_RED;
1847
			break;
1848
		}
1849
		case texture_format::cBC5:
1850
		{
1851
			internal_fmt = KTX_COMPRESSED_RED_GREEN_RGTC2_EXT;
1852
			base_internal_fmt = KTX_RG;
1853
			break;
1854
		}
1855
		case texture_format::cETC1:
1856
		case texture_format::cETC1S:
1857
		{
1858
			internal_fmt = KTX_ETC1_RGB8_OES;
1859
			break;
1860
		}
1861
		case texture_format::cETC2_RGB:
1862
		{
1863
			internal_fmt = KTX_COMPRESSED_RGB8_ETC2;
1864
			break;
1865
		}
1866
		case texture_format::cETC2_RGBA:
1867
		{
1868
			internal_fmt = KTX_COMPRESSED_RGBA8_ETC2_EAC;
1869
			base_internal_fmt = KTX_RGBA;
1870
			break;
1871
		}
1872
		case texture_format::cBC6HSigned:
1873
		{
1874
			internal_fmt = KTX_COMPRESSED_RGB_BPTC_SIGNED_FLOAT;
1875
			base_internal_fmt = KTX_RGBA;
1876
			break;
1877
		}
1878
		case texture_format::cBC6HUnsigned:
1879
		{
1880
			internal_fmt = KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT;
1881
			base_internal_fmt = KTX_RGBA;
1882
			break;
1883
		}
1884
		case texture_format::cBC7:
1885
		{
1886
			internal_fmt = KTX_COMPRESSED_RGBA_BPTC_UNORM;
1887
			base_internal_fmt = KTX_RGBA;
1888
			break;
1889
		}
1890
		case texture_format::cPVRTC1_4_RGB:
1891
		{
1892
			internal_fmt = KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG;
1893
			break;
1894
		}
1895
		case texture_format::cPVRTC1_4_RGBA:
1896
		{
1897
			internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG;
1898
			base_internal_fmt = KTX_RGBA;
1899
			break;
1900
		}
1901
		case texture_format::cASTC_HDR_6x6:
1902
		{
1903
			internal_fmt = KTX_COMPRESSED_RGBA_ASTC_6x6_KHR;
1904
			// TODO: should we write RGB? We don't support generating HDR 6x6 with alpha.
1905
			base_internal_fmt = KTX_RGBA; 
1906
			break;
1907
		}
1908
		// We use different enums for HDR vs. LDR ASTC, but internally they are both just ASTC.
1909
		case texture_format::cASTC_LDR_4x4:
1910
		case texture_format::cASTC_HDR_4x4:
1911
		case texture_format::cUASTC_HDR_4x4: // UASTC_HDR is just HDR-only ASTC
1912
		{
1913
			internal_fmt = KTX_COMPRESSED_RGBA_ASTC_4x4_KHR;
1914
			base_internal_fmt = KTX_RGBA;
1915
			break;
1916
		}
1917
		case texture_format::cATC_RGB:
1918
		{
1919
			internal_fmt = KTX_ATC_RGB_AMD;
1920
			break;
1921
		}
1922
		case texture_format::cATC_RGBA_INTERPOLATED_ALPHA:
1923
		{
1924
			internal_fmt = KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD;
1925
			base_internal_fmt = KTX_RGBA;
1926
			break;
1927
		}
1928
		case texture_format::cETC2_R11_EAC:
1929
		{
1930
			internal_fmt = KTX_COMPRESSED_R11_EAC;
1931
			base_internal_fmt = KTX_RED;
1932
			break;
1933
		}
1934
		case texture_format::cETC2_RG11_EAC:
1935
		{
1936
			internal_fmt = KTX_COMPRESSED_RG11_EAC;
1937
			base_internal_fmt = KTX_RG;
1938
			break;
1939
		}
1940
		case texture_format::cUASTC4x4:
1941
		{
1942
			internal_fmt = KTX_COMPRESSED_RGBA_UASTC_4x4_KHR;
1943
			base_internal_fmt = KTX_RGBA;
1944
			break;
1945
		}
1946
		case texture_format::cFXT1_RGB:
1947
		{
1948
			internal_fmt = KTX_COMPRESSED_RGB_FXT1_3DFX;
1949
			break;
1950
		}
1951
		case texture_format::cPVRTC2_4_RGBA:
1952
		{
1953
			internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG;
1954
			base_internal_fmt = KTX_RGBA;
1955
			break;
1956
		}
1957
		default:
1958
		{
1959
			// TODO
1960
			assert(0);
1961
			return false;
1962
		}
1963
		}
1964

1965
		ktx_header header;
1966
		header.clear();
1967
		memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id));
1968
		header.m_endianness = KTX_ENDIAN;
1969

1970
		header.m_pixelWidth = width;
1971
		header.m_pixelHeight = height;
1972

1973
		header.m_glTypeSize = 1;
1974

1975
		header.m_glInternalFormat = internal_fmt;
1976
		header.m_glBaseInternalFormat = base_internal_fmt;
1977

1978
		header.m_numberOfArrayElements = (uint32_t)(cubemap_flag ? (gpu_images.size() / 6) : gpu_images.size());
1979
		if (header.m_numberOfArrayElements == 1)
1980
			header.m_numberOfArrayElements = 0;
1981

1982
		header.m_numberOfMipmapLevels = total_levels;
1983
		header.m_numberOfFaces = cubemap_flag ? 6 : 1;
1984

1985
		append_vector(ktx_data, (uint8_t*)&header, sizeof(header));
1986

1987
		for (uint32_t level_index = 0; level_index < total_levels; level_index++)
1988
		{
1989
			uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes();
1990

1991
			if ((header.m_numberOfFaces == 1) || (header.m_numberOfArrayElements > 1))
1992
			{
1993
				img_size = img_size * header.m_numberOfFaces * maximum<uint32_t>(1, header.m_numberOfArrayElements);
1994
			}
1995

1996
			assert(img_size && ((img_size & 3) == 0));
1997

1998
			packed_uint<4> packed_img_size(img_size);
1999
			append_vector(ktx_data, (uint8_t*)&packed_img_size, sizeof(packed_img_size));
2000

2001
			uint32_t bytes_written = 0;
2002
			(void)bytes_written;
2003

2004
			for (uint32_t array_index = 0; array_index < maximum<uint32_t>(1, header.m_numberOfArrayElements); array_index++)
2005
			{
2006
				for (uint32_t face_index = 0; face_index < header.m_numberOfFaces; face_index++)
2007
				{
2008
					const gpu_image& img = gpu_images[cubemap_flag ? (array_index * 6 + face_index) : array_index][level_index];
2009

2010
					append_vector(ktx_data, (uint8_t*)img.get_ptr(), img.get_size_in_bytes());
2011

2012
					bytes_written += img.get_size_in_bytes();
2013
				}
2014

2015
			} // array_index
2016

2017
		} // level_index
2018

2019
		return true;
2020
	}
2021

2022
	bool does_dds_support_format(texture_format fmt)
2023
	{
2024
		switch (fmt)
2025
		{
2026
		case texture_format::cBC1_NV:
2027
		case texture_format::cBC1_AMD:
2028
		case texture_format::cBC1:
2029
		case texture_format::cBC3:
2030
		case texture_format::cBC4:
2031
		case texture_format::cBC5:
2032
		case texture_format::cBC6HSigned:
2033
		case texture_format::cBC6HUnsigned:
2034
		case texture_format::cBC7:
2035
			return true;
2036
		default:
2037
			break;
2038
		}
2039
		return false;
2040
	}
2041

2042
	// Only supports the basic DirectX BC texture formats.
2043
	// gpu_images array is: [face/layer][mipmap level]
2044
	// For cubemap arrays, # of face/layers must be a multiple of 6.
2045
	// Accepts 2D, 2D mipmapped, 2D array, 2D array mipmapped
2046
	// and cubemap, cubemap mipmapped, and cubemap array mipmapped.
2047
	bool write_dds_file(uint8_vec &dds_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
2048
	{
2049
		return false;
2050
	}
2051

2052
	bool write_dds_file(const char* pFilename, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
2053
	{
2054
		uint8_vec dds_data;
2055

2056
		if (!write_dds_file(dds_data, gpu_images, cubemap_flag, use_srgb_format))
2057
			return false;
2058

2059
		if (!write_vec_to_file(pFilename, dds_data))
2060
		{
2061
			fprintf(stderr, "write_dds_file: Failed writing DDS file data\n");
2062
			return false;
2063
		}
2064

2065
		return true;
2066
	}
2067
		
2068
	bool read_uncompressed_dds_file(const char* pFilename, basisu::vector<image> &ldr_mips,	basisu::vector<imagef>& hdr_mips)
2069
	{
2070
		return false;
2071
	}
2072

2073
	bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag, bool use_srgb_format)
2074
	{
2075
		std::string extension(string_tolower(string_get_extension(pFilename)));
2076

2077
		uint8_vec filedata;
2078
		if (extension == "ktx")
2079
		{
2080
			if (!create_ktx_texture_file(filedata, g, cubemap_flag))
2081
				return false;
2082
		}
2083
		else if (extension == "pvr")
2084
		{
2085
			// TODO
2086
			return false;
2087
		}
2088
		else if (extension == "dds")
2089
		{
2090
			if (!write_dds_file(filedata, g, cubemap_flag, use_srgb_format))
2091
				return false;
2092
		}
2093
		else
2094
		{
2095
			// unsupported texture format
2096
			assert(0);
2097
			return false;
2098
		}
2099

2100
		return basisu::write_vec_to_file(pFilename, filedata);
2101
	}
2102

2103
	bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g, bool use_srgb_format)
2104
	{
2105
		basisu::vector<gpu_image_vec> a;
2106
		a.push_back(g);
2107
		return write_compressed_texture_file(pFilename, a, false, use_srgb_format);
2108
	}
2109

2110
	bool write_compressed_texture_file(const char* pFilename, const gpu_image& g, bool use_srgb_format)
2111
	{
2112
		basisu::vector<gpu_image_vec> v;
2113
		enlarge_vector(v, 1)->push_back(g);
2114
		return write_compressed_texture_file(pFilename, v, false, use_srgb_format);
2115
	}
2116

2117
	//const uint32_t OUT_FILE_MAGIC = 'TEXC';
2118
	struct out_file_header 
2119
	{
2120
		packed_uint<4> m_magic;
2121
		packed_uint<4> m_pad;
2122
		packed_uint<4> m_width;
2123
		packed_uint<4> m_height;
2124
	};
2125

2126
	// As no modern tool supports FXT1 format .KTX files, let's write .OUT files and make sure 3DFX's original tools shipped in 1999 can decode our encoded output.
2127
	bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi)
2128
	{
2129
		out_file_header hdr;
2130
		//hdr.m_magic = OUT_FILE_MAGIC;
2131
		hdr.m_magic.m_bytes[0] = 67;
2132
		hdr.m_magic.m_bytes[1] = 88;
2133
		hdr.m_magic.m_bytes[2] = 69;
2134
		hdr.m_magic.m_bytes[3] = 84;
2135
		hdr.m_pad = 0;
2136
		hdr.m_width = gi.get_blocks_x() * 8;
2137
		hdr.m_height = gi.get_blocks_y() * 4;
2138

2139
		FILE* pFile = nullptr;
2140
#ifdef _WIN32
2141
		fopen_s(&pFile, pFilename, "wb");
2142
#else
2143
		pFile = fopen(pFilename, "wb");
2144
#endif
2145
		if (!pFile)
2146
			return false;
2147

2148
		fwrite(&hdr, sizeof(hdr), 1, pFile);
2149
		fwrite(gi.get_ptr(), gi.get_size_in_bytes(), 1, pFile);
2150
		
2151
		return fclose(pFile) != EOF;
2152
	}
2153

2154
	// The .astc texture format is readable using ARM's astcenc, AMD Compressonator, and other engines/tools. It oddly doesn't support mipmaps, limiting 
2155
	// its usefulness/relevance.
2156
	// https://github.com/ARM-software/astc-encoder/blob/main/Docs/FileFormat.md
2157
	bool write_astc_file(const char* pFilename, const void* pBlocks, uint32_t block_width, uint32_t block_height, uint32_t dim_x, uint32_t dim_y)
2158
	{
2159
		assert(pBlocks && (block_width >= 4) && (block_height >= 4) && (dim_x > 0) && (dim_y > 0));
2160

2161
		uint8_vec file_data;
2162
		file_data.push_back(0x13);
2163
		file_data.push_back(0xAB);
2164
		file_data.push_back(0xA1);
2165
		file_data.push_back(0x5C);
2166

2167
		file_data.push_back((uint8_t)block_width);
2168
		file_data.push_back((uint8_t)block_height);
2169
		file_data.push_back(1);
2170

2171
		file_data.push_back((uint8_t)dim_x);
2172
		file_data.push_back((uint8_t)(dim_x >> 8));
2173
		file_data.push_back((uint8_t)(dim_x >> 16));
2174

2175
		file_data.push_back((uint8_t)dim_y);
2176
		file_data.push_back((uint8_t)(dim_y >> 8));
2177
		file_data.push_back((uint8_t)(dim_y >> 16));
2178

2179
		file_data.push_back((uint8_t)1);
2180
		file_data.push_back((uint8_t)0);
2181
		file_data.push_back((uint8_t)0);
2182

2183
		const uint32_t num_blocks_x = (dim_x + block_width - 1) / block_width;
2184
		const uint32_t num_blocks_y = (dim_y + block_height - 1) / block_height;
2185

2186
		const uint32_t total_bytes = num_blocks_x * num_blocks_y * 16;
2187

2188
		const size_t cur_size = file_data.size();
2189

2190
		file_data.resize(cur_size + total_bytes);
2191

2192
		memcpy(&file_data[cur_size], pBlocks, total_bytes);
2193

2194
		return write_vec_to_file(pFilename, file_data);
2195
	}
2196
		
2197
} // basisu
2198

2199

2200
Product

Resources

Company