CoCalc -- astcenc

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/astcenc/astcenc_image.cpp
⁹⁹⁰² views
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2011-2024 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
//     http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17

18
/**
19
 * @brief Functions for creating in-memory ASTC image structures.
20
 */
21

22
#include <cassert>
23
#include <cstring>
24

25
#include "astcenc_internal.h"
26

27
/**
28
 * @brief Loader pipeline function type for data fetch from memory.
29
 */
30
using pixel_loader = vfloat4(*)(const void*, int);
31

32
/**
33
 * @brief Loader pipeline function type for swizzling data in a vector.
34
 */
35
using pixel_swizzler = vfloat4(*)(vfloat4, const astcenc_swizzle&);
36

37
/**
38
 * @brief Loader pipeline function type for converting data in a vector to LNS.
39
 */
40
using pixel_converter = vfloat4(*)(vfloat4, vmask4);
41

42
/**
43
 * @brief Load a 8-bit UNORM texel from a data array.
44
 *
45
 * @param data          The data pointer.
46
 * @param base_offset   The index offset to the start of the pixel.
47
 */
48
static vfloat4 load_texel_u8(
49
	const void* data,
50
	int base_offset
51
) {
52
	const uint8_t* data8 = static_cast<const uint8_t*>(data);
53
	return int_to_float(vint4(data8 + base_offset)) / 255.0f;
54
}
55

56
/**
57
 * @brief Load a 16-bit fp16 texel from a data array.
58
 *
59
 * @param data          The data pointer.
60
 * @param base_offset   The index offset to the start of the pixel.
61
 */
62
static vfloat4 load_texel_f16(
63
	const void* data,
64
	int base_offset
65
) {
66
	const uint16_t* data16 = static_cast<const uint16_t*>(data);
67
	int r = data16[base_offset    ];
68
	int g = data16[base_offset + 1];
69
	int b = data16[base_offset + 2];
70
	int a = data16[base_offset + 3];
71
	return float16_to_float(vint4(r, g, b, a));
72
}
73

74
/**
75
 * @brief Load a 32-bit float texel from a data array.
76
 *
77
 * @param data          The data pointer.
78
 * @param base_offset   The index offset to the start of the pixel.
79
 */
80
static vfloat4 load_texel_f32(
81
	const void* data,
82
	int base_offset
83
) {
84
	const float* data32 = static_cast<const float*>(data);
85
	return vfloat4(data32 + base_offset);
86
}
87

88
/**
89
 * @brief Dummy no-op swizzle function.
90
 *
91
 * @param data   The source RGBA vector to swizzle.
92
 * @param swz    The swizzle to use.
93
 */
94
static vfloat4 swz_texel_skip(
95
	vfloat4 data,
96
	const astcenc_swizzle& swz
97
) {
98
	(void)swz;
99
	return data;
100
}
101

102
/**
103
 * @brief Swizzle a texel into a new arrangement.
104
 *
105
 * @param data   The source RGBA vector to swizzle.
106
 * @param swz    The swizzle to use.
107
 */
108
static vfloat4 swz_texel(
109
	vfloat4 data,
110
	const astcenc_swizzle& swz
111
) {
112
	ASTCENC_ALIGNAS float datas[6];
113

114
	storea(data, datas);
115
	datas[ASTCENC_SWZ_0] = 0.0f;
116
	datas[ASTCENC_SWZ_1] = 1.0f;
117

118
	return vfloat4(datas[swz.r], datas[swz.g], datas[swz.b], datas[swz.a]);
119
}
120

121
/**
122
 * @brief Encode a texel that is entirely LDR linear.
123
 *
124
 * @param data       The RGBA data to encode.
125
 * @param lns_mask   The mask for the HDR channels than need LNS encoding.
126
 */
127
static vfloat4 encode_texel_unorm(
128
	vfloat4 data,
129
	vmask4 lns_mask
130
) {
131
	(void)lns_mask;
132
	return data * 65535.0f;
133
}
134

135
/**
136
 * @brief Encode a texel that includes at least some HDR LNS texels.
137
 *
138
 * @param data       The RGBA data to encode.
139
 * @param lns_mask   The mask for the HDR channels than need LNS encoding.
140
 */
141
static vfloat4 encode_texel_lns(
142
	vfloat4 data,
143
	vmask4 lns_mask
144
) {
145
	vfloat4 datav_unorm = data * 65535.0f;
146
	vfloat4 datav_lns = float_to_lns(data);
147
	return select(datav_unorm, datav_lns, lns_mask);
148
}
149

150
/* See header for documentation. */
151
void load_image_block(
152
	astcenc_profile decode_mode,
153
	const astcenc_image& img,
154
	image_block& blk,
155
	const block_size_descriptor& bsd,
156
	unsigned int xpos,
157
	unsigned int ypos,
158
	unsigned int zpos,
159
	const astcenc_swizzle& swz
160
) {
161
	unsigned int xsize = img.dim_x;
162
	unsigned int ysize = img.dim_y;
163
	unsigned int zsize = img.dim_z;
164

165
	blk.xpos = xpos;
166
	blk.ypos = ypos;
167
	blk.zpos = zpos;
168

169
	// True if any non-identity swizzle
170
	bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
171
	                 (swz.b != ASTCENC_SWZ_B) || (swz.a != ASTCENC_SWZ_A);
172

173
	int idx = 0;
174

175
	vfloat4 data_min(1e38f);
176
	vfloat4 data_mean(0.0f);
177
	vfloat4 data_mean_scale(1.0f / static_cast<float>(bsd.texel_count));
178
	vfloat4 data_max(-1e38f);
179
	vmask4 grayscalev(true);
180

181
	// This works because we impose the same choice everywhere during encode
182
	uint8_t rgb_lns = (decode_mode == ASTCENC_PRF_HDR) ||
183
	                  (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A) ? 1 : 0;
184
	uint8_t a_lns = decode_mode == ASTCENC_PRF_HDR ? 1 : 0;
185
	vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns);
186
	vmask4 lns_mask = use_lns != vint4::zero();
187

188
	// Set up the function pointers for loading pipeline as needed
189
	pixel_loader loader = load_texel_u8;
190
	if (img.data_type == ASTCENC_TYPE_F16)
191
	{
192
		loader = load_texel_f16;
193
	}
194
	else if  (img.data_type == ASTCENC_TYPE_F32)
195
	{
196
		loader = load_texel_f32;
197
	}
198

199
	pixel_swizzler swizzler = swz_texel_skip;
200
	if (needs_swz)
201
	{
202
		swizzler = swz_texel;
203
	}
204

205
	pixel_converter converter = encode_texel_unorm;
206
	if (any(lns_mask))
207
	{
208
		converter = encode_texel_lns;
209
	}
210

211
	for (unsigned int z = 0; z < bsd.zdim; z++)
212
	{
213
		unsigned int zi = astc::min(zpos + z, zsize - 1);
214
		void* plane = img.data[zi];
215

216
		for (unsigned int y = 0; y < bsd.ydim; y++)
217
		{
218
			unsigned int yi = astc::min(ypos + y, ysize - 1);
219

220
			for (unsigned int x = 0; x < bsd.xdim; x++)
221
			{
222
				unsigned int xi = astc::min(xpos + x, xsize - 1);
223

224
				vfloat4 datav = loader(plane, (4 * xsize * yi) + (4 * xi));
225
				datav = swizzler(datav, swz);
226
				datav = converter(datav, lns_mask);
227

228
				// Compute block metadata
229
				data_min = min(data_min, datav);
230
				data_mean += datav * data_mean_scale;
231
				data_max = max(data_max, datav);
232

233
				grayscalev = grayscalev & (datav.swz<0,0,0,0>() == datav.swz<1,1,2,2>());
234

235
				blk.data_r[idx] = datav.lane<0>();
236
				blk.data_g[idx] = datav.lane<1>();
237
				blk.data_b[idx] = datav.lane<2>();
238
				blk.data_a[idx] = datav.lane<3>();
239

240
				blk.rgb_lns[idx] = rgb_lns;
241
				blk.alpha_lns[idx] = a_lns;
242

243
				idx++;
244
			}
245
		}
246
	}
247

248
	// Reverse the encoding so we store origin block in the original format
249
	vfloat4 data_enc = blk.texel(0);
250
	vfloat4 data_enc_unorm = data_enc / 65535.0f;
251
	vfloat4 data_enc_lns = vfloat4::zero();
252

253
	if (rgb_lns || a_lns)
254
	{
255
		data_enc_lns = float16_to_float(lns_to_sf16(float_to_int(data_enc)));
256
	}
257

258
	blk.origin_texel = select(data_enc_unorm, data_enc_lns, lns_mask);
259

260
	// Store block metadata
261
	blk.data_min = data_min;
262
	blk.data_mean = data_mean;
263
	blk.data_max = data_max;
264
	blk.grayscale = all(grayscalev);
265
}
266

267
/* See header for documentation. */
268
void load_image_block_fast_ldr(
269
	astcenc_profile decode_mode,
270
	const astcenc_image& img,
271
	image_block& blk,
272
	const block_size_descriptor& bsd,
273
	unsigned int xpos,
274
	unsigned int ypos,
275
	unsigned int zpos,
276
	const astcenc_swizzle& swz
277
) {
278
	(void)swz;
279
	(void)decode_mode;
280

281
	unsigned int xsize = img.dim_x;
282
	unsigned int ysize = img.dim_y;
283

284
	blk.xpos = xpos;
285
	blk.ypos = ypos;
286
	blk.zpos = zpos;
287

288
	vfloat4 data_min(1e38f);
289
	vfloat4 data_mean = vfloat4::zero();
290
	vfloat4 data_max(-1e38f);
291
	vmask4 grayscalev(true);
292
	int idx = 0;
293

294
	const uint8_t* plane = static_cast<const uint8_t*>(img.data[0]);
295
	for (unsigned int y = ypos; y < ypos + bsd.ydim; y++)
296
	{
297
		unsigned int yi = astc::min(y, ysize - 1);
298

299
		for (unsigned int x = xpos; x < xpos + bsd.xdim; x++)
300
		{
301
			unsigned int xi = astc::min(x, xsize - 1);
302

303
			vint4 datavi = vint4(plane + (4 * xsize * yi) + (4 * xi));
304
			vfloat4 datav = int_to_float(datavi) * (65535.0f / 255.0f);
305

306
			// Compute block metadata
307
			data_min = min(data_min, datav);
308
			data_mean += datav;
309
			data_max = max(data_max, datav);
310

311
			grayscalev = grayscalev & (datav.swz<0,0,0,0>() == datav.swz<1,1,2,2>());
312

313
			blk.data_r[idx] = datav.lane<0>();
314
			blk.data_g[idx] = datav.lane<1>();
315
			blk.data_b[idx] = datav.lane<2>();
316
			blk.data_a[idx] = datav.lane<3>();
317

318
			idx++;
319
		}
320
	}
321

322
	// Reverse the encoding so we store origin block in the original format
323
	blk.origin_texel = blk.texel(0) / 65535.0f;
324

325
	// Store block metadata
326
	blk.rgb_lns[0] = 0;
327
	blk.alpha_lns[0] = 0;
328
	blk.data_min = data_min;
329
	blk.data_mean = data_mean / static_cast<float>(bsd.texel_count);
330
	blk.data_max = data_max;
331
	blk.grayscale = all(grayscalev);
332
}
333

334
/* See header for documentation. */
335
void store_image_block(
336
	astcenc_image& img,
337
	const image_block& blk,
338
	const block_size_descriptor& bsd,
339
	unsigned int xpos,
340
	unsigned int ypos,
341
	unsigned int zpos,
342
	const astcenc_swizzle& swz
343
) {
344
	unsigned int x_size = img.dim_x;
345
	unsigned int x_start = xpos;
346
	unsigned int x_end = astc::min(x_size, xpos + bsd.xdim);
347
	unsigned int x_count = x_end - x_start;
348
	unsigned int x_nudge = bsd.xdim - x_count;
349

350
	unsigned int y_size = img.dim_y;
351
	unsigned int y_start = ypos;
352
	unsigned int y_end = astc::min(y_size, ypos + bsd.ydim);
353
	unsigned int y_count = y_end - y_start;
354
	unsigned int y_nudge = (bsd.ydim - y_count) * bsd.xdim;
355

356
	unsigned int z_size = img.dim_z;
357
	unsigned int z_start = zpos;
358
	unsigned int z_end = astc::min(z_size, zpos + bsd.zdim);
359

360
	// True if any non-identity swizzle
361
	bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
362
	                 (swz.b != ASTCENC_SWZ_B) || (swz.a != ASTCENC_SWZ_A);
363

364
	// True if any swizzle uses Z reconstruct
365
	bool needs_z = (swz.r == ASTCENC_SWZ_Z) || (swz.g == ASTCENC_SWZ_Z) ||
366
	               (swz.b == ASTCENC_SWZ_Z) || (swz.a == ASTCENC_SWZ_Z);
367

368
	int idx = 0;
369
	if (img.data_type == ASTCENC_TYPE_U8)
370
	{
371
		for (unsigned int z = z_start; z < z_end; z++)
372
		{
373
			// Fetch the image plane
374
			uint8_t* data8 = static_cast<uint8_t*>(img.data[z]);
375

376
			for (unsigned int y = y_start; y < y_end; y++)
377
			{
378
				uint8_t* data8_row = data8 + (4 * x_size * y) + (4 * x_start);
379

380
				for (unsigned int x = 0; x < x_count; x += ASTCENC_SIMD_WIDTH)
381
				{
382
					unsigned int max_texels = ASTCENC_SIMD_WIDTH;
383
					unsigned int used_texels = astc::min(x_count - x, max_texels);
384

385
					// Unaligned load as rows are not always SIMD_WIDTH long
386
					vfloat data_r(blk.data_r + idx);
387
					vfloat data_g(blk.data_g + idx);
388
					vfloat data_b(blk.data_b + idx);
389
					vfloat data_a(blk.data_a + idx);
390

391
					vint data_ri = float_to_int_rtn(min(data_r, 1.0f) * 255.0f);
392
					vint data_gi = float_to_int_rtn(min(data_g, 1.0f) * 255.0f);
393
					vint data_bi = float_to_int_rtn(min(data_b, 1.0f) * 255.0f);
394
					vint data_ai = float_to_int_rtn(min(data_a, 1.0f) * 255.0f);
395

396
					if (needs_swz)
397
					{
398
						vint swizzle_table[7];
399
						swizzle_table[ASTCENC_SWZ_0] = vint(0);
400
						swizzle_table[ASTCENC_SWZ_1] = vint(255);
401
						swizzle_table[ASTCENC_SWZ_R] = data_ri;
402
						swizzle_table[ASTCENC_SWZ_G] = data_gi;
403
						swizzle_table[ASTCENC_SWZ_B] = data_bi;
404
						swizzle_table[ASTCENC_SWZ_A] = data_ai;
405

406
						if (needs_z)
407
						{
408
							vfloat data_x = (data_r * vfloat(2.0f)) - vfloat(1.0f);
409
							vfloat data_y = (data_a * vfloat(2.0f)) - vfloat(1.0f);
410
							vfloat data_z = vfloat(1.0f) - (data_x * data_x) - (data_y * data_y);
411
							data_z = max(data_z, 0.0f);
412
							data_z = (sqrt(data_z) * vfloat(0.5f)) + vfloat(0.5f);
413

414
							swizzle_table[ASTCENC_SWZ_Z] = float_to_int_rtn(min(data_z, 1.0f) * 255.0f);
415
						}
416

417
						data_ri = swizzle_table[swz.r];
418
						data_gi = swizzle_table[swz.g];
419
						data_bi = swizzle_table[swz.b];
420
						data_ai = swizzle_table[swz.a];
421
					}
422

423
					// Errors are NaN encoded - convert to magenta error color
424
					// Branch is OK here - it is almost never true so predicts well
425
					vmask nan_mask = data_r != data_r;
426
					if (any(nan_mask))
427
					{
428
						data_ri = select(data_ri, vint(0xFF), nan_mask);
429
						data_gi = select(data_gi, vint(0x00), nan_mask);
430
						data_bi = select(data_bi, vint(0xFF), nan_mask);
431
						data_ai = select(data_ai, vint(0xFF), nan_mask);
432
					}
433

434
					vint data_rgbai = interleave_rgba8(data_ri, data_gi, data_bi, data_ai);
435
					vmask store_mask = vint::lane_id() < vint(used_texels);
436
					store_lanes_masked(data8_row, data_rgbai, store_mask);
437

438
					data8_row += ASTCENC_SIMD_WIDTH * 4;
439
					idx += used_texels;
440
				}
441
				idx += x_nudge;
442
			}
443
			idx += y_nudge;
444
		}
445
	}
446
	else if (img.data_type == ASTCENC_TYPE_F16)
447
	{
448
		for (unsigned int z = z_start; z < z_end; z++)
449
		{
450
			// Fetch the image plane
451
			uint16_t* data16 = static_cast<uint16_t*>(img.data[z]);
452

453
			for (unsigned int y = y_start; y < y_end; y++)
454
			{
455
				uint16_t* data16_row = data16 + (4 * x_size * y) + (4 * x_start);
456

457
				for (unsigned int x = 0; x < x_count; x++)
458
				{
459
					vint4 color;
460

461
					// NaNs are handled inline - no need to special case
462
					if (needs_swz)
463
					{
464
						float data[7];
465
						data[ASTCENC_SWZ_0] = 0.0f;
466
						data[ASTCENC_SWZ_1] = 1.0f;
467
						data[ASTCENC_SWZ_R] = blk.data_r[idx];
468
						data[ASTCENC_SWZ_G] = blk.data_g[idx];
469
						data[ASTCENC_SWZ_B] = blk.data_b[idx];
470
						data[ASTCENC_SWZ_A] = blk.data_a[idx];
471

472
						if (needs_z)
473
						{
474
							float xN = (data[0] * 2.0f) - 1.0f;
475
							float yN = (data[3] * 2.0f) - 1.0f;
476
							float zN = 1.0f - xN * xN - yN * yN;
477
							if (zN < 0.0f)
478
							{
479
								zN = 0.0f;
480
							}
481
							data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
482
						}
483

484
						vfloat4 colorf(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
485
						color = float_to_float16(colorf);
486
					}
487
					else
488
					{
489
						vfloat4 colorf = blk.texel(idx);
490
						color = float_to_float16(colorf);
491
					}
492

493
					// TODO: Vectorize with store N shorts?
494
					data16_row[0] = static_cast<uint16_t>(color.lane<0>());
495
					data16_row[1] = static_cast<uint16_t>(color.lane<1>());
496
					data16_row[2] = static_cast<uint16_t>(color.lane<2>());
497
					data16_row[3] = static_cast<uint16_t>(color.lane<3>());
498
					data16_row += 4;
499
					idx++;
500
				}
501
				idx += x_nudge;
502
			}
503
			idx += y_nudge;
504
		}
505
	}
506
	else // if (img.data_type == ASTCENC_TYPE_F32)
507
	{
508
		assert(img.data_type == ASTCENC_TYPE_F32);
509

510
		for (unsigned int z = z_start; z < z_end; z++)
511
		{
512
			// Fetch the image plane
513
			float* data32 = static_cast<float*>(img.data[z]);
514

515
			for (unsigned int y = y_start; y < y_end; y++)
516
			{
517
				float* data32_row = data32 + (4 * x_size * y) + (4 * x_start);
518

519
				for (unsigned int x = 0; x < x_count; x++)
520
				{
521
					vfloat4 color = blk.texel(idx);
522

523
					// NaNs are handled inline - no need to special case
524
					if (needs_swz)
525
					{
526
						float data[7];
527
						data[ASTCENC_SWZ_0] = 0.0f;
528
						data[ASTCENC_SWZ_1] = 1.0f;
529
						data[ASTCENC_SWZ_R] = color.lane<0>();
530
						data[ASTCENC_SWZ_G] = color.lane<1>();
531
						data[ASTCENC_SWZ_B] = color.lane<2>();
532
						data[ASTCENC_SWZ_A] = color.lane<3>();
533

534
						if (needs_z)
535
						{
536
							float xN = (data[0] * 2.0f) - 1.0f;
537
							float yN = (data[3] * 2.0f) - 1.0f;
538
							float zN = 1.0f - xN * xN - yN * yN;
539
							if (zN < 0.0f)
540
							{
541
								zN = 0.0f;
542
							}
543
							data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
544
						}
545

546
						color = vfloat4(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
547
					}
548

549
					store(color, data32_row);
550
					data32_row += 4;
551
					idx++;
552
				}
553
				idx += x_nudge;
554
			}
555
			idx += y_nudge;
556
		}
557
	}
558
}
559

560
Product

Resources

Company