Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/astcenc/astcenc_internal.h
9905 views
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2011-2024 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
// http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17
18
/**
19
* @brief Functions and data declarations.
20
*/
21
22
#ifndef ASTCENC_INTERNAL_INCLUDED
23
#define ASTCENC_INTERNAL_INCLUDED
24
25
#include <algorithm>
26
#include <cstddef>
27
#include <cstdint>
28
#if defined(ASTCENC_DIAGNOSTICS)
29
#include <cstdio>
30
#endif
31
#include <cstdlib>
32
#include <limits>
33
34
#include "astcenc.h"
35
#include "astcenc_mathlib.h"
36
#include "astcenc_vecmathlib.h"
37
38
/**
39
* @brief Make a promise to the compiler's optimizer.
40
*
41
* A promise is an expression that the optimizer is can assume is true for to help it generate
42
* faster code. Common use cases for this are to promise that a for loop will iterate more than
43
* once, or that the loop iteration count is a multiple of a vector length, which avoids pre-loop
44
* checks and can avoid loop tails if loops are unrolled by the auto-vectorizer.
45
*/
46
#if defined(NDEBUG)
47
#if !defined(__clang__) && defined(_MSC_VER)
48
#define promise(cond) __assume(cond)
49
#elif defined(__clang__)
50
#if __has_builtin(__builtin_assume)
51
#define promise(cond) __builtin_assume(cond)
52
#elif __has_builtin(__builtin_unreachable)
53
#define promise(cond) if (!(cond)) { __builtin_unreachable(); }
54
#else
55
#define promise(cond)
56
#endif
57
#else // Assume GCC
58
#define promise(cond) if (!(cond)) { __builtin_unreachable(); }
59
#endif
60
#else
61
#define promise(cond) assert(cond)
62
#endif
63
64
/* ============================================================================
65
Constants
66
============================================================================ */
67
#if !defined(ASTCENC_BLOCK_MAX_TEXELS)
68
#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
69
#endif
70
71
/** @brief The maximum number of texels a block can support (6x6x6 block). */
72
static constexpr unsigned int BLOCK_MAX_TEXELS { ASTCENC_BLOCK_MAX_TEXELS };
73
74
/** @brief The maximum number of components a block can support. */
75
static constexpr unsigned int BLOCK_MAX_COMPONENTS { 4 };
76
77
/** @brief The maximum number of partitions a block can support. */
78
static constexpr unsigned int BLOCK_MAX_PARTITIONS { 4 };
79
80
/** @brief The number of partitionings, per partition count, suported by the ASTC format. */
81
static constexpr unsigned int BLOCK_MAX_PARTITIONINGS { 1024 };
82
83
/** @brief The maximum number of texels used during partition selection for texel clustering. */
84
static constexpr uint8_t BLOCK_MAX_KMEANS_TEXELS { 64 };
85
86
/** @brief The maximum number of weights a block can support. */
87
static constexpr unsigned int BLOCK_MAX_WEIGHTS { 64 };
88
89
/** @brief The maximum number of weights a block can support per plane in 2 plane mode. */
90
static constexpr unsigned int BLOCK_MAX_WEIGHTS_2PLANE { BLOCK_MAX_WEIGHTS / 2 };
91
92
/** @brief The minimum number of weight bits a candidate encoding must encode. */
93
static constexpr unsigned int BLOCK_MIN_WEIGHT_BITS { 24 };
94
95
/** @brief The maximum number of weight bits a candidate encoding can encode. */
96
static constexpr unsigned int BLOCK_MAX_WEIGHT_BITS { 96 };
97
98
/** @brief The index indicating a bad (unused) block mode in the remap array. */
99
static constexpr uint16_t BLOCK_BAD_BLOCK_MODE { 0xFFFFu };
100
101
/** @brief The index indicating a bad (unused) partitioning in the remap array. */
102
static constexpr uint16_t BLOCK_BAD_PARTITIONING { 0xFFFFu };
103
104
/** @brief The number of partition index bits supported by the ASTC format . */
105
static constexpr unsigned int PARTITION_INDEX_BITS { 10 };
106
107
/** @brief The offset of the plane 2 weights in shared weight arrays. */
108
static constexpr unsigned int WEIGHTS_PLANE2_OFFSET { BLOCK_MAX_WEIGHTS_2PLANE };
109
110
/** @brief The sum of quantized weights for one texel. */
111
static constexpr float WEIGHTS_TEXEL_SUM { 16.0f };
112
113
/** @brief The number of block modes supported by the ASTC format. */
114
static constexpr unsigned int WEIGHTS_MAX_BLOCK_MODES { 2048 };
115
116
/** @brief The number of weight grid decimation modes supported by the ASTC format. */
117
static constexpr unsigned int WEIGHTS_MAX_DECIMATION_MODES { 87 };
118
119
/** @brief The high default error used to initialize error trackers. */
120
static constexpr float ERROR_CALC_DEFAULT { 1e30f };
121
122
/**
123
* @brief The minimum tuning setting threshold for the one partition fast path.
124
*/
125
static constexpr float TUNE_MIN_SEARCH_MODE0 { 0.85f };
126
127
/**
128
* @brief The maximum number of candidate encodings tested for each encoding mode.
129
*
130
* This can be dynamically reduced by the compression quality preset.
131
*/
132
static constexpr unsigned int TUNE_MAX_TRIAL_CANDIDATES { 8 };
133
134
/**
135
* @brief The maximum number of candidate partitionings tested for each encoding mode.
136
*
137
* This can be dynamically reduced by the compression quality preset.
138
*/
139
static constexpr unsigned int TUNE_MAX_PARTITIONING_CANDIDATES { 8 };
140
141
/**
142
* @brief The maximum quant level using full angular endpoint search method.
143
*
144
* The angular endpoint search is used to find the min/max weight that should
145
* be used for a given quantization level. It is effective but expensive, so
146
* we only use it where it has the most value - low quant levels with wide
147
* spacing. It is used below TUNE_MAX_ANGULAR_QUANT (inclusive). Above this we
148
* assume the min weight is 0.0f, and the max weight is 1.0f.
149
*
150
* Note the angular algorithm is vectorized, and using QUANT_12 exactly fills
151
* one 8-wide vector. Decreasing by one doesn't buy much performance, and
152
* increasing by one is disproportionately expensive.
153
*/
154
static constexpr unsigned int TUNE_MAX_ANGULAR_QUANT { 7 }; /* QUANT_12 */
155
156
static_assert((BLOCK_MAX_TEXELS % ASTCENC_SIMD_WIDTH) == 0,
157
"BLOCK_MAX_TEXELS must be multiple of ASTCENC_SIMD_WIDTH");
158
159
static_assert(BLOCK_MAX_TEXELS <= 216,
160
"BLOCK_MAX_TEXELS must not be greater than 216");
161
162
static_assert((BLOCK_MAX_WEIGHTS % ASTCENC_SIMD_WIDTH) == 0,
163
"BLOCK_MAX_WEIGHTS must be multiple of ASTCENC_SIMD_WIDTH");
164
165
static_assert((WEIGHTS_MAX_BLOCK_MODES % ASTCENC_SIMD_WIDTH) == 0,
166
"WEIGHTS_MAX_BLOCK_MODES must be multiple of ASTCENC_SIMD_WIDTH");
167
168
169
/* ============================================================================
170
Commonly used data structures
171
============================================================================ */
172
173
/**
174
* @brief The ASTC endpoint formats.
175
*
176
* Note, the values here are used directly in the encoding in the format so do not rearrange.
177
*/
178
enum endpoint_formats
179
{
180
FMT_LUMINANCE = 0,
181
FMT_LUMINANCE_DELTA = 1,
182
FMT_HDR_LUMINANCE_LARGE_RANGE = 2,
183
FMT_HDR_LUMINANCE_SMALL_RANGE = 3,
184
FMT_LUMINANCE_ALPHA = 4,
185
FMT_LUMINANCE_ALPHA_DELTA = 5,
186
FMT_RGB_SCALE = 6,
187
FMT_HDR_RGB_SCALE = 7,
188
FMT_RGB = 8,
189
FMT_RGB_DELTA = 9,
190
FMT_RGB_SCALE_ALPHA = 10,
191
FMT_HDR_RGB = 11,
192
FMT_RGBA = 12,
193
FMT_RGBA_DELTA = 13,
194
FMT_HDR_RGB_LDR_ALPHA = 14,
195
FMT_HDR_RGBA = 15
196
};
197
198
/**
199
* @brief The ASTC quantization methods.
200
*
201
* Note, the values here are used directly in the encoding in the format so do not rearrange.
202
*/
203
enum quant_method
204
{
205
QUANT_2 = 0,
206
QUANT_3 = 1,
207
QUANT_4 = 2,
208
QUANT_5 = 3,
209
QUANT_6 = 4,
210
QUANT_8 = 5,
211
QUANT_10 = 6,
212
QUANT_12 = 7,
213
QUANT_16 = 8,
214
QUANT_20 = 9,
215
QUANT_24 = 10,
216
QUANT_32 = 11,
217
QUANT_40 = 12,
218
QUANT_48 = 13,
219
QUANT_64 = 14,
220
QUANT_80 = 15,
221
QUANT_96 = 16,
222
QUANT_128 = 17,
223
QUANT_160 = 18,
224
QUANT_192 = 19,
225
QUANT_256 = 20
226
};
227
228
/**
229
* @brief The number of levels use by an ASTC quantization method.
230
*
231
* @param method The quantization method
232
*
233
* @return The number of levels used by @c method.
234
*/
235
static inline unsigned int get_quant_level(quant_method method)
236
{
237
switch (method)
238
{
239
case QUANT_2: return 2;
240
case QUANT_3: return 3;
241
case QUANT_4: return 4;
242
case QUANT_5: return 5;
243
case QUANT_6: return 6;
244
case QUANT_8: return 8;
245
case QUANT_10: return 10;
246
case QUANT_12: return 12;
247
case QUANT_16: return 16;
248
case QUANT_20: return 20;
249
case QUANT_24: return 24;
250
case QUANT_32: return 32;
251
case QUANT_40: return 40;
252
case QUANT_48: return 48;
253
case QUANT_64: return 64;
254
case QUANT_80: return 80;
255
case QUANT_96: return 96;
256
case QUANT_128: return 128;
257
case QUANT_160: return 160;
258
case QUANT_192: return 192;
259
case QUANT_256: return 256;
260
}
261
262
// Unreachable - the enum is fully described
263
return 0;
264
}
265
266
/**
267
* @brief Computed metrics about a partition in a block.
268
*/
269
struct partition_metrics
270
{
271
/** @brief The error-weighted average color in the partition. */
272
vfloat4 avg;
273
274
/** @brief The dominant error-weighted direction in the partition. */
275
vfloat4 dir;
276
};
277
278
/**
279
* @brief Computed lines for a a three component analysis.
280
*/
281
struct partition_lines3
282
{
283
/** @brief Line for uncorrelated chroma. */
284
line3 uncor_line;
285
286
/** @brief Line for correlated chroma, passing though the origin. */
287
line3 samec_line;
288
289
/** @brief Post-processed line for uncorrelated chroma. */
290
processed_line3 uncor_pline;
291
292
/** @brief Post-processed line for correlated chroma, passing though the origin. */
293
processed_line3 samec_pline;
294
295
/**
296
* @brief The length of the line for uncorrelated chroma.
297
*
298
* This is used for both the uncorrelated and same chroma lines - they are normally very similar
299
* and only used for the relative ranking of partitionings against one another.
300
*/
301
float line_length;
302
};
303
304
/**
305
* @brief The partition information for a single partition.
306
*
307
* ASTC has a total of 1024 candidate partitions for each of 2/3/4 partition counts, although this
308
* 1024 includes seeds that generate duplicates of other seeds and seeds that generate completely
309
* empty partitions. These are both valid encodings, but astcenc will skip both during compression
310
* as they are not useful.
311
*/
312
struct partition_info
313
{
314
/** @brief The number of partitions in this partitioning. */
315
uint16_t partition_count;
316
317
/** @brief The index (seed) of this partitioning. */
318
uint16_t partition_index;
319
320
/**
321
* @brief The number of texels in each partition.
322
*
323
* Note that some seeds result in zero texels assigned to a partition. These are valid, but are
324
* skipped by this compressor as there is no point spending bits encoding an unused endpoints.
325
*/
326
uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS];
327
328
/** @brief The partition of each texel in the block. */
329
ASTCENC_ALIGNAS uint8_t partition_of_texel[BLOCK_MAX_TEXELS];
330
331
/** @brief The list of texels in each partition. */
332
ASTCENC_ALIGNAS uint8_t texels_of_partition[BLOCK_MAX_PARTITIONS][BLOCK_MAX_TEXELS];
333
};
334
335
/**
336
* @brief The weight grid information for a single decimation pattern.
337
*
338
* ASTC can store one weight per texel, but is also capable of storing lower resolution weight grids
339
* that are interpolated during decompression to assign a with to a texel. Storing fewer weights
340
* can free up a substantial amount of bits that we can then spend on more useful things, such as
341
* more accurate endpoints and weights, or additional partitions.
342
*
343
* This data structure is used to store information about a single weight grid decimation pattern,
344
* for a single block size.
345
*/
346
struct decimation_info
347
{
348
/** @brief The total number of texels in the block. */
349
uint8_t texel_count;
350
351
/** @brief The maximum number of stored weights that contribute to each texel, between 1 and 4. */
352
uint8_t max_texel_weight_count;
353
354
/** @brief The total number of weights stored. */
355
uint8_t weight_count;
356
357
/** @brief The number of stored weights in the X dimension. */
358
uint8_t weight_x;
359
360
/** @brief The number of stored weights in the Y dimension. */
361
uint8_t weight_y;
362
363
/** @brief The number of stored weights in the Z dimension. */
364
uint8_t weight_z;
365
366
/**
367
* @brief The number of weights that contribute to each texel.
368
* Value is between 1 and 4.
369
*/
370
ASTCENC_ALIGNAS uint8_t texel_weight_count[BLOCK_MAX_TEXELS];
371
372
/**
373
* @brief The weight index of the N weights that are interpolated for each texel.
374
* Stored transposed to improve vectorization.
375
*/
376
ASTCENC_ALIGNAS uint8_t texel_weights_tr[4][BLOCK_MAX_TEXELS];
377
378
/**
379
* @brief The bilinear contribution of the N weights that are interpolated for each texel.
380
* Value is between 0 and 16, stored transposed to improve vectorization.
381
*/
382
ASTCENC_ALIGNAS uint8_t texel_weight_contribs_int_tr[4][BLOCK_MAX_TEXELS];
383
384
/**
385
* @brief The bilinear contribution of the N weights that are interpolated for each texel.
386
* Value is between 0 and 1, stored transposed to improve vectorization.
387
*/
388
ASTCENC_ALIGNAS float texel_weight_contribs_float_tr[4][BLOCK_MAX_TEXELS];
389
390
/** @brief The number of texels that each stored weight contributes to. */
391
ASTCENC_ALIGNAS uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS];
392
393
/**
394
* @brief The list of texels that use a specific weight index.
395
* Stored transposed to improve vectorization.
396
*/
397
ASTCENC_ALIGNAS uint8_t weight_texels_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
398
399
/**
400
* @brief The bilinear contribution to the N texels that use each weight.
401
* Value is between 0 and 1, stored transposed to improve vectorization.
402
*/
403
ASTCENC_ALIGNAS float weights_texel_contribs_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
404
405
/**
406
* @brief The bilinear contribution to the Nth texel that uses each weight.
407
* Value is between 0 and 1, stored transposed to improve vectorization.
408
*/
409
float texel_contrib_for_weight[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
410
};
411
412
/**
413
* @brief Metadata for single block mode for a specific block size.
414
*/
415
struct block_mode
416
{
417
/** @brief The block mode index in the ASTC encoded form. */
418
uint16_t mode_index;
419
420
/** @brief The decimation mode index in the compressor reindexed list. */
421
uint8_t decimation_mode;
422
423
/** @brief The weight quantization used by this block mode. */
424
uint8_t quant_mode;
425
426
/** @brief The weight quantization used by this block mode. */
427
uint8_t weight_bits;
428
429
/** @brief Is a dual weight plane used by this block mode? */
430
uint8_t is_dual_plane : 1;
431
432
/**
433
* @brief Get the weight quantization used by this block mode.
434
*
435
* @return The quantization level.
436
*/
437
inline quant_method get_weight_quant_mode() const
438
{
439
return static_cast<quant_method>(this->quant_mode);
440
}
441
};
442
443
/**
444
* @brief Metadata for single decimation mode for a specific block size.
445
*/
446
struct decimation_mode
447
{
448
/** @brief The max weight precision for 1 plane, or -1 if not supported. */
449
int8_t maxprec_1plane;
450
451
/** @brief The max weight precision for 2 planes, or -1 if not supported. */
452
int8_t maxprec_2planes;
453
454
/**
455
* @brief Bitvector indicating weight quant modes used by active 1 plane block modes.
456
*
457
* Bit 0 = QUANT_2, Bit 1 = QUANT_3, etc.
458
*/
459
uint16_t refprec_1plane;
460
461
/**
462
* @brief Bitvector indicating weight quant methods used by active 2 plane block modes.
463
*
464
* Bit 0 = QUANT_2, Bit 1 = QUANT_3, etc.
465
*/
466
uint16_t refprec_2planes;
467
468
/**
469
* @brief Set a 1 plane weight quant as active.
470
*
471
* @param weight_quant The quant method to set.
472
*/
473
void set_ref_1plane(quant_method weight_quant)
474
{
475
refprec_1plane |= (1 << weight_quant);
476
}
477
478
/**
479
* @brief Test if this mode is active below a given 1 plane weight quant (inclusive).
480
*
481
* @param max_weight_quant The max quant method to test.
482
*/
483
bool is_ref_1plane(quant_method max_weight_quant) const
484
{
485
uint16_t mask = static_cast<uint16_t>((1 << (max_weight_quant + 1)) - 1);
486
return (refprec_1plane & mask) != 0;
487
}
488
489
/**
490
* @brief Set a 2 plane weight quant as active.
491
*
492
* @param weight_quant The quant method to set.
493
*/
494
void set_ref_2plane(quant_method weight_quant)
495
{
496
refprec_2planes |= static_cast<uint16_t>(1 << weight_quant);
497
}
498
499
/**
500
* @brief Test if this mode is active below a given 2 plane weight quant (inclusive).
501
*
502
* @param max_weight_quant The max quant method to test.
503
*/
504
bool is_ref_2plane(quant_method max_weight_quant) const
505
{
506
uint16_t mask = static_cast<uint16_t>((1 << (max_weight_quant + 1)) - 1);
507
return (refprec_2planes & mask) != 0;
508
}
509
};
510
511
/**
512
* @brief Data tables for a single block size.
513
*
514
* The decimation tables store the information to apply weight grid dimension reductions. We only
515
* store the decimation modes that are actually needed by the current context; many of the possible
516
* modes will be unused (too many weights for the current block size or disabled by heuristics). The
517
* actual number of weights stored is @c decimation_mode_count, and the @c decimation_modes and
518
* @c decimation_tables arrays store the active modes contiguously at the start of the array. These
519
* entries are not stored in any particular order.
520
*
521
* The block mode tables store the unpacked block mode settings. Block modes are stored in the
522
* compressed block as an 11 bit field, but for any given block size and set of compressor
523
* heuristics, only a subset of the block modes will be used. The actual number of block modes
524
* stored is indicated in @c block_mode_count, and the @c block_modes array store the active modes
525
* contiguously at the start of the array. These entries are stored in incrementing "packed" value
526
* order, which doesn't mean much once unpacked. To allow decompressors to reference the packed data
527
* efficiently the @c block_mode_packed_index array stores the mapping between physical ID and the
528
* actual remapped array index.
529
*/
530
struct block_size_descriptor
531
{
532
/** @brief The block X dimension, in texels. */
533
uint8_t xdim;
534
535
/** @brief The block Y dimension, in texels. */
536
uint8_t ydim;
537
538
/** @brief The block Z dimension, in texels. */
539
uint8_t zdim;
540
541
/** @brief The block total texel count. */
542
uint8_t texel_count;
543
544
/**
545
* @brief The number of stored decimation modes which are "always" modes.
546
*
547
* Always modes are stored at the start of the decimation_modes list.
548
*/
549
unsigned int decimation_mode_count_always;
550
551
/** @brief The number of stored decimation modes for selected encodings. */
552
unsigned int decimation_mode_count_selected;
553
554
/** @brief The number of stored decimation modes for any encoding. */
555
unsigned int decimation_mode_count_all;
556
557
/**
558
* @brief The number of stored block modes which are "always" modes.
559
*
560
* Always modes are stored at the start of the block_modes list.
561
*/
562
unsigned int block_mode_count_1plane_always;
563
564
/** @brief The number of stored block modes for active 1 plane encodings. */
565
unsigned int block_mode_count_1plane_selected;
566
567
/** @brief The number of stored block modes for active 1 and 2 plane encodings. */
568
unsigned int block_mode_count_1plane_2plane_selected;
569
570
/** @brief The number of stored block modes for any encoding. */
571
unsigned int block_mode_count_all;
572
573
/** @brief The number of selected partitionings for 1/2/3/4 partitionings. */
574
unsigned int partitioning_count_selected[BLOCK_MAX_PARTITIONS];
575
576
/** @brief The number of partitionings for 1/2/3/4 partitionings. */
577
unsigned int partitioning_count_all[BLOCK_MAX_PARTITIONS];
578
579
/** @brief The active decimation modes, stored in low indices. */
580
decimation_mode decimation_modes[WEIGHTS_MAX_DECIMATION_MODES];
581
582
/** @brief The active decimation tables, stored in low indices. */
583
ASTCENC_ALIGNAS decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES];
584
585
/** @brief The packed block mode array index, or @c BLOCK_BAD_BLOCK_MODE if not active. */
586
uint16_t block_mode_packed_index[WEIGHTS_MAX_BLOCK_MODES];
587
588
/** @brief The active block modes, stored in low indices. */
589
block_mode block_modes[WEIGHTS_MAX_BLOCK_MODES];
590
591
/** @brief The active partition tables, stored in low indices per-count. */
592
partition_info partitionings[(3 * BLOCK_MAX_PARTITIONINGS) + 1];
593
594
/**
595
* @brief The packed partition table array index, or @c BLOCK_BAD_PARTITIONING if not active.
596
*
597
* Indexed by partition_count - 2, containing 2, 3 and 4 partitions.
598
*/
599
uint16_t partitioning_packed_index[3][BLOCK_MAX_PARTITIONINGS];
600
601
/** @brief The active texels for k-means partition selection. */
602
uint8_t kmeans_texels[BLOCK_MAX_KMEANS_TEXELS];
603
604
/**
605
* @brief The canonical 2-partition coverage pattern used during block partition search.
606
*
607
* Indexed by remapped index, not physical index.
608
*/
609
uint64_t coverage_bitmaps_2[BLOCK_MAX_PARTITIONINGS][2];
610
611
/**
612
* @brief The canonical 3-partition coverage pattern used during block partition search.
613
*
614
* Indexed by remapped index, not physical index.
615
*/
616
uint64_t coverage_bitmaps_3[BLOCK_MAX_PARTITIONINGS][3];
617
618
/**
619
* @brief The canonical 4-partition coverage pattern used during block partition search.
620
*
621
* Indexed by remapped index, not physical index.
622
*/
623
uint64_t coverage_bitmaps_4[BLOCK_MAX_PARTITIONINGS][4];
624
625
/**
626
* @brief Get the block mode structure for index @c block_mode.
627
*
628
* This function can only return block modes that are enabled by the current compressor config.
629
* Decompression from an arbitrary source should not use this without first checking that the
630
* packed block mode index is not @c BLOCK_BAD_BLOCK_MODE.
631
*
632
* @param block_mode The packed block mode index.
633
*
634
* @return The block mode structure.
635
*/
636
const block_mode& get_block_mode(unsigned int block_mode) const
637
{
638
unsigned int packed_index = this->block_mode_packed_index[block_mode];
639
assert(packed_index != BLOCK_BAD_BLOCK_MODE && packed_index < this->block_mode_count_all);
640
return this->block_modes[packed_index];
641
}
642
643
/**
644
* @brief Get the decimation mode structure for index @c decimation_mode.
645
*
646
* This function can only return decimation modes that are enabled by the current compressor
647
* config. The mode array is stored packed, but this is only ever indexed by the packed index
648
* stored in the @c block_mode and never exists in an unpacked form.
649
*
650
* @param decimation_mode The packed decimation mode index.
651
*
652
* @return The decimation mode structure.
653
*/
654
const decimation_mode& get_decimation_mode(unsigned int decimation_mode) const
655
{
656
return this->decimation_modes[decimation_mode];
657
}
658
659
/**
660
* @brief Get the decimation info structure for index @c decimation_mode.
661
*
662
* This function can only return decimation modes that are enabled by the current compressor
663
* config. The mode array is stored packed, but this is only ever indexed by the packed index
664
* stored in the @c block_mode and never exists in an unpacked form.
665
*
666
* @param decimation_mode The packed decimation mode index.
667
*
668
* @return The decimation info structure.
669
*/
670
const decimation_info& get_decimation_info(unsigned int decimation_mode) const
671
{
672
return this->decimation_tables[decimation_mode];
673
}
674
675
/**
676
* @brief Get the partition info table for a given partition count.
677
*
678
* @param partition_count The number of partitions we want the table for.
679
*
680
* @return The pointer to the table of 1024 entries (for 2/3/4 parts) or 1 entry (for 1 part).
681
*/
682
const partition_info* get_partition_table(unsigned int partition_count) const
683
{
684
if (partition_count == 1)
685
{
686
partition_count = 5;
687
}
688
unsigned int index = (partition_count - 2) * BLOCK_MAX_PARTITIONINGS;
689
return this->partitionings + index;
690
}
691
692
/**
693
* @brief Get the partition info structure for a given partition count and seed.
694
*
695
* @param partition_count The number of partitions we want the info for.
696
* @param index The partition seed (between 0 and 1023).
697
*
698
* @return The partition info structure.
699
*/
700
const partition_info& get_partition_info(unsigned int partition_count, unsigned int index) const
701
{
702
unsigned int packed_index = 0;
703
if (partition_count >= 2)
704
{
705
packed_index = this->partitioning_packed_index[partition_count - 2][index];
706
}
707
708
assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count_all[partition_count - 1]);
709
auto& result = get_partition_table(partition_count)[packed_index];
710
assert(index == result.partition_index);
711
return result;
712
}
713
714
/**
715
* @brief Get the partition info structure for a given partition count and seed.
716
*
717
* @param partition_count The number of partitions we want the info for.
718
* @param packed_index The raw array offset.
719
*
720
* @return The partition info structure.
721
*/
722
const partition_info& get_raw_partition_info(unsigned int partition_count, unsigned int packed_index) const
723
{
724
assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count_all[partition_count - 1]);
725
auto& result = get_partition_table(partition_count)[packed_index];
726
return result;
727
}
728
};
729
730
/**
731
* @brief The image data for a single block.
732
*
733
* The @c data_[rgba] fields store the image data in an encoded SoA float form designed for easy
734
* vectorization. Input data is converted to float and stored as values between 0 and 65535. LDR
735
* data is stored as direct UNORM data, HDR data is stored as LNS data. They are allocated SIMD
736
* elements over-size to allow vectorized stores of unaligned and partial SIMD lanes (e.g. in a
737
* 6x6x6 block the final row write will read elements 210-217 (vec8) or 214-217 (vec4), which is
738
* two elements above the last real data element). The overspill values are never written to memory,
739
* and would be benign, but the padding avoids hitting undefined behavior.
740
*
741
* The @c rgb_lns and @c alpha_lns fields that assigned a per-texel use of HDR are only used during
742
* decompression. The current compressor will always use HDR endpoint formats when in HDR mode.
743
*/
744
struct image_block
745
{
746
/** @brief The input (compress) or output (decompress) data for the red color component. */
747
ASTCENC_ALIGNAS float data_r[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1];
748
749
/** @brief The input (compress) or output (decompress) data for the green color component. */
750
ASTCENC_ALIGNAS float data_g[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1];
751
752
/** @brief The input (compress) or output (decompress) data for the blue color component. */
753
ASTCENC_ALIGNAS float data_b[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1];
754
755
/** @brief The input (compress) or output (decompress) data for the alpha color component. */
756
ASTCENC_ALIGNAS float data_a[BLOCK_MAX_TEXELS + ASTCENC_SIMD_WIDTH - 1];
757
758
/** @brief The number of texels in the block. */
759
uint8_t texel_count;
760
761
/** @brief The original data for texel 0 for constant color block encoding. */
762
vfloat4 origin_texel;
763
764
/** @brief The min component value of all texels in the block. */
765
vfloat4 data_min;
766
767
/** @brief The mean component value of all texels in the block. */
768
vfloat4 data_mean;
769
770
/** @brief The max component value of all texels in the block. */
771
vfloat4 data_max;
772
773
/** @brief The relative error significance of the color channels. */
774
vfloat4 channel_weight;
775
776
/** @brief Is this grayscale block where R == G == B for all texels? */
777
bool grayscale;
778
779
/** @brief Is the eventual decode using decode_unorm8 rounding? */
780
bool decode_unorm8;
781
782
/** @brief Set to 1 if a texel is using HDR RGB endpoints (decompression only). */
783
uint8_t rgb_lns[BLOCK_MAX_TEXELS];
784
785
/** @brief Set to 1 if a texel is using HDR alpha endpoints (decompression only). */
786
uint8_t alpha_lns[BLOCK_MAX_TEXELS];
787
788
/** @brief The X position of this block in the input or output image. */
789
unsigned int xpos;
790
791
/** @brief The Y position of this block in the input or output image. */
792
unsigned int ypos;
793
794
/** @brief The Z position of this block in the input or output image. */
795
unsigned int zpos;
796
797
/**
798
* @brief Get an RGBA texel value from the data.
799
*
800
* @param index The texel index.
801
*
802
* @return The texel in RGBA component ordering.
803
*/
804
inline vfloat4 texel(unsigned int index) const
805
{
806
return vfloat4(data_r[index],
807
data_g[index],
808
data_b[index],
809
data_a[index]);
810
}
811
812
/**
813
* @brief Get an RGB texel value from the data.
814
*
815
* @param index The texel index.
816
*
817
* @return The texel in RGB0 component ordering.
818
*/
819
inline vfloat4 texel3(unsigned int index) const
820
{
821
return vfloat3(data_r[index],
822
data_g[index],
823
data_b[index]);
824
}
825
826
/**
827
* @brief Get the default alpha value for endpoints that don't store it.
828
*
829
* The default depends on whether the alpha endpoint is LDR or HDR.
830
*
831
* @return The alpha value in the scaled range used by the compressor.
832
*/
833
inline float get_default_alpha() const
834
{
835
return this->alpha_lns[0] ? static_cast<float>(0x7800) : static_cast<float>(0xFFFF);
836
}
837
838
/**
839
* @brief Test if a single color channel is constant across the block.
840
*
841
* Constant color channels are easier to compress as interpolating between two identical colors
842
* always returns the same value, irrespective of the weight used. They therefore can be ignored
843
* for the purposes of weight selection and use of a second weight plane.
844
*
845
* @return @c true if the channel is constant across the block, @c false otherwise.
846
*/
847
inline bool is_constant_channel(int channel) const
848
{
849
vmask4 lane_mask = vint4::lane_id() == vint4(channel);
850
vmask4 color_mask = this->data_min == this->data_max;
851
return any(lane_mask & color_mask);
852
}
853
854
/**
855
* @brief Test if this block is a luminance block with constant 1.0 alpha.
856
*
857
* @return @c true if the block is a luminance block , @c false otherwise.
858
*/
859
inline bool is_luminance() const
860
{
861
float default_alpha = this->get_default_alpha();
862
bool alpha1 = (this->data_min.lane<3>() == default_alpha) &&
863
(this->data_max.lane<3>() == default_alpha);
864
return this->grayscale && alpha1;
865
}
866
867
/**
868
* @brief Test if this block is a luminance block with variable alpha.
869
*
870
* @return @c true if the block is a luminance + alpha block , @c false otherwise.
871
*/
872
inline bool is_luminancealpha() const
873
{
874
float default_alpha = this->get_default_alpha();
875
bool alpha1 = (this->data_min.lane<3>() == default_alpha) &&
876
(this->data_max.lane<3>() == default_alpha);
877
return this->grayscale && !alpha1;
878
}
879
};
880
881
/**
882
* @brief Data structure storing the color endpoints for a block.
883
*/
884
struct endpoints
885
{
886
/** @brief The number of partition endpoints stored. */
887
unsigned int partition_count;
888
889
/** @brief The colors for endpoint 0. */
890
vfloat4 endpt0[BLOCK_MAX_PARTITIONS];
891
892
/** @brief The colors for endpoint 1. */
893
vfloat4 endpt1[BLOCK_MAX_PARTITIONS];
894
};
895
896
/**
897
* @brief Data structure storing the color endpoints and weights.
898
*/
899
struct endpoints_and_weights
900
{
901
/** @brief True if all active values in weight_error_scale are the same. */
902
bool is_constant_weight_error_scale;
903
904
/** @brief The color endpoints. */
905
endpoints ep;
906
907
/** @brief The ideal weight for each texel; may be undecimated or decimated. */
908
ASTCENC_ALIGNAS float weights[BLOCK_MAX_TEXELS];
909
910
/** @brief The ideal weight error scaling for each texel; may be undecimated or decimated. */
911
ASTCENC_ALIGNAS float weight_error_scale[BLOCK_MAX_TEXELS];
912
};
913
914
/**
915
* @brief Utility storing estimated errors from choosing particular endpoint encodings.
916
*/
917
struct encoding_choice_errors
918
{
919
/** @brief Error of using LDR RGB-scale instead of complete endpoints. */
920
float rgb_scale_error;
921
922
/** @brief Error of using HDR RGB-scale instead of complete endpoints. */
923
float rgb_luma_error;
924
925
/** @brief Error of using luminance instead of RGB. */
926
float luminance_error;
927
928
/** @brief Error of discarding alpha and using a constant 1.0 alpha. */
929
float alpha_drop_error;
930
931
/** @brief Can we use delta offset encoding? */
932
bool can_offset_encode;
933
934
/** @brief Can we use blue contraction encoding? */
935
bool can_blue_contract;
936
};
937
938
/**
939
* @brief Preallocated working buffers, allocated per thread during context creation.
940
*/
941
struct ASTCENC_ALIGNAS compression_working_buffers
942
{
943
/** @brief Ideal endpoints and weights for plane 1. */
944
endpoints_and_weights ei1;
945
946
/** @brief Ideal endpoints and weights for plane 2. */
947
endpoints_and_weights ei2;
948
949
/**
950
* @brief Decimated ideal weight values in the ~0-1 range.
951
*
952
* Note that values can be slightly below zero or higher than one due to
953
* endpoint extents being inside the ideal color representation.
954
*
955
* For two planes, second plane starts at @c WEIGHTS_PLANE2_OFFSET offsets.
956
*/
957
ASTCENC_ALIGNAS float dec_weights_ideal[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS];
958
959
/**
960
* @brief Decimated quantized weight values in the unquantized 0-64 range.
961
*
962
* For two planes, second plane starts at @c WEIGHTS_PLANE2_OFFSET offsets.
963
*/
964
ASTCENC_ALIGNAS uint8_t dec_weights_uquant[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS];
965
966
/** @brief Error of the best encoding combination for each block mode. */
967
ASTCENC_ALIGNAS float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES];
968
969
/** @brief The best color quant for each block mode. */
970
uint8_t best_quant_levels[WEIGHTS_MAX_BLOCK_MODES];
971
972
/** @brief The best color quant for each block mode if modes are the same and we have spare bits. */
973
uint8_t best_quant_levels_mod[WEIGHTS_MAX_BLOCK_MODES];
974
975
/** @brief The best endpoint format for each partition. */
976
uint8_t best_ep_formats[WEIGHTS_MAX_BLOCK_MODES][BLOCK_MAX_PARTITIONS];
977
978
/** @brief The total bit storage needed for quantized weights for each block mode. */
979
int8_t qwt_bitcounts[WEIGHTS_MAX_BLOCK_MODES];
980
981
/** @brief The cumulative error for quantized weights for each block mode. */
982
float qwt_errors[WEIGHTS_MAX_BLOCK_MODES];
983
984
/** @brief The low weight value in plane 1 for each block mode. */
985
float weight_low_value1[WEIGHTS_MAX_BLOCK_MODES];
986
987
/** @brief The high weight value in plane 1 for each block mode. */
988
float weight_high_value1[WEIGHTS_MAX_BLOCK_MODES];
989
990
/** @brief The low weight value in plane 1 for each quant level and decimation mode. */
991
float weight_low_values1[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1];
992
993
/** @brief The high weight value in plane 1 for each quant level and decimation mode. */
994
float weight_high_values1[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1];
995
996
/** @brief The low weight value in plane 2 for each block mode. */
997
float weight_low_value2[WEIGHTS_MAX_BLOCK_MODES];
998
999
/** @brief The high weight value in plane 2 for each block mode. */
1000
float weight_high_value2[WEIGHTS_MAX_BLOCK_MODES];
1001
1002
/** @brief The low weight value in plane 2 for each quant level and decimation mode. */
1003
float weight_low_values2[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1];
1004
1005
/** @brief The high weight value in plane 2 for each quant level and decimation mode. */
1006
float weight_high_values2[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1];
1007
};
1008
1009
struct dt_init_working_buffers
1010
{
1011
uint8_t weight_count_of_texel[BLOCK_MAX_TEXELS];
1012
uint8_t grid_weights_of_texel[BLOCK_MAX_TEXELS][4];
1013
uint8_t weights_of_texel[BLOCK_MAX_TEXELS][4];
1014
1015
uint8_t texel_count_of_weight[BLOCK_MAX_WEIGHTS];
1016
uint8_t texels_of_weight[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS];
1017
uint8_t texel_weights_of_weight[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS];
1018
};
1019
1020
/**
1021
* @brief Weight quantization transfer table.
1022
*
1023
* ASTC can store texel weights at many quantization levels, so for performance we store essential
1024
* information about each level as a precomputed data structure. Unquantized weights are integers
1025
* or floats in the range [0, 64].
1026
*
1027
* This structure provides a table, used to estimate the closest quantized weight for a given
1028
* floating-point weight. For each quantized weight, the corresponding unquantized values. For each
1029
* quantized weight, a previous-value and a next-value.
1030
*/
1031
struct quant_and_transfer_table
1032
{
1033
/** @brief The unscrambled unquantized value. */
1034
uint8_t quant_to_unquant[32];
1035
1036
/** @brief The scrambling order: scrambled_quant = map[unscrambled_quant]. */
1037
uint8_t scramble_map[32];
1038
1039
/** @brief The unscrambling order: unscrambled_unquant = map[scrambled_quant]. */
1040
uint8_t unscramble_and_unquant_map[32];
1041
1042
/**
1043
* @brief A table of previous-and-next weights, indexed by the current unquantized value.
1044
* * bits 7:0 = previous-index, unquantized
1045
* * bits 15:8 = next-index, unquantized
1046
*/
1047
uint16_t prev_next_values[65];
1048
};
1049
1050
/** @brief The precomputed quant and transfer table. */
1051
extern const quant_and_transfer_table quant_and_xfer_tables[12];
1052
1053
/** @brief The block is an error block, and will return error color or NaN. */
1054
static constexpr uint8_t SYM_BTYPE_ERROR { 0 };
1055
1056
/** @brief The block is a constant color block using FP16 colors. */
1057
static constexpr uint8_t SYM_BTYPE_CONST_F16 { 1 };
1058
1059
/** @brief The block is a constant color block using UNORM16 colors. */
1060
static constexpr uint8_t SYM_BTYPE_CONST_U16 { 2 };
1061
1062
/** @brief The block is a normal non-constant color block. */
1063
static constexpr uint8_t SYM_BTYPE_NONCONST { 3 };
1064
1065
/**
1066
* @brief A symbolic representation of a compressed block.
1067
*
1068
* The symbolic representation stores the unpacked content of a single
1069
* physical compressed block, in a form which is much easier to access for
1070
* the rest of the compressor code.
1071
*/
1072
struct symbolic_compressed_block
1073
{
1074
/** @brief The block type, one of the @c SYM_BTYPE_* constants. */
1075
uint8_t block_type;
1076
1077
/** @brief The number of partitions; valid for @c NONCONST blocks. */
1078
uint8_t partition_count;
1079
1080
/** @brief Non-zero if the color formats matched; valid for @c NONCONST blocks. */
1081
uint8_t color_formats_matched;
1082
1083
/** @brief The plane 2 color component, or -1 if single plane; valid for @c NONCONST blocks. */
1084
int8_t plane2_component;
1085
1086
/** @brief The block mode; valid for @c NONCONST blocks. */
1087
uint16_t block_mode;
1088
1089
/** @brief The partition index; valid for @c NONCONST blocks if 2 or more partitions. */
1090
uint16_t partition_index;
1091
1092
/** @brief The endpoint color formats for each partition; valid for @c NONCONST blocks. */
1093
uint8_t color_formats[BLOCK_MAX_PARTITIONS];
1094
1095
/** @brief The endpoint color quant mode; valid for @c NONCONST blocks. */
1096
quant_method quant_mode;
1097
1098
/** @brief The error of the current encoding; valid for @c NONCONST blocks. */
1099
float errorval;
1100
1101
// We can't have both of these at the same time
1102
union {
1103
/** @brief The constant color; valid for @c CONST blocks. */
1104
int constant_color[BLOCK_MAX_COMPONENTS];
1105
1106
/** @brief The quantized endpoint color pairs; valid for @c NONCONST blocks. */
1107
uint8_t color_values[BLOCK_MAX_PARTITIONS][8];
1108
};
1109
1110
/** @brief The quantized and decimated weights.
1111
*
1112
* Weights are stored in the 0-64 unpacked range allowing them to be used
1113
* directly in encoding passes without per-use unpacking. Packing happens
1114
* when converting to/from the physical bitstream encoding.
1115
*
1116
* If dual plane, the second plane starts at @c weights[WEIGHTS_PLANE2_OFFSET].
1117
*/
1118
ASTCENC_ALIGNAS uint8_t weights[BLOCK_MAX_WEIGHTS];
1119
1120
/**
1121
* @brief Get the weight quantization used by this block mode.
1122
*
1123
* @return The quantization level.
1124
*/
1125
inline quant_method get_color_quant_mode() const
1126
{
1127
return this->quant_mode;
1128
}
1129
};
1130
1131
/**
1132
* @brief Parameter structure for @c compute_pixel_region_variance().
1133
*
1134
* This function takes a structure to avoid spilling arguments to the stack on every function
1135
* invocation, as there are a lot of parameters.
1136
*/
1137
struct pixel_region_args
1138
{
1139
/** @brief The image to analyze. */
1140
const astcenc_image* img;
1141
1142
/** @brief The component swizzle pattern. */
1143
astcenc_swizzle swz;
1144
1145
/** @brief Should the algorithm bother with Z axis processing? */
1146
bool have_z;
1147
1148
/** @brief The kernel radius for alpha processing. */
1149
unsigned int alpha_kernel_radius;
1150
1151
/** @brief The X dimension of the working data to process. */
1152
unsigned int size_x;
1153
1154
/** @brief The Y dimension of the working data to process. */
1155
unsigned int size_y;
1156
1157
/** @brief The Z dimension of the working data to process. */
1158
unsigned int size_z;
1159
1160
/** @brief The X position of first src and dst data in the data set. */
1161
unsigned int offset_x;
1162
1163
/** @brief The Y position of first src and dst data in the data set. */
1164
unsigned int offset_y;
1165
1166
/** @brief The Z position of first src and dst data in the data set. */
1167
unsigned int offset_z;
1168
1169
/** @brief The working memory buffer. */
1170
vfloat4 *work_memory;
1171
};
1172
1173
/**
1174
* @brief Parameter structure for @c compute_averages_proc().
1175
*/
1176
struct avg_args
1177
{
1178
/** @brief The arguments for the nested variance computation. */
1179
pixel_region_args arg;
1180
1181
/** @brief The image X dimensions. */
1182
unsigned int img_size_x;
1183
1184
/** @brief The image Y dimensions. */
1185
unsigned int img_size_y;
1186
1187
/** @brief The image Z dimensions. */
1188
unsigned int img_size_z;
1189
1190
/** @brief The maximum working block dimensions in X and Y dimensions. */
1191
unsigned int blk_size_xy;
1192
1193
/** @brief The maximum working block dimensions in Z dimensions. */
1194
unsigned int blk_size_z;
1195
1196
/** @brief The working block memory size. */
1197
unsigned int work_memory_size;
1198
};
1199
1200
#if defined(ASTCENC_DIAGNOSTICS)
1201
/* See astcenc_diagnostic_trace header for details. */
1202
class TraceLog;
1203
#endif
1204
1205
/**
1206
* @brief The astcenc compression context.
1207
*/
1208
struct astcenc_contexti
1209
{
1210
/** @brief The configuration this context was created with. */
1211
astcenc_config config;
1212
1213
/** @brief The thread count supported by this context. */
1214
unsigned int thread_count;
1215
1216
/** @brief The block size descriptor this context was created with. */
1217
block_size_descriptor* bsd;
1218
1219
/*
1220
* Fields below here are not needed in a decompress-only build, but some remain as they are
1221
* small and it avoids littering the code with #ifdefs. The most significant contributors to
1222
* large structure size are omitted.
1223
*/
1224
1225
/** @brief The input image alpha channel averages table, may be @c nullptr if not needed. */
1226
float* input_alpha_averages;
1227
1228
/** @brief The scratch working buffers, one per thread (see @c thread_count). */
1229
compression_working_buffers* working_buffers;
1230
1231
#if !defined(ASTCENC_DECOMPRESS_ONLY)
1232
/** @brief The pixel region and variance worker arguments. */
1233
avg_args avg_preprocess_args;
1234
#endif
1235
1236
#if defined(ASTCENC_DIAGNOSTICS)
1237
/**
1238
* @brief The diagnostic trace logger.
1239
*
1240
* Note that this is a singleton, so can only be used in single threaded mode. It only exists
1241
* here so we have a reference to close the file at the end of the capture.
1242
*/
1243
TraceLog* trace_log;
1244
#endif
1245
};
1246
1247
/* ============================================================================
1248
Functionality for managing block sizes and partition tables.
1249
============================================================================ */
1250
1251
/**
1252
* @brief Populate the block size descriptor for the target block size.
1253
*
1254
* This will also initialize the partition table metadata, which is stored as part of the BSD
1255
* structure.
1256
*
1257
* @param x_texels The number of texels in the block X dimension.
1258
* @param y_texels The number of texels in the block Y dimension.
1259
* @param z_texels The number of texels in the block Z dimension.
1260
* @param can_omit_modes Can we discard modes and partitionings that astcenc won't use?
1261
* @param partition_count_cutoff The partition count cutoff to use, if we can omit partitionings.
1262
* @param mode_cutoff The block mode percentile cutoff [0-1].
1263
* @param[out] bsd The descriptor to initialize.
1264
*/
1265
void init_block_size_descriptor(
1266
unsigned int x_texels,
1267
unsigned int y_texels,
1268
unsigned int z_texels,
1269
bool can_omit_modes,
1270
unsigned int partition_count_cutoff,
1271
float mode_cutoff,
1272
block_size_descriptor& bsd);
1273
1274
/**
1275
* @brief Populate the partition tables for the target block size.
1276
*
1277
* Note the @c bsd descriptor must be initialized by calling @c init_block_size_descriptor() before
1278
* calling this function.
1279
*
1280
* @param[out] bsd The block size information structure to populate.
1281
* @param can_omit_partitionings True if we can we drop partitionings that astcenc won't use.
1282
* @param partition_count_cutoff The partition count cutoff to use, if we can omit partitionings.
1283
*/
1284
void init_partition_tables(
1285
block_size_descriptor& bsd,
1286
bool can_omit_partitionings,
1287
unsigned int partition_count_cutoff);
1288
1289
/**
1290
* @brief Get the percentile table for 2D block modes.
1291
*
1292
* This is an empirically determined prioritization of which block modes to use in the search in
1293
* terms of their centile (lower centiles = more useful).
1294
*
1295
* Returns a dynamically allocated array; caller must free with delete[].
1296
*
1297
* @param xdim The block x size.
1298
* @param ydim The block y size.
1299
*
1300
* @return The unpacked table.
1301
*/
1302
const float* get_2d_percentile_table(
1303
unsigned int xdim,
1304
unsigned int ydim);
1305
1306
/**
1307
* @brief Query if a 2D block size is legal.
1308
*
1309
* @return True if legal, false otherwise.
1310
*/
1311
bool is_legal_2d_block_size(
1312
unsigned int xdim,
1313
unsigned int ydim);
1314
1315
/**
1316
* @brief Query if a 3D block size is legal.
1317
*
1318
* @return True if legal, false otherwise.
1319
*/
1320
bool is_legal_3d_block_size(
1321
unsigned int xdim,
1322
unsigned int ydim,
1323
unsigned int zdim);
1324
1325
/* ============================================================================
1326
Functionality for managing BISE quantization and unquantization.
1327
============================================================================ */
1328
1329
/**
1330
* @brief The precomputed table for quantizing color values.
1331
*
1332
* Converts unquant value in 0-255 range into quant value in 0-255 range.
1333
* No BISE scrambling is applied at this stage.
1334
*
1335
* The BISE encoding results in ties where available quant<256> values are
1336
* equidistant the available quant<BISE> values. This table stores two values
1337
* for each input - one for use with a negative residual, and one for use with
1338
* a positive residual.
1339
*
1340
* Indexed by [quant_mode - 4][data_value * 2 + residual].
1341
*/
1342
extern const uint8_t color_unquant_to_uquant_tables[17][512];
1343
1344
/**
1345
* @brief The precomputed table for packing quantized color values.
1346
*
1347
* Converts quant value in 0-255 range into packed quant value in 0-N range,
1348
* with BISE scrambling applied.
1349
*
1350
* Indexed by [quant_mode - 4][data_value].
1351
*/
1352
extern const uint8_t color_uquant_to_scrambled_pquant_tables[17][256];
1353
1354
/**
1355
* @brief The precomputed table for unpacking color values.
1356
*
1357
* Converts quant value in 0-N range into unpacked value in 0-255 range,
1358
* with BISE unscrambling applied.
1359
*
1360
* Indexed by [quant_mode - 4][data_value].
1361
*/
1362
extern const uint8_t* color_scrambled_pquant_to_uquant_tables[17];
1363
1364
/**
1365
* @brief The precomputed quant mode storage table.
1366
*
1367
* Indexing by [integer_count/2][bits] gives us the quantization level for a given integer count and
1368
* number of compressed storage bits. Returns -1 for cases where the requested integer count cannot
1369
* ever fit in the supplied storage size.
1370
*/
1371
extern const int8_t quant_mode_table[10][128];
1372
1373
/**
1374
* @brief Encode a packed string using BISE.
1375
*
1376
* Note that BISE can return strings that are not a whole number of bytes in length, and ASTC can
1377
* start storing strings in a block at arbitrary bit offsets in the encoded data.
1378
*
1379
* @param quant_level The BISE alphabet size.
1380
* @param character_count The number of characters in the string.
1381
* @param input_data The unpacked string, one byte per character.
1382
* @param[in,out] output_data The output packed string.
1383
* @param bit_offset The starting offset in the output storage.
1384
*/
1385
void encode_ise(
1386
quant_method quant_level,
1387
unsigned int character_count,
1388
const uint8_t* input_data,
1389
uint8_t* output_data,
1390
unsigned int bit_offset);
1391
1392
/**
1393
* @brief Decode a packed string using BISE.
1394
*
1395
* Note that BISE input strings are not a whole number of bytes in length, and ASTC can start
1396
* strings at arbitrary bit offsets in the encoded data.
1397
*
1398
* @param quant_level The BISE alphabet size.
1399
* @param character_count The number of characters in the string.
1400
* @param input_data The packed string.
1401
* @param[in,out] output_data The output storage, one byte per character.
1402
* @param bit_offset The starting offset in the output storage.
1403
*/
1404
void decode_ise(
1405
quant_method quant_level,
1406
unsigned int character_count,
1407
const uint8_t* input_data,
1408
uint8_t* output_data,
1409
unsigned int bit_offset);
1410
1411
/**
1412
* @brief Return the number of bits needed to encode an ISE sequence.
1413
*
1414
* This implementation assumes that the @c quant level is untrusted, given it may come from random
1415
* data being decompressed, so we return an arbitrary unencodable size if that is the case.
1416
*
1417
* @param character_count The number of items in the sequence.
1418
* @param quant_level The desired quantization level.
1419
*
1420
* @return The number of bits needed to encode the BISE string.
1421
*/
1422
unsigned int get_ise_sequence_bitcount(
1423
unsigned int character_count,
1424
quant_method quant_level);
1425
1426
/* ============================================================================
1427
Functionality for managing color partitioning.
1428
============================================================================ */
1429
1430
/**
1431
* @brief Compute averages and dominant directions for each partition in a 2 component texture.
1432
*
1433
* @param pi The partition info for the current trial.
1434
* @param blk The image block color data to be compressed.
1435
* @param component1 The first component included in the analysis.
1436
* @param component2 The second component included in the analysis.
1437
* @param[out] pm The output partition metrics.
1438
* - Only pi.partition_count array entries actually get initialized.
1439
* - Direction vectors @c pm.dir are not normalized.
1440
*/
1441
void compute_avgs_and_dirs_2_comp(
1442
const partition_info& pi,
1443
const image_block& blk,
1444
unsigned int component1,
1445
unsigned int component2,
1446
partition_metrics pm[BLOCK_MAX_PARTITIONS]);
1447
1448
/**
1449
* @brief Compute averages and dominant directions for each partition in a 3 component texture.
1450
*
1451
* @param pi The partition info for the current trial.
1452
* @param blk The image block color data to be compressed.
1453
* @param omitted_component The component excluded from the analysis.
1454
* @param[out] pm The output partition metrics.
1455
* - Only pi.partition_count array entries actually get initialized.
1456
* - Direction vectors @c pm.dir are not normalized.
1457
*/
1458
void compute_avgs_and_dirs_3_comp(
1459
const partition_info& pi,
1460
const image_block& blk,
1461
unsigned int omitted_component,
1462
partition_metrics pm[BLOCK_MAX_PARTITIONS]);
1463
1464
/**
1465
* @brief Compute averages and dominant directions for each partition in a 3 component texture.
1466
*
1467
* This is a specialization of @c compute_avgs_and_dirs_3_comp where the omitted component is
1468
* always alpha, a common case during partition search.
1469
*
1470
* @param pi The partition info for the current trial.
1471
* @param blk The image block color data to be compressed.
1472
* @param[out] pm The output partition metrics.
1473
* - Only pi.partition_count array entries actually get initialized.
1474
* - Direction vectors @c pm.dir are not normalized.
1475
*/
1476
void compute_avgs_and_dirs_3_comp_rgb(
1477
const partition_info& pi,
1478
const image_block& blk,
1479
partition_metrics pm[BLOCK_MAX_PARTITIONS]);
1480
1481
/**
1482
* @brief Compute averages and dominant directions for each partition in a 4 component texture.
1483
*
1484
* @param pi The partition info for the current trial.
1485
* @param blk The image block color data to be compressed.
1486
* @param[out] pm The output partition metrics.
1487
* - Only pi.partition_count array entries actually get initialized.
1488
* - Direction vectors @c pm.dir are not normalized.
1489
*/
1490
void compute_avgs_and_dirs_4_comp(
1491
const partition_info& pi,
1492
const image_block& blk,
1493
partition_metrics pm[BLOCK_MAX_PARTITIONS]);
1494
1495
/**
1496
* @brief Compute the RGB error for uncorrelated and same chroma projections.
1497
*
1498
* The output of compute averages and dirs is post processed to define two lines, both of which go
1499
* through the mean-color-value. One line has a direction defined by the dominant direction; this
1500
* is used to assess the error from using an uncorrelated color representation. The other line goes
1501
* through (0,0,0) and is used to assess the error from using an RGBS color representation.
1502
*
1503
* This function computes the squared error when using these two representations.
1504
*
1505
* @param pi The partition info for the current trial.
1506
* @param blk The image block color data to be compressed.
1507
* @param[in,out] plines Processed line inputs, and line length outputs.
1508
* @param[out] uncor_error The cumulative error for using the uncorrelated line.
1509
* @param[out] samec_error The cumulative error for using the same chroma line.
1510
*/
1511
void compute_error_squared_rgb(
1512
const partition_info& pi,
1513
const image_block& blk,
1514
partition_lines3 plines[BLOCK_MAX_PARTITIONS],
1515
float& uncor_error,
1516
float& samec_error);
1517
1518
/**
1519
* @brief Compute the RGBA error for uncorrelated and same chroma projections.
1520
*
1521
* The output of compute averages and dirs is post processed to define two lines, both of which go
1522
* through the mean-color-value. One line has a direction defined by the dominant direction; this
1523
* is used to assess the error from using an uncorrelated color representation. The other line goes
1524
* through (0,0,0,1) and is used to assess the error from using an RGBS color representation.
1525
*
1526
* This function computes the squared error when using these two representations.
1527
*
1528
* @param pi The partition info for the current trial.
1529
* @param blk The image block color data to be compressed.
1530
* @param uncor_plines Processed uncorrelated partition lines for each partition.
1531
* @param samec_plines Processed same chroma partition lines for each partition.
1532
* @param[out] line_lengths The length of each components deviation from the line.
1533
* @param[out] uncor_error The cumulative error for using the uncorrelated line.
1534
* @param[out] samec_error The cumulative error for using the same chroma line.
1535
*/
1536
void compute_error_squared_rgba(
1537
const partition_info& pi,
1538
const image_block& blk,
1539
const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS],
1540
const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS],
1541
float line_lengths[BLOCK_MAX_PARTITIONS],
1542
float& uncor_error,
1543
float& samec_error);
1544
1545
/**
1546
* @brief Find the best set of partitions to trial for a given block.
1547
*
1548
* On return the @c best_partitions list will contain the two best partition
1549
* candidates; one assuming data has uncorrelated chroma and one assuming the
1550
* data has correlated chroma. The best candidate is returned first in the list.
1551
*
1552
* @param bsd The block size information.
1553
* @param blk The image block color data to compress.
1554
* @param partition_count The number of partitions in the block.
1555
* @param partition_search_limit The number of candidate partition encodings to trial.
1556
* @param[out] best_partitions The best partition candidates.
1557
* @param requested_candidates The number of requested partitionings. May return fewer if
1558
* candidates are not available.
1559
*
1560
* @return The actual number of candidates returned.
1561
*/
1562
unsigned int find_best_partition_candidates(
1563
const block_size_descriptor& bsd,
1564
const image_block& blk,
1565
unsigned int partition_count,
1566
unsigned int partition_search_limit,
1567
unsigned int best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES],
1568
unsigned int requested_candidates);
1569
1570
/* ============================================================================
1571
Functionality for managing images and image related data.
1572
============================================================================ */
1573
1574
/**
1575
* @brief Get a vector mask indicating lanes decompressing into a UNORM8 value.
1576
*
1577
* @param decode_mode The color profile for LDR_SRGB settings.
1578
* @param blk The image block for output image bitness settings.
1579
*
1580
* @return The component mask vector.
1581
*/
1582
static inline vmask4 get_u8_component_mask(
1583
astcenc_profile decode_mode,
1584
const image_block& blk
1585
) {
1586
// Decode mode or sRGB forces writing to unorm8 output value
1587
if (blk.decode_unorm8 || decode_mode == ASTCENC_PRF_LDR_SRGB)
1588
{
1589
return vmask4(true);
1590
}
1591
1592
return vmask4(false);
1593
}
1594
1595
/**
1596
* @brief Setup computation of regional averages in an image.
1597
*
1598
* This must be done by only a single thread per image, before any thread calls
1599
* @c compute_averages().
1600
*
1601
* Results are written back into @c img->input_alpha_averages.
1602
*
1603
* @param img The input image data, also holds output data.
1604
* @param alpha_kernel_radius The kernel radius (in pixels) for alpha mods.
1605
* @param swz Input data component swizzle.
1606
* @param[out] ag The average variance arguments to init.
1607
*
1608
* @return The number of tasks in the processing stage.
1609
*/
1610
unsigned int init_compute_averages(
1611
const astcenc_image& img,
1612
unsigned int alpha_kernel_radius,
1613
const astcenc_swizzle& swz,
1614
avg_args& ag);
1615
1616
/**
1617
* @brief Compute averages for a pixel region.
1618
*
1619
* The routine computes both in a single pass, using a summed-area table to decouple the running
1620
* time from the averaging/variance kernel size.
1621
*
1622
* @param[out] ctx The compressor context storing the output data.
1623
* @param arg The input parameter structure.
1624
*/
1625
void compute_pixel_region_variance(
1626
astcenc_contexti& ctx,
1627
const pixel_region_args& arg);
1628
/**
1629
* @brief Load a single image block from the input image.
1630
*
1631
* @param decode_mode The compression color profile.
1632
* @param img The input image data.
1633
* @param[out] blk The image block to populate.
1634
* @param bsd The block size information.
1635
* @param xpos The block X coordinate in the input image.
1636
* @param ypos The block Y coordinate in the input image.
1637
* @param zpos The block Z coordinate in the input image.
1638
* @param swz The swizzle to apply on load.
1639
*/
1640
void load_image_block(
1641
astcenc_profile decode_mode,
1642
const astcenc_image& img,
1643
image_block& blk,
1644
const block_size_descriptor& bsd,
1645
unsigned int xpos,
1646
unsigned int ypos,
1647
unsigned int zpos,
1648
const astcenc_swizzle& swz);
1649
1650
/**
1651
* @brief Load a single image block from the input image.
1652
*
1653
* This specialized variant can be used only if the block is 2D LDR U8 data,
1654
* with no swizzle.
1655
*
1656
* @param decode_mode The compression color profile.
1657
* @param img The input image data.
1658
* @param[out] blk The image block to populate.
1659
* @param bsd The block size information.
1660
* @param xpos The block X coordinate in the input image.
1661
* @param ypos The block Y coordinate in the input image.
1662
* @param zpos The block Z coordinate in the input image.
1663
* @param swz The swizzle to apply on load.
1664
*/
1665
void load_image_block_fast_ldr(
1666
astcenc_profile decode_mode,
1667
const astcenc_image& img,
1668
image_block& blk,
1669
const block_size_descriptor& bsd,
1670
unsigned int xpos,
1671
unsigned int ypos,
1672
unsigned int zpos,
1673
const astcenc_swizzle& swz);
1674
1675
/**
1676
* @brief Store a single image block to the output image.
1677
*
1678
* @param[out] img The output image data.
1679
* @param blk The image block to export.
1680
* @param bsd The block size information.
1681
* @param xpos The block X coordinate in the input image.
1682
* @param ypos The block Y coordinate in the input image.
1683
* @param zpos The block Z coordinate in the input image.
1684
* @param swz The swizzle to apply on store.
1685
*/
1686
void store_image_block(
1687
astcenc_image& img,
1688
const image_block& blk,
1689
const block_size_descriptor& bsd,
1690
unsigned int xpos,
1691
unsigned int ypos,
1692
unsigned int zpos,
1693
const astcenc_swizzle& swz);
1694
1695
/* ============================================================================
1696
Functionality for computing endpoint colors and weights for a block.
1697
============================================================================ */
1698
1699
/**
1700
* @brief Compute ideal endpoint colors and weights for 1 plane of weights.
1701
*
1702
* The ideal endpoints define a color line for the partition. For each texel the ideal weight
1703
* defines an exact position on the partition color line. We can then use these to assess the error
1704
* introduced by removing and quantizing the weight grid.
1705
*
1706
* @param blk The image block color data to compress.
1707
* @param pi The partition info for the current trial.
1708
* @param[out] ei The endpoint and weight values.
1709
*/
1710
void compute_ideal_colors_and_weights_1plane(
1711
const image_block& blk,
1712
const partition_info& pi,
1713
endpoints_and_weights& ei);
1714
1715
/**
1716
* @brief Compute ideal endpoint colors and weights for 2 planes of weights.
1717
*
1718
* The ideal endpoints define a color line for the partition. For each texel the ideal weight
1719
* defines an exact position on the partition color line. We can then use these to assess the error
1720
* introduced by removing and quantizing the weight grid.
1721
*
1722
* @param bsd The block size information.
1723
* @param blk The image block color data to compress.
1724
* @param plane2_component The component assigned to plane 2.
1725
* @param[out] ei1 The endpoint and weight values for plane 1.
1726
* @param[out] ei2 The endpoint and weight values for plane 2.
1727
*/
1728
void compute_ideal_colors_and_weights_2planes(
1729
const block_size_descriptor& bsd,
1730
const image_block& blk,
1731
unsigned int plane2_component,
1732
endpoints_and_weights& ei1,
1733
endpoints_and_weights& ei2);
1734
1735
/**
1736
* @brief Compute the optimal unquantized weights for a decimation table.
1737
*
1738
* After computing ideal weights for the case for a complete weight grid, we we want to compute the
1739
* ideal weights for the case where weights exist only for some texels. We do this with a
1740
* steepest-descent grid solver which works as follows:
1741
*
1742
* First, for each actual weight, perform a weighted averaging of the texels affected by the weight.
1743
* Then, set step size to <some initial value> and attempt one step towards the original ideal
1744
* weight if it helps to reduce error.
1745
*
1746
* @param ei The non-decimated endpoints and weights.
1747
* @param di The selected weight decimation.
1748
* @param[out] dec_weight_ideal_value The ideal values for the decimated weight set.
1749
*/
1750
void compute_ideal_weights_for_decimation(
1751
const endpoints_and_weights& ei,
1752
const decimation_info& di,
1753
float* dec_weight_ideal_value);
1754
1755
/**
1756
* @brief Compute the optimal quantized weights for a decimation table.
1757
*
1758
* We test the two closest weight indices in the allowed quantization range and keep the weight that
1759
* is the closest match.
1760
*
1761
* @param di The selected weight decimation.
1762
* @param low_bound The lowest weight allowed.
1763
* @param high_bound The highest weight allowed.
1764
* @param dec_weight_ideal_value The ideal weight set.
1765
* @param[out] dec_weight_quant_uvalue The output quantized weight as a float.
1766
* @param[out] dec_weight_uquant The output quantized weight as encoded int.
1767
* @param quant_level The desired weight quant level.
1768
*/
1769
void compute_quantized_weights_for_decimation(
1770
const decimation_info& di,
1771
float low_bound,
1772
float high_bound,
1773
const float* dec_weight_ideal_value,
1774
float* dec_weight_quant_uvalue,
1775
uint8_t* dec_weight_uquant,
1776
quant_method quant_level);
1777
1778
/**
1779
* @brief Compute the error of a decimated weight set for 1 plane.
1780
*
1781
* After computing ideal weights for the case with one weight per texel, we want to compute the
1782
* error for decimated weight grids where weights are stored at a lower resolution. This function
1783
* computes the error of the reduced grid, compared to the full grid.
1784
*
1785
* @param eai The ideal weights for the full grid.
1786
* @param di The selected weight decimation.
1787
* @param dec_weight_quant_uvalue The quantized weights for the decimated grid.
1788
*
1789
* @return The accumulated error.
1790
*/
1791
float compute_error_of_weight_set_1plane(
1792
const endpoints_and_weights& eai,
1793
const decimation_info& di,
1794
const float* dec_weight_quant_uvalue);
1795
1796
/**
1797
* @brief Compute the error of a decimated weight set for 2 planes.
1798
*
1799
* After computing ideal weights for the case with one weight per texel, we want to compute the
1800
* error for decimated weight grids where weights are stored at a lower resolution. This function
1801
* computes the error of the reduced grid, compared to the full grid.
1802
*
1803
* @param eai1 The ideal weights for the full grid and plane 1.
1804
* @param eai2 The ideal weights for the full grid and plane 2.
1805
* @param di The selected weight decimation.
1806
* @param dec_weight_quant_uvalue_plane1 The quantized weights for the decimated grid plane 1.
1807
* @param dec_weight_quant_uvalue_plane2 The quantized weights for the decimated grid plane 2.
1808
*
1809
* @return The accumulated error.
1810
*/
1811
float compute_error_of_weight_set_2planes(
1812
const endpoints_and_weights& eai1,
1813
const endpoints_and_weights& eai2,
1814
const decimation_info& di,
1815
const float* dec_weight_quant_uvalue_plane1,
1816
const float* dec_weight_quant_uvalue_plane2);
1817
1818
/**
1819
* @brief Pack a single pair of color endpoints as effectively as possible.
1820
*
1821
* The user requests a base color endpoint mode in @c format, but the quantizer may choose a
1822
* delta-based representation. It will report back the format variant it actually used.
1823
*
1824
* @param color0 The input unquantized color0 endpoint for absolute endpoint pairs.
1825
* @param color1 The input unquantized color1 endpoint for absolute endpoint pairs.
1826
* @param rgbs_color The input unquantized RGBS variant endpoint for same chroma endpoints.
1827
* @param rgbo_color The input unquantized RGBS variant endpoint for HDR endpoints.
1828
* @param format The desired base format.
1829
* @param[out] output The output storage for the quantized colors/
1830
* @param quant_level The quantization level requested.
1831
*
1832
* @return The actual endpoint mode used.
1833
*/
1834
uint8_t pack_color_endpoints(
1835
vfloat4 color0,
1836
vfloat4 color1,
1837
vfloat4 rgbs_color,
1838
vfloat4 rgbo_color,
1839
int format,
1840
uint8_t* output,
1841
quant_method quant_level);
1842
1843
/**
1844
* @brief Unpack a single pair of encoded endpoints.
1845
*
1846
* Endpoints must be unscrambled and converted into the 0-255 range before calling this functions.
1847
*
1848
* @param decode_mode The decode mode (LDR, HDR, etc).
1849
* @param format The color endpoint mode used.
1850
* @param input The raw array of encoded input integers. The length of this array
1851
* depends on @c format; it can be safely assumed to be large enough.
1852
* @param[out] rgb_hdr Is the endpoint using HDR for the RGB channels?
1853
* @param[out] alpha_hdr Is the endpoint using HDR for the A channel?
1854
* @param[out] output0 The output color for endpoint 0.
1855
* @param[out] output1 The output color for endpoint 1.
1856
*/
1857
void unpack_color_endpoints(
1858
astcenc_profile decode_mode,
1859
int format,
1860
const uint8_t* input,
1861
bool& rgb_hdr,
1862
bool& alpha_hdr,
1863
vint4& output0,
1864
vint4& output1);
1865
1866
/**
1867
* @brief Unpack an LDR RGBA color that uses delta encoding.
1868
*
1869
* @param input0 The packed endpoint 0 color.
1870
* @param input1 The packed endpoint 1 color deltas.
1871
* @param[out] output0 The unpacked endpoint 0 color.
1872
* @param[out] output1 The unpacked endpoint 1 color.
1873
*/
1874
void rgba_delta_unpack(
1875
vint4 input0,
1876
vint4 input1,
1877
vint4& output0,
1878
vint4& output1);
1879
1880
/**
1881
* @brief Unpack an LDR RGBA color that uses direct encoding.
1882
*
1883
* @param input0 The packed endpoint 0 color.
1884
* @param input1 The packed endpoint 1 color.
1885
* @param[out] output0 The unpacked endpoint 0 color.
1886
* @param[out] output1 The unpacked endpoint 1 color.
1887
*/
1888
void rgba_unpack(
1889
vint4 input0,
1890
vint4 input1,
1891
vint4& output0,
1892
vint4& output1);
1893
1894
/**
1895
* @brief Unpack a set of quantized and decimated weights.
1896
*
1897
* TODO: Can we skip this for non-decimated weights now that the @c scb is
1898
* already storing unquantized weights?
1899
*
1900
* @param bsd The block size information.
1901
* @param scb The symbolic compressed encoding.
1902
* @param di The weight grid decimation table.
1903
* @param is_dual_plane @c true if this is a dual plane block, @c false otherwise.
1904
* @param[out] weights_plane1 The output array for storing the plane 1 weights.
1905
* @param[out] weights_plane2 The output array for storing the plane 2 weights.
1906
*/
1907
void unpack_weights(
1908
const block_size_descriptor& bsd,
1909
const symbolic_compressed_block& scb,
1910
const decimation_info& di,
1911
bool is_dual_plane,
1912
int weights_plane1[BLOCK_MAX_TEXELS],
1913
int weights_plane2[BLOCK_MAX_TEXELS]);
1914
1915
/**
1916
* @brief Identify, for each mode, which set of color endpoint produces the best result.
1917
*
1918
* Returns the best @c tune_candidate_limit best looking modes, along with the ideal color encoding
1919
* combination for each. The modified quantization level can be used when all formats are the same,
1920
* as this frees up two additional bits of storage.
1921
*
1922
* @param pi The partition info for the current trial.
1923
* @param blk The image block color data to compress.
1924
* @param ep The ideal endpoints.
1925
* @param qwt_bitcounts Bit counts for different quantization methods.
1926
* @param qwt_errors Errors for different quantization methods.
1927
* @param tune_candidate_limit The max number of candidates to return, may be less.
1928
* @param start_block_mode The first block mode to inspect.
1929
* @param end_block_mode The last block mode to inspect.
1930
* @param[out] partition_format_specifiers The best formats per partition.
1931
* @param[out] block_mode The best packed block mode indexes.
1932
* @param[out] quant_level The best color quant level.
1933
* @param[out] quant_level_mod The best color quant level if endpoints are the same.
1934
* @param[out] tmpbuf Preallocated scratch buffers for the compressor.
1935
*
1936
* @return The actual number of candidate matches returned.
1937
*/
1938
unsigned int compute_ideal_endpoint_formats(
1939
const partition_info& pi,
1940
const image_block& blk,
1941
const endpoints& ep,
1942
const int8_t* qwt_bitcounts,
1943
const float* qwt_errors,
1944
unsigned int tune_candidate_limit,
1945
unsigned int start_block_mode,
1946
unsigned int end_block_mode,
1947
uint8_t partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS],
1948
int block_mode[TUNE_MAX_TRIAL_CANDIDATES],
1949
quant_method quant_level[TUNE_MAX_TRIAL_CANDIDATES],
1950
quant_method quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES],
1951
compression_working_buffers& tmpbuf);
1952
1953
/**
1954
* @brief For a given 1 plane weight set recompute the endpoint colors.
1955
*
1956
* As we quantize and decimate weights the optimal endpoint colors may change slightly, so we must
1957
* recompute the ideal colors for a specific weight set.
1958
*
1959
* @param blk The image block color data to compress.
1960
* @param pi The partition info for the current trial.
1961
* @param di The weight grid decimation table.
1962
* @param dec_weights_uquant The quantized weight set.
1963
* @param[in,out] ep The color endpoints (modifed in place).
1964
* @param[out] rgbs_vectors The RGB+scale vectors for LDR blocks.
1965
* @param[out] rgbo_vectors The RGB+offset vectors for HDR blocks.
1966
*/
1967
void recompute_ideal_colors_1plane(
1968
const image_block& blk,
1969
const partition_info& pi,
1970
const decimation_info& di,
1971
const uint8_t* dec_weights_uquant,
1972
endpoints& ep,
1973
vfloat4 rgbs_vectors[BLOCK_MAX_PARTITIONS],
1974
vfloat4 rgbo_vectors[BLOCK_MAX_PARTITIONS]);
1975
1976
/**
1977
* @brief For a given 2 plane weight set recompute the endpoint colors.
1978
*
1979
* As we quantize and decimate weights the optimal endpoint colors may change slightly, so we must
1980
* recompute the ideal colors for a specific weight set.
1981
*
1982
* @param blk The image block color data to compress.
1983
* @param bsd The block_size descriptor.
1984
* @param di The weight grid decimation table.
1985
* @param dec_weights_uquant_plane1 The quantized weight set for plane 1.
1986
* @param dec_weights_uquant_plane2 The quantized weight set for plane 2.
1987
* @param[in,out] ep The color endpoints (modifed in place).
1988
* @param[out] rgbs_vector The RGB+scale color for LDR blocks.
1989
* @param[out] rgbo_vector The RGB+offset color for HDR blocks.
1990
* @param plane2_component The component assigned to plane 2.
1991
*/
1992
void recompute_ideal_colors_2planes(
1993
const image_block& blk,
1994
const block_size_descriptor& bsd,
1995
const decimation_info& di,
1996
const uint8_t* dec_weights_uquant_plane1,
1997
const uint8_t* dec_weights_uquant_plane2,
1998
endpoints& ep,
1999
vfloat4& rgbs_vector,
2000
vfloat4& rgbo_vector,
2001
int plane2_component);
2002
2003
/**
2004
* @brief Expand the angular tables needed for the alternative to PCA that we use.
2005
*/
2006
void prepare_angular_tables();
2007
2008
/**
2009
* @brief Compute the angular endpoints for one plane for each block mode.
2010
*
2011
* @param only_always Only consider block modes that are always enabled.
2012
* @param bsd The block size descriptor for the current trial.
2013
* @param dec_weight_ideal_value The ideal decimated unquantized weight values.
2014
* @param max_weight_quant The maximum block mode weight quantization allowed.
2015
* @param[out] tmpbuf Preallocated scratch buffers for the compressor.
2016
*/
2017
void compute_angular_endpoints_1plane(
2018
bool only_always,
2019
const block_size_descriptor& bsd,
2020
const float* dec_weight_ideal_value,
2021
unsigned int max_weight_quant,
2022
compression_working_buffers& tmpbuf);
2023
2024
/**
2025
* @brief Compute the angular endpoints for two planes for each block mode.
2026
*
2027
* @param bsd The block size descriptor for the current trial.
2028
* @param dec_weight_ideal_value The ideal decimated unquantized weight values.
2029
* @param max_weight_quant The maximum block mode weight quantization allowed.
2030
* @param[out] tmpbuf Preallocated scratch buffers for the compressor.
2031
*/
2032
void compute_angular_endpoints_2planes(
2033
const block_size_descriptor& bsd,
2034
const float* dec_weight_ideal_value,
2035
unsigned int max_weight_quant,
2036
compression_working_buffers& tmpbuf);
2037
2038
/* ============================================================================
2039
Functionality for high level compression and decompression access.
2040
============================================================================ */
2041
2042
/**
2043
* @brief Compress an image block into a physical block.
2044
*
2045
* @param ctx The compressor context and configuration.
2046
* @param blk The image block color data to compress.
2047
* @param[out] pcb The physical compressed block output.
2048
* @param[out] tmpbuf Preallocated scratch buffers for the compressor.
2049
*/
2050
void compress_block(
2051
const astcenc_contexti& ctx,
2052
const image_block& blk,
2053
uint8_t pcb[16],
2054
compression_working_buffers& tmpbuf);
2055
2056
/**
2057
* @brief Decompress a symbolic block in to an image block.
2058
*
2059
* @param decode_mode The decode mode (LDR, HDR, etc).
2060
* @param bsd The block size information.
2061
* @param xpos The X coordinate of the block in the overall image.
2062
* @param ypos The Y coordinate of the block in the overall image.
2063
* @param zpos The Z coordinate of the block in the overall image.
2064
* @param[out] blk The decompressed image block color data.
2065
*/
2066
void decompress_symbolic_block(
2067
astcenc_profile decode_mode,
2068
const block_size_descriptor& bsd,
2069
int xpos,
2070
int ypos,
2071
int zpos,
2072
const symbolic_compressed_block& scb,
2073
image_block& blk);
2074
2075
/**
2076
* @brief Compute the error between a symbolic block and the original input data.
2077
*
2078
* This function is specialized for 2 plane and 1 partition search.
2079
*
2080
* In RGBM mode this will reject blocks that attempt to encode a zero M value.
2081
*
2082
* @param config The compressor config.
2083
* @param bsd The block size information.
2084
* @param scb The symbolic compressed encoding.
2085
* @param blk The original image block color data.
2086
*
2087
* @return Returns the computed error, or a negative value if the encoding
2088
* should be rejected for any reason.
2089
*/
2090
float compute_symbolic_block_difference_2plane(
2091
const astcenc_config& config,
2092
const block_size_descriptor& bsd,
2093
const symbolic_compressed_block& scb,
2094
const image_block& blk);
2095
2096
/**
2097
* @brief Compute the error between a symbolic block and the original input data.
2098
*
2099
* This function is specialized for 1 plane and N partition search.
2100
*
2101
* In RGBM mode this will reject blocks that attempt to encode a zero M value.
2102
*
2103
* @param config The compressor config.
2104
* @param bsd The block size information.
2105
* @param scb The symbolic compressed encoding.
2106
* @param blk The original image block color data.
2107
*
2108
* @return Returns the computed error, or a negative value if the encoding
2109
* should be rejected for any reason.
2110
*/
2111
float compute_symbolic_block_difference_1plane(
2112
const astcenc_config& config,
2113
const block_size_descriptor& bsd,
2114
const symbolic_compressed_block& scb,
2115
const image_block& blk);
2116
2117
/**
2118
* @brief Compute the error between a symbolic block and the original input data.
2119
*
2120
* This function is specialized for 1 plane and 1 partition search.
2121
*
2122
* In RGBM mode this will reject blocks that attempt to encode a zero M value.
2123
*
2124
* @param config The compressor config.
2125
* @param bsd The block size information.
2126
* @param scb The symbolic compressed encoding.
2127
* @param blk The original image block color data.
2128
*
2129
* @return Returns the computed error, or a negative value if the encoding
2130
* should be rejected for any reason.
2131
*/
2132
float compute_symbolic_block_difference_1plane_1partition(
2133
const astcenc_config& config,
2134
const block_size_descriptor& bsd,
2135
const symbolic_compressed_block& scb,
2136
const image_block& blk);
2137
2138
/**
2139
* @brief Convert a symbolic representation into a binary physical encoding.
2140
*
2141
* It is assumed that the symbolic encoding is valid and encodable, or
2142
* previously flagged as an error block if an error color it to be encoded.
2143
*
2144
* @param bsd The block size information.
2145
* @param scb The symbolic representation.
2146
* @param[out] pcb The physical compressed block output.
2147
*/
2148
void symbolic_to_physical(
2149
const block_size_descriptor& bsd,
2150
const symbolic_compressed_block& scb,
2151
uint8_t pcb[16]);
2152
2153
/**
2154
* @brief Convert a binary physical encoding into a symbolic representation.
2155
*
2156
* This function can cope with arbitrary input data; output blocks will be
2157
* flagged as an error block if the encoding is invalid.
2158
*
2159
* @param bsd The block size information.
2160
* @param pcb The physical compresesd block input.
2161
* @param[out] scb The output symbolic representation.
2162
*/
2163
void physical_to_symbolic(
2164
const block_size_descriptor& bsd,
2165
const uint8_t pcb[16],
2166
symbolic_compressed_block& scb);
2167
2168
/* ============================================================================
2169
Platform-specific functions.
2170
============================================================================ */
2171
/**
2172
* @brief Allocate an aligned memory buffer.
2173
*
2174
* Allocated memory must be freed by aligned_free.
2175
*
2176
* @param size The desired buffer size.
2177
* @param align The desired buffer alignment; must be 2^N, may be increased
2178
* by the implementation to a minimum allowable alignment.
2179
*
2180
* @return The memory buffer pointer or nullptr on allocation failure.
2181
*/
2182
template<typename T>
2183
T* aligned_malloc(size_t size, size_t align)
2184
{
2185
void* ptr;
2186
int error = 0;
2187
2188
// Don't allow this to under-align a type
2189
size_t min_align = astc::max(alignof(T), sizeof(void*));
2190
size_t real_align = astc::max(min_align, align);
2191
2192
#if defined(_WIN32)
2193
ptr = _aligned_malloc(size, real_align);
2194
#else
2195
error = posix_memalign(&ptr, real_align, size);
2196
#endif
2197
2198
if (error || (!ptr))
2199
{
2200
return nullptr;
2201
}
2202
2203
return static_cast<T*>(ptr);
2204
}
2205
2206
/**
2207
* @brief Free an aligned memory buffer.
2208
*
2209
* @param ptr The buffer to free.
2210
*/
2211
template<typename T>
2212
void aligned_free(T* ptr)
2213
{
2214
#if defined(_WIN32)
2215
_aligned_free(ptr);
2216
#else
2217
free(ptr);
2218
#endif
2219
}
2220
2221
#endif
2222
2223