Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp
9903 views
1
// basisu_uastc_enc.cpp
2
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
// http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
#include "basisu_uastc_enc.h"
16
#include "3rdparty/android_astc_decomp.h"
17
#include "basisu_gpu_texture.h"
18
#include "basisu_bc7enc.h"
19
20
#ifdef _DEBUG
21
// When BASISU_VALIDATE_UASTC_ENC is 1, we pack and unpack to/from UASTC and ASTC, then validate that each codec returns the exact same results. This is slower.
22
#define BASISU_VALIDATE_UASTC_ENC 1
23
#endif
24
25
#define BASISU_SUPPORT_FORCE_MODE 0
26
27
using namespace basist;
28
29
namespace basisu
30
{
31
const uint32_t MAX_ENCODE_RESULTS = 512;
32
33
#if BASISU_VALIDATE_UASTC_ENC
34
static void validate_func(bool condition, int line)
35
{
36
if (!condition)
37
{
38
fprintf(stderr, "basisu_uastc_enc: Internal validation failed on line %u!\n", line);
39
}
40
}
41
42
#define VALIDATE(c) validate_func(c, __LINE__);
43
#else
44
#define VALIDATE(c)
45
#endif
46
47
enum dxt_constants
48
{
49
cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U,
50
cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U,
51
};
52
53
struct dxt1_block
54
{
55
enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
56
57
uint8_t m_low_color[cTotalEndpointBytes];
58
uint8_t m_high_color[cTotalEndpointBytes];
59
uint8_t m_selectors[cTotalSelectorBytes];
60
61
inline void clear() { basisu::clear_obj(*this); }
62
63
inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
64
inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
65
inline void set_low_color(uint16_t c) { m_low_color[0] = static_cast<uint8_t>(c & 0xFF); m_low_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
66
inline void set_high_color(uint16_t c) { m_high_color[0] = static_cast<uint8_t>(c & 0xFF); m_high_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
67
inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits))& cDXT1SelectorMask; }
68
inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); }
69
70
static uint16_t pack_color(const color_rgba& color, bool scaled, uint32_t bias = 127U)
71
{
72
uint32_t r = color.r, g = color.g, b = color.b;
73
if (scaled)
74
{
75
r = (r * 31U + bias) / 255U;
76
g = (g * 63U + bias) / 255U;
77
b = (b * 31U + bias) / 255U;
78
}
79
return static_cast<uint16_t>(basisu::minimum(b, 31U) | (basisu::minimum(g, 63U) << 5U) | (basisu::minimum(r, 31U) << 11U));
80
}
81
82
static uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast<uint16_t>(b | (g << 5U) | (r << 11U)); }
83
};
84
85
#define UASTC_WRITE_MODE_DESCS 0
86
87
static inline void uastc_write_bits(uint8_t* pBuf, uint32_t& bit_offset, uint64_t code, uint32_t codesize, const char* pDesc)
88
{
89
(void)pDesc;
90
91
#if UASTC_WRITE_MODE_DESCS
92
if (pDesc)
93
printf("%s: %u %u\n", pDesc, bit_offset, codesize);
94
#endif
95
96
assert((codesize == 64) || (code < (1ULL << codesize)));
97
98
while (codesize)
99
{
100
uint32_t byte_bit_offset = bit_offset & 7;
101
uint32_t bits_to_write = basisu::minimum<int>(codesize, 8 - byte_bit_offset);
102
103
pBuf[bit_offset >> 3] |= (code << byte_bit_offset);
104
105
code >>= bits_to_write;
106
codesize -= bits_to_write;
107
bit_offset += bits_to_write;
108
}
109
}
110
111
void pack_uastc(basist::uastc_block& blk, const uastc_encode_results& result, const etc_block& etc1_blk, uint32_t etc1_bias, const eac_a8_block& etc_eac_a8_blk, bool bc1_hint0, bool bc1_hint1)
112
{
113
if ((g_uastc_mode_has_alpha[result.m_uastc_mode]) && (result.m_uastc_mode != UASTC_MODE_INDEX_SOLID_COLOR))
114
{
115
assert(etc_eac_a8_blk.m_multiplier >= 1);
116
}
117
118
uint8_t buf[32];
119
memset(buf, 0, sizeof(buf));
120
121
uint32_t block_bit_offset = 0;
122
123
#if UASTC_WRITE_MODE_DESCS
124
printf("**** Mode: %u\n", result.m_uastc_mode);
125
#endif
126
127
uastc_write_bits(buf, block_bit_offset, g_uastc_mode_huff_codes[result.m_uastc_mode][0], g_uastc_mode_huff_codes[result.m_uastc_mode][1], "mode");
128
129
if (result.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR)
130
{
131
uastc_write_bits(buf, block_bit_offset, result.m_solid_color.r, 8, "R");
132
uastc_write_bits(buf, block_bit_offset, result.m_solid_color.g, 8, "G");
133
uastc_write_bits(buf, block_bit_offset, result.m_solid_color.b, 8, "B");
134
uastc_write_bits(buf, block_bit_offset, result.m_solid_color.a, 8, "A");
135
136
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_diff_bit(), 1, "ETC1D");
137
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(0), 3, "ETC1I");
138
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_selector(0, 0), 2, "ETC1S");
139
140
uint32_t r, g, b;
141
if (etc1_blk.get_diff_bit())
142
etc_block::unpack_color5(r, g, b, etc1_blk.get_base5_color(), false);
143
else
144
etc_block::unpack_color4(r, g, b, etc1_blk.get_base4_color(0), false);
145
146
uastc_write_bits(buf, block_bit_offset, r, 5, "ETC1R");
147
uastc_write_bits(buf, block_bit_offset, g, 5, "ETC1G");
148
uastc_write_bits(buf, block_bit_offset, b, 5, "ETC1B");
149
150
memcpy(&blk, buf, sizeof(blk));
151
return;
152
}
153
154
if (g_uastc_mode_has_bc1_hint0[result.m_uastc_mode])
155
uastc_write_bits(buf, block_bit_offset, bc1_hint0, 1, "BC1H0");
156
else
157
{
158
assert(bc1_hint0 == false);
159
}
160
161
if (g_uastc_mode_has_bc1_hint1[result.m_uastc_mode])
162
uastc_write_bits(buf, block_bit_offset, bc1_hint1, 1, "BC1H1");
163
else
164
{
165
assert(bc1_hint1 == false);
166
}
167
168
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_flip_bit(), 1, "ETC1F");
169
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_diff_bit(), 1, "ETC1D");
170
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(0), 3, "ETC1I0");
171
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(1), 3, "ETC1I1");
172
173
if (g_uastc_mode_has_etc1_bias[result.m_uastc_mode])
174
uastc_write_bits(buf, block_bit_offset, etc1_bias, 5, "ETC1BIAS");
175
else
176
{
177
assert(etc1_bias == 0);
178
}
179
180
if (g_uastc_mode_has_alpha[result.m_uastc_mode])
181
{
182
const uint32_t etc2_hints = etc_eac_a8_blk.m_table | (etc_eac_a8_blk.m_multiplier << 4);
183
184
assert(etc2_hints > 0 && etc2_hints <= 0xFF);
185
uastc_write_bits(buf, block_bit_offset, etc2_hints, 8, "ETC2TM");
186
}
187
188
uint32_t subsets = 1;
189
switch (result.m_uastc_mode)
190
{
191
case 2:
192
case 4:
193
case 7:
194
case 9:
195
case 16:
196
uastc_write_bits(buf, block_bit_offset, result.m_common_pattern, 5, "PAT");
197
subsets = 2;
198
break;
199
case 3:
200
uastc_write_bits(buf, block_bit_offset, result.m_common_pattern, 4, "PAT");
201
subsets = 3;
202
break;
203
default:
204
break;
205
}
206
207
#ifdef _DEBUG
208
uint32_t part_seed = 0;
209
switch (result.m_uastc_mode)
210
{
211
case 2:
212
case 4:
213
case 9:
214
case 16:
215
part_seed = g_astc_bc7_common_partitions2[result.m_common_pattern].m_astc;
216
break;
217
case 3:
218
part_seed = g_astc_bc7_common_partitions3[result.m_common_pattern].m_astc;
219
break;
220
case 7:
221
part_seed = g_bc7_3_astc2_common_partitions[result.m_common_pattern].m_astc2;
222
break;
223
default:
224
break;
225
}
226
#endif
227
228
uint32_t total_planes = 1;
229
switch (result.m_uastc_mode)
230
{
231
case 6:
232
case 11:
233
case 13:
234
uastc_write_bits(buf, block_bit_offset, result.m_astc.m_ccs, 2, "COMPSEL");
235
total_planes = 2;
236
break;
237
case 17:
238
// CCS field is always 3 for dual plane LA.
239
assert(result.m_astc.m_ccs == 3);
240
total_planes = 2;
241
break;
242
default:
243
break;
244
}
245
246
uint8_t weights[32];
247
memcpy(weights, result.m_astc.m_weights, 16 * total_planes);
248
249
uint8_t endpoints[18];
250
memcpy(endpoints, result.m_astc.m_endpoints, sizeof(endpoints));
251
252
const uint32_t total_comps = g_uastc_mode_comps[result.m_uastc_mode];
253
254
// LLAA
255
// LLAA LLAA
256
// LLAA LLAA LLAA
257
// RRGGBB
258
// RRGGBB RRGGBB
259
// RRGGBB RRGGBB RRGGBB
260
// RRGGBBAA
261
// RRGGBBAA RRGGBBAA
262
263
const uint32_t weight_bits = g_uastc_mode_weight_bits[result.m_uastc_mode];
264
265
const uint8_t* pPartition_pattern;
266
const uint8_t* pSubset_anchor_indices = basist::get_anchor_indices(subsets, result.m_uastc_mode, result.m_common_pattern, pPartition_pattern);
267
268
for (uint32_t plane_index = 0; plane_index < total_planes; plane_index++)
269
{
270
for (uint32_t subset_index = 0; subset_index < subsets; subset_index++)
271
{
272
const uint32_t anchor_index = pSubset_anchor_indices[subset_index];
273
274
#ifdef _DEBUG
275
if (subsets >= 2)
276
{
277
for (uint32_t i = 0; i < 16; i++)
278
{
279
const uint32_t part_index = astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true);
280
if (part_index == subset_index)
281
{
282
assert(anchor_index == i);
283
break;
284
}
285
}
286
}
287
else
288
{
289
assert(!anchor_index);
290
}
291
#endif
292
293
// Check anchor weight's MSB - if it's set then invert this subset's weights and swap the endpoints
294
if (weights[anchor_index * total_planes + plane_index] & (1 << (weight_bits - 1)))
295
{
296
for (uint32_t i = 0; i < 16; i++)
297
{
298
const uint32_t part_index = pPartition_pattern[i];
299
300
#ifdef _DEBUG
301
if (subsets >= 2)
302
{
303
assert(part_index == (uint32_t)astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true));
304
}
305
else
306
{
307
assert(!part_index);
308
}
309
#endif
310
311
if (part_index == subset_index)
312
weights[i * total_planes + plane_index] = ((1 << weight_bits) - 1) - weights[i * total_planes + plane_index];
313
}
314
315
if (total_planes == 2)
316
{
317
for (int c = 0; c < (int)total_comps; c++)
318
{
319
const uint32_t comp_plane = (total_comps == 2) ? c : ((c == result.m_astc.m_ccs) ? 1 : 0);
320
321
if (comp_plane == plane_index)
322
std::swap(endpoints[c * 2 + 0], endpoints[c * 2 + 1]);
323
}
324
}
325
else
326
{
327
for (uint32_t c = 0; c < total_comps; c++)
328
std::swap(endpoints[subset_index * total_comps * 2 + c * 2 + 0], endpoints[subset_index * total_comps * 2 + c * 2 + 1]);
329
}
330
}
331
} // subset_index
332
} // plane_index
333
334
const uint32_t total_values = total_comps * 2 * subsets;
335
const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[result.m_uastc_mode];
336
337
uint32_t bit_values[18];
338
uint32_t tq_values[8];
339
uint32_t total_tq_values = 0;
340
uint32_t tq_accum = 0;
341
uint32_t tq_mul = 1;
342
343
const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0];
344
const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1];
345
const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2];
346
347
for (uint32_t i = 0; i < total_values; i++)
348
{
349
uint32_t val = endpoints[i];
350
351
uint32_t bits = val & ((1 << ep_bits) - 1);
352
uint32_t tq = val >> ep_bits;
353
354
bit_values[i] = bits;
355
356
if (ep_trits)
357
{
358
assert(tq < 3);
359
tq_accum += tq * tq_mul;
360
tq_mul *= 3;
361
if (tq_mul == 243)
362
{
363
tq_values[total_tq_values++] = tq_accum;
364
tq_accum = 0;
365
tq_mul = 1;
366
}
367
}
368
else if (ep_quints)
369
{
370
assert(tq < 5);
371
tq_accum += tq * tq_mul;
372
tq_mul *= 5;
373
if (tq_mul == 125)
374
{
375
tq_values[total_tq_values++] = tq_accum;
376
tq_accum = 0;
377
tq_mul = 1;
378
}
379
}
380
}
381
382
uint32_t total_endpoint_bits = 0;
383
(void)total_endpoint_bits;
384
385
for (uint32_t i = 0; i < total_tq_values; i++)
386
{
387
const uint32_t num_bits = ep_trits ? 8 : 7;
388
uastc_write_bits(buf, block_bit_offset, tq_values[i], num_bits, "ETQ");
389
total_endpoint_bits += num_bits;
390
}
391
392
if (tq_mul > 1)
393
{
394
uint32_t num_bits;
395
if (ep_trits)
396
{
397
if (tq_mul == 3)
398
num_bits = 2;
399
else if (tq_mul == 9)
400
num_bits = 4;
401
else if (tq_mul == 27)
402
num_bits = 5;
403
else //if (tq_mul == 81)
404
num_bits = 7;
405
}
406
else
407
{
408
if (tq_mul == 5)
409
num_bits = 3;
410
else //if (tq_mul == 25)
411
num_bits = 5;
412
}
413
uastc_write_bits(buf, block_bit_offset, tq_accum, num_bits, "ETQ");
414
total_endpoint_bits += num_bits;
415
}
416
417
for (uint32_t i = 0; i < total_values; i++)
418
{
419
uastc_write_bits(buf, block_bit_offset, bit_values[i], ep_bits, "EBITS");
420
total_endpoint_bits += ep_bits;
421
}
422
423
#if UASTC_WRITE_MODE_DESCS
424
uint32_t weight_start = block_bit_offset;
425
#endif
426
427
uint32_t total_weight_bits = 0;
428
(void)total_weight_bits;
429
430
const uint32_t plane_shift = (total_planes == 2) ? 1 : 0;
431
for (uint32_t i = 0; i < 16 * total_planes; i++)
432
{
433
uint32_t numbits = weight_bits;
434
for (uint32_t s = 0; s < subsets; s++)
435
{
436
if (pSubset_anchor_indices[s] == (i >> plane_shift))
437
{
438
numbits--;
439
break;
440
}
441
}
442
443
uastc_write_bits(buf, block_bit_offset, weights[i], numbits, nullptr);
444
445
total_weight_bits += numbits;
446
}
447
448
#if UASTC_WRITE_MODE_DESCS
449
printf("WEIGHTS: %u %u\n", weight_start, total_weight_bits);
450
#endif
451
452
assert(block_bit_offset <= 128);
453
memcpy(&blk, buf, sizeof(blk));
454
455
#if UASTC_WRITE_MODE_DESCS
456
printf("Total bits: %u, endpoint bits: %u, weight bits: %u\n", block_bit_offset, total_endpoint_bits, total_weight_bits);
457
#endif
458
}
459
460
// MODE 0
461
// 0. DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 19 (192) MODE6 RGB
462
// 18. DualPlane: 0, WeightRange: 11 (32), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 11 (32) MODE6 RGB
463
static void astc_mode0_or_18(uint32_t mode, const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, const uint8_t *pForce_selectors = nullptr)
464
{
465
const uint32_t endpoint_range = (mode == 18) ? 11 : 19;
466
const uint32_t weight_range = (mode == 18) ? 11 : 8;
467
468
color_cell_compressor_params ccell_params;
469
memset(&ccell_params, 0, sizeof(ccell_params));
470
471
ccell_params.m_num_pixels = 16;
472
ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
473
ccell_params.m_num_selector_weights = (mode == 18) ? 32 : 16;
474
ccell_params.m_pSelector_weights = (mode == 18) ? g_astc_weights5 : g_astc_weights4;
475
ccell_params.m_pSelector_weightsx = (mode == 18) ? (const bc7enc_vec4F*)g_astc_weights5x : (const bc7enc_vec4F*)g_astc_weights4x;
476
ccell_params.m_astc_endpoint_range = endpoint_range;
477
ccell_params.m_weights[0] = 1;
478
ccell_params.m_weights[1] = 1;
479
ccell_params.m_weights[2] = 1;
480
ccell_params.m_weights[3] = 1;
481
ccell_params.m_pForce_selectors = pForce_selectors;
482
483
color_cell_compressor_results ccell_results;
484
uint8_t ccell_result_selectors[16];
485
uint8_t ccell_result_selectors_temp[16];
486
memset(&ccell_results, 0, sizeof(ccell_results));
487
ccell_results.m_pSelectors = &ccell_result_selectors[0];
488
ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
489
490
uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
491
492
// ASTC
493
astc_block_desc astc_results;
494
memset(&astc_results, 0, sizeof(astc_results));
495
496
astc_results.m_dual_plane = false;
497
astc_results.m_weight_range = weight_range;// (mode == 18) ? 11 : 8;
498
499
astc_results.m_ccs = 0;
500
astc_results.m_subsets = 1;
501
astc_results.m_partition_seed = 0;
502
astc_results.m_cem = 8;
503
504
astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
505
astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
506
astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
507
astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
508
astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
509
astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
510
511
bool invert = false;
512
513
if (pForce_selectors == nullptr)
514
{
515
int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
516
int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
517
if (s1 < s0)
518
{
519
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
520
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
521
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
522
invert = true;
523
}
524
}
525
526
for (uint32_t y = 0; y < 4; y++)
527
{
528
for (uint32_t x = 0; x < 4; x++)
529
{
530
astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
531
532
if (invert)
533
astc_results.m_weights[x + y * 4] = ((mode == 18) ? 31 : 15) - astc_results.m_weights[x + y * 4];
534
}
535
}
536
537
assert(total_results < MAX_ENCODE_RESULTS);
538
if (total_results < MAX_ENCODE_RESULTS)
539
{
540
pResults[total_results].m_uastc_mode = mode;
541
pResults[total_results].m_common_pattern = 0;
542
pResults[total_results].m_astc = astc_results;
543
pResults[total_results].m_astc_err = part_err;
544
total_results++;
545
}
546
}
547
548
// MODE 1
549
// 1-subset, 2-bit indices, 8-bit endpoints, BC7 mode 3
550
// DualPlane: 0, WeightRange: 2 (4), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) MODE3 or MODE5 RGB
551
static void astc_mode1(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
552
{
553
color_cell_compressor_params ccell_params;
554
memset(&ccell_params, 0, sizeof(ccell_params));
555
556
ccell_params.m_num_pixels = 16;
557
ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
558
ccell_params.m_num_selector_weights = 4;
559
ccell_params.m_pSelector_weights = g_bc7_weights2;
560
ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
561
ccell_params.m_astc_endpoint_range = 20;
562
ccell_params.m_weights[0] = 1;
563
ccell_params.m_weights[1] = 1;
564
ccell_params.m_weights[2] = 1;
565
ccell_params.m_weights[3] = 1;
566
567
color_cell_compressor_results ccell_results;
568
uint8_t ccell_result_selectors[16];
569
uint8_t ccell_result_selectors_temp[16];
570
memset(&ccell_results, 0, sizeof(ccell_results));
571
ccell_results.m_pSelectors = &ccell_result_selectors[0];
572
ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
573
574
uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
575
576
// ASTC
577
astc_block_desc astc_results;
578
memset(&astc_results, 0, sizeof(astc_results));
579
580
astc_results.m_dual_plane = false;
581
astc_results.m_weight_range = 2;
582
583
astc_results.m_ccs = 0;
584
astc_results.m_subsets = 1;
585
astc_results.m_partition_seed = 0;
586
astc_results.m_cem = 8;
587
588
astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
589
astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
590
astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
591
astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
592
astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
593
astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
594
595
const uint32_t range = 20;
596
597
bool invert = false;
598
599
int s0 = g_astc_unquant[range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4]].m_unquant;
600
int s1 = g_astc_unquant[range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5]].m_unquant;
601
if (s1 < s0)
602
{
603
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
604
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
605
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
606
invert = true;
607
}
608
609
for (uint32_t y = 0; y < 4; y++)
610
{
611
for (uint32_t x = 0; x < 4; x++)
612
{
613
astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
614
615
if (invert)
616
astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
617
}
618
}
619
620
assert(total_results < MAX_ENCODE_RESULTS);
621
if (total_results < MAX_ENCODE_RESULTS)
622
{
623
pResults[total_results].m_uastc_mode = 1;
624
pResults[total_results].m_common_pattern = 0;
625
pResults[total_results].m_astc = astc_results;
626
pResults[total_results].m_astc_err = part_err;
627
total_results++;
628
}
629
}
630
631
static uint32_t estimate_partition2(uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeights, const color_rgba block[4][4], const uint32_t weights[4])
632
{
633
assert(pWeights[0] == 0 && pWeights[num_weights - 1] == 64);
634
635
uint64_t best_err = UINT64_MAX;
636
uint32_t best_common_pattern = 0;
637
638
for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS2; common_pattern++)
639
{
640
const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
641
642
const uint8_t* pPartition = &g_bc7_partition2[bc7_pattern * 16];
643
644
color_quad_u8 subset_colors[2][16];
645
uint32_t subset_total_colors[2] = { 0, 0 };
646
for (uint32_t index = 0; index < 16; index++)
647
subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index];
648
649
uint64_t total_subset_err = 0;
650
for (uint32_t subset = 0; (subset < 2) && (total_subset_err < best_err); subset++)
651
total_subset_err += color_cell_compression_est_astc(num_weights, num_comps, pWeights, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights);
652
653
if (total_subset_err < best_err)
654
{
655
best_err = total_subset_err;
656
best_common_pattern = common_pattern;
657
}
658
}
659
660
return best_common_pattern;
661
}
662
663
// MODE 2
664
// 2-subset, 3-bit indices, 4-bit endpoints, BC7 mode 1
665
// DualPlane: 0, WeightRange: 5 (8), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 8 (16) MODE1
666
static void astc_mode2(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition)
667
{
668
uint32_t first_common_pattern = 0;
669
uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2;
670
671
if (estimate_partition)
672
{
673
const uint32_t weights[4] = { 1, 1, 1, 1 };
674
first_common_pattern = estimate_partition2(8, 3, g_bc7_weights3, block, weights);
675
last_common_pattern = first_common_pattern + 1;
676
}
677
678
for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++)
679
{
680
const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
681
682
color_rgba part_pixels[2][16];
683
uint32_t part_pixel_index[4][4];
684
uint32_t num_part_pixels[2] = { 0, 0 };
685
686
for (uint32_t y = 0; y < 4; y++)
687
{
688
for (uint32_t x = 0; x < 4; x++)
689
{
690
const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
691
part_pixel_index[y][x] = num_part_pixels[part];
692
part_pixels[part][num_part_pixels[part]++] = block[y][x];
693
}
694
}
695
696
color_cell_compressor_params ccell_params[2];
697
color_cell_compressor_results ccell_results[2];
698
uint8_t ccell_result_selectors[2][16];
699
uint8_t ccell_result_selectors_temp[2][16];
700
701
uint64_t total_part_err = 0;
702
for (uint32_t part = 0; part < 2; part++)
703
{
704
memset(&ccell_params[part], 0, sizeof(ccell_params[part]));
705
706
ccell_params[part].m_num_pixels = num_part_pixels[part];
707
ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0];
708
ccell_params[part].m_num_selector_weights = 8;
709
ccell_params[part].m_pSelector_weights = g_bc7_weights3;
710
ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x;
711
ccell_params[part].m_astc_endpoint_range = 8;
712
ccell_params[part].m_weights[0] = 1;
713
ccell_params[part].m_weights[1] = 1;
714
ccell_params[part].m_weights[2] = 1;
715
ccell_params[part].m_weights[3] = 1;
716
717
memset(&ccell_results[part], 0, sizeof(ccell_results[part]));
718
ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0];
719
ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0];
720
721
uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params);
722
total_part_err += part_err;
723
} // part
724
725
{
726
// ASTC
727
astc_block_desc astc_results;
728
memset(&astc_results, 0, sizeof(astc_results));
729
730
astc_results.m_dual_plane = false;
731
astc_results.m_weight_range = 5;
732
733
astc_results.m_ccs = 0;
734
astc_results.m_subsets = 2;
735
astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc;
736
astc_results.m_cem = 8;
737
738
uint32_t p0 = 0;
739
uint32_t p1 = 1;
740
if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
741
std::swap(p0, p1);
742
743
astc_results.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0];
744
astc_results.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0];
745
astc_results.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1];
746
astc_results.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1];
747
astc_results.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2];
748
astc_results.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2];
749
750
const uint32_t range = 8;
751
752
bool invert[2] = { false, false };
753
754
int s0 = g_astc_unquant[range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4]].m_unquant;
755
int s1 = g_astc_unquant[range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5]].m_unquant;
756
if (s1 < s0)
757
{
758
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
759
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
760
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
761
invert[0] = true;
762
}
763
764
astc_results.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0];
765
astc_results.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0];
766
astc_results.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1];
767
astc_results.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1];
768
astc_results.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2];
769
astc_results.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2];
770
771
s0 = g_astc_unquant[range][astc_results.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4 + 6]].m_unquant;
772
s1 = g_astc_unquant[range][astc_results.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5 + 6]].m_unquant;
773
774
if (s1 < s0)
775
{
776
std::swap(astc_results.m_endpoints[0 + 6], astc_results.m_endpoints[1 + 6]);
777
std::swap(astc_results.m_endpoints[2 + 6], astc_results.m_endpoints[3 + 6]);
778
std::swap(astc_results.m_endpoints[4 + 6], astc_results.m_endpoints[5 + 6]);
779
invert[1] = true;
780
}
781
782
for (uint32_t y = 0; y < 4; y++)
783
{
784
for (uint32_t x = 0; x < 4; x++)
785
{
786
const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
787
788
astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]];
789
790
uint32_t astc_part = bc7_part;
791
if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
792
astc_part = 1 - astc_part;
793
794
if (invert[astc_part])
795
astc_results.m_weights[x + y * 4] = 7 - astc_results.m_weights[x + y * 4];
796
}
797
}
798
799
assert(total_results < MAX_ENCODE_RESULTS);
800
if (total_results < MAX_ENCODE_RESULTS)
801
{
802
pResults[total_results].m_uastc_mode = 2;
803
pResults[total_results].m_common_pattern = common_pattern;
804
pResults[total_results].m_astc = astc_results;
805
pResults[total_results].m_astc_err = total_part_err;
806
total_results++;
807
}
808
}
809
810
} // common_pattern
811
}
812
813
// MODE 3
814
// 3-subsets, 2-bit indices, [0,11] endpoints, BC7 mode 2
815
// DualPlane: 0, WeightRange: 2 (4), Subsets: 3, CEM: 8 (RGB Direct ), EndpointRange: 7 (12) MODE2
816
static void astc_mode3(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition)
817
{
818
uint32_t first_common_pattern = 0;
819
uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS3;
820
821
if (estimate_partition)
822
{
823
uint64_t best_err = UINT64_MAX;
824
uint32_t best_common_pattern = 0;
825
const uint32_t weights[4] = { 1, 1, 1, 1 };
826
827
for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS3; common_pattern++)
828
{
829
const uint32_t bc7_pattern = g_astc_bc7_common_partitions3[common_pattern].m_bc7;
830
831
const uint8_t* pPartition = &g_bc7_partition3[bc7_pattern * 16];
832
833
color_quad_u8 subset_colors[3][16];
834
uint32_t subset_total_colors[3] = { 0, 0 };
835
for (uint32_t index = 0; index < 16; index++)
836
subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index];
837
838
uint64_t total_subset_err = 0;
839
for (uint32_t subset = 0; (subset < 3) && (total_subset_err < best_err); subset++)
840
total_subset_err += color_cell_compression_est_astc(4, 3, g_bc7_weights2, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights);
841
842
if (total_subset_err < best_err)
843
{
844
best_err = total_subset_err;
845
best_common_pattern = common_pattern;
846
}
847
}
848
849
first_common_pattern = best_common_pattern;
850
last_common_pattern = best_common_pattern + 1;
851
}
852
853
for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++)
854
{
855
const uint32_t endpoint_range = 7;
856
857
const uint32_t bc7_pattern = g_astc_bc7_common_partitions3[common_pattern].m_bc7;
858
859
color_rgba part_pixels[3][16];
860
uint32_t part_pixel_index[4][4];
861
uint32_t num_part_pixels[3] = { 0, 0, 0 };
862
863
for (uint32_t y = 0; y < 4; y++)
864
{
865
for (uint32_t x = 0; x < 4; x++)
866
{
867
const uint32_t bc7_part = g_bc7_partition3[16 * bc7_pattern + x + y * 4];
868
part_pixel_index[y][x] = num_part_pixels[bc7_part];
869
part_pixels[bc7_part][num_part_pixels[bc7_part]++] = block[y][x];
870
}
871
}
872
873
color_cell_compressor_params ccell_params[3];
874
color_cell_compressor_results ccell_results[3];
875
uint8_t ccell_result_selectors[3][16];
876
uint8_t ccell_result_selectors_temp[3][16];
877
878
uint64_t total_part_err = 0;
879
for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++)
880
{
881
memset(&ccell_params[bc7_part], 0, sizeof(ccell_params[bc7_part]));
882
883
ccell_params[bc7_part].m_num_pixels = num_part_pixels[bc7_part];
884
ccell_params[bc7_part].m_pPixels = (color_quad_u8*)&part_pixels[bc7_part][0];
885
ccell_params[bc7_part].m_num_selector_weights = 4;
886
ccell_params[bc7_part].m_pSelector_weights = g_bc7_weights2;
887
ccell_params[bc7_part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
888
ccell_params[bc7_part].m_astc_endpoint_range = endpoint_range;
889
ccell_params[bc7_part].m_weights[0] = 1;
890
ccell_params[bc7_part].m_weights[1] = 1;
891
ccell_params[bc7_part].m_weights[2] = 1;
892
ccell_params[bc7_part].m_weights[3] = 1;
893
894
memset(&ccell_results[bc7_part], 0, sizeof(ccell_results[bc7_part]));
895
ccell_results[bc7_part].m_pSelectors = &ccell_result_selectors[bc7_part][0];
896
ccell_results[bc7_part].m_pSelectors_temp = &ccell_result_selectors_temp[bc7_part][0];
897
898
uint64_t part_err = color_cell_compression(255, &ccell_params[bc7_part], &ccell_results[bc7_part], &comp_params);
899
total_part_err += part_err;
900
} // part
901
902
{
903
// ASTC
904
astc_block_desc astc_results;
905
memset(&astc_results, 0, sizeof(astc_results));
906
907
astc_results.m_dual_plane = false;
908
astc_results.m_weight_range = 2;
909
910
astc_results.m_ccs = 0;
911
astc_results.m_subsets = 3;
912
astc_results.m_partition_seed = g_astc_bc7_common_partitions3[common_pattern].m_astc;
913
astc_results.m_cem = 8;
914
915
uint32_t astc_to_bc7_part[3]; // converts ASTC to BC7 partition index
916
const uint32_t perm = g_astc_bc7_common_partitions3[common_pattern].m_astc_to_bc7_perm;
917
astc_to_bc7_part[0] = g_astc_to_bc7_partition_index_perm_tables[perm][0];
918
astc_to_bc7_part[1] = g_astc_to_bc7_partition_index_perm_tables[perm][1];
919
astc_to_bc7_part[2] = g_astc_to_bc7_partition_index_perm_tables[perm][2];
920
921
bool invert_astc_part[3] = { false, false, false };
922
923
for (uint32_t astc_part = 0; astc_part < 3; astc_part++)
924
{
925
uint8_t* pEndpoints = &astc_results.m_endpoints[6 * astc_part];
926
927
pEndpoints[0] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[0];
928
pEndpoints[1] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[0];
929
pEndpoints[2] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[1];
930
pEndpoints[3] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[1];
931
pEndpoints[4] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[2];
932
pEndpoints[5] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[2];
933
934
int s0 = g_astc_unquant[endpoint_range][pEndpoints[0]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[2]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[4]].m_unquant;
935
int s1 = g_astc_unquant[endpoint_range][pEndpoints[1]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[3]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[5]].m_unquant;
936
if (s1 < s0)
937
{
938
std::swap(pEndpoints[0], pEndpoints[1]);
939
std::swap(pEndpoints[2], pEndpoints[3]);
940
std::swap(pEndpoints[4], pEndpoints[5]);
941
invert_astc_part[astc_part] = true;
942
}
943
}
944
945
for (uint32_t y = 0; y < 4; y++)
946
{
947
for (uint32_t x = 0; x < 4; x++)
948
{
949
const uint32_t bc7_part = g_bc7_partition3[16 * bc7_pattern + x + y * 4];
950
951
astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]];
952
953
uint32_t astc_part = 0;
954
for (uint32_t i = 0; i < 3; i++)
955
{
956
if (astc_to_bc7_part[i] == bc7_part)
957
{
958
astc_part = i;
959
break;
960
}
961
}
962
963
if (invert_astc_part[astc_part])
964
astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
965
}
966
}
967
968
assert(total_results < MAX_ENCODE_RESULTS);
969
if (total_results < MAX_ENCODE_RESULTS)
970
{
971
pResults[total_results].m_uastc_mode = 3;
972
pResults[total_results].m_common_pattern = common_pattern;
973
pResults[total_results].m_astc = astc_results;
974
pResults[total_results].m_astc_err = total_part_err;
975
total_results++;
976
}
977
978
}
979
980
} // common_pattern
981
}
982
983
// MODE 4
984
// DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 12 (40) MODE3
985
static void astc_mode4(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition)
986
{
987
//const uint32_t weight_range = 2;
988
const uint32_t endpoint_range = 12;
989
990
uint32_t first_common_pattern = 0;
991
uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2;
992
993
if (estimate_partition)
994
{
995
const uint32_t weights[4] = { 1, 1, 1, 1 };
996
first_common_pattern = estimate_partition2(4, 3, g_bc7_weights2, block, weights);
997
last_common_pattern = first_common_pattern + 1;
998
}
999
1000
for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++)
1001
{
1002
const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
1003
1004
color_rgba part_pixels[2][16];
1005
uint32_t part_pixel_index[4][4];
1006
uint32_t num_part_pixels[2] = { 0, 0 };
1007
1008
for (uint32_t y = 0; y < 4; y++)
1009
{
1010
for (uint32_t x = 0; x < 4; x++)
1011
{
1012
const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
1013
part_pixel_index[y][x] = num_part_pixels[part];
1014
part_pixels[part][num_part_pixels[part]++] = block[y][x];
1015
}
1016
}
1017
1018
color_cell_compressor_params ccell_params[2];
1019
color_cell_compressor_results ccell_results[2];
1020
uint8_t ccell_result_selectors[2][16];
1021
uint8_t ccell_result_selectors_temp[2][16];
1022
1023
uint64_t total_part_err = 0;
1024
for (uint32_t part = 0; part < 2; part++)
1025
{
1026
memset(&ccell_params[part], 0, sizeof(ccell_params[part]));
1027
1028
ccell_params[part].m_num_pixels = num_part_pixels[part];
1029
ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0];
1030
ccell_params[part].m_num_selector_weights = 4;
1031
ccell_params[part].m_pSelector_weights = g_bc7_weights2;
1032
ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1033
ccell_params[part].m_astc_endpoint_range = endpoint_range;
1034
ccell_params[part].m_weights[0] = 1;
1035
ccell_params[part].m_weights[1] = 1;
1036
ccell_params[part].m_weights[2] = 1;
1037
ccell_params[part].m_weights[3] = 1;
1038
1039
memset(&ccell_results[part], 0, sizeof(ccell_results[part]));
1040
ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0];
1041
ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0];
1042
1043
uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params);
1044
total_part_err += part_err;
1045
} // part
1046
1047
// ASTC
1048
astc_block_desc astc_results;
1049
memset(&astc_results, 0, sizeof(astc_results));
1050
1051
astc_results.m_dual_plane = false;
1052
astc_results.m_weight_range = 2;
1053
1054
astc_results.m_ccs = 0;
1055
astc_results.m_subsets = 2;
1056
astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc;
1057
astc_results.m_cem = 8;
1058
1059
uint32_t p0 = 0;
1060
uint32_t p1 = 1;
1061
if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
1062
std::swap(p0, p1);
1063
1064
astc_results.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0];
1065
astc_results.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0];
1066
astc_results.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1];
1067
astc_results.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1];
1068
astc_results.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2];
1069
astc_results.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2];
1070
1071
bool invert[2] = { false, false };
1072
1073
int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
1074
int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
1075
if (s1 < s0)
1076
{
1077
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
1078
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
1079
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
1080
invert[0] = true;
1081
}
1082
1083
astc_results.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0];
1084
astc_results.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0];
1085
astc_results.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1];
1086
astc_results.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1];
1087
astc_results.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2];
1088
astc_results.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2];
1089
1090
s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4 + 6]].m_unquant;
1091
s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5 + 6]].m_unquant;
1092
1093
if (s1 < s0)
1094
{
1095
std::swap(astc_results.m_endpoints[0 + 6], astc_results.m_endpoints[1 + 6]);
1096
std::swap(astc_results.m_endpoints[2 + 6], astc_results.m_endpoints[3 + 6]);
1097
std::swap(astc_results.m_endpoints[4 + 6], astc_results.m_endpoints[5 + 6]);
1098
invert[1] = true;
1099
}
1100
1101
for (uint32_t y = 0; y < 4; y++)
1102
{
1103
for (uint32_t x = 0; x < 4; x++)
1104
{
1105
const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
1106
1107
astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]];
1108
1109
uint32_t astc_part = bc7_part;
1110
if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
1111
astc_part = 1 - astc_part;
1112
1113
if (invert[astc_part])
1114
astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
1115
}
1116
}
1117
1118
assert(total_results < MAX_ENCODE_RESULTS);
1119
if (total_results < MAX_ENCODE_RESULTS)
1120
{
1121
pResults[total_results].m_uastc_mode = 4;
1122
pResults[total_results].m_common_pattern = common_pattern;
1123
pResults[total_results].m_astc = astc_results;
1124
pResults[total_results].m_astc_err = total_part_err;
1125
total_results++;
1126
}
1127
1128
} // common_pattern
1129
}
1130
1131
// MODE 5
1132
// DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) BC7 MODE 6 (or MODE 1 1-subset)
1133
static void astc_mode5(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
1134
{
1135
const uint32_t weight_range = 5;
1136
const uint32_t endpoint_range = 20;
1137
1138
color_cell_compressor_params ccell_params;
1139
memset(&ccell_params, 0, sizeof(ccell_params));
1140
1141
ccell_params.m_num_pixels = 16;
1142
ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
1143
ccell_params.m_num_selector_weights = 8;
1144
ccell_params.m_pSelector_weights = g_bc7_weights3;
1145
ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x;
1146
ccell_params.m_astc_endpoint_range = endpoint_range;
1147
ccell_params.m_weights[0] = 1;
1148
ccell_params.m_weights[1] = 1;
1149
ccell_params.m_weights[2] = 1;
1150
ccell_params.m_weights[3] = 1;
1151
1152
color_cell_compressor_results ccell_results;
1153
uint8_t ccell_result_selectors[16];
1154
uint8_t ccell_result_selectors_temp[16];
1155
memset(&ccell_results, 0, sizeof(ccell_results));
1156
ccell_results.m_pSelectors = &ccell_result_selectors[0];
1157
ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
1158
1159
uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
1160
1161
// ASTC
1162
astc_block_desc blk;
1163
memset(&blk, 0, sizeof(blk));
1164
1165
blk.m_dual_plane = false;
1166
blk.m_weight_range = weight_range;
1167
1168
blk.m_ccs = 0;
1169
blk.m_subsets = 1;
1170
blk.m_partition_seed = 0;
1171
blk.m_cem = 8;
1172
1173
blk.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
1174
blk.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
1175
blk.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
1176
blk.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
1177
blk.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
1178
blk.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
1179
1180
bool invert = false;
1181
1182
int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
1183
int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
1184
if (s1 < s0)
1185
{
1186
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
1187
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
1188
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
1189
invert = true;
1190
}
1191
1192
for (uint32_t y = 0; y < 4; y++)
1193
{
1194
for (uint32_t x = 0; x < 4; x++)
1195
{
1196
blk.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
1197
1198
if (invert)
1199
blk.m_weights[x + y * 4] = 7 - blk.m_weights[x + y * 4];
1200
}
1201
}
1202
1203
assert(total_results < MAX_ENCODE_RESULTS);
1204
if (total_results < MAX_ENCODE_RESULTS)
1205
{
1206
pResults[total_results].m_uastc_mode = 5;
1207
pResults[total_results].m_common_pattern = 0;
1208
pResults[total_results].m_astc = blk;
1209
pResults[total_results].m_astc_err = part_err;
1210
total_results++;
1211
}
1212
}
1213
1214
// MODE 6
1215
// DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 18 (160) BC7 MODE5
1216
static void astc_mode6(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
1217
{
1218
for (uint32_t rot_comp = 0; rot_comp < 3; rot_comp++)
1219
{
1220
const uint32_t weight_range = 2;
1221
const uint32_t endpoint_range = 18;
1222
1223
color_quad_u8 block_rgb[16];
1224
color_quad_u8 block_a[16];
1225
for (uint32_t i = 0; i < 16; i++)
1226
{
1227
block_rgb[i] = ((color_quad_u8*)&block[0][0])[i];
1228
block_a[i] = block_rgb[i];
1229
1230
uint8_t c = block_a[i].m_c[rot_comp];
1231
block_a[i].m_c[0] = c;
1232
block_a[i].m_c[1] = c;
1233
block_a[i].m_c[2] = c;
1234
block_a[i].m_c[3] = 255;
1235
1236
block_rgb[i].m_c[rot_comp] = 255;
1237
}
1238
1239
uint8_t ccell_result_selectors_temp[16];
1240
1241
color_cell_compressor_params ccell_params_rgb;
1242
memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb));
1243
1244
ccell_params_rgb.m_num_pixels = 16;
1245
ccell_params_rgb.m_pPixels = block_rgb;
1246
ccell_params_rgb.m_num_selector_weights = 4;
1247
ccell_params_rgb.m_pSelector_weights = g_bc7_weights2;
1248
ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1249
ccell_params_rgb.m_astc_endpoint_range = endpoint_range;
1250
ccell_params_rgb.m_weights[0] = 1;
1251
ccell_params_rgb.m_weights[1] = 1;
1252
ccell_params_rgb.m_weights[2] = 1;
1253
ccell_params_rgb.m_weights[3] = 1;
1254
1255
color_cell_compressor_results ccell_results_rgb;
1256
uint8_t ccell_result_selectors_rgb[16];
1257
memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb));
1258
ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0];
1259
ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0];
1260
1261
uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &comp_params);
1262
1263
color_cell_compressor_params ccell_params_a;
1264
memset(&ccell_params_a, 0, sizeof(ccell_params_a));
1265
1266
ccell_params_a.m_num_pixels = 16;
1267
ccell_params_a.m_pPixels = block_a;
1268
ccell_params_a.m_num_selector_weights = 4;
1269
ccell_params_a.m_pSelector_weights = g_bc7_weights2;
1270
ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1271
ccell_params_a.m_astc_endpoint_range = endpoint_range;
1272
ccell_params_a.m_weights[0] = 1;
1273
ccell_params_a.m_weights[1] = 1;
1274
ccell_params_a.m_weights[2] = 1;
1275
ccell_params_a.m_weights[3] = 1;
1276
1277
color_cell_compressor_results ccell_results_a;
1278
uint8_t ccell_result_selectors_a[16];
1279
memset(&ccell_results_a, 0, sizeof(ccell_results_a));
1280
ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0];
1281
ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0];
1282
1283
uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &comp_params) / 3;
1284
1285
uint64_t total_err = part_err_rgb + part_err_a;
1286
1287
// ASTC
1288
astc_block_desc blk;
1289
memset(&blk, 0, sizeof(blk));
1290
1291
blk.m_dual_plane = true;
1292
blk.m_weight_range = weight_range;
1293
1294
blk.m_ccs = rot_comp;
1295
blk.m_subsets = 1;
1296
blk.m_partition_seed = 0;
1297
blk.m_cem = 8;
1298
1299
blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0];
1300
blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0];
1301
blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1];
1302
blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1];
1303
blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2];
1304
blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2];
1305
1306
bool invert = false;
1307
1308
int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
1309
int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
1310
if (s1 < s0)
1311
{
1312
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
1313
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
1314
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
1315
invert = true;
1316
}
1317
1318
for (uint32_t y = 0; y < 4; y++)
1319
{
1320
for (uint32_t x = 0; x < 4; x++)
1321
{
1322
uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4];
1323
uint32_t a_index = ccell_result_selectors_a[x + y * 4];
1324
1325
if (invert)
1326
{
1327
rgb_index = 3 - rgb_index;
1328
a_index = 3 - a_index;
1329
}
1330
1331
blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index;
1332
blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index;
1333
}
1334
}
1335
1336
assert(total_results < MAX_ENCODE_RESULTS);
1337
if (total_results < MAX_ENCODE_RESULTS)
1338
{
1339
pResults[total_results].m_uastc_mode = 6;
1340
pResults[total_results].m_common_pattern = 0;
1341
pResults[total_results].m_astc = blk;
1342
pResults[total_results].m_astc_err = total_err;
1343
total_results++;
1344
}
1345
} // rot_comp
1346
}
1347
1348
// MODE 7 - 2 subset ASTC, 3 subset BC7
1349
// DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 12 (40) MODE2
1350
static void astc_mode7(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition)
1351
{
1352
uint32_t first_common_pattern = 0;
1353
uint32_t last_common_pattern = TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS;
1354
1355
if (estimate_partition)
1356
{
1357
uint64_t best_err = UINT64_MAX;
1358
uint32_t best_common_pattern = 0;
1359
const uint32_t weights[4] = { 1, 1, 1, 1 };
1360
1361
for (uint32_t common_pattern = 0; common_pattern < TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS; common_pattern++)
1362
{
1363
const uint8_t* pPartition = &g_bc7_3_astc2_patterns2[common_pattern][0];
1364
1365
#ifdef _DEBUG
1366
const uint32_t astc_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_astc2;
1367
const uint32_t bc7_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_bc73;
1368
const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[common_pattern].k;
1369
1370
for (uint32_t y = 0; y < 4; y++)
1371
{
1372
for (uint32_t x = 0; x < 4; x++)
1373
{
1374
const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k);
1375
assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true));
1376
assert(astc_part == pPartition[x + y * 4]);
1377
}
1378
}
1379
#endif
1380
1381
color_quad_u8 subset_colors[2][16];
1382
uint32_t subset_total_colors[2] = { 0, 0 };
1383
for (uint32_t index = 0; index < 16; index++)
1384
subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index];
1385
1386
uint64_t total_subset_err = 0;
1387
for (uint32_t subset = 0; (subset < 2) && (total_subset_err < best_err); subset++)
1388
total_subset_err += color_cell_compression_est_astc(4, 3, g_bc7_weights2, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights);
1389
1390
if (total_subset_err < best_err)
1391
{
1392
best_err = total_subset_err;
1393
best_common_pattern = common_pattern;
1394
}
1395
}
1396
1397
first_common_pattern = best_common_pattern;
1398
last_common_pattern = best_common_pattern + 1;
1399
}
1400
1401
//const uint32_t weight_range = 2;
1402
const uint32_t endpoint_range = 12;
1403
1404
for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++)
1405
{
1406
const uint32_t astc_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_astc2;
1407
const uint32_t bc7_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_bc73;
1408
const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[common_pattern].k;
1409
1410
color_rgba part_pixels[2][16];
1411
uint32_t part_pixel_index[4][4];
1412
uint32_t num_part_pixels[2] = { 0, 0 };
1413
1414
for (uint32_t y = 0; y < 4; y++)
1415
{
1416
for (uint32_t x = 0; x < 4; x++)
1417
{
1418
const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k);
1419
#ifdef _DEBUG
1420
assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true));
1421
#endif
1422
1423
part_pixel_index[y][x] = num_part_pixels[astc_part];
1424
part_pixels[astc_part][num_part_pixels[astc_part]++] = block[y][x];
1425
}
1426
}
1427
1428
color_cell_compressor_params ccell_params[2];
1429
color_cell_compressor_results ccell_results[2];
1430
uint8_t ccell_result_selectors[2][16];
1431
uint8_t ccell_result_selectors_temp[2][16];
1432
1433
uint64_t total_part_err = 0;
1434
for (uint32_t part = 0; part < 2; part++)
1435
{
1436
memset(&ccell_params[part], 0, sizeof(ccell_params[part]));
1437
1438
ccell_params[part].m_num_pixels = num_part_pixels[part];
1439
ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0];
1440
ccell_params[part].m_num_selector_weights = 4;
1441
ccell_params[part].m_pSelector_weights = g_bc7_weights2;
1442
ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1443
ccell_params[part].m_astc_endpoint_range = endpoint_range;
1444
ccell_params[part].m_weights[0] = 1;
1445
ccell_params[part].m_weights[1] = 1;
1446
ccell_params[part].m_weights[2] = 1;
1447
ccell_params[part].m_weights[3] = 1;
1448
1449
memset(&ccell_results[part], 0, sizeof(ccell_results[part]));
1450
ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0];
1451
ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0];
1452
1453
uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params);
1454
total_part_err += part_err;
1455
} // part
1456
1457
// ASTC
1458
astc_block_desc blk;
1459
memset(&blk, 0, sizeof(blk));
1460
1461
blk.m_dual_plane = false;
1462
blk.m_weight_range = 2;
1463
1464
blk.m_ccs = 0;
1465
blk.m_subsets = 2;
1466
blk.m_partition_seed = astc_pattern;
1467
blk.m_cem = 8;
1468
1469
const uint32_t p0 = 0;
1470
const uint32_t p1 = 1;
1471
1472
blk.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0];
1473
blk.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0];
1474
blk.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1];
1475
blk.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1];
1476
blk.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2];
1477
blk.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2];
1478
1479
bool invert[2] = { false, false };
1480
1481
int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
1482
int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
1483
if (s1 < s0)
1484
{
1485
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
1486
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
1487
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
1488
invert[0] = true;
1489
}
1490
1491
blk.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0];
1492
blk.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0];
1493
blk.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1];
1494
blk.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1];
1495
blk.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2];
1496
blk.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2];
1497
1498
s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4 + 6]].m_unquant;
1499
s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5 + 6]].m_unquant;
1500
1501
if (s1 < s0)
1502
{
1503
std::swap(blk.m_endpoints[0 + 6], blk.m_endpoints[1 + 6]);
1504
std::swap(blk.m_endpoints[2 + 6], blk.m_endpoints[3 + 6]);
1505
std::swap(blk.m_endpoints[4 + 6], blk.m_endpoints[5 + 6]);
1506
invert[1] = true;
1507
}
1508
1509
for (uint32_t y = 0; y < 4; y++)
1510
{
1511
for (uint32_t x = 0; x < 4; x++)
1512
{
1513
const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k);
1514
1515
blk.m_weights[x + y * 4] = ccell_result_selectors[astc_part][part_pixel_index[y][x]];
1516
1517
if (invert[astc_part])
1518
blk.m_weights[x + y * 4] = 3 - blk.m_weights[x + y * 4];
1519
}
1520
}
1521
1522
assert(total_results < MAX_ENCODE_RESULTS);
1523
if (total_results < MAX_ENCODE_RESULTS)
1524
{
1525
pResults[total_results].m_uastc_mode = 7;
1526
pResults[total_results].m_common_pattern = common_pattern;
1527
pResults[total_results].m_astc = blk;
1528
pResults[total_results].m_astc_err = total_part_err;
1529
total_results++;
1530
}
1531
1532
} // common_pattern
1533
}
1534
1535
static void estimate_partition2_list(uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeights, const color_rgba block[4][4], uint32_t* pParts, uint32_t max_parts, const uint32_t weights[4])
1536
{
1537
assert(pWeights[0] == 0 && pWeights[num_weights - 1] == 64);
1538
1539
const uint32_t MAX_PARTS = 8;
1540
assert(max_parts <= MAX_PARTS);
1541
1542
uint64_t part_error[MAX_PARTS];
1543
memset(part_error, 0xFF, sizeof(part_error));
1544
memset(pParts, 0, sizeof(pParts[0]) * max_parts);
1545
1546
for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS2; common_pattern++)
1547
{
1548
const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
1549
1550
const uint8_t* pPartition = &g_bc7_partition2[bc7_pattern * 16];
1551
1552
color_quad_u8 subset_colors[2][16];
1553
uint32_t subset_total_colors[2] = { 0, 0 };
1554
for (uint32_t index = 0; index < 16; index++)
1555
subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index];
1556
1557
uint64_t total_subset_err = 0;
1558
for (uint32_t subset = 0; subset < 2; subset++)
1559
total_subset_err += color_cell_compression_est_astc(num_weights, num_comps, pWeights, subset_total_colors[subset], &subset_colors[subset][0], UINT64_MAX, weights);
1560
1561
for (int i = 0; i < (int)max_parts; i++)
1562
{
1563
if (total_subset_err < part_error[i])
1564
{
1565
for (int j = max_parts - 1; j > i; --j)
1566
{
1567
pParts[j] = pParts[j - 1];
1568
part_error[j] = part_error[j - 1];
1569
}
1570
1571
pParts[i] = common_pattern;
1572
part_error[i] = total_subset_err;
1573
1574
break;
1575
}
1576
}
1577
}
1578
1579
#ifdef _DEBUG
1580
for (uint32_t i = 0; i < max_parts - 1; i++)
1581
{
1582
assert(part_error[i] <= part_error[i + 1]);
1583
}
1584
#endif
1585
}
1586
1587
// 9. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 12 (RGBA Direct), EndpointRange: 8 (16) - BC7 MODE 7
1588
// 16. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, CEM: 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE 7
1589
static void astc_mode9_or_16(uint32_t mode, const color_rgba source_block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, uint32_t estimate_partition_list_size)
1590
{
1591
assert(mode == 9 || mode == 16);
1592
1593
const color_rgba* pBlock = &source_block[0][0];
1594
1595
color_rgba temp_block[16];
1596
if (mode == 16)
1597
{
1598
for (uint32_t i = 0; i < 16; i++)
1599
{
1600
if (mode == 16)
1601
{
1602
assert(pBlock[i].r == pBlock[i].g);
1603
assert(pBlock[i].r == pBlock[i].b);
1604
}
1605
1606
const uint32_t l = pBlock[i].r;
1607
const uint32_t a = pBlock[i].a;
1608
1609
// Use (l,0,0,a) not (l,l,l,a) so both components are treated equally.
1610
temp_block[i].set_noclamp_rgba(l, 0, 0, a);
1611
}
1612
1613
pBlock = temp_block;
1614
}
1615
1616
const uint32_t weights[4] = { 1, 1, 1, 1 };
1617
1618
//const uint32_t weight_range = 2;
1619
const uint32_t endpoint_range = (mode == 16) ? 20 : 8;
1620
1621
uint32_t first_common_pattern = 0;
1622
uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2;
1623
bool use_part_list = false;
1624
1625
const uint32_t MAX_PARTS = 8;
1626
uint32_t parts[MAX_PARTS];
1627
1628
if (estimate_partition_list_size == 1)
1629
{
1630
first_common_pattern = estimate_partition2(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, weights);
1631
last_common_pattern = first_common_pattern + 1;
1632
}
1633
else if (estimate_partition_list_size > 0)
1634
{
1635
assert(estimate_partition_list_size <= MAX_PARTS);
1636
estimate_partition_list_size = basisu::minimum(estimate_partition_list_size, MAX_PARTS);
1637
1638
estimate_partition2_list(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, parts, estimate_partition_list_size, weights);
1639
1640
first_common_pattern = 0;
1641
last_common_pattern = estimate_partition_list_size;
1642
use_part_list = true;
1643
1644
#ifdef _DEBUG
1645
assert(parts[0] == estimate_partition2(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, weights));
1646
#endif
1647
}
1648
1649
for (uint32_t common_pattern_iter = first_common_pattern; common_pattern_iter < last_common_pattern; common_pattern_iter++)
1650
{
1651
const uint32_t common_pattern = use_part_list ? parts[common_pattern_iter] : common_pattern_iter;
1652
1653
const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
1654
1655
color_rgba part_pixels[2][16];
1656
uint32_t part_pixel_index[4][4];
1657
uint32_t num_part_pixels[2] = { 0, 0 };
1658
1659
for (uint32_t y = 0; y < 4; y++)
1660
{
1661
for (uint32_t x = 0; x < 4; x++)
1662
{
1663
const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
1664
part_pixel_index[y][x] = num_part_pixels[part];
1665
part_pixels[part][num_part_pixels[part]++] = pBlock[y * 4 + x];
1666
}
1667
}
1668
1669
color_cell_compressor_params ccell_params[2];
1670
color_cell_compressor_results ccell_results[2];
1671
uint8_t ccell_result_selectors[2][16];
1672
uint8_t ccell_result_selectors_temp[2][16];
1673
1674
uint64_t total_err = 0;
1675
for (uint32_t subset = 0; subset < 2; subset++)
1676
{
1677
memset(&ccell_params[subset], 0, sizeof(ccell_params[subset]));
1678
1679
ccell_params[subset].m_num_pixels = num_part_pixels[subset];
1680
ccell_params[subset].m_pPixels = (color_quad_u8*)&part_pixels[subset][0];
1681
ccell_params[subset].m_num_selector_weights = 4;
1682
ccell_params[subset].m_pSelector_weights = g_bc7_weights2;
1683
ccell_params[subset].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1684
ccell_params[subset].m_astc_endpoint_range = endpoint_range;
1685
ccell_params[subset].m_weights[0] = weights[0];
1686
ccell_params[subset].m_weights[1] = weights[1];
1687
ccell_params[subset].m_weights[2] = weights[2];
1688
ccell_params[subset].m_weights[3] = weights[3];
1689
ccell_params[subset].m_has_alpha = true;
1690
1691
memset(&ccell_results[subset], 0, sizeof(ccell_results[subset]));
1692
ccell_results[subset].m_pSelectors = &ccell_result_selectors[subset][0];
1693
ccell_results[subset].m_pSelectors_temp = &ccell_result_selectors_temp[subset][0];
1694
1695
uint64_t subset_err = color_cell_compression(255, &ccell_params[subset], &ccell_results[subset], &comp_params);
1696
1697
if (mode == 16)
1698
{
1699
color_rgba colors[4];
1700
for (uint32_t c = 0; c < 4; c++)
1701
{
1702
colors[0].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results[subset].m_astc_low_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant;
1703
colors[3].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results[subset].m_astc_high_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant;
1704
}
1705
1706
for (uint32_t i = 1; i < 4 - 1; i++)
1707
for (uint32_t c = 0; c < 4; c++)
1708
colors[i].m_comps[c] = (uint8_t)astc_interpolate(colors[0].m_comps[c], colors[3].m_comps[c], g_bc7_weights2[i], false);
1709
1710
for (uint32_t p = 0; p < ccell_params[subset].m_num_pixels; p++)
1711
{
1712
color_rgba orig_pix(part_pixels[subset][p]);
1713
orig_pix.g = orig_pix.r;
1714
orig_pix.b = orig_pix.r;
1715
total_err += color_distance_la(orig_pix, colors[ccell_result_selectors[subset][p]]);
1716
}
1717
}
1718
else
1719
{
1720
total_err += subset_err;
1721
}
1722
} // subset
1723
1724
// ASTC
1725
astc_block_desc astc_results;
1726
memset(&astc_results, 0, sizeof(astc_results));
1727
1728
astc_results.m_dual_plane = false;
1729
astc_results.m_weight_range = 2;
1730
1731
astc_results.m_ccs = 0;
1732
astc_results.m_subsets = 2;
1733
astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc;
1734
astc_results.m_cem = (mode == 16) ? 4 : 12;
1735
1736
uint32_t part[2] = { 0, 1 };
1737
if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
1738
std::swap(part[0], part[1]);
1739
1740
bool invert[2] = { false, false };
1741
1742
for (uint32_t p = 0; p < 2; p++)
1743
{
1744
if (mode == 16)
1745
{
1746
astc_results.m_endpoints[p * 4 + 0] = ccell_results[part[p]].m_astc_low_endpoint.m_c[0];
1747
astc_results.m_endpoints[p * 4 + 1] = ccell_results[part[p]].m_astc_high_endpoint.m_c[0];
1748
1749
astc_results.m_endpoints[p * 4 + 2] = ccell_results[part[p]].m_astc_low_endpoint.m_c[3];
1750
astc_results.m_endpoints[p * 4 + 3] = ccell_results[part[p]].m_astc_high_endpoint.m_c[3];
1751
}
1752
else
1753
{
1754
for (uint32_t c = 0; c < 4; c++)
1755
{
1756
astc_results.m_endpoints[p * 8 + c * 2] = ccell_results[part[p]].m_astc_low_endpoint.m_c[c];
1757
astc_results.m_endpoints[p * 8 + c * 2 + 1] = ccell_results[part[p]].m_astc_high_endpoint.m_c[c];
1758
}
1759
1760
int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 0]].m_unquant +
1761
g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 2]].m_unquant +
1762
g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 4]].m_unquant;
1763
1764
int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 1]].m_unquant +
1765
g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 3]].m_unquant +
1766
g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 5]].m_unquant;
1767
1768
if (s1 < s0)
1769
{
1770
std::swap(astc_results.m_endpoints[p * 8 + 0], astc_results.m_endpoints[p * 8 + 1]);
1771
std::swap(astc_results.m_endpoints[p * 8 + 2], astc_results.m_endpoints[p * 8 + 3]);
1772
std::swap(astc_results.m_endpoints[p * 8 + 4], astc_results.m_endpoints[p * 8 + 5]);
1773
std::swap(astc_results.m_endpoints[p * 8 + 6], astc_results.m_endpoints[p * 8 + 7]);
1774
invert[p] = true;
1775
}
1776
}
1777
}
1778
1779
for (uint32_t y = 0; y < 4; y++)
1780
{
1781
for (uint32_t x = 0; x < 4; x++)
1782
{
1783
const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
1784
1785
astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]];
1786
1787
uint32_t astc_part = bc7_part;
1788
if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
1789
astc_part = 1 - astc_part;
1790
1791
if (invert[astc_part])
1792
astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
1793
}
1794
}
1795
1796
assert(total_results < MAX_ENCODE_RESULTS);
1797
if (total_results < MAX_ENCODE_RESULTS)
1798
{
1799
pResults[total_results].m_uastc_mode = mode;
1800
pResults[total_results].m_common_pattern = common_pattern;
1801
pResults[total_results].m_astc = astc_results;
1802
pResults[total_results].m_astc_err = total_err;
1803
total_results++;
1804
}
1805
1806
} // common_pattern
1807
}
1808
1809
// MODE 10
1810
// DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 13 (48) MODE6
1811
static void astc_mode10(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
1812
{
1813
const uint32_t weight_range = 8;
1814
const uint32_t endpoint_range = 13;
1815
1816
color_cell_compressor_params ccell_params;
1817
memset(&ccell_params, 0, sizeof(ccell_params));
1818
1819
ccell_params.m_num_pixels = 16;
1820
ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
1821
ccell_params.m_num_selector_weights = 16;
1822
ccell_params.m_pSelector_weights = g_astc_weights4;
1823
ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_astc_weights4x;
1824
ccell_params.m_astc_endpoint_range = endpoint_range;
1825
ccell_params.m_weights[0] = 1;
1826
ccell_params.m_weights[1] = 1;
1827
ccell_params.m_weights[2] = 1;
1828
ccell_params.m_weights[3] = 1;
1829
ccell_params.m_has_alpha = true;
1830
1831
color_cell_compressor_results ccell_results;
1832
uint8_t ccell_result_selectors[16];
1833
uint8_t ccell_result_selectors_temp[16];
1834
memset(&ccell_results, 0, sizeof(ccell_results));
1835
ccell_results.m_pSelectors = &ccell_result_selectors[0];
1836
ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
1837
1838
uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
1839
1840
// ASTC
1841
astc_block_desc astc_results;
1842
memset(&astc_results, 0, sizeof(astc_results));
1843
1844
astc_results.m_dual_plane = false;
1845
astc_results.m_weight_range = weight_range;
1846
1847
astc_results.m_ccs = 0;
1848
astc_results.m_subsets = 1;
1849
astc_results.m_partition_seed = 0;
1850
astc_results.m_cem = 12;
1851
1852
astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
1853
astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
1854
astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
1855
astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
1856
astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
1857
astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
1858
astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3];
1859
astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3];
1860
1861
bool invert = false;
1862
1863
int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
1864
int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
1865
if (s1 < s0)
1866
{
1867
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
1868
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
1869
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
1870
std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]);
1871
invert = true;
1872
}
1873
1874
for (uint32_t y = 0; y < 4; y++)
1875
{
1876
for (uint32_t x = 0; x < 4; x++)
1877
{
1878
astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
1879
1880
if (invert)
1881
astc_results.m_weights[x + y * 4] = 15 - astc_results.m_weights[x + y * 4];
1882
}
1883
}
1884
1885
assert(total_results < MAX_ENCODE_RESULTS);
1886
if (total_results < MAX_ENCODE_RESULTS)
1887
{
1888
pResults[total_results].m_uastc_mode = 10;
1889
pResults[total_results].m_common_pattern = 0;
1890
pResults[total_results].m_astc = astc_results;
1891
pResults[total_results].m_astc_err = part_err;
1892
total_results++;
1893
}
1894
}
1895
1896
// 11. DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 12 (RGBA Direct), EndpointRange: 13 (48) MODE5
1897
// 17. DualPlane: 1, WeightRange : 2 (4), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) BC7 MODE5
1898
static void astc_mode11_or_17(uint32_t mode, const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
1899
{
1900
assert((mode == 11) || (mode == 17));
1901
1902
const uint32_t weight_range = 2;
1903
const uint32_t endpoint_range = (mode == 17) ? 20 : 13;
1904
1905
bc7enc_compress_block_params local_comp_params(comp_params);
1906
local_comp_params.m_perceptual = false;
1907
local_comp_params.m_weights[0] = 1;
1908
local_comp_params.m_weights[1] = 1;
1909
local_comp_params.m_weights[2] = 1;
1910
local_comp_params.m_weights[3] = 1;
1911
1912
const uint32_t last_rot_comp = (mode == 17) ? 1 : 4;
1913
1914
for (uint32_t rot_comp = 0; rot_comp < last_rot_comp; rot_comp++)
1915
{
1916
color_quad_u8 block_rgb[16];
1917
color_quad_u8 block_a[16];
1918
for (uint32_t i = 0; i < 16; i++)
1919
{
1920
block_rgb[i] = ((color_quad_u8*)&block[0][0])[i];
1921
block_a[i] = block_rgb[i];
1922
1923
if (mode == 17)
1924
{
1925
assert(block_rgb[i].m_c[0] == block_rgb[i].m_c[1]);
1926
assert(block_rgb[i].m_c[0] == block_rgb[i].m_c[2]);
1927
1928
block_a[i].m_c[0] = block_rgb[i].m_c[3];
1929
block_a[i].m_c[1] = block_rgb[i].m_c[3];
1930
block_a[i].m_c[2] = block_rgb[i].m_c[3];
1931
block_a[i].m_c[3] = 255;
1932
1933
block_rgb[i].m_c[1] = block_rgb[i].m_c[0];
1934
block_rgb[i].m_c[2] = block_rgb[i].m_c[0];
1935
block_rgb[i].m_c[3] = 255;
1936
}
1937
else
1938
{
1939
uint8_t c = block_a[i].m_c[rot_comp];
1940
block_a[i].m_c[0] = c;
1941
block_a[i].m_c[1] = c;
1942
block_a[i].m_c[2] = c;
1943
block_a[i].m_c[3] = 255;
1944
1945
block_rgb[i].m_c[rot_comp] = block_rgb[i].m_c[3];
1946
block_rgb[i].m_c[3] = 255;
1947
}
1948
}
1949
1950
uint8_t ccell_result_selectors_temp[16];
1951
1952
color_cell_compressor_params ccell_params_rgb;
1953
memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb));
1954
1955
ccell_params_rgb.m_num_pixels = 16;
1956
ccell_params_rgb.m_pPixels = block_rgb;
1957
ccell_params_rgb.m_num_selector_weights = 4;
1958
ccell_params_rgb.m_pSelector_weights = g_bc7_weights2;
1959
ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1960
ccell_params_rgb.m_astc_endpoint_range = endpoint_range;
1961
ccell_params_rgb.m_weights[0] = 1;
1962
ccell_params_rgb.m_weights[1] = 1;
1963
ccell_params_rgb.m_weights[2] = 1;
1964
ccell_params_rgb.m_weights[3] = 1;
1965
1966
color_cell_compressor_results ccell_results_rgb;
1967
uint8_t ccell_result_selectors_rgb[16];
1968
memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb));
1969
ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0];
1970
ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0];
1971
1972
uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &local_comp_params);
1973
1974
color_cell_compressor_params ccell_params_a;
1975
memset(&ccell_params_a, 0, sizeof(ccell_params_a));
1976
1977
ccell_params_a.m_num_pixels = 16;
1978
ccell_params_a.m_pPixels = block_a;
1979
ccell_params_a.m_num_selector_weights = 4;
1980
ccell_params_a.m_pSelector_weights = g_bc7_weights2;
1981
ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1982
ccell_params_a.m_astc_endpoint_range = endpoint_range;
1983
ccell_params_a.m_weights[0] = 1;
1984
ccell_params_a.m_weights[1] = 1;
1985
ccell_params_a.m_weights[2] = 1;
1986
ccell_params_a.m_weights[3] = 1;
1987
1988
color_cell_compressor_results ccell_results_a;
1989
uint8_t ccell_result_selectors_a[16];
1990
memset(&ccell_results_a, 0, sizeof(ccell_results_a));
1991
ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0];
1992
ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0];
1993
1994
uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &local_comp_params) / 3;
1995
1996
uint64_t total_err = (mode == 17) ? ((part_err_rgb / 3) + part_err_a) : (part_err_rgb + part_err_a);
1997
1998
// ASTC
1999
astc_block_desc blk;
2000
memset(&blk, 0, sizeof(blk));
2001
2002
blk.m_dual_plane = true;
2003
blk.m_weight_range = weight_range;
2004
2005
blk.m_ccs = (mode == 17) ? 3 : rot_comp;
2006
blk.m_subsets = 1;
2007
blk.m_partition_seed = 0;
2008
blk.m_cem = (mode == 17) ? 4 : 12;
2009
2010
bool invert = false;
2011
2012
if (mode == 17)
2013
{
2014
assert(ccell_results_rgb.m_astc_low_endpoint.m_c[0] == ccell_results_rgb.m_astc_low_endpoint.m_c[1]);
2015
assert(ccell_results_rgb.m_astc_low_endpoint.m_c[0] == ccell_results_rgb.m_astc_low_endpoint.m_c[2]);
2016
2017
assert(ccell_results_rgb.m_astc_high_endpoint.m_c[0] == ccell_results_rgb.m_astc_high_endpoint.m_c[1]);
2018
assert(ccell_results_rgb.m_astc_high_endpoint.m_c[0] == ccell_results_rgb.m_astc_high_endpoint.m_c[2]);
2019
2020
blk.m_endpoints[0] = ccell_results_rgb.m_astc_low_endpoint.m_c[0];
2021
blk.m_endpoints[1] = ccell_results_rgb.m_astc_high_endpoint.m_c[0];
2022
2023
blk.m_endpoints[2] = ccell_results_a.m_astc_low_endpoint.m_c[0];
2024
blk.m_endpoints[3] = ccell_results_a.m_astc_high_endpoint.m_c[0];
2025
}
2026
else
2027
{
2028
blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0];
2029
blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0];
2030
blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1];
2031
blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1];
2032
blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2];
2033
blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2];
2034
if (rot_comp == 3)
2035
{
2036
blk.m_endpoints[6] = ccell_results_a.m_astc_low_endpoint.m_c[0];
2037
blk.m_endpoints[7] = ccell_results_a.m_astc_high_endpoint.m_c[0];
2038
}
2039
else
2040
{
2041
blk.m_endpoints[6] = ccell_results_rgb.m_astc_low_endpoint.m_c[rot_comp];
2042
blk.m_endpoints[7] = ccell_results_rgb.m_astc_high_endpoint.m_c[rot_comp];
2043
}
2044
2045
int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
2046
int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
2047
if (s1 < s0)
2048
{
2049
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
2050
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
2051
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
2052
std::swap(blk.m_endpoints[6], blk.m_endpoints[7]);
2053
invert = true;
2054
}
2055
}
2056
2057
for (uint32_t y = 0; y < 4; y++)
2058
{
2059
for (uint32_t x = 0; x < 4; x++)
2060
{
2061
uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4];
2062
uint32_t a_index = ccell_result_selectors_a[x + y * 4];
2063
2064
if (invert)
2065
{
2066
rgb_index = 3 - rgb_index;
2067
a_index = 3 - a_index;
2068
}
2069
2070
blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index;
2071
blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index;
2072
}
2073
}
2074
2075
assert(total_results < MAX_ENCODE_RESULTS);
2076
if (total_results < MAX_ENCODE_RESULTS)
2077
{
2078
pResults[total_results].m_uastc_mode = mode;
2079
pResults[total_results].m_common_pattern = 0;
2080
pResults[total_results].m_astc = blk;
2081
pResults[total_results].m_astc_err = total_err;
2082
total_results++;
2083
}
2084
} // rot_comp
2085
}
2086
2087
// MODE 12
2088
// DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 19 (192) MODE6
2089
static void astc_mode12(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
2090
{
2091
const uint32_t weight_range = 5;
2092
const uint32_t endpoint_range = 19;
2093
2094
color_cell_compressor_params ccell_params;
2095
memset(&ccell_params, 0, sizeof(ccell_params));
2096
2097
ccell_params.m_num_pixels = 16;
2098
ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
2099
ccell_params.m_num_selector_weights = 8;
2100
ccell_params.m_pSelector_weights = g_bc7_weights3;
2101
ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x;
2102
ccell_params.m_astc_endpoint_range = endpoint_range;
2103
ccell_params.m_weights[0] = 1;
2104
ccell_params.m_weights[1] = 1;
2105
ccell_params.m_weights[2] = 1;
2106
ccell_params.m_weights[3] = 1;
2107
ccell_params.m_has_alpha = true;
2108
2109
color_cell_compressor_results ccell_results;
2110
uint8_t ccell_result_selectors[16];
2111
uint8_t ccell_result_selectors_temp[16];
2112
memset(&ccell_results, 0, sizeof(ccell_results));
2113
ccell_results.m_pSelectors = &ccell_result_selectors[0];
2114
ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
2115
2116
uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
2117
2118
// ASTC
2119
astc_block_desc astc_results;
2120
memset(&astc_results, 0, sizeof(astc_results));
2121
2122
astc_results.m_dual_plane = false;
2123
astc_results.m_weight_range = weight_range;
2124
2125
astc_results.m_ccs = 0;
2126
astc_results.m_subsets = 1;
2127
astc_results.m_partition_seed = 0;
2128
astc_results.m_cem = 12;
2129
2130
astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
2131
astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
2132
astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
2133
astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
2134
astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
2135
astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
2136
astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3];
2137
astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3];
2138
2139
bool invert = false;
2140
2141
int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
2142
int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
2143
if (s1 < s0)
2144
{
2145
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
2146
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
2147
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
2148
std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]);
2149
invert = true;
2150
}
2151
2152
for (uint32_t y = 0; y < 4; y++)
2153
{
2154
for (uint32_t x = 0; x < 4; x++)
2155
{
2156
astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
2157
2158
if (invert)
2159
astc_results.m_weights[x + y * 4] = 7 - astc_results.m_weights[x + y * 4];
2160
}
2161
}
2162
2163
assert(total_results < MAX_ENCODE_RESULTS);
2164
if (total_results < MAX_ENCODE_RESULTS)
2165
{
2166
pResults[total_results].m_uastc_mode = 12;
2167
pResults[total_results].m_common_pattern = 0;
2168
pResults[total_results].m_astc = astc_results;
2169
pResults[total_results].m_astc_err = part_err;
2170
total_results++;
2171
}
2172
}
2173
2174
// 13. DualPlane: 1, WeightRange: 0 (2), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 20 (256) MODE5
2175
static void astc_mode13(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
2176
{
2177
bc7enc_compress_block_params local_comp_params(comp_params);
2178
local_comp_params.m_perceptual = false;
2179
local_comp_params.m_weights[0] = 1;
2180
local_comp_params.m_weights[1] = 1;
2181
local_comp_params.m_weights[2] = 1;
2182
local_comp_params.m_weights[3] = 1;
2183
2184
for (uint32_t rot_comp = 0; rot_comp < 4; rot_comp++)
2185
{
2186
const uint32_t weight_range = 0;
2187
const uint32_t endpoint_range = 20;
2188
2189
color_quad_u8 block_rgb[16];
2190
color_quad_u8 block_a[16];
2191
for (uint32_t i = 0; i < 16; i++)
2192
{
2193
block_rgb[i] = ((color_quad_u8*)&block[0][0])[i];
2194
block_a[i] = block_rgb[i];
2195
2196
uint8_t c = block_a[i].m_c[rot_comp];
2197
block_a[i].m_c[0] = c;
2198
block_a[i].m_c[1] = c;
2199
block_a[i].m_c[2] = c;
2200
block_a[i].m_c[3] = 255;
2201
2202
block_rgb[i].m_c[rot_comp] = block_rgb[i].m_c[3];
2203
block_rgb[i].m_c[3] = 255;
2204
}
2205
2206
uint8_t ccell_result_selectors_temp[16];
2207
2208
color_cell_compressor_params ccell_params_rgb;
2209
memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb));
2210
2211
ccell_params_rgb.m_num_pixels = 16;
2212
ccell_params_rgb.m_pPixels = block_rgb;
2213
ccell_params_rgb.m_num_selector_weights = 2;
2214
ccell_params_rgb.m_pSelector_weights = g_bc7_weights1;
2215
ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights1x;
2216
ccell_params_rgb.m_astc_endpoint_range = endpoint_range;
2217
ccell_params_rgb.m_weights[0] = 1;
2218
ccell_params_rgb.m_weights[1] = 1;
2219
ccell_params_rgb.m_weights[2] = 1;
2220
ccell_params_rgb.m_weights[3] = 1;
2221
2222
color_cell_compressor_results ccell_results_rgb;
2223
uint8_t ccell_result_selectors_rgb[16];
2224
memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb));
2225
ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0];
2226
ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0];
2227
2228
uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &local_comp_params);
2229
2230
color_cell_compressor_params ccell_params_a;
2231
memset(&ccell_params_a, 0, sizeof(ccell_params_a));
2232
2233
ccell_params_a.m_num_pixels = 16;
2234
ccell_params_a.m_pPixels = block_a;
2235
ccell_params_a.m_num_selector_weights = 2;
2236
ccell_params_a.m_pSelector_weights = g_bc7_weights1;
2237
ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights1x;
2238
ccell_params_a.m_astc_endpoint_range = endpoint_range;
2239
ccell_params_a.m_weights[0] = 1;
2240
ccell_params_a.m_weights[1] = 1;
2241
ccell_params_a.m_weights[2] = 1;
2242
ccell_params_a.m_weights[3] = 1;
2243
2244
color_cell_compressor_results ccell_results_a;
2245
uint8_t ccell_result_selectors_a[16];
2246
memset(&ccell_results_a, 0, sizeof(ccell_results_a));
2247
ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0];
2248
ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0];
2249
2250
uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &local_comp_params) / 3;
2251
2252
uint64_t total_err = part_err_rgb + part_err_a;
2253
2254
// ASTC
2255
astc_block_desc blk;
2256
memset(&blk, 0, sizeof(blk));
2257
2258
blk.m_dual_plane = true;
2259
blk.m_weight_range = weight_range;
2260
2261
blk.m_ccs = rot_comp;
2262
blk.m_subsets = 1;
2263
blk.m_partition_seed = 0;
2264
blk.m_cem = 12;
2265
2266
blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0];
2267
blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0];
2268
blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1];
2269
blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1];
2270
blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2];
2271
blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2];
2272
if (rot_comp == 3)
2273
{
2274
blk.m_endpoints[6] = ccell_results_a.m_astc_low_endpoint.m_c[0];
2275
blk.m_endpoints[7] = ccell_results_a.m_astc_high_endpoint.m_c[0];
2276
}
2277
else
2278
{
2279
blk.m_endpoints[6] = ccell_results_rgb.m_astc_low_endpoint.m_c[rot_comp];
2280
blk.m_endpoints[7] = ccell_results_rgb.m_astc_high_endpoint.m_c[rot_comp];
2281
}
2282
2283
bool invert = false;
2284
2285
int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
2286
int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
2287
if (s1 < s0)
2288
{
2289
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
2290
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
2291
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
2292
std::swap(blk.m_endpoints[6], blk.m_endpoints[7]);
2293
invert = true;
2294
}
2295
2296
for (uint32_t y = 0; y < 4; y++)
2297
{
2298
for (uint32_t x = 0; x < 4; x++)
2299
{
2300
uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4];
2301
uint32_t a_index = ccell_result_selectors_a[x + y * 4];
2302
2303
if (invert)
2304
{
2305
rgb_index = 1 - rgb_index;
2306
a_index = 1 - a_index;
2307
}
2308
2309
blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index;
2310
blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index;
2311
}
2312
}
2313
2314
assert(total_results < MAX_ENCODE_RESULTS);
2315
if (total_results < MAX_ENCODE_RESULTS)
2316
{
2317
pResults[total_results].m_uastc_mode = 13;
2318
pResults[total_results].m_common_pattern = 0;
2319
pResults[total_results].m_astc = blk;
2320
pResults[total_results].m_astc_err = total_err;
2321
total_results++;
2322
}
2323
} // rot_comp
2324
}
2325
2326
// MODE14
2327
// DualPlane: 0, WeightRange: 2 (4), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 20 (256) MODE6
2328
static void astc_mode14(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
2329
{
2330
const uint32_t weight_range = 2;
2331
const uint32_t endpoint_range = 20;
2332
2333
color_cell_compressor_params ccell_params;
2334
memset(&ccell_params, 0, sizeof(ccell_params));
2335
2336
ccell_params.m_num_pixels = 16;
2337
ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
2338
ccell_params.m_num_selector_weights = 4;
2339
ccell_params.m_pSelector_weights = g_bc7_weights2;
2340
ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
2341
ccell_params.m_astc_endpoint_range = endpoint_range;
2342
ccell_params.m_weights[0] = 1;
2343
ccell_params.m_weights[1] = 1;
2344
ccell_params.m_weights[2] = 1;
2345
ccell_params.m_weights[3] = 1;
2346
ccell_params.m_has_alpha = true;
2347
2348
color_cell_compressor_results ccell_results;
2349
uint8_t ccell_result_selectors[16];
2350
uint8_t ccell_result_selectors_temp[16];
2351
memset(&ccell_results, 0, sizeof(ccell_results));
2352
ccell_results.m_pSelectors = &ccell_result_selectors[0];
2353
ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
2354
2355
uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
2356
2357
// ASTC
2358
astc_block_desc astc_results;
2359
memset(&astc_results, 0, sizeof(astc_results));
2360
2361
astc_results.m_dual_plane = false;
2362
astc_results.m_weight_range = weight_range;
2363
2364
astc_results.m_ccs = 0;
2365
astc_results.m_subsets = 1;
2366
astc_results.m_partition_seed = 0;
2367
astc_results.m_cem = 12;
2368
2369
astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
2370
astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
2371
astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
2372
astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
2373
astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
2374
astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
2375
astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3];
2376
astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3];
2377
2378
bool invert = false;
2379
2380
int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
2381
int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
2382
if (s1 < s0)
2383
{
2384
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
2385
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
2386
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
2387
std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]);
2388
invert = true;
2389
}
2390
2391
for (uint32_t y = 0; y < 4; y++)
2392
{
2393
for (uint32_t x = 0; x < 4; x++)
2394
{
2395
astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
2396
2397
if (invert)
2398
astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
2399
}
2400
}
2401
2402
assert(total_results < MAX_ENCODE_RESULTS);
2403
if (total_results < MAX_ENCODE_RESULTS)
2404
{
2405
pResults[total_results].m_uastc_mode = 14;
2406
pResults[total_results].m_common_pattern = 0;
2407
pResults[total_results].m_astc = astc_results;
2408
pResults[total_results].m_astc_err = part_err;
2409
total_results++;
2410
}
2411
}
2412
2413
// MODE 15
2414
// DualPlane: 0, WeightRange : 8 (16), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) BC7 MODE6
2415
static void astc_mode15(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
2416
{
2417
const uint32_t weight_range = 8;
2418
const uint32_t endpoint_range = 20;
2419
2420
color_cell_compressor_params ccell_params;
2421
memset(&ccell_params, 0, sizeof(ccell_params));
2422
2423
color_rgba temp_block[16];
2424
for (uint32_t i = 0; i < 16; i++)
2425
{
2426
const uint32_t l = ((const color_rgba*)block)[i].r;
2427
const uint32_t a = ((const color_rgba*)block)[i].a;
2428
2429
// Use (l,0,0,a) not (l,l,l,a) so both components are treated equally.
2430
temp_block[i].set_noclamp_rgba(l, 0, 0, a);
2431
}
2432
2433
ccell_params.m_num_pixels = 16;
2434
//ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
2435
ccell_params.m_pPixels = (color_quad_u8*)temp_block;
2436
ccell_params.m_num_selector_weights = 16;
2437
ccell_params.m_pSelector_weights = g_astc_weights4;
2438
ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_astc_weights4x;
2439
ccell_params.m_astc_endpoint_range = endpoint_range;
2440
ccell_params.m_weights[0] = 1;
2441
ccell_params.m_weights[1] = 1;
2442
ccell_params.m_weights[2] = 1;
2443
ccell_params.m_weights[3] = 1;
2444
ccell_params.m_has_alpha = true;
2445
2446
color_cell_compressor_results ccell_results;
2447
uint8_t ccell_result_selectors[16];
2448
uint8_t ccell_result_selectors_temp[16];
2449
memset(&ccell_results, 0, sizeof(ccell_results));
2450
ccell_results.m_pSelectors = &ccell_result_selectors[0];
2451
ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
2452
2453
color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
2454
2455
// ASTC
2456
astc_block_desc astc_results;
2457
memset(&astc_results, 0, sizeof(astc_results));
2458
2459
astc_results.m_dual_plane = false;
2460
astc_results.m_weight_range = weight_range;
2461
2462
astc_results.m_ccs = 0;
2463
astc_results.m_subsets = 1;
2464
astc_results.m_partition_seed = 0;
2465
astc_results.m_cem = 4;
2466
2467
astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
2468
astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
2469
2470
astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[3];
2471
astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[3];
2472
2473
for (uint32_t y = 0; y < 4; y++)
2474
for (uint32_t x = 0; x < 4; x++)
2475
astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
2476
2477
color_rgba colors[16];
2478
for (uint32_t c = 0; c < 4; c++)
2479
{
2480
colors[0].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results.m_astc_low_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant;
2481
colors[15].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results.m_astc_high_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant;
2482
}
2483
2484
for (uint32_t i = 1; i < 16 - 1; i++)
2485
for (uint32_t c = 0; c < 4; c++)
2486
colors[i].m_comps[c] = (uint8_t)astc_interpolate(colors[0].m_comps[c], colors[15].m_comps[c], g_astc_weights4[i], false);
2487
2488
uint64_t total_err = 0;
2489
for (uint32_t p = 0; p < 16; p++)
2490
total_err += color_distance_la(((const color_rgba*)block)[p], colors[ccell_result_selectors[p]]);
2491
2492
assert(total_results < MAX_ENCODE_RESULTS);
2493
if (total_results < MAX_ENCODE_RESULTS)
2494
{
2495
pResults[total_results].m_uastc_mode = 15;
2496
pResults[total_results].m_common_pattern = 0;
2497
pResults[total_results].m_astc = astc_results;
2498
pResults[total_results].m_astc_err = total_err;
2499
total_results++;
2500
}
2501
}
2502
2503
static void compute_block_error(const color_rgba block[4][4], const color_rgba decoded_block[4][4], uint64_t &total_rgb_err, uint64_t &total_rgba_err, uint64_t &total_la_err)
2504
{
2505
uint64_t total_err_r = 0, total_err_g = 0, total_err_b = 0, total_err_a = 0;
2506
2507
for (uint32_t y = 0; y < 4; y++)
2508
{
2509
for (uint32_t x = 0; x < 4; x++)
2510
{
2511
const int dr = (int)block[y][x].m_comps[0] - (int)decoded_block[y][x].m_comps[0];
2512
const int dg = (int)block[y][x].m_comps[1] - (int)decoded_block[y][x].m_comps[1];
2513
const int db = (int)block[y][x].m_comps[2] - (int)decoded_block[y][x].m_comps[2];
2514
const int da = (int)block[y][x].m_comps[3] - (int)decoded_block[y][x].m_comps[3];
2515
2516
total_err_r += dr * dr;
2517
total_err_g += dg * dg;
2518
total_err_b += db * db;
2519
total_err_a += da * da;
2520
}
2521
}
2522
2523
total_la_err = total_err_r + total_err_a;
2524
total_rgb_err = total_err_r + total_err_g + total_err_b;
2525
total_rgba_err = total_rgb_err + total_err_a;
2526
}
2527
2528
static void compute_bc1_hints(bool &bc1_hint0, bool &bc1_hint1, const uastc_encode_results &best_results, const color_rgba block[4][4], const color_rgba decoded_uastc_block[4][4])
2529
{
2530
const uint32_t best_mode = best_results.m_uastc_mode;
2531
const bool perceptual = false;
2532
2533
bc1_hint0 = false;
2534
bc1_hint1 = false;
2535
2536
if (best_mode == UASTC_MODE_INDEX_SOLID_COLOR)
2537
return;
2538
2539
if (!g_uastc_mode_has_bc1_hint0[best_mode] && !g_uastc_mode_has_bc1_hint1[best_mode])
2540
return;
2541
2542
color_rgba tblock_bc1[4][4];
2543
dxt1_block tbc1_block[8];
2544
basist::encode_bc1(tbc1_block, (const uint8_t*)&decoded_uastc_block[0][0], 0);
2545
unpack_block(texture_format::cBC1, tbc1_block, &tblock_bc1[0][0]);
2546
2547
color_rgba tblock_hint0_bc1[4][4];
2548
color_rgba tblock_hint1_bc1[4][4];
2549
2550
etc_block etc1_blk;
2551
memset(&etc1_blk, 0, sizeof(etc1_blk));
2552
2553
eac_a8_block etc2_blk;
2554
memset(&etc2_blk, 0, sizeof(etc2_blk));
2555
etc2_blk.m_multiplier = 1;
2556
2557
// Pack to UASTC, then unpack, because the endpoints may be swapped.
2558
2559
uastc_block temp_ublock;
2560
pack_uastc(temp_ublock, best_results, etc1_blk, 0, etc2_blk, false, false);
2561
2562
unpacked_uastc_block temp_ublock_unpacked;
2563
unpack_uastc(temp_ublock, temp_ublock_unpacked, false);
2564
2565
unpacked_uastc_block ublock;
2566
memset(&ublock, 0, sizeof(ublock));
2567
ublock.m_mode = best_results.m_uastc_mode;
2568
ublock.m_common_pattern = best_results.m_common_pattern;
2569
ublock.m_astc = temp_ublock_unpacked.m_astc;
2570
2571
dxt1_block b;
2572
2573
// HINT1
2574
if (!g_uastc_mode_has_bc1_hint1[best_mode])
2575
{
2576
memset(tblock_hint1_bc1, 0, sizeof(tblock_hint1_bc1));
2577
}
2578
else
2579
{
2580
transcode_uastc_to_bc1_hint1(ublock, (color32 (*)[4]) decoded_uastc_block, &b, false);
2581
2582
unpack_block(texture_format::cBC1, &b, &tblock_hint1_bc1[0][0]);
2583
}
2584
2585
// HINT0
2586
if (!g_uastc_mode_has_bc1_hint0[best_mode])
2587
{
2588
memset(tblock_hint0_bc1, 0, sizeof(tblock_hint0_bc1));
2589
}
2590
else
2591
{
2592
transcode_uastc_to_bc1_hint0(ublock, &b);
2593
2594
unpack_block(texture_format::cBC1, &b, &tblock_hint0_bc1[0][0]);
2595
}
2596
2597
// Compute block errors
2598
uint64_t total_t_err = 0, total_hint0_err = 0, total_hint1_err = 0;
2599
for (uint32_t y = 0; y < 4; y++)
2600
{
2601
for (uint32_t x = 0; x < 4; x++)
2602
{
2603
total_t_err += color_distance(perceptual, block[y][x], tblock_bc1[y][x], false);
2604
total_hint0_err += color_distance(perceptual, block[y][x], tblock_hint0_bc1[y][x], false);
2605
total_hint1_err += color_distance(perceptual, block[y][x], tblock_hint1_bc1[y][x], false);
2606
}
2607
}
2608
2609
const float t_err = sqrtf((float)total_t_err);
2610
const float t_err_hint0 = sqrtf((float)total_hint0_err);
2611
const float t_err_hint1 = sqrtf((float)total_hint1_err);
2612
2613
const float err_thresh0 = 1.075f;
2614
const float err_thresh1 = 1.075f;
2615
2616
if ((g_uastc_mode_has_bc1_hint0[best_mode]) && (t_err_hint0 <= t_err * err_thresh0))
2617
bc1_hint0 = true;
2618
2619
if ((g_uastc_mode_has_bc1_hint1[best_mode]) && (t_err_hint1 <= t_err * err_thresh1))
2620
bc1_hint1 = true;
2621
}
2622
2623
struct ycbcr
2624
{
2625
int32_t m_y;
2626
int32_t m_cb;
2627
int32_t m_cr;
2628
};
2629
2630
static inline void rgb_to_y_cb_cr(const color_rgba& c, ycbcr& dst)
2631
{
2632
const int y = c.r * 54 + c.g * 183 + c.b * 19;
2633
dst.m_y = y;
2634
dst.m_cb = (c.b << 8) - y;
2635
dst.m_cr = (c.r << 8) - y;
2636
}
2637
2638
static inline uint64_t color_diff(const ycbcr& a, const ycbcr& b)
2639
{
2640
const int y_delta = a.m_y - b.m_y;
2641
const int cb_delta = a.m_cb - b.m_cb;
2642
const int cr_delta = a.m_cr - b.m_cr;
2643
return ((int64_t)y_delta * y_delta * 4) + ((int64_t)cr_delta * cr_delta) + ((int64_t)cb_delta * cb_delta);
2644
}
2645
2646
static inline int gray_distance2(const color_rgba& c, int r, int g, int b)
2647
{
2648
int gray_dist = (((int)c[0] - r) + ((int)c[1] - g) + ((int)c[2] - b) + 1) / 3;
2649
2650
int gray_point_r = clamp255(r + gray_dist);
2651
int gray_point_g = clamp255(g + gray_dist);
2652
int gray_point_b = clamp255(b + gray_dist);
2653
2654
int dist_to_gray_point_r = c[0] - gray_point_r;
2655
int dist_to_gray_point_g = c[1] - gray_point_g;
2656
int dist_to_gray_point_b = c[2] - gray_point_b;
2657
2658
return (dist_to_gray_point_r * dist_to_gray_point_r) + (dist_to_gray_point_g * dist_to_gray_point_g) + (dist_to_gray_point_b * dist_to_gray_point_b);
2659
}
2660
2661
static bool pack_etc1_estimate_flipped(const color_rgba* pSrc_pixels)
2662
{
2663
int sums[3][2][2];
2664
2665
#define GET_XY(x, y, c) pSrc_pixels[(x) + ((y) * 4)][c]
2666
2667
for (uint32_t c = 0; c < 3; c++)
2668
{
2669
sums[c][0][0] = GET_XY(0, 0, c) + GET_XY(0, 1, c) + GET_XY(1, 0, c) + GET_XY(1, 1, c);
2670
sums[c][1][0] = GET_XY(2, 0, c) + GET_XY(2, 1, c) + GET_XY(3, 0, c) + GET_XY(3, 1, c);
2671
sums[c][0][1] = GET_XY(0, 2, c) + GET_XY(0, 3, c) + GET_XY(1, 2, c) + GET_XY(1, 3, c);
2672
sums[c][1][1] = GET_XY(2, 2, c) + GET_XY(2, 3, c) + GET_XY(3, 2, c) + GET_XY(3, 3, c);
2673
}
2674
2675
int upper_avg[3], lower_avg[3], left_avg[3], right_avg[3];
2676
for (uint32_t c = 0; c < 3; c++)
2677
{
2678
upper_avg[c] = (sums[c][0][0] + sums[c][1][0] + 4) / 8;
2679
lower_avg[c] = (sums[c][0][1] + sums[c][1][1] + 4) / 8;
2680
left_avg[c] = (sums[c][0][0] + sums[c][0][1] + 4) / 8;
2681
right_avg[c] = (sums[c][1][0] + sums[c][1][1] + 4) / 8;
2682
}
2683
2684
#undef GET_XY
2685
#define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a[0], a[1], a[2])
2686
2687
int upper_gray_dist = 0, lower_gray_dist = 0, left_gray_dist = 0, right_gray_dist = 0;
2688
for (uint32_t i = 0; i < 4; i++)
2689
{
2690
for (uint32_t j = 0; j < 2; j++)
2691
{
2692
upper_gray_dist += GET_XY(i, j, upper_avg);
2693
lower_gray_dist += GET_XY(i, 2 + j, lower_avg);
2694
left_gray_dist += GET_XY(j, i, left_avg);
2695
right_gray_dist += GET_XY(2 + j, i, right_avg);
2696
}
2697
}
2698
2699
#undef GET_XY
2700
2701
int upper_lower_sum = upper_gray_dist + lower_gray_dist;
2702
int left_right_sum = left_gray_dist + right_gray_dist;
2703
2704
return upper_lower_sum < left_right_sum;
2705
}
2706
2707
static void compute_etc1_hints(etc_block& best_etc1_blk, uint32_t& best_etc1_bias, const uastc_encode_results& best_results, const color_rgba block[4][4], const color_rgba decoded_uastc_block[4][4], int level, uint32_t flags)
2708
{
2709
best_etc1_bias = 0;
2710
2711
if (best_results.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR)
2712
{
2713
pack_etc1_block_solid_color(best_etc1_blk, &best_results.m_solid_color.m_comps[0]);
2714
return;
2715
}
2716
2717
const bool faster_etc1 = (flags & cPackUASTCETC1FasterHints) != 0;
2718
const bool fastest_etc1 = (flags & cPackUASTCETC1FastestHints) != 0;
2719
2720
const bool has_bias = g_uastc_mode_has_etc1_bias[best_results.m_uastc_mode];
2721
2722
// 0 should be at the top, but we need 13 first because it represents bias (0,0,0).
2723
const uint8_t s_sorted_bias_modes[32] = { 13, 0, 22, 29, 27, 12, 26, 9, 30, 31, 8, 10, 25, 2, 23, 5, 15, 7, 3, 11, 6, 17, 28, 18, 1, 19, 20, 21, 24, 4, 14, 16 };
2724
2725
uint32_t last_bias = 1;
2726
bool use_faster_bias_mode_table = false;
2727
const bool flip_estimate = (level <= cPackUASTCLevelFaster) || (faster_etc1) || (fastest_etc1);
2728
if (has_bias)
2729
{
2730
switch (level)
2731
{
2732
case cPackUASTCLevelFastest:
2733
{
2734
last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 1 : 2);
2735
use_faster_bias_mode_table = true;
2736
break;
2737
}
2738
case cPackUASTCLevelFaster:
2739
{
2740
last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 3 : 5);
2741
use_faster_bias_mode_table = true;
2742
break;
2743
}
2744
case cPackUASTCLevelDefault:
2745
{
2746
last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 10 : 20);
2747
use_faster_bias_mode_table = true;
2748
break;
2749
}
2750
case cPackUASTCLevelSlower:
2751
{
2752
last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 16 : 32);
2753
use_faster_bias_mode_table = true;
2754
break;
2755
}
2756
default:
2757
{
2758
last_bias = 32;
2759
break;
2760
}
2761
}
2762
}
2763
2764
memset(&best_etc1_blk, 0, sizeof(best_etc1_blk));
2765
uint64_t best_err = UINT64_MAX;
2766
2767
etc_block trial_block;
2768
memset(&trial_block, 0, sizeof(trial_block));
2769
2770
ycbcr block_ycbcr[4][4], decoded_uastc_block_ycbcr[4][4];
2771
for (uint32_t y = 0; y < 4; y++)
2772
{
2773
for (uint32_t x = 0; x < 4; x++)
2774
{
2775
rgb_to_y_cb_cr(block[y][x], block_ycbcr[y][x]);
2776
rgb_to_y_cb_cr(decoded_uastc_block[y][x], decoded_uastc_block_ycbcr[y][x]);
2777
}
2778
}
2779
2780
uint32_t first_flip = 0, last_flip = 2;
2781
uint32_t first_individ = 0, last_individ = 2;
2782
2783
if (flags & cPackUASTCETC1DisableFlipAndIndividual)
2784
{
2785
last_flip = 1;
2786
last_individ = 1;
2787
}
2788
else if (flip_estimate)
2789
{
2790
if (pack_etc1_estimate_flipped(&decoded_uastc_block[0][0]))
2791
first_flip = 1;
2792
last_flip = first_flip + 1;
2793
}
2794
2795
for (uint32_t flip = first_flip; flip < last_flip; flip++)
2796
{
2797
trial_block.set_flip_bit(flip != 0);
2798
2799
for (uint32_t individ = first_individ; individ < last_individ; individ++)
2800
{
2801
const uint32_t mul = individ ? 15 : 31;
2802
2803
trial_block.set_diff_bit(individ == 0);
2804
2805
color_rgba unbiased_block_colors[2];
2806
2807
int min_r[2] = { 255, 255 }, min_g[2] = { 255, 255 }, min_b[2] = { 255, 255 }, max_r[2] = { 0, 0 }, max_g[2] = { 0, 0 }, max_b[2] = { 0, 0 };
2808
2809
for (uint32_t subset = 0; subset < 2; subset++)
2810
{
2811
uint32_t avg_color[3];
2812
memset(avg_color, 0, sizeof(avg_color));
2813
2814
for (uint32_t j = 0; j < 8; j++)
2815
{
2816
const etc_coord2 &c = g_etc1_pixel_coords[flip][subset][j];
2817
const color_rgba& p = decoded_uastc_block[c.m_y][c.m_x];
2818
2819
avg_color[0] += p.r;
2820
avg_color[1] += p.g;
2821
avg_color[2] += p.b;
2822
2823
min_r[subset] = basisu::minimum<uint32_t>(min_r[subset], p.r);
2824
min_g[subset] = basisu::minimum<uint32_t>(min_g[subset], p.g);
2825
min_b[subset] = basisu::minimum<uint32_t>(min_b[subset], p.b);
2826
2827
max_r[subset] = basisu::maximum<uint32_t>(max_r[subset], p.r);
2828
max_g[subset] = basisu::maximum<uint32_t>(max_g[subset], p.g);
2829
max_b[subset] = basisu::maximum<uint32_t>(max_b[subset], p.b);
2830
} // j
2831
2832
unbiased_block_colors[subset][0] = (uint8_t)((avg_color[0] * mul + 1020) / (8 * 255));
2833
unbiased_block_colors[subset][1] = (uint8_t)((avg_color[1] * mul + 1020) / (8 * 255));
2834
unbiased_block_colors[subset][2] = (uint8_t)((avg_color[2] * mul + 1020) / (8 * 255));
2835
unbiased_block_colors[subset][3] = 0;
2836
2837
} // subset
2838
2839
for (uint32_t bias_iter = 0; bias_iter < last_bias; bias_iter++)
2840
{
2841
const uint32_t bias = use_faster_bias_mode_table ? s_sorted_bias_modes[bias_iter] : bias_iter;
2842
2843
color_rgba block_colors[2];
2844
for (uint32_t subset = 0; subset < 2; subset++)
2845
block_colors[subset] = has_bias ? apply_etc1_bias((color32&)unbiased_block_colors[subset], bias, mul, subset) : unbiased_block_colors[subset];
2846
2847
if (individ)
2848
trial_block.set_block_color4(block_colors[0], block_colors[1]);
2849
else
2850
trial_block.set_block_color5_clamp(block_colors[0], block_colors[1]);
2851
2852
uint32_t range[2];
2853
for (uint32_t subset = 0; subset < 2; subset++)
2854
{
2855
const color_rgba base_c(trial_block.get_block_color(subset, true));
2856
2857
const int pos_r = iabs(max_r[subset] - base_c.r);
2858
const int neg_r = iabs(base_c.r - min_r[subset]);
2859
2860
const int pos_g = iabs(max_g[subset] - base_c.g);
2861
const int neg_g = iabs(base_c.g - min_g[subset]);
2862
2863
const int pos_b = iabs(max_b[subset] - base_c.b);
2864
const int neg_b = iabs(base_c.b - min_b[subset]);
2865
2866
range[subset] = maximum(maximum(pos_r, neg_r, pos_g, neg_g), pos_b, neg_b);
2867
}
2868
2869
uint32_t best_inten_table[2] = { 0, 0 };
2870
2871
for (uint32_t subset = 0; subset < 2; subset++)
2872
{
2873
uint64_t best_subset_err = UINT64_MAX;
2874
2875
const uint32_t inten_table_limit = (level == cPackUASTCLevelVerySlow) ? 8 : ((range[subset] > 51) ? 8 : (range[subset] >= 7 ? 4 : 2));
2876
2877
for (uint32_t inten_table = 0; inten_table < inten_table_limit; inten_table++)
2878
{
2879
trial_block.set_inten_table(subset, inten_table);
2880
2881
color_rgba color_table[4];
2882
trial_block.get_block_colors(color_table, subset);
2883
2884
ycbcr color_table_ycbcr[4];
2885
for (uint32_t i = 0; i < 4; i++)
2886
rgb_to_y_cb_cr(color_table[i], color_table_ycbcr[i]);
2887
2888
uint64_t total_error = 0;
2889
if (flip)
2890
{
2891
for (uint32_t y = 0; y < 2; y++)
2892
{
2893
{
2894
const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][0];
2895
total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c));
2896
}
2897
{
2898
const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][1];
2899
total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c));
2900
}
2901
{
2902
const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][2];
2903
total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c));
2904
}
2905
{
2906
const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][3];
2907
total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c));
2908
}
2909
if (total_error >= best_subset_err)
2910
break;
2911
}
2912
}
2913
else
2914
{
2915
for (uint32_t y = 0; y < 4; y++)
2916
{
2917
{
2918
const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + 0];
2919
total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c));
2920
}
2921
{
2922
const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + 1];
2923
total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c));
2924
}
2925
}
2926
if (total_error >= best_subset_err)
2927
break;
2928
}
2929
2930
if (total_error < best_subset_err)
2931
{
2932
best_subset_err = total_error;
2933
best_inten_table[subset] = inten_table;
2934
}
2935
2936
} // inten_table
2937
2938
} // subset
2939
2940
trial_block.set_inten_table(0, best_inten_table[0]);
2941
trial_block.set_inten_table(1, best_inten_table[1]);
2942
2943
// Compute error against the ORIGINAL block.
2944
uint64_t err = 0;
2945
2946
for (uint32_t subset = 0; subset < 2; subset++)
2947
{
2948
color_rgba color_table[4];
2949
trial_block.get_block_colors(color_table, subset);
2950
2951
ycbcr color_table_ycbcr[4];
2952
for (uint32_t i = 0; i < 4; i++)
2953
rgb_to_y_cb_cr(color_table[i], color_table_ycbcr[i]);
2954
2955
if (flip)
2956
{
2957
for (uint32_t y = 0; y < 2; y++)
2958
{
2959
for (uint32_t x = 0; x < 4; x++)
2960
{
2961
const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][x];
2962
const uint64_t best_index_err = minimum(color_diff(color_table_ycbcr[0], c) << 2, (color_diff(color_table_ycbcr[1], c) << 2) + 1, (color_diff(color_table_ycbcr[2], c) << 2) + 2, (color_diff(color_table_ycbcr[3], c) << 2) + 3);
2963
2964
const uint32_t best_index = (uint32_t)best_index_err & 3;
2965
err += color_diff(block_ycbcr[subset * 2 + y][x], color_table_ycbcr[best_index]);
2966
}
2967
if (err >= best_err)
2968
break;
2969
}
2970
}
2971
else
2972
{
2973
for (uint32_t y = 0; y < 4; y++)
2974
{
2975
for (uint32_t x = 0; x < 2; x++)
2976
{
2977
const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + x];
2978
const uint64_t best_index_err = minimum(color_diff(color_table_ycbcr[0], c) << 2, (color_diff(color_table_ycbcr[1], c) << 2) + 1, (color_diff(color_table_ycbcr[2], c) << 2) + 2, (color_diff(color_table_ycbcr[3], c) << 2) + 3);
2979
2980
const uint32_t best_index = (uint32_t)best_index_err & 3;
2981
err += color_diff(block_ycbcr[y][subset * 2 + x], color_table_ycbcr[best_index]);
2982
}
2983
if (err >= best_err)
2984
break;
2985
}
2986
}
2987
2988
} // subset
2989
2990
if (err < best_err)
2991
{
2992
best_err = err;
2993
2994
best_etc1_blk = trial_block;
2995
best_etc1_bias = bias;
2996
}
2997
2998
} // bias_iter
2999
3000
} // individ
3001
3002
} // flip
3003
}
3004
3005
struct uastc_pack_eac_a8_results
3006
{
3007
uint32_t m_base;
3008
uint32_t m_table;
3009
uint32_t m_multiplier;
3010
};
3011
3012
static uint64_t uastc_pack_eac_a8(uastc_pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask)
3013
{
3014
assert(num_pixels <= 16);
3015
3016
uint32_t min_alpha = 255, max_alpha = 0;
3017
for (uint32_t i = 0; i < num_pixels; i++)
3018
{
3019
const uint32_t a = pPixels[i];
3020
if (a < min_alpha) min_alpha = a;
3021
if (a > max_alpha) max_alpha = a;
3022
}
3023
3024
if (min_alpha == max_alpha)
3025
{
3026
results.m_base = min_alpha;
3027
results.m_table = 13;
3028
results.m_multiplier = 1;
3029
return 0;
3030
}
3031
3032
const uint32_t alpha_range = max_alpha - min_alpha;
3033
3034
uint64_t best_err = UINT64_MAX;
3035
3036
for (uint32_t table = 0; table < 16; table++)
3037
{
3038
if ((table_mask & (1U << table)) == 0)
3039
continue;
3040
3041
const float range = (float)(g_etc2_eac_tables[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]);
3042
const int center = (int)roundf(lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range));
3043
3044
const int base_min = clamp255(center - base_search_rad);
3045
const int base_max = clamp255(center + base_search_rad);
3046
3047
const int mul = (int)roundf(alpha_range / range);
3048
const int mul_low = clamp<int>(mul - mul_search_rad, 1, 15);
3049
const int mul_high = clamp<int>(mul + mul_search_rad, 1, 15);
3050
3051
for (int base = base_min; base <= base_max; base++)
3052
{
3053
for (int multiplier = mul_low; multiplier <= mul_high; multiplier++)
3054
{
3055
uint64_t total_err = 0;
3056
3057
for (uint32_t i = 0; i < num_pixels; i++)
3058
{
3059
const int a = pPixels[i];
3060
3061
uint32_t best_s_err = UINT32_MAX;
3062
//uint32_t best_s = 0;
3063
for (uint32_t s = 0; s < 8; s++)
3064
{
3065
const int v = clamp255((int)multiplier * g_etc2_eac_tables[table][s] + (int)base);
3066
3067
uint32_t err = iabs(a - v);
3068
if (err < best_s_err)
3069
{
3070
best_s_err = err;
3071
//best_s = s;
3072
}
3073
}
3074
3075
total_err += best_s_err * best_s_err;
3076
if (total_err >= best_err)
3077
break;
3078
}
3079
3080
if (total_err < best_err)
3081
{
3082
best_err = total_err;
3083
results.m_base = base;
3084
results.m_multiplier = multiplier;
3085
results.m_table = table;
3086
if (!best_err)
3087
return best_err;
3088
}
3089
3090
} // table
3091
3092
} // multiplier
3093
3094
} // base
3095
3096
return best_err;
3097
}
3098
3099
const int32_t DEFAULT_BC7_ERROR_WEIGHT = 50;
3100
const float UASTC_ERROR_THRESH = 1.3f;
3101
3102
// TODO: This is a quick hack to favor certain modes when we know we'll be followed up with an RDO postprocess.
3103
static inline float get_uastc_mode_weight(uint32_t mode)
3104
{
3105
const float FAVORED_MODE_WEIGHT = .8f;
3106
3107
switch (mode)
3108
{
3109
case 0:
3110
case 10:
3111
return FAVORED_MODE_WEIGHT;
3112
default:
3113
break;
3114
}
3115
3116
return 1.0f;
3117
}
3118
3119
void encode_uastc(const uint8_t* pRGBAPixels, uastc_block& output_block, uint32_t flags)
3120
{
3121
// printf("encode_uastc: \n");
3122
// for (int i = 0; i < 16; i++)
3123
// printf("[%u %u %u %u] ", pRGBAPixels[i * 4 + 0], pRGBAPixels[i * 4 + 1], pRGBAPixels[i * 4 + 2], pRGBAPixels[i * 4 + 3]);
3124
// printf("\n");
3125
3126
const color_rgba(*block)[4] = reinterpret_cast<const color_rgba(*)[4]>(pRGBAPixels);
3127
3128
bool solid_color = true, has_alpha = false, is_la = true;
3129
3130
const color_rgba first_color(block[0][0]);
3131
for (uint32_t y = 0; y < 4; y++)
3132
{
3133
for (uint32_t x = 0; x < 4; x++)
3134
{
3135
if (block[y][x].a < 255)
3136
has_alpha = true;
3137
3138
if (block[y][x] != first_color)
3139
solid_color = false;
3140
3141
if ((block[y][x].r != block[y][x].g) || (block[y][x].r != block[y][x].b))
3142
is_la = false;
3143
}
3144
}
3145
3146
if (solid_color)
3147
{
3148
// Solid color blocks are so common that we handle them specially and as quickly as we can.
3149
uastc_encode_results solid_results;
3150
solid_results.m_uastc_mode = UASTC_MODE_INDEX_SOLID_COLOR;
3151
solid_results.m_astc_err = 0;
3152
solid_results.m_common_pattern = 0;
3153
solid_results.m_solid_color = first_color;
3154
memset(&solid_results.m_astc, 0, sizeof(solid_results.m_astc));
3155
3156
etc_block etc1_blk;
3157
uint32_t etc1_bias = 0;
3158
3159
pack_etc1_block_solid_color(etc1_blk, &first_color.m_comps[0]);
3160
3161
eac_a8_block eac_a8_blk;
3162
eac_a8_blk.m_table = 0;
3163
eac_a8_blk.m_multiplier = 1;
3164
3165
pack_uastc(output_block, solid_results, etc1_blk, etc1_bias, eac_a8_blk, false, false);
3166
3167
// printf(" Solid\n");
3168
3169
return;
3170
}
3171
3172
int level = flags & 7;
3173
const bool favor_uastc_error = (flags & cPackUASTCFavorUASTCError) != 0;
3174
const bool favor_bc7_error = !favor_uastc_error && ((flags & cPackUASTCFavorBC7Error) != 0);
3175
//const bool etc1_perceptual = true;
3176
3177
// TODO: This uses 64KB of stack space!
3178
uastc_encode_results results[MAX_ENCODE_RESULTS];
3179
3180
level = clampi(level, cPackUASTCLevelFastest, cPackUASTCLevelVerySlow);
3181
3182
// Set all options to slowest, then configure from there depending on the selected level.
3183
uint32_t mode_mask = UINT32_MAX;
3184
uint32_t uber_level = 6;
3185
bool estimate_partition = false;
3186
bool always_try_alpha_modes = true;
3187
uint32_t eac_a8_mul_search_rad = 3;
3188
uint32_t eac_a8_table_mask = UINT32_MAX;
3189
uint32_t least_squares_passes = 2;
3190
bool bc1_hints = true;
3191
bool only_use_la_on_transparent_blocks = false;
3192
3193
switch (level)
3194
{
3195
case cPackUASTCLevelFastest:
3196
{
3197
mode_mask = (1 << 0) | (1 << 8) |
3198
(1 << 11) | (1 << 12) |
3199
(1 << 15);
3200
always_try_alpha_modes = false;
3201
eac_a8_mul_search_rad = 0;
3202
eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13);
3203
uber_level = 0;
3204
least_squares_passes = 1;
3205
bc1_hints = false;
3206
estimate_partition = true;
3207
only_use_la_on_transparent_blocks = true;
3208
break;
3209
}
3210
case cPackUASTCLevelFaster:
3211
{
3212
mode_mask = (1 << 0) | (1 << 4) | (1 << 6) | (1 << 8) |
3213
(1 << 9) | (1 << 11) | (1 << 12) |
3214
(1 << 15) | (1 << 17);
3215
always_try_alpha_modes = false;
3216
eac_a8_mul_search_rad = 0;
3217
eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13);
3218
uber_level = 0;
3219
least_squares_passes = 1;
3220
estimate_partition = true;
3221
break;
3222
}
3223
case cPackUASTCLevelDefault:
3224
{
3225
mode_mask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 6) | (1 << 8) |
3226
(1 << 9) | (1 << 10) | (1 << 11) | (1 << 12) | (1 << 13) |
3227
(1 << 15) | (1 << 16) | (1 << 17);
3228
always_try_alpha_modes = false;
3229
eac_a8_mul_search_rad = 1;
3230
eac_a8_table_mask = (1 << 0) | (1 << 2) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 10) | (1 << 11) | (1 << 13);
3231
uber_level = 1;
3232
least_squares_passes = 1;
3233
estimate_partition = true;
3234
break;
3235
}
3236
case cPackUASTCLevelSlower:
3237
{
3238
always_try_alpha_modes = false;
3239
eac_a8_mul_search_rad = 2;
3240
uber_level = 3;
3241
estimate_partition = true;
3242
break;
3243
}
3244
case cPackUASTCLevelVerySlow:
3245
{
3246
break;
3247
}
3248
}
3249
3250
#if BASISU_SUPPORT_FORCE_MODE
3251
static int force_mode = -1;
3252
force_mode = (force_mode + 1) % TOTAL_UASTC_MODES;
3253
mode_mask = UINT32_MAX;
3254
always_try_alpha_modes = true;
3255
only_use_la_on_transparent_blocks = false;
3256
#endif
3257
3258
// HACK HACK
3259
//mode_mask &= ~(1 << 18);
3260
//mode_mask = (1 << 18)| (1 << 10);
3261
3262
uint32_t total_results = 0;
3263
3264
if (only_use_la_on_transparent_blocks)
3265
{
3266
if ((is_la) && (!has_alpha))
3267
is_la = false;
3268
}
3269
3270
const bool try_alpha_modes = has_alpha || always_try_alpha_modes;
3271
3272
bc7enc_compress_block_params comp_params;
3273
memset(&comp_params, 0, sizeof(comp_params));
3274
comp_params.m_max_partitions_mode1 = 64;
3275
comp_params.m_least_squares_passes = least_squares_passes;
3276
comp_params.m_weights[0] = 1;
3277
comp_params.m_weights[1] = 1;
3278
comp_params.m_weights[2] = 1;
3279
comp_params.m_weights[3] = 1;
3280
comp_params.m_uber_level = uber_level;
3281
3282
if (is_la)
3283
{
3284
if (mode_mask & (1U << 15))
3285
astc_mode15(block, results, total_results, comp_params);
3286
3287
if (mode_mask & (1U << 16))
3288
astc_mode9_or_16(16, block, results, total_results, comp_params, estimate_partition ? 4 : 0);
3289
3290
if (mode_mask & (1U << 17))
3291
astc_mode11_or_17(17, block, results, total_results, comp_params);
3292
}
3293
3294
if (!has_alpha)
3295
{
3296
if (mode_mask & (1U << 0))
3297
astc_mode0_or_18(0, block, results, total_results, comp_params);
3298
3299
if (mode_mask & (1U << 1))
3300
astc_mode1(block, results, total_results, comp_params);
3301
3302
if (mode_mask & (1U << 2))
3303
astc_mode2(block, results, total_results, comp_params, estimate_partition);
3304
3305
if (mode_mask & (1U << 3))
3306
astc_mode3(block, results, total_results, comp_params, estimate_partition);
3307
3308
if (mode_mask & (1U << 4))
3309
astc_mode4(block, results, total_results, comp_params, estimate_partition);
3310
3311
if (mode_mask & (1U << 5))
3312
astc_mode5(block, results, total_results, comp_params);
3313
3314
if (mode_mask & (1U << 6))
3315
astc_mode6(block, results, total_results, comp_params);
3316
3317
if (mode_mask & (1U << 7))
3318
astc_mode7(block, results, total_results, comp_params, estimate_partition);
3319
3320
if (mode_mask & (1U << 18))
3321
astc_mode0_or_18(18, block, results, total_results, comp_params);
3322
}
3323
3324
if (try_alpha_modes)
3325
{
3326
if (mode_mask & (1U << 9))
3327
astc_mode9_or_16(9, block, results, total_results, comp_params, estimate_partition ? 4 : 0);
3328
3329
if (mode_mask & (1U << 10))
3330
astc_mode10(block, results, total_results, comp_params);
3331
3332
if (mode_mask & (1U << 11))
3333
astc_mode11_or_17(11, block, results, total_results, comp_params);
3334
3335
if (mode_mask & (1U << 12))
3336
astc_mode12(block, results, total_results, comp_params);
3337
3338
if (mode_mask & (1U << 13))
3339
astc_mode13(block, results, total_results, comp_params);
3340
3341
if (mode_mask & (1U << 14))
3342
astc_mode14(block, results, total_results, comp_params);
3343
}
3344
3345
assert(total_results);
3346
3347
// Fix up the errors so we consistently have LA, RGB, or RGBA error.
3348
for (uint32_t i = 0; i < total_results; i++)
3349
{
3350
uastc_encode_results& r = results[i];
3351
if (!is_la)
3352
{
3353
if (g_uastc_mode_is_la[r.m_uastc_mode])
3354
{
3355
color_rgba unpacked_block[16];
3356
unpack_uastc(r.m_uastc_mode, r.m_common_pattern, r.m_solid_color.get_color32(), r.m_astc, (basist::color32 *)unpacked_block, false);
3357
3358
uint64_t total_err = 0;
3359
for (uint32_t j = 0; j < 16; j++)
3360
total_err += color_distance(unpacked_block[j], ((const color_rgba*)block)[j], true);
3361
3362
r.m_astc_err = total_err;
3363
}
3364
}
3365
else
3366
{
3367
if (!g_uastc_mode_is_la[r.m_uastc_mode])
3368
{
3369
color_rgba unpacked_block[16];
3370
unpack_uastc(r.m_uastc_mode, r.m_common_pattern, r.m_solid_color.get_color32(), r.m_astc, (basist::color32 *)unpacked_block, false);
3371
3372
uint64_t total_err = 0;
3373
for (uint32_t j = 0; j < 16; j++)
3374
total_err += color_distance_la(unpacked_block[j], ((const color_rgba*)block)[j]);
3375
3376
r.m_astc_err = total_err;
3377
}
3378
}
3379
}
3380
3381
unpacked_uastc_block unpacked_ublock;
3382
memset(&unpacked_ublock, 0, sizeof(unpacked_ublock));
3383
3384
uint64_t total_overall_err[MAX_ENCODE_RESULTS];
3385
float uastc_err_f[MAX_ENCODE_RESULTS];
3386
double best_uastc_err_f = 1e+20f;
3387
3388
int best_index = -1;
3389
3390
if (total_results == 1)
3391
{
3392
best_index = 0;
3393
}
3394
else
3395
{
3396
const uint32_t bc7_err_weight = favor_bc7_error ? 100 : ((favor_uastc_error ? 0 : DEFAULT_BC7_ERROR_WEIGHT));
3397
const uint32_t uastc_err_weight = favor_bc7_error ? 0 : 100;
3398
3399
// Find best overall results, balancing UASTC and UASTC->BC7 error.
3400
// We purposely allow UASTC error to increase a little, if doing so lowers the BC7 error.
3401
for (uint32_t i = 0; i < total_results; i++)
3402
{
3403
#if BASISU_SUPPORT_FORCE_MODE
3404
if (results[i].m_uastc_mode == force_mode)
3405
{
3406
best_index = i;
3407
break;
3408
}
3409
#endif
3410
3411
unpacked_ublock.m_mode = results[i].m_uastc_mode;
3412
unpacked_ublock.m_astc = results[i].m_astc;
3413
unpacked_ublock.m_common_pattern = results[i].m_common_pattern;
3414
unpacked_ublock.m_solid_color = results[i].m_solid_color.get_color32();
3415
3416
color_rgba decoded_uastc_block[4][4];
3417
bool success = unpack_uastc(results[i].m_uastc_mode, results[i].m_common_pattern, results[i].m_solid_color.get_color32(), results[i].m_astc, (basist::color32 *)&decoded_uastc_block[0][0], false);
3418
(void)success;
3419
VALIDATE(success);
3420
3421
uint64_t total_uastc_rgb_err, total_uastc_rgba_err, total_uastc_la_err;
3422
compute_block_error(block, decoded_uastc_block, total_uastc_rgb_err, total_uastc_rgba_err, total_uastc_la_err);
3423
3424
// Validate the computed error, or we're go mad if it's inaccurate.
3425
if (results[i].m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR)
3426
{
3427
VALIDATE(total_uastc_rgba_err == 0);
3428
}
3429
else if (is_la)
3430
{
3431
VALIDATE(total_uastc_la_err == results[i].m_astc_err);
3432
}
3433
else if (g_uastc_mode_has_alpha[results[i].m_uastc_mode])
3434
{
3435
VALIDATE(total_uastc_rgba_err == results[i].m_astc_err);
3436
}
3437
else
3438
{
3439
VALIDATE(total_uastc_rgb_err == results[i].m_astc_err);
3440
}
3441
3442
// Transcode to BC7
3443
bc7_optimization_results bc7_results;
3444
transcode_uastc_to_bc7(unpacked_ublock, bc7_results);
3445
3446
bc7_block bc7_data;
3447
encode_bc7_block(&bc7_data, &bc7_results);
3448
3449
color_rgba decoded_bc7_block[4][4];
3450
unpack_block(texture_format::cBC7, &bc7_data, &decoded_bc7_block[0][0]);
3451
3452
// Compute BC7 error
3453
uint64_t total_bc7_la_err, total_bc7_rgb_err, total_bc7_rgba_err;
3454
compute_block_error(block, decoded_bc7_block, total_bc7_rgb_err, total_bc7_rgba_err, total_bc7_la_err);
3455
3456
if (results[i].m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR)
3457
{
3458
VALIDATE(total_bc7_rgba_err == 0);
3459
3460
best_index = i;
3461
break;
3462
}
3463
3464
uint64_t total_uastc_err = 0, total_bc7_err = 0;
3465
if (is_la)
3466
{
3467
total_bc7_err = total_bc7_la_err;
3468
total_uastc_err = total_uastc_la_err;
3469
}
3470
else if (has_alpha)
3471
{
3472
total_bc7_err = total_bc7_rgba_err;
3473
total_uastc_err = total_uastc_rgba_err;
3474
}
3475
else
3476
{
3477
total_bc7_err = total_bc7_rgb_err;
3478
total_uastc_err = total_uastc_rgb_err;
3479
}
3480
3481
total_overall_err[i] = ((total_bc7_err * bc7_err_weight) / 100) + ((total_uastc_err * uastc_err_weight) / 100);
3482
if (!total_overall_err[i])
3483
{
3484
best_index = i;
3485
break;
3486
}
3487
3488
uastc_err_f[i] = sqrtf((float)total_uastc_err);
3489
3490
if (uastc_err_f[i] < best_uastc_err_f)
3491
{
3492
best_uastc_err_f = uastc_err_f[i];
3493
}
3494
3495
} // total_results
3496
3497
if (best_index < 0)
3498
{
3499
uint64_t best_err = UINT64_MAX;
3500
3501
if ((best_uastc_err_f == 0.0f) || (favor_bc7_error))
3502
{
3503
for (uint32_t i = 0; i < total_results; i++)
3504
{
3505
// TODO: This is a quick hack to favor modes 0 or 10 for better RDO compression.
3506
const float err_weight = (flags & cPackUASTCFavorSimplerModes) ? get_uastc_mode_weight(results[i].m_uastc_mode) : 1.0f;
3507
3508
const uint64_t w = (uint64_t)(total_overall_err[i] * err_weight);
3509
if (w < best_err)
3510
{
3511
best_err = w;
3512
best_index = i;
3513
if (!best_err)
3514
break;
3515
}
3516
} // i
3517
}
3518
else
3519
{
3520
// Scan the UASTC results, and consider all results within a window that has the best UASTC+BC7 error.
3521
for (uint32_t i = 0; i < total_results; i++)
3522
{
3523
double err_delta = uastc_err_f[i] / best_uastc_err_f;
3524
3525
if (err_delta <= UASTC_ERROR_THRESH)
3526
{
3527
// TODO: This is a quick hack to favor modes 0 or 10 for better RDO compression.
3528
const float err_weight = (flags & cPackUASTCFavorSimplerModes) ? get_uastc_mode_weight(results[i].m_uastc_mode) : 1.0f;
3529
3530
const uint64_t w = (uint64_t)(total_overall_err[i] * err_weight);
3531
if (w < best_err)
3532
{
3533
best_err = w;
3534
best_index = i;
3535
if (!best_err)
3536
break;
3537
}
3538
}
3539
} // i
3540
}
3541
}
3542
}
3543
3544
const uastc_encode_results& best_results = results[best_index];
3545
const uint32_t best_mode = best_results.m_uastc_mode;
3546
const astc_block_desc& best_astc_results = best_results.m_astc;
3547
3548
color_rgba decoded_uastc_block[4][4];
3549
bool success = unpack_uastc(best_mode, best_results.m_common_pattern, best_results.m_solid_color.get_color32(), best_astc_results, (basist::color32 *)&decoded_uastc_block[0][0], false);
3550
(void)success;
3551
VALIDATE(success);
3552
3553
#if BASISU_VALIDATE_UASTC_ENC
3554
// Make sure that the UASTC block unpacks to the same exact pixels as the ASTC block does, using two different decoders.
3555
{
3556
// Round trip to packed UASTC and back, then decode to pixels.
3557
etc_block etc1_blk;
3558
memset(&etc1_blk, 0, sizeof(etc1_blk));
3559
eac_a8_block etc_eac_a8_blk;
3560
memset(&etc_eac_a8_blk, 0, sizeof(etc_eac_a8_blk));
3561
etc_eac_a8_blk.m_multiplier = 1;
3562
3563
basist::uastc_block temp_block;
3564
pack_uastc(temp_block, best_results, etc1_blk, 0, etc_eac_a8_blk, false, false);
3565
3566
basist::color32 temp_block_unpacked[4][4];
3567
success = basist::unpack_uastc(temp_block, (basist::color32 *)temp_block_unpacked, false);
3568
VALIDATE(success);
3569
3570
// Now round trip to packed ASTC and back, then decode to pixels.
3571
uint32_t astc_data[4];
3572
3573
if (best_results.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR)
3574
pack_astc_solid_block(astc_data, (color32 &)best_results.m_solid_color);
3575
else
3576
{
3577
success = pack_astc_block(astc_data, &best_astc_results, best_results.m_uastc_mode);
3578
VALIDATE(success);
3579
}
3580
3581
color_rgba decoded_astc_block[4][4];
3582
success = basisu_astc::astc::decompress_ldr((uint8_t*)decoded_astc_block, (uint8_t*)&astc_data, false, 4, 4);
3583
VALIDATE(success);
3584
3585
for (uint32_t y = 0; y < 4; y++)
3586
{
3587
for (uint32_t x = 0; x < 4; x++)
3588
{
3589
VALIDATE(decoded_astc_block[y][x] == decoded_uastc_block[y][x]);
3590
3591
VALIDATE(temp_block_unpacked[y][x].c[0] == decoded_uastc_block[y][x].r);
3592
VALIDATE(temp_block_unpacked[y][x].c[1] == decoded_uastc_block[y][x].g);
3593
VALIDATE(temp_block_unpacked[y][x].c[2] == decoded_uastc_block[y][x].b);
3594
VALIDATE(temp_block_unpacked[y][x].c[3] == decoded_uastc_block[y][x].a);
3595
}
3596
}
3597
}
3598
#endif
3599
3600
// Compute BC1 hints
3601
bool bc1_hint0 = false, bc1_hint1 = false;
3602
if (bc1_hints)
3603
compute_bc1_hints(bc1_hint0, bc1_hint1, best_results, block, decoded_uastc_block);
3604
3605
eac_a8_block eac_a8_blk;
3606
if ((g_uastc_mode_has_alpha[best_mode]) && (best_mode != UASTC_MODE_INDEX_SOLID_COLOR))
3607
{
3608
// Compute ETC2 hints
3609
uint8_t decoded_uastc_block_alpha[16];
3610
for (uint32_t i = 0; i < 16; i++)
3611
decoded_uastc_block_alpha[i] = decoded_uastc_block[i >> 2][i & 3].a;
3612
3613
uastc_pack_eac_a8_results eac8_a8_results;
3614
memset(&eac8_a8_results, 0, sizeof(eac8_a8_results));
3615
uastc_pack_eac_a8(eac8_a8_results, decoded_uastc_block_alpha, 16, 0, eac_a8_mul_search_rad, eac_a8_table_mask);
3616
3617
// All we care about for hinting is the table and multiplier.
3618
eac_a8_blk.m_table = eac8_a8_results.m_table;
3619
eac_a8_blk.m_multiplier = eac8_a8_results.m_multiplier;
3620
}
3621
else
3622
{
3623
memset(&eac_a8_blk, 0, sizeof(eac_a8_blk));
3624
}
3625
3626
// Compute ETC1 hints
3627
etc_block etc1_blk;
3628
uint32_t etc1_bias = 0;
3629
compute_etc1_hints(etc1_blk, etc1_bias, best_results, block, decoded_uastc_block, level, flags);
3630
3631
// Finally, pack the UASTC block with its hints and we're done.
3632
pack_uastc(output_block, best_results, etc1_blk, etc1_bias, eac_a8_blk, bc1_hint0, bc1_hint1);
3633
3634
// printf(" Packed: ");
3635
// for (int i = 0; i < 16; i++)
3636
// printf("%X ", output_block.m_bytes[i]);
3637
// printf("\n");
3638
}
3639
3640
static bool uastc_recompute_hints(basist::uastc_block* pBlock, const color_rgba* pBlock_pixels, uint32_t flags, const unpacked_uastc_block *pUnpacked_blk)
3641
{
3642
unpacked_uastc_block unpacked_blk;
3643
3644
if (pUnpacked_blk)
3645
unpacked_blk = *pUnpacked_blk;
3646
else
3647
{
3648
if (!unpack_uastc(*pBlock, unpacked_blk, false, true))
3649
return false;
3650
}
3651
color_rgba decoded_uastc_block[4][4];
3652
if (!unpack_uastc(unpacked_blk, (basist::color32 *)decoded_uastc_block, false))
3653
return false;
3654
uastc_encode_results results;
3655
results.m_uastc_mode = unpacked_blk.m_mode;
3656
results.m_common_pattern = unpacked_blk.m_common_pattern;
3657
results.m_astc = unpacked_blk.m_astc;
3658
results.m_solid_color = unpacked_blk.m_solid_color;
3659
results.m_astc_err = 0;
3660
bool bc1_hints = true;
3661
uint32_t eac_a8_mul_search_rad = 3;
3662
uint32_t eac_a8_table_mask = UINT32_MAX;
3663
const uint32_t level = flags & cPackUASTCLevelMask;
3664
switch (level)
3665
{
3666
case cPackUASTCLevelFastest:
3667
{
3668
eac_a8_mul_search_rad = 0;
3669
eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13);
3670
bc1_hints = false;
3671
break;
3672
}
3673
case cPackUASTCLevelFaster:
3674
{
3675
eac_a8_mul_search_rad = 0;
3676
eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13);
3677
break;
3678
}
3679
case cPackUASTCLevelDefault:
3680
{
3681
eac_a8_mul_search_rad = 1;
3682
eac_a8_table_mask = (1 << 0) | (1 << 2) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 10) | (1 << 11) | (1 << 13);
3683
break;
3684
}
3685
case cPackUASTCLevelSlower:
3686
{
3687
eac_a8_mul_search_rad = 2;
3688
break;
3689
}
3690
case cPackUASTCLevelVerySlow:
3691
{
3692
break;
3693
}
3694
}
3695
bool bc1_hint0 = false, bc1_hint1 = false;
3696
if (bc1_hints)
3697
compute_bc1_hints(bc1_hint0, bc1_hint1, results, (color_rgba (*)[4])pBlock_pixels, decoded_uastc_block);
3698
const uint32_t best_mode = unpacked_blk.m_mode;
3699
eac_a8_block eac_a8_blk;
3700
if ((g_uastc_mode_has_alpha[best_mode]) && (best_mode != UASTC_MODE_INDEX_SOLID_COLOR))
3701
{
3702
uint8_t decoded_uastc_block_alpha[16];
3703
for (uint32_t i = 0; i < 16; i++)
3704
decoded_uastc_block_alpha[i] = decoded_uastc_block[i >> 2][i & 3].a;
3705
uastc_pack_eac_a8_results eac8_a8_results;
3706
memset(&eac8_a8_results, 0, sizeof(eac8_a8_results));
3707
uastc_pack_eac_a8(eac8_a8_results, decoded_uastc_block_alpha, 16, 0, eac_a8_mul_search_rad, eac_a8_table_mask);
3708
eac_a8_blk.m_table = eac8_a8_results.m_table;
3709
eac_a8_blk.m_multiplier = eac8_a8_results.m_multiplier;
3710
}
3711
else
3712
{
3713
memset(&eac_a8_blk, 0, sizeof(eac_a8_blk));
3714
}
3715
etc_block etc1_blk;
3716
uint32_t etc1_bias = 0;
3717
compute_etc1_hints(etc1_blk, etc1_bias, results, (color_rgba (*)[4])pBlock_pixels, decoded_uastc_block, level, flags);
3718
pack_uastc(*pBlock, results, etc1_blk, etc1_bias, eac_a8_blk, bc1_hint0, bc1_hint1);
3719
return true;
3720
}
3721
3722
static const uint8_t g_uastc_mode_selector_bits[TOTAL_UASTC_MODES][2] =
3723
{
3724
{ 65, 63 }, { 69, 31 }, { 73, 46 }, { 89, 29 },
3725
{ 89, 30 }, { 68, 47 }, { 66, 62 }, { 89, 30 },
3726
{ 0, 0 }, { 97, 30 }, { 65, 63 }, { 66, 62 },
3727
{ 81, 47 }, { 94, 30 }, { 92, 31 }, { 62, 63 },
3728
{ 98, 30 }, { 61, 62 }, { 49, 79 }
3729
};
3730
3731
static inline uint32_t set_block_bits(uint8_t* pBytes, uint64_t val, uint32_t num_bits, uint32_t cur_ofs)
3732
{
3733
assert(num_bits <= 64);
3734
assert((num_bits == 64) || (val < (1ULL << num_bits)));
3735
uint64_t mask = (num_bits == 64) ? UINT64_MAX : ((1ULL << num_bits) - 1);
3736
while (num_bits)
3737
{
3738
const uint32_t n = basisu::minimum<uint32_t>(8U - (cur_ofs & 7U), num_bits);
3739
pBytes[cur_ofs >> 3] &= ~static_cast<uint8_t>(mask << (cur_ofs & 7U));
3740
pBytes[cur_ofs >> 3] |= static_cast<uint8_t>(val << (cur_ofs & 7U));
3741
val >>= n;
3742
mask >>= n;
3743
num_bits -= n;
3744
cur_ofs += n;
3745
}
3746
return cur_ofs;
3747
}
3748
3749
static const uint8_t g_tdefl_small_dist_extra[512] =
3750
{
3751
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
3752
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
3753
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
3754
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
3755
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
3756
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
3757
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
3758
7, 7, 7, 7, 7, 7, 7, 7
3759
};
3760
3761
static const uint8_t g_tdefl_large_dist_extra[128] =
3762
{
3763
0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
3764
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
3765
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
3766
};
3767
3768
static inline uint32_t compute_match_cost_estimate(uint32_t dist)
3769
{
3770
uint32_t len_cost = 7;
3771
uint32_t dist_cost = 5;
3772
if (dist < 512)
3773
dist_cost += g_tdefl_small_dist_extra[dist & 511];
3774
else
3775
{
3776
dist_cost += g_tdefl_large_dist_extra[basisu::minimum<uint32_t>(dist, 32767) >> 8];
3777
while (dist >= 32768)
3778
{
3779
dist_cost++;
3780
dist >>= 1;
3781
}
3782
}
3783
return len_cost + dist_cost;
3784
}
3785
3786
struct selector_bitsequence
3787
{
3788
uint64_t m_sel;
3789
uint32_t m_ofs;
3790
uint32_t m_pad; // avoid implicit padding for selector_bitsequence_hash
3791
selector_bitsequence() { }
3792
selector_bitsequence(uint32_t bit_ofs, uint64_t sel) : m_sel(sel), m_ofs(bit_ofs), m_pad(0) { }
3793
bool operator== (const selector_bitsequence& other) const
3794
{
3795
return (m_ofs == other.m_ofs) && (m_sel == other.m_sel);
3796
}
3797
3798
bool operator< (const selector_bitsequence& other) const
3799
{
3800
if (m_ofs < other.m_ofs)
3801
return true;
3802
else if (m_ofs == other.m_ofs)
3803
return m_sel < other.m_sel;
3804
3805
return false;
3806
}
3807
};
3808
3809
struct selector_bitsequence_hash
3810
{
3811
std::size_t operator()(selector_bitsequence const& s) const noexcept
3812
{
3813
return hash_hsieh((const uint8_t*)&s, sizeof(s));
3814
}
3815
};
3816
3817
static bool uastc_rdo_blocks(uint32_t first_index, uint32_t last_index, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags,
3818
uint32_t &total_skipped, uint32_t &total_refined, uint32_t &total_modified, uint32_t &total_smooth)
3819
{
3820
debug_printf("uastc_rdo_blocks: Processing blocks %u to %u\n", first_index, last_index);
3821
3822
const int total_blocks_to_check = basisu::maximum<uint32_t>(1U, params.m_lz_dict_size / sizeof(basist::uastc_block));
3823
const bool perceptual = false;
3824
3825
std::unordered_map<selector_bitsequence, uint32_t, selector_bitsequence_hash> selector_history;
3826
3827
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
3828
{
3829
const basist::uastc_block& blk = pBlocks[block_index];
3830
const color_rgba* pPixels = &pBlock_pixels[16 * block_index];
3831
3832
unpacked_uastc_block unpacked_blk;
3833
if (!unpack_uastc(blk, unpacked_blk, false, true))
3834
return false;
3835
3836
const uint32_t block_mode = unpacked_blk.m_mode;
3837
if (block_mode == UASTC_MODE_INDEX_SOLID_COLOR)
3838
continue;
3839
3840
tracked_stat r_stats, g_stats, b_stats, a_stats;
3841
3842
for (uint32_t i = 0; i < 16; i++)
3843
{
3844
r_stats.update(pPixels[i].r);
3845
g_stats.update(pPixels[i].g);
3846
b_stats.update(pPixels[i].b);
3847
a_stats.update(pPixels[i].a);
3848
}
3849
3850
const float max_std_dev = basisu::maximum<float>(basisu::maximum<float>(basisu::maximum(r_stats.get_std_dev(), g_stats.get_std_dev()), b_stats.get_std_dev()), a_stats.get_std_dev());
3851
3852
float yl = clamp<float>(max_std_dev / params.m_max_smooth_block_std_dev, 0.0f, 1.0f);
3853
yl = yl * yl;
3854
const float smooth_block_error_scale = lerp<float>(params.m_smooth_block_max_error_scale, 1.0f, yl);
3855
if (smooth_block_error_scale > 1.0f)
3856
total_smooth++;
3857
3858
color_rgba decoded_uastc_block[4][4];
3859
if (!unpack_uastc(unpacked_blk, (basist::color32*)decoded_uastc_block, false))
3860
return false;
3861
3862
uint64_t uastc_err = 0;
3863
for (uint32_t i = 0; i < 16; i++)
3864
uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_uastc_block)[i], true);
3865
3866
// Transcode to BC7
3867
bc7_optimization_results b7_results;
3868
if (!transcode_uastc_to_bc7(unpacked_blk, b7_results))
3869
return false;
3870
3871
basist::bc7_block b7_block;
3872
basist::encode_bc7_block(&b7_block, &b7_results);
3873
3874
color_rgba decoded_b7_blk[4][4];
3875
unpack_block(texture_format::cBC7, &b7_block, &decoded_b7_blk[0][0]);
3876
3877
uint64_t bc7_err = 0;
3878
for (uint32_t i = 0; i < 16; i++)
3879
bc7_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_b7_blk)[i], true);
3880
3881
uint64_t cur_err = (uastc_err + bc7_err) / 2;
3882
3883
// Divide by 16*4 to compute RMS error
3884
const float cur_ms_err = (float)cur_err * (1.0f / 64.0f);
3885
const float cur_rms_err = sqrt(cur_ms_err);
3886
3887
const uint32_t first_sel_bit = g_uastc_mode_selector_bits[block_mode][0];
3888
const uint32_t total_sel_bits = g_uastc_mode_selector_bits[block_mode][1];
3889
assert(first_sel_bit + total_sel_bits <= 128);
3890
assert(total_sel_bits > 0);
3891
3892
uint32_t cur_bit_offset = first_sel_bit;
3893
uint64_t cur_sel_bits = read_bits((const uint8_t*)&blk, cur_bit_offset, basisu::minimum(64U, total_sel_bits));
3894
3895
if (cur_rms_err >= params.m_skip_block_rms_thresh)
3896
{
3897
auto cur_search_res = selector_history.insert(std::make_pair(selector_bitsequence(first_sel_bit, cur_sel_bits), block_index));
3898
3899
// Block already has too much error, so don't mess with it.
3900
if (!cur_search_res.second)
3901
(*cur_search_res.first).second = block_index;
3902
3903
total_skipped++;
3904
continue;
3905
}
3906
3907
int cur_bits;
3908
auto cur_find_res = selector_history.find(selector_bitsequence(first_sel_bit, cur_sel_bits));
3909
if (cur_find_res == selector_history.end())
3910
{
3911
// Wasn't found - wildly estimate literal cost
3912
//cur_bits = (total_sel_bits * 5) / 4;
3913
cur_bits = (total_sel_bits * params.m_lz_literal_cost) / 100;
3914
}
3915
else
3916
{
3917
// Was found - wildly estimate match cost
3918
uint32_t match_block_index = cur_find_res->second;
3919
const int block_dist_in_bytes = (block_index - match_block_index) * 16;
3920
cur_bits = compute_match_cost_estimate(block_dist_in_bytes);
3921
}
3922
3923
int first_block_to_check = basisu::maximum<int>(first_index, block_index - total_blocks_to_check);
3924
int last_block_to_check = block_index - 1;
3925
3926
basist::uastc_block best_block(blk);
3927
uint32_t best_block_index = block_index;
3928
3929
float best_t = cur_ms_err * smooth_block_error_scale + cur_bits * params.m_lambda;
3930
3931
// Now scan through previous blocks, insert their selector bit patterns into the current block, and find
3932
// selector bit patterns which don't increase the overall block error too much.
3933
for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index)
3934
{
3935
const basist::uastc_block& prev_blk = pBlocks[prev_block_index];
3936
3937
uint32_t bit_offset = first_sel_bit;
3938
uint64_t sel_bits = read_bits((const uint8_t*)&prev_blk, bit_offset, basisu::minimum(64U, total_sel_bits));
3939
3940
int match_block_index = prev_block_index;
3941
auto res = selector_history.find(selector_bitsequence(first_sel_bit, sel_bits));
3942
if (res != selector_history.end())
3943
match_block_index = res->second;
3944
// Have we already checked this bit pattern? If so then skip this block.
3945
if (match_block_index > prev_block_index)
3946
continue;
3947
3948
unpacked_uastc_block unpacked_prev_blk;
3949
if (!unpack_uastc(prev_blk, unpacked_prev_blk, false, true))
3950
return false;
3951
3952
basist::uastc_block trial_blk(blk);
3953
3954
set_block_bits((uint8_t*)&trial_blk, sel_bits, basisu::minimum(64U, total_sel_bits), first_sel_bit);
3955
3956
if (total_sel_bits > 64)
3957
{
3958
sel_bits = read_bits((const uint8_t*)&prev_blk, bit_offset, total_sel_bits - 64U);
3959
3960
set_block_bits((uint8_t*)&trial_blk, sel_bits, total_sel_bits - 64U, first_sel_bit + basisu::minimum(64U, total_sel_bits));
3961
}
3962
3963
unpacked_uastc_block unpacked_trial_blk;
3964
if (!unpack_uastc(trial_blk, unpacked_trial_blk, false, true))
3965
continue;
3966
3967
color_rgba decoded_trial_uastc_block[4][4];
3968
if (!unpack_uastc(unpacked_trial_blk, (basist::color32*)decoded_trial_uastc_block, false))
3969
continue;
3970
3971
uint64_t trial_uastc_err = 0;
3972
for (uint32_t i = 0; i < 16; i++)
3973
trial_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_uastc_block)[i], true);
3974
3975
// Transcode trial to BC7, compute error
3976
bc7_optimization_results trial_b7_results;
3977
if (!transcode_uastc_to_bc7(unpacked_trial_blk, trial_b7_results))
3978
return false;
3979
3980
basist::bc7_block trial_b7_block;
3981
basist::encode_bc7_block(&trial_b7_block, &trial_b7_results);
3982
3983
color_rgba decoded_trial_b7_blk[4][4];
3984
unpack_block(texture_format::cBC7, &trial_b7_block, &decoded_trial_b7_blk[0][0]);
3985
3986
uint64_t trial_bc7_err = 0;
3987
for (uint32_t i = 0; i < 16; i++)
3988
trial_bc7_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_b7_blk)[i], true);
3989
3990
uint64_t trial_err = (trial_uastc_err + trial_bc7_err) / 2;
3991
3992
const float trial_ms_err = (float)trial_err * (1.0f / 64.0f);
3993
const float trial_rms_err = sqrtf(trial_ms_err);
3994
3995
if (trial_rms_err > cur_rms_err * params.m_max_allowed_rms_increase_ratio)
3996
continue;
3997
3998
const int block_dist_in_bytes = (block_index - match_block_index) * 16;
3999
const int match_bits = compute_match_cost_estimate(block_dist_in_bytes);
4000
4001
float t = trial_ms_err * smooth_block_error_scale + match_bits * params.m_lambda;
4002
if (t < best_t)
4003
{
4004
best_t = t;
4005
best_block_index = prev_block_index;
4006
4007
best_block = trial_blk;
4008
}
4009
4010
} // prev_block_index
4011
4012
if (best_block_index != block_index)
4013
{
4014
total_modified++;
4015
4016
unpacked_uastc_block unpacked_best_blk;
4017
if (!unpack_uastc(best_block, unpacked_best_blk, false, false))
4018
return false;
4019
4020
if ((params.m_endpoint_refinement) && (block_mode == 0))
4021
{
4022
// Attempt to refine mode 0 block's endpoints, using the new selectors. This doesn't help much, but it does help.
4023
// TODO: We could do this with the other modes too.
4024
color_rgba decoded_best_uastc_block[4][4];
4025
if (!unpack_uastc(unpacked_best_blk, (basist::color32*)decoded_best_uastc_block, false))
4026
return false;
4027
4028
// Compute the block's current error (with the modified selectors).
4029
uint64_t best_uastc_err = 0;
4030
for (uint32_t i = 0; i < 16; i++)
4031
best_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_best_uastc_block)[i], true);
4032
4033
bc7enc_compress_block_params comp_params;
4034
memset(&comp_params, 0, sizeof(comp_params));
4035
comp_params.m_max_partitions_mode1 = 64;
4036
comp_params.m_least_squares_passes = 1;
4037
comp_params.m_weights[0] = 1;
4038
comp_params.m_weights[1] = 1;
4039
comp_params.m_weights[2] = 1;
4040
comp_params.m_weights[3] = 1;
4041
comp_params.m_uber_level = 0;
4042
4043
uastc_encode_results results;
4044
uint32_t total_results = 0;
4045
astc_mode0_or_18(0, (color_rgba(*)[4])pPixels, &results, total_results, comp_params, unpacked_best_blk.m_astc.m_weights);
4046
assert(total_results == 1);
4047
4048
// See if the overall error has actually gone done.
4049
4050
color_rgba decoded_trial_uastc_block[4][4];
4051
bool success = unpack_uastc(results.m_uastc_mode, results.m_common_pattern, results.m_solid_color.get_color32(), results.m_astc, (basist::color32*) & decoded_trial_uastc_block[0][0], false);
4052
assert(success);
4053
4054
BASISU_NOTE_UNUSED(success);
4055
4056
uint64_t trial_uastc_err = 0;
4057
for (uint32_t i = 0; i < 16; i++)
4058
trial_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_uastc_block)[i], true);
4059
4060
if (trial_uastc_err < best_uastc_err)
4061
{
4062
// The error went down, so accept the new endpoints.
4063
4064
// Ensure the selectors haven't changed, otherwise we'll invalidate the LZ matches.
4065
for (uint32_t i = 0; i < 16; i++)
4066
assert(unpacked_best_blk.m_astc.m_weights[i] == results.m_astc.m_weights[i]);
4067
4068
unpacked_best_blk.m_astc = results.m_astc;
4069
4070
total_refined++;
4071
}
4072
} // if ((params.m_endpoint_refinement) && (block_mode == 0))
4073
4074
// The selectors have changed, so go recompute the block hints.
4075
if (!uastc_recompute_hints(&best_block, pPixels, flags, &unpacked_best_blk))
4076
return false;
4077
4078
// Write the modified block
4079
pBlocks[block_index] = best_block;
4080
4081
} // if (best_block_index != block_index)
4082
4083
{
4084
uint32_t bit_offset = first_sel_bit;
4085
uint64_t sel_bits = read_bits((const uint8_t*)&best_block, bit_offset, basisu::minimum(64U, total_sel_bits));
4086
4087
auto res = selector_history.insert(std::make_pair(selector_bitsequence(first_sel_bit, sel_bits), block_index));
4088
if (!res.second)
4089
(*res.first).second = block_index;
4090
}
4091
4092
} // block_index
4093
4094
return true;
4095
}
4096
4097
// This function implements a basic form of rate distortion optimization (RDO) for UASTC.
4098
// It only changes selectors and then updates the hints. It uses very approximate LZ bitprice estimation.
4099
// There's A LOT that can be done better in here, but it's a start.
4100
// One nice advantage of the method used here is that it works for any input, no matter which or how many modes it uses.
4101
bool uastc_rdo(uint32_t num_blocks, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, job_pool* pJob_pool, uint32_t total_jobs)
4102
{
4103
assert(params.m_max_allowed_rms_increase_ratio > 1.0f);
4104
assert(params.m_lz_dict_size > 0);
4105
assert(params.m_lambda > 0.0f);
4106
4107
uint32_t total_skipped = 0, total_modified = 0, total_refined = 0, total_smooth = 0;
4108
4109
uint32_t blocks_per_job = total_jobs ? (num_blocks / total_jobs) : 0;
4110
4111
std::mutex stat_mutex;
4112
4113
bool status = false;
4114
4115
if ((!pJob_pool) || (total_jobs <= 1) || (blocks_per_job <= 8))
4116
{
4117
status = uastc_rdo_blocks(0, num_blocks, pBlocks, pBlock_pixels, params, flags, total_skipped, total_refined, total_modified, total_smooth);
4118
}
4119
else
4120
{
4121
bool all_succeeded = true;
4122
4123
for (uint32_t block_index_iter = 0; block_index_iter < num_blocks; block_index_iter += blocks_per_job)
4124
{
4125
const uint32_t first_index = block_index_iter;
4126
const uint32_t last_index = minimum<uint32_t>(num_blocks, block_index_iter + blocks_per_job);
4127
4128
pJob_pool->add_job([first_index, last_index, pBlocks, pBlock_pixels, &params, flags, &total_skipped, &total_modified, &total_refined, &total_smooth, &all_succeeded, &stat_mutex] {
4129
4130
uint32_t job_skipped = 0, job_modified = 0, job_refined = 0, job_smooth = 0;
4131
4132
bool status = uastc_rdo_blocks(first_index, last_index, pBlocks, pBlock_pixels, params, flags, job_skipped, job_refined, job_modified, job_smooth);
4133
4134
{
4135
std::lock_guard<std::mutex> lck(stat_mutex);
4136
4137
all_succeeded = all_succeeded && status;
4138
total_skipped += job_skipped;
4139
total_modified += job_modified;
4140
total_refined += job_refined;
4141
total_smooth += job_smooth;
4142
}
4143
4144
}
4145
);
4146
4147
} // block_index_iter
4148
4149
pJob_pool->wait_for_all();
4150
4151
status = all_succeeded;
4152
}
4153
4154
debug_printf("uastc_rdo: Total modified: %3.2f%%, total skipped: %3.2f%%, total refined: %3.2f%%, total smooth: %3.2f%%\n", total_modified * 100.0f / num_blocks, total_skipped * 100.0f / num_blocks, total_refined * 100.0f / num_blocks, total_smooth * 100.0f / num_blocks);
4155
4156
return status;
4157
}
4158
} // namespace basisu
4159
4160
4161
4162
4163
4164
4165