Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/encoder/basisu_uastc_hdr_4x4_enc.cpp
9902 views
1
// basisu_uastc_hdr_4x4_enc.cpp
2
#include "basisu_uastc_hdr_4x4_enc.h"
3
#include "../transcoder/basisu_transcoder.h"
4
5
using namespace basist;
6
7
namespace basisu
8
{
9
10
const uint32_t UHDR_MODE11_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, UHDR_MODE11_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS;
11
const uint32_t UHDR_MODE7_PART1_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, UHDR_MODE7_PART1_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS;
12
const uint32_t UHDR_MODE7_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, UHDR_MODE7_PART2_LAST_ISE_RANGE = astc_helpers::BISE_8_LEVELS;
13
const uint32_t UHDR_MODE11_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, UHDR_MODE11_PART2_LAST_ISE_RANGE = astc_helpers::BISE_4_LEVELS;
14
15
uastc_hdr_4x4_codec_options::uastc_hdr_4x4_codec_options() :
16
astc_hdr_codec_base_options()
17
{
18
init();
19
}
20
21
void uastc_hdr_4x4_codec_options::init()
22
{
23
astc_hdr_codec_base_options::init();
24
25
// This was the log bias we used on the initial release. It's too low.
26
//m_q_log_bias = Q_LOG_BIAS_4x4;
27
28
m_q_log_bias = Q_LOG_BIAS_6x6;
29
30
m_bc6h_err_weight = .85f;
31
32
#if 0
33
// HACK HACK
34
m_disable_weight_plane_optimization = true;
35
m_take_first_non_clamping_mode11_submode = false;
36
m_take_first_non_clamping_mode7_submode = false;
37
#endif
38
39
// Must set the quality level at least once to reset this struct.
40
set_quality_level(cDefaultLevel);
41
}
42
43
void uastc_hdr_4x4_codec_options::set_quality_best()
44
{
45
// highest achievable quality
46
m_mode11_direct_only = false;
47
48
m_use_solid = true;
49
50
m_use_mode11_part1 = true;
51
m_mode11_uber_mode = true;
52
m_first_mode11_weight_ise_range = UHDR_MODE11_FIRST_ISE_RANGE;
53
m_last_mode11_weight_ise_range = UHDR_MODE11_LAST_ISE_RANGE;
54
m_first_mode11_submode = -1;
55
m_last_mode11_submode = 7;
56
57
m_use_mode7_part1 = true;
58
m_first_mode7_part1_weight_ise_range = UHDR_MODE7_PART1_FIRST_ISE_RANGE;
59
m_last_mode7_part1_weight_ise_range = UHDR_MODE7_PART1_LAST_ISE_RANGE;
60
m_mode7_full_s_optimization = true;
61
62
m_use_mode7_part2 = true;
63
m_mode7_part2_part_masks = UINT32_MAX;
64
m_first_mode7_part2_weight_ise_range = UHDR_MODE7_PART2_FIRST_ISE_RANGE;
65
m_last_mode7_part2_weight_ise_range = UHDR_MODE7_PART2_LAST_ISE_RANGE;
66
67
m_use_mode11_part2 = true;
68
m_mode11_part2_part_masks = UINT32_MAX;
69
m_first_mode11_part2_weight_ise_range = UHDR_MODE11_PART2_FIRST_ISE_RANGE;
70
m_last_mode11_part2_weight_ise_range = UHDR_MODE11_PART2_LAST_ISE_RANGE;
71
72
m_refine_weights = true;
73
74
m_use_estimated_partitions = false;
75
m_max_estimated_partitions = 0;
76
}
77
78
void uastc_hdr_4x4_codec_options::set_quality_normal()
79
{
80
m_use_solid = true;
81
82
// We'll allow uber mode in normal if the user allows it.
83
m_use_mode11_part1 = true;
84
m_mode11_uber_mode = true;
85
m_first_mode11_weight_ise_range = 6;
86
m_last_mode11_weight_ise_range = UHDR_MODE11_LAST_ISE_RANGE;
87
88
m_use_mode7_part1 = true;
89
m_first_mode7_part1_weight_ise_range = UHDR_MODE7_PART1_LAST_ISE_RANGE;
90
m_last_mode7_part1_weight_ise_range = UHDR_MODE7_PART1_LAST_ISE_RANGE;
91
92
m_use_mode7_part2 = true;
93
m_mode7_part2_part_masks = UINT32_MAX;
94
m_first_mode7_part2_weight_ise_range = UHDR_MODE7_PART2_LAST_ISE_RANGE;
95
m_last_mode7_part2_weight_ise_range = UHDR_MODE7_PART2_LAST_ISE_RANGE;
96
97
m_use_mode11_part2 = true;
98
m_mode11_part2_part_masks = UINT32_MAX;
99
m_first_mode11_part2_weight_ise_range = UHDR_MODE11_PART2_LAST_ISE_RANGE;
100
m_last_mode11_part2_weight_ise_range = UHDR_MODE11_PART2_LAST_ISE_RANGE;
101
102
m_refine_weights = true;
103
}
104
105
void uastc_hdr_4x4_codec_options::set_quality_fastest()
106
{
107
m_use_solid = true;
108
109
m_use_mode11_part1 = true;
110
m_mode11_uber_mode = false;
111
m_first_mode11_weight_ise_range = UHDR_MODE11_LAST_ISE_RANGE;
112
m_last_mode11_weight_ise_range = UHDR_MODE11_LAST_ISE_RANGE;
113
114
m_use_mode7_part1 = false;
115
m_mode7_full_s_optimization = false;
116
117
m_use_mode7_part2 = false;
118
m_use_mode11_part2 = false;
119
120
m_refine_weights = false;
121
}
122
123
void uastc_hdr_4x4_codec_options::set_quality_level(int level)
124
{
125
level = clamp(level, cMinLevel, cMaxLevel);
126
127
m_level = level;
128
129
// First ensure all options are set to best.
130
set_quality_best();
131
132
switch (level)
133
{
134
case 0:
135
{
136
set_quality_fastest();
137
break;
138
}
139
case 1:
140
{
141
set_quality_normal();
142
143
m_first_mode11_weight_ise_range = UHDR_MODE11_LAST_ISE_RANGE - 1;
144
m_last_mode11_weight_ise_range = UHDR_MODE11_LAST_ISE_RANGE;
145
146
m_use_mode7_part1 = false;
147
m_mode7_full_s_optimization = false;
148
m_use_mode7_part2 = false;
149
150
m_use_estimated_partitions = true;
151
m_max_estimated_partitions = 1;
152
153
m_mode11_part2_part_masks = 1 | 2;
154
m_mode7_part2_part_masks = 1 | 2;
155
156
// TODO: Disabling this hurts BC6H quality, but significantly speeds up compression.
157
//m_refine_weights = false;
158
break;
159
}
160
case 2:
161
{
162
set_quality_normal();
163
164
m_use_estimated_partitions = true;
165
m_max_estimated_partitions = 2;
166
167
m_mode11_part2_part_masks = 1 | 2;
168
m_mode7_part2_part_masks = 1 | 2;
169
170
break;
171
}
172
case 3:
173
{
174
m_use_estimated_partitions = true;
175
m_max_estimated_partitions = 2;
176
177
m_mode11_part2_part_masks = 1 | 2 | 4 | 8;
178
m_mode7_part2_part_masks = 1 | 2 | 4 | 8;
179
180
break;
181
}
182
default:
183
{
184
// best options already set
185
break;
186
}
187
}
188
}
189
190
//--------------------------------------------------------------------------------------------------------------------------
191
192
static bool pack_solid(const vec4F* pBlock_linear_colors, basisu::vector<astc_hdr_4x4_pack_results>& all_results, const uastc_hdr_4x4_codec_options& coptions)
193
{
194
float r = 0.0f, g = 0.0f, b = 0.0f;
195
196
const float LOG_BIAS = .125f;
197
198
bool solid_block = true;
199
for (uint32_t i = 0; i < 16; i++)
200
{
201
if ((pBlock_linear_colors[0][0] != pBlock_linear_colors[i][0]) ||
202
(pBlock_linear_colors[0][1] != pBlock_linear_colors[i][1]) ||
203
(pBlock_linear_colors[0][2] != pBlock_linear_colors[i][2]))
204
{
205
solid_block = false;
206
}
207
208
r += log2f(pBlock_linear_colors[i][0] + LOG_BIAS);
209
g += log2f(pBlock_linear_colors[i][1] + LOG_BIAS);
210
b += log2f(pBlock_linear_colors[i][2] + LOG_BIAS);
211
}
212
213
if (solid_block)
214
{
215
r = pBlock_linear_colors[0][0];
216
g = pBlock_linear_colors[0][1];
217
b = pBlock_linear_colors[0][2];
218
}
219
else
220
{
221
r = maximum<float>(0.0f, powf(2.0f, r * (1.0f / 16.0f)) - LOG_BIAS);
222
g = maximum<float>(0.0f, powf(2.0f, g * (1.0f / 16.0f)) - LOG_BIAS);
223
b = maximum<float>(0.0f, powf(2.0f, b * (1.0f / 16.0f)) - LOG_BIAS);
224
225
// for safety
226
r = minimum<float>(r, MAX_HALF_FLOAT);
227
g = minimum<float>(g, MAX_HALF_FLOAT);
228
b = minimum<float>(b, MAX_HALF_FLOAT);
229
}
230
231
half_float rh = float_to_half_non_neg_no_nan_inf(r), gh = float_to_half_non_neg_no_nan_inf(g), bh = float_to_half_non_neg_no_nan_inf(b), ah = float_to_half_non_neg_no_nan_inf(1.0f);
232
233
astc_hdr_4x4_pack_results results;
234
results.clear();
235
236
uint8_t* packed_blk = (uint8_t*)&results.m_solid_blk;
237
results.m_is_solid = true;
238
239
packed_blk[0] = 0b11111100;
240
packed_blk[1] = 255;
241
packed_blk[2] = 255;
242
packed_blk[3] = 255;
243
packed_blk[4] = 255;
244
packed_blk[5] = 255;
245
packed_blk[6] = 255;
246
packed_blk[7] = 255;
247
248
packed_blk[8] = (uint8_t)rh;
249
packed_blk[9] = (uint8_t)(rh >> 8);
250
packed_blk[10] = (uint8_t)gh;
251
packed_blk[11] = (uint8_t)(gh >> 8);
252
packed_blk[12] = (uint8_t)bh;
253
packed_blk[13] = (uint8_t)(bh >> 8);
254
packed_blk[14] = (uint8_t)ah;
255
packed_blk[15] = (uint8_t)(ah >> 8);
256
257
results.m_best_block_error = 0;
258
259
if (!solid_block)
260
{
261
const float R_WEIGHT = coptions.m_r_err_scale;
262
const float G_WEIGHT = coptions.m_g_err_scale;
263
264
// This MUST match how errors are computed in eval_selectors().
265
for (uint32_t i = 0; i < 16; i++)
266
{
267
half_float dr = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][0]), dg = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][1]), db = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][2]);
268
double rd = q(rh, Q_LOG_BIAS_4x4) - q(dr, Q_LOG_BIAS_4x4);
269
double gd = q(gh, Q_LOG_BIAS_4x4) - q(dg, Q_LOG_BIAS_4x4);
270
double bd = q(bh, Q_LOG_BIAS_4x4) - q(db, Q_LOG_BIAS_4x4);
271
272
double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
273
274
results.m_best_block_error += e;
275
}
276
}
277
278
const half_float hc[3] = { rh, gh, bh };
279
280
bc6h_enc_block_solid_color(&results.m_bc6h_block, hc);
281
282
all_results.push_back(results);
283
284
return solid_block;
285
}
286
287
//--------------------------------------------------------------------------------------------------------------------------
288
289
static void pack_mode11(
290
const vec4F* pBlock_linear_colors, const half_float pBlock_pixels_half[16][3], const vec4F pBlock_pixels_q16[16],
291
basisu::vector<astc_hdr_4x4_pack_results>& all_results,
292
const uastc_hdr_4x4_codec_options& coptions,
293
uint32_t first_weight_ise_range, uint32_t last_weight_ise_range, bool constrain_ise_weight_selectors)
294
{
295
BASISU_NOTE_UNUSED(pBlock_linear_colors);
296
assert(first_weight_ise_range <= last_weight_ise_range);
297
298
uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights[16];
299
uint32_t trial_submode11 = 0;
300
301
clear_obj(trial_endpoints);
302
clear_obj(trial_weights);
303
304
for (uint32_t weight_ise_range = first_weight_ise_range; weight_ise_range <= last_weight_ise_range; weight_ise_range++)
305
{
306
const bool direct_only = coptions.m_mode11_direct_only;
307
308
uint32_t endpoint_ise_range = astc_helpers::BISE_256_LEVELS;
309
if (weight_ise_range == astc_helpers::BISE_16_LEVELS)
310
endpoint_ise_range = astc_helpers::BISE_192_LEVELS;
311
else
312
{
313
assert(weight_ise_range < astc_helpers::BISE_16_LEVELS);
314
}
315
316
double trial_error = encode_astc_hdr_block_mode_11(16, pBlock_pixels_half, pBlock_pixels_q16, weight_ise_range, trial_submode11, BIG_FLOAT_VAL, trial_endpoints, trial_weights, coptions, direct_only,
317
endpoint_ise_range, coptions.m_mode11_uber_mode && (weight_ise_range >= astc_helpers::BISE_4_LEVELS) && coptions.m_allow_uber_mode, constrain_ise_weight_selectors, coptions.m_first_mode11_submode, coptions.m_last_mode11_submode, false, cOrdinaryLeastSquares);
318
319
if (trial_error < BIG_FLOAT_VAL)
320
{
321
astc_hdr_4x4_pack_results results;
322
results.clear();
323
324
results.m_best_block_error = trial_error;
325
326
results.m_best_submodes[0] = trial_submode11;
327
results.m_constrained_weights = constrain_ise_weight_selectors;
328
329
results.m_best_blk.m_num_partitions = 1;
330
results.m_best_blk.m_color_endpoint_modes[0] = 11;
331
results.m_best_blk.m_weight_ise_range = (uint8_t)weight_ise_range;
332
results.m_best_blk.m_endpoint_ise_range = (uint8_t)endpoint_ise_range;
333
334
memcpy(results.m_best_blk.m_endpoints, trial_endpoints, NUM_MODE11_ENDPOINTS);
335
memcpy(results.m_best_blk.m_weights, trial_weights, 16);
336
337
#ifdef _DEBUG
338
// Sanity checking
339
{
340
half_float block_pixels_half[16][3];
341
342
for (uint32_t i = 0; i < 16; i++)
343
{
344
block_pixels_half[i][0] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][0]);
345
block_pixels_half[i][1] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][1]);
346
block_pixels_half[i][2] = float_to_half_non_neg_no_nan_inf(pBlock_linear_colors[i][2]);
347
}
348
349
half_float unpacked_astc_blk_rgba[4][4][4];
350
bool res = astc_helpers::decode_block(results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16);
351
assert(res);
352
353
half_float unpacked_astc_blk_rgb[4][4][3];
354
for (uint32_t y = 0; y < 4; y++)
355
for (uint32_t x = 0; x < 4; x++)
356
for (uint32_t c = 0; c < 3; c++)
357
unpacked_astc_blk_rgb[y][x][c] = unpacked_astc_blk_rgba[y][x][c];
358
359
double cmp_err = compute_block_error(16, &block_pixels_half[0][0], &unpacked_astc_blk_rgb[0][0][0], coptions);
360
assert(results.m_best_block_error == cmp_err);
361
}
362
#endif
363
364
// transcode to BC6H
365
assert(results.m_best_blk.m_color_endpoint_modes[0] == 11);
366
367
// Get qlog12 endpoints
368
int e[2][3];
369
bool success = decode_mode11_to_qlog12(results.m_best_blk.m_endpoints, e, results.m_best_blk.m_endpoint_ise_range);
370
assert(success);
371
BASISU_NOTE_UNUSED(success);
372
373
// Transform endpoints to half float
374
half_float h_e[3][2] =
375
{
376
{ qlog_to_half(e[0][0], 12), qlog_to_half(e[1][0], 12) },
377
{ qlog_to_half(e[0][1], 12), qlog_to_half(e[1][1], 12) },
378
{ qlog_to_half(e[0][2], 12), qlog_to_half(e[1][2], 12) }
379
};
380
381
// Transcode to bc6h
382
success = transcode_bc6h_1subset(h_e, results.m_best_blk, results.m_bc6h_block);
383
assert(success);
384
385
all_results.push_back(results);
386
}
387
}
388
}
389
390
//--------------------------------------------------------------------------------------------------------------------------
391
392
static void pack_mode7_single_part(
393
const half_float pBlock_pixels_half[16][3], const vec4F pBlock_pixels_q16[16],
394
basisu::vector<astc_hdr_4x4_pack_results>& all_results, const uastc_hdr_4x4_codec_options& coptions,
395
uint32_t first_mode7_part1_weight_ise_range, uint32_t last_mode7_part1_weight_ise_range)
396
{
397
assert(first_mode7_part1_weight_ise_range <= last_mode7_part1_weight_ise_range);
398
399
uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS], trial_weights[16];
400
uint32_t trial_submode7 = 0;
401
402
clear_obj(trial_endpoints);
403
clear_obj(trial_weights);
404
405
for (uint32_t weight_ise_range = first_mode7_part1_weight_ise_range; weight_ise_range <= last_mode7_part1_weight_ise_range; weight_ise_range++)
406
{
407
const uint32_t ise_endpoint_range = astc_helpers::BISE_256_LEVELS;
408
409
double trial_error = encode_astc_hdr_block_mode_7(16, pBlock_pixels_half, pBlock_pixels_q16, weight_ise_range, trial_submode7, BIG_FLOAT_VAL, trial_endpoints, trial_weights, coptions, ise_endpoint_range);
410
411
if (trial_error < BIG_FLOAT_VAL)
412
{
413
astc_hdr_4x4_pack_results results;
414
results.clear();
415
416
results.m_best_block_error = trial_error;
417
418
results.m_best_submodes[0] = trial_submode7;
419
420
results.m_best_blk.m_num_partitions = 1;
421
results.m_best_blk.m_color_endpoint_modes[0] = 7;
422
results.m_best_blk.m_weight_ise_range = (uint8_t)weight_ise_range;
423
results.m_best_blk.m_endpoint_ise_range = (uint8_t)ise_endpoint_range;
424
425
memcpy(results.m_best_blk.m_endpoints, trial_endpoints, NUM_MODE7_ENDPOINTS);
426
memcpy(results.m_best_blk.m_weights, trial_weights, 16);
427
428
// transcode to BC6H
429
assert(results.m_best_blk.m_color_endpoint_modes[0] == 7);
430
431
// Get qlog12 endpoints
432
int e[2][3];
433
if (!decode_mode7_to_qlog12(results.m_best_blk.m_endpoints, e, nullptr, results.m_best_blk.m_endpoint_ise_range))
434
continue;
435
436
// Transform endpoints to half float
437
half_float h_e[3][2] =
438
{
439
{ qlog_to_half(e[0][0], 12), qlog_to_half(e[1][0], 12) },
440
{ qlog_to_half(e[0][1], 12), qlog_to_half(e[1][1], 12) },
441
{ qlog_to_half(e[0][2], 12), qlog_to_half(e[1][2], 12) }
442
};
443
444
// Transcode to bc6h
445
bool status = transcode_bc6h_1subset(h_e, results.m_best_blk, results.m_bc6h_block);
446
assert(status);
447
(void)status;
448
449
all_results.push_back(results);
450
}
451
}
452
}
453
454
//--------------------------------------------------------------------------------------------------------------------------
455
456
static bool estimate_partition(
457
const half_float pBlock_pixels_half[16][3],
458
int* pBest_parts, uint32_t num_best_parts)
459
{
460
assert(num_best_parts <= basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
461
462
vec3F training_vecs[16], mean(0.0f);
463
464
for (uint32_t i = 0; i < 16; i++)
465
{
466
vec3F& v = training_vecs[i];
467
468
v[0] = (float)pBlock_pixels_half[i][0];
469
v[1] = (float)pBlock_pixels_half[i][1];
470
v[2] = (float)pBlock_pixels_half[i][2];
471
472
mean += v;
473
}
474
mean *= (1.0f / 16.0f);
475
476
vec3F cluster_centroids[2] = { mean - vec3F(.1f), mean + vec3F(.1f) };
477
478
uint32_t cluster_pixels[2][16];
479
uint32_t num_cluster_pixels[2];
480
vec3F new_cluster_means[2];
481
482
for (uint32_t s = 0; s < 4; s++)
483
{
484
num_cluster_pixels[0] = 0;
485
num_cluster_pixels[1] = 0;
486
487
new_cluster_means[0].clear();
488
new_cluster_means[1].clear();
489
490
for (uint32_t i = 0; i < 16; i++)
491
{
492
float d0 = training_vecs[i].squared_distance(cluster_centroids[0]);
493
float d1 = training_vecs[i].squared_distance(cluster_centroids[1]);
494
495
if (d0 < d1)
496
{
497
cluster_pixels[0][num_cluster_pixels[0]] = i;
498
new_cluster_means[0] += training_vecs[i];
499
num_cluster_pixels[0]++;
500
}
501
else
502
{
503
cluster_pixels[1][num_cluster_pixels[1]] = i;
504
new_cluster_means[1] += training_vecs[i];
505
num_cluster_pixels[1]++;
506
}
507
}
508
509
if (!num_cluster_pixels[0] || !num_cluster_pixels[1])
510
return false;
511
512
cluster_centroids[0] = new_cluster_means[0] / (float)num_cluster_pixels[0];
513
cluster_centroids[1] = new_cluster_means[1] / (float)num_cluster_pixels[1];
514
}
515
516
int desired_parts[4][4]; // [y][x]
517
for (uint32_t p = 0; p < 2; p++)
518
{
519
for (uint32_t i = 0; i < num_cluster_pixels[p]; i++)
520
{
521
const uint32_t pix_index = cluster_pixels[p][i];
522
523
desired_parts[pix_index >> 2][pix_index & 3] = p;
524
}
525
}
526
527
uint32_t part_similarity[basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2];
528
529
for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; part_index++)
530
{
531
const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
532
533
int total_sim_non_inv = 0;
534
int total_sim_inv = 0;
535
536
for (uint32_t y = 0; y < 4; y++)
537
{
538
for (uint32_t x = 0; x < 4; x++)
539
{
540
int part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4];
541
542
if (part == desired_parts[y][x])
543
total_sim_non_inv++;
544
545
if ((part ^ 1) == desired_parts[y][x])
546
total_sim_inv++;
547
}
548
}
549
550
int total_sim = maximum(total_sim_non_inv, total_sim_inv);
551
552
part_similarity[part_index] = (total_sim << 8) | part_index;
553
554
} // part_index;
555
556
std::sort(part_similarity, part_similarity + basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
557
558
for (uint32_t i = 0; i < num_best_parts; i++)
559
pBest_parts[i] = part_similarity[(basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2 - 1) - i] & 0xFF;
560
561
return true;
562
}
563
564
//--------------------------------------------------------------------------------------------------------------------------
565
566
static void pack_mode7_2part(
567
const half_float pBlock_pixels_half[16][3], const vec4F pBlock_pixels_q16[16],
568
basisu::vector<astc_hdr_4x4_pack_results>& all_results, const uastc_hdr_4x4_codec_options& coptions,
569
int num_estimated_partitions, const int *pEstimated_partitions,
570
uint32_t first_weight_ise_range, uint32_t last_weight_ise_range)
571
{
572
assert(coptions.m_mode7_part2_part_masks);
573
574
astc_helpers::log_astc_block trial_blk;
575
clear_obj(trial_blk);
576
trial_blk.m_grid_width = 4;
577
trial_blk.m_grid_height = 4;
578
579
trial_blk.m_num_partitions = 2;
580
trial_blk.m_color_endpoint_modes[0] = 7;
581
trial_blk.m_color_endpoint_modes[1] = 7;
582
583
uint32_t first_part_index = 0, last_part_index = basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2;
584
585
if (num_estimated_partitions)
586
{
587
first_part_index = 0;
588
last_part_index = num_estimated_partitions;
589
}
590
591
for (uint32_t part_index_iter = first_part_index; part_index_iter < last_part_index; ++part_index_iter)
592
{
593
uint32_t part_index;
594
if (num_estimated_partitions)
595
{
596
part_index = pEstimated_partitions[part_index_iter];
597
assert(part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
598
}
599
else
600
{
601
part_index = part_index_iter;
602
if (((1U << part_index) & coptions.m_mode7_part2_part_masks) == 0)
603
continue;
604
}
605
606
const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
607
const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
608
const bool invert_flag = basist::g_astc_bc7_common_partitions2[part_index].m_invert;
609
610
half_float part_pixels_half[2][16][3];
611
vec4F part_pixels_q16[2][16];
612
613
uint32_t pixel_part_index[4][4]; // [y][x]
614
uint32_t num_part_pixels[2] = { 0, 0 };
615
616
// Extract each subset's texels for this partition pattern
617
for (uint32_t y = 0; y < 4; y++)
618
{
619
for (uint32_t x = 0; x < 4; x++)
620
{
621
uint32_t part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4];
622
if (invert_flag)
623
part = 1 - part;
624
625
pixel_part_index[y][x] = part;
626
627
const uint32_t n = num_part_pixels[part];
628
629
part_pixels_half[part][n][0] = pBlock_pixels_half[x + y * 4][0];
630
part_pixels_half[part][n][1] = pBlock_pixels_half[x + y * 4][1];
631
part_pixels_half[part][n][2] = pBlock_pixels_half[x + y * 4][2];
632
part_pixels_q16[part][n] = pBlock_pixels_q16[x + y * 4];
633
634
num_part_pixels[part] = n + 1;
635
}
636
}
637
638
trial_blk.m_partition_id = (uint16_t)astc_pattern;
639
640
for (uint32_t weight_ise_range = first_weight_ise_range; weight_ise_range <= last_weight_ise_range; weight_ise_range++)
641
{
642
assert(weight_ise_range <= astc_helpers::BISE_8_LEVELS);
643
644
uint32_t ise_endpoint_range = astc_helpers::BISE_256_LEVELS;
645
if (weight_ise_range == astc_helpers::BISE_5_LEVELS)
646
ise_endpoint_range = astc_helpers::BISE_192_LEVELS;
647
else if (weight_ise_range == astc_helpers::BISE_6_LEVELS)
648
ise_endpoint_range = astc_helpers::BISE_128_LEVELS;
649
else if (weight_ise_range == astc_helpers::BISE_8_LEVELS)
650
ise_endpoint_range = astc_helpers::BISE_80_LEVELS;
651
652
uint8_t trial_endpoints[2][NUM_MODE7_ENDPOINTS], trial_weights[2][16];
653
uint32_t trial_submode7[2];
654
655
clear_obj(trial_endpoints);
656
clear_obj(trial_weights);
657
clear_obj(trial_submode7);
658
659
double total_trial_err = 0;
660
for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++)
661
{
662
total_trial_err += encode_astc_hdr_block_mode_7(
663
num_part_pixels[pack_part_index], part_pixels_half[pack_part_index], part_pixels_q16[pack_part_index],
664
weight_ise_range, trial_submode7[pack_part_index], BIG_FLOAT_VAL,
665
&trial_endpoints[pack_part_index][0], &trial_weights[pack_part_index][0], coptions, ise_endpoint_range);
666
667
} // pack_part_index
668
669
if (total_trial_err < BIG_FLOAT_VAL)
670
{
671
trial_blk.m_weight_ise_range = (uint8_t)weight_ise_range;
672
trial_blk.m_endpoint_ise_range = (uint8_t)ise_endpoint_range;
673
674
for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++)
675
memcpy(&trial_blk.m_endpoints[pack_part_index * NUM_MODE7_ENDPOINTS], &trial_endpoints[pack_part_index][0], NUM_MODE7_ENDPOINTS);
676
677
uint32_t src_pixel_index[2] = { 0, 0 };
678
for (uint32_t y = 0; y < 4; y++)
679
{
680
for (uint32_t x = 0; x < 4; x++)
681
{
682
uint32_t p = pixel_part_index[y][x];
683
trial_blk.m_weights[x + y * 4] = trial_weights[p][src_pixel_index[p]++];
684
}
685
}
686
687
astc_hdr_4x4_pack_results results;
688
results.clear();
689
690
results.m_best_block_error = total_trial_err;
691
results.m_best_submodes[0] = trial_submode7[0];
692
results.m_best_submodes[1] = trial_submode7[1];
693
results.m_best_pat_index = part_index;
694
695
results.m_best_blk = trial_blk;
696
697
bool status = transcode_bc6h_2subsets(part_index, results.m_best_blk, results.m_bc6h_block);
698
assert(status);
699
BASISU_NOTE_UNUSED(status);
700
701
all_results.push_back(results);
702
}
703
704
} // weight_ise_range
705
706
} // part_index
707
}
708
709
//--------------------------------------------------------------------------------------------------------------------------
710
711
static void pack_mode11_2part(
712
const half_float pBlock_pixels_half[16][3], const vec4F pBlock_pixels_q16[16],
713
basisu::vector<astc_hdr_4x4_pack_results>& all_results, const uastc_hdr_4x4_codec_options& coptions,
714
int num_estimated_partitions, const int* pEstimated_partitions)
715
{
716
assert(coptions.m_mode11_part2_part_masks);
717
718
astc_helpers::log_astc_block trial_blk;
719
clear_obj(trial_blk);
720
trial_blk.m_grid_width = 4;
721
trial_blk.m_grid_height = 4;
722
723
trial_blk.m_num_partitions = 2;
724
trial_blk.m_color_endpoint_modes[0] = 11;
725
trial_blk.m_color_endpoint_modes[1] = 11;
726
727
uint32_t first_part_index = 0, last_part_index = basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2;
728
729
if (num_estimated_partitions)
730
{
731
first_part_index = 0;
732
last_part_index = num_estimated_partitions;
733
}
734
735
for (uint32_t part_index_iter = first_part_index; part_index_iter < last_part_index; ++part_index_iter)
736
{
737
uint32_t part_index;
738
if (num_estimated_partitions)
739
{
740
part_index = pEstimated_partitions[part_index_iter];
741
assert(part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
742
}
743
else
744
{
745
part_index = part_index_iter;
746
if (((1U << part_index) & coptions.m_mode11_part2_part_masks) == 0)
747
continue;
748
}
749
750
const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
751
const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
752
const bool invert_flag = basist::g_astc_bc7_common_partitions2[part_index].m_invert;
753
754
half_float part_pixels_half[2][16][3];
755
vec4F part_pixels_q16[2][16];
756
757
uint32_t pixel_part_index[4][4]; // [y][x]
758
uint32_t num_part_pixels[2] = { 0, 0 };
759
760
// Extract each subset's texels for this partition pattern
761
for (uint32_t y = 0; y < 4; y++)
762
{
763
for (uint32_t x = 0; x < 4; x++)
764
{
765
uint32_t part = basist::g_bc7_partition2[16 * bc7_pattern + x + y * 4];
766
if (invert_flag)
767
part = 1 - part;
768
769
pixel_part_index[y][x] = part;
770
771
const uint32_t n = num_part_pixels[part];
772
773
part_pixels_half[part][n][0] = pBlock_pixels_half[x + y * 4][0];
774
part_pixels_half[part][n][1] = pBlock_pixels_half[x + y * 4][1];
775
part_pixels_half[part][n][2] = pBlock_pixels_half[x + y * 4][2];
776
part_pixels_q16[part][n] = pBlock_pixels_q16[x + y * 4];
777
778
num_part_pixels[part] = n + 1;
779
}
780
}
781
782
trial_blk.m_partition_id = (uint16_t)astc_pattern;
783
784
for (uint32_t weight_ise_range = coptions.m_first_mode11_part2_weight_ise_range; weight_ise_range <= coptions.m_last_mode11_part2_weight_ise_range; weight_ise_range++)
785
{
786
bool direct_only = false;
787
uint32_t ise_endpoint_range = astc_helpers::BISE_64_LEVELS;
788
if (weight_ise_range == astc_helpers::BISE_4_LEVELS)
789
ise_endpoint_range = astc_helpers::BISE_40_LEVELS;
790
791
uint8_t trial_endpoints[2][NUM_MODE11_ENDPOINTS], trial_weights[2][16];
792
uint32_t trial_submode11[2];
793
794
clear_obj(trial_endpoints);
795
clear_obj(trial_weights);
796
clear_obj(trial_submode11);
797
798
double total_trial_err = 0;
799
for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++)
800
{
801
total_trial_err += encode_astc_hdr_block_mode_11(
802
num_part_pixels[pack_part_index], part_pixels_half[pack_part_index], part_pixels_q16[pack_part_index],
803
weight_ise_range, trial_submode11[pack_part_index], BIG_FLOAT_VAL,
804
&trial_endpoints[pack_part_index][0], &trial_weights[pack_part_index][0], coptions,
805
direct_only, ise_endpoint_range, coptions.m_mode11_uber_mode && (weight_ise_range >= astc_helpers::BISE_4_LEVELS) && coptions.m_allow_uber_mode, false,
806
coptions.m_first_mode11_submode, coptions.m_last_mode11_submode, false, cOrdinaryLeastSquares);
807
808
} // pack_part_index
809
810
if (total_trial_err < BIG_FLOAT_VAL)
811
{
812
trial_blk.m_weight_ise_range = (uint8_t)weight_ise_range;
813
trial_blk.m_endpoint_ise_range = (uint8_t)ise_endpoint_range;
814
815
for (uint32_t pack_part_index = 0; pack_part_index < 2; pack_part_index++)
816
memcpy(&trial_blk.m_endpoints[pack_part_index * NUM_MODE11_ENDPOINTS], &trial_endpoints[pack_part_index][0], NUM_MODE11_ENDPOINTS);
817
818
uint32_t src_pixel_index[2] = { 0, 0 };
819
for (uint32_t y = 0; y < 4; y++)
820
{
821
for (uint32_t x = 0; x < 4; x++)
822
{
823
uint32_t p = pixel_part_index[y][x];
824
trial_blk.m_weights[x + y * 4] = trial_weights[p][src_pixel_index[p]++];
825
}
826
}
827
828
astc_hdr_4x4_pack_results results;
829
results.clear();
830
831
results.m_best_block_error = total_trial_err;
832
results.m_best_submodes[0] = trial_submode11[0];
833
results.m_best_submodes[1] = trial_submode11[1];
834
results.m_best_pat_index = part_index;
835
836
results.m_best_blk = trial_blk;
837
838
bool status = transcode_bc6h_2subsets(part_index, results.m_best_blk, results.m_bc6h_block);
839
assert(status);
840
BASISU_NOTE_UNUSED(status);
841
842
all_results.push_back(results);
843
}
844
845
} // weight_ise_range
846
847
} // part_index
848
}
849
850
bool astc_hdr_4x4_enc_block(
851
const float* pRGBPixels, const basist::half_float *pRGBPixelsHalf,
852
const uastc_hdr_4x4_codec_options& coptions,
853
basisu::vector<astc_hdr_4x4_pack_results>& all_results)
854
{
855
assert(g_astc_hdr_enc_initialized);
856
if (!g_astc_hdr_enc_initialized)
857
{
858
// astc_hdr_enc_init() MUST be called first.
859
assert(0);
860
return false;
861
}
862
863
assert(coptions.m_use_solid || coptions.m_use_mode11_part1 || coptions.m_use_mode7_part2 || coptions.m_use_mode7_part1 || coptions.m_use_mode11_part2);
864
865
all_results.resize(0);
866
867
const half_float (*pBlock_pixels_half)[16][3] = reinterpret_cast<const half_float(*)[16][3]>(pRGBPixelsHalf);
868
869
vec4F block_linear_colors[16];
870
vec4F block_pixels_q16[16];
871
872
bool is_greyscale = true;
873
874
for (uint32_t i = 0; i < 16; i++)
875
{
876
const float fr = pRGBPixels[i * 3 + 0], fg = pRGBPixels[i * 3 + 1], fb = pRGBPixels[i * 3 + 2];
877
878
// Sanity check the input block.
879
assert((fr >= 0) && (fr <= MAX_HALF_FLOAT) && (!std::isinf(fr)) && (!std::isnan(fr)));
880
assert((fg >= 0) && (fg <= MAX_HALF_FLOAT) && (!std::isinf(fg)) && (!std::isnan(fg)));
881
assert((fb >= 0) && (fb <= MAX_HALF_FLOAT) && (!std::isinf(fb)) && (!std::isnan(fb)));
882
883
block_linear_colors[i].set(fr, fg, fb, 1.0f);
884
885
const half_float hr = (*pBlock_pixels_half)[i][0];
886
assert(hr == basist::float_to_half(fr));
887
block_pixels_q16[i][0] = (float)half_to_qlog16(hr);
888
889
const half_float hg = (*pBlock_pixels_half)[i][1];
890
assert(hg == basist::float_to_half(fg));
891
block_pixels_q16[i][1] = (float)half_to_qlog16(hg);
892
893
const half_float hb = (*pBlock_pixels_half)[i][2];
894
assert(hb == basist::float_to_half(fb));
895
block_pixels_q16[i][2] = (float)half_to_qlog16(hb);
896
897
block_pixels_q16[i][3] = 0.0f;
898
899
if ((hr != hg) || (hr != hb))
900
is_greyscale = false;
901
} // i
902
903
bool is_solid = false;
904
if (coptions.m_use_solid)
905
is_solid = pack_solid(block_linear_colors, all_results, coptions);
906
907
if (!is_solid)
908
{
909
if ((is_greyscale) && (coptions.m_level == 0))
910
{
911
// Special case if it's a pure grayscale block - just try mode 7.
912
pack_mode7_single_part(*pBlock_pixels_half, block_pixels_q16, all_results, coptions, 1, 1);
913
pack_mode7_single_part(*pBlock_pixels_half, block_pixels_q16, all_results, coptions, UHDR_MODE7_PART1_LAST_ISE_RANGE, UHDR_MODE7_PART1_LAST_ISE_RANGE);
914
}
915
else
916
{
917
if (coptions.m_use_mode11_part1)
918
{
919
const size_t cur_num_results = all_results.size();
920
921
pack_mode11(block_linear_colors, *pBlock_pixels_half, block_pixels_q16, all_results, coptions, coptions.m_first_mode11_weight_ise_range, coptions.m_last_mode11_weight_ise_range, false);
922
923
if (coptions.m_last_mode11_weight_ise_range >= astc_helpers::BISE_12_LEVELS)
924
{
925
// Try constrained weights if we're allowed to use 12/16 level ISE weight modes
926
pack_mode11(block_linear_colors, *pBlock_pixels_half, block_pixels_q16, all_results, coptions, maximum<uint32_t>(coptions.m_first_mode11_weight_ise_range, astc_helpers::BISE_12_LEVELS), coptions.m_last_mode11_weight_ise_range, true);
927
}
928
929
// If we couldn't get any mode 11 results at all, and we were restricted to just trying weight ISE range 8 (which required endpoint quantization) then
930
// fall back to weight ISE range 7 (which doesn't need any endpoint quantization).
931
// This is to guarantee we always get at least 1 non-solid result.
932
if (all_results.size() == cur_num_results)
933
{
934
if (coptions.m_first_mode11_weight_ise_range == astc_helpers::BISE_16_LEVELS)
935
{
936
pack_mode11(block_linear_colors, *pBlock_pixels_half, block_pixels_q16, all_results, coptions, astc_helpers::BISE_12_LEVELS, astc_helpers::BISE_12_LEVELS, false);
937
}
938
}
939
}
940
941
if (coptions.m_use_mode7_part1)
942
{
943
// Mode 7 1-subset never requires endpoint quantization, so it cannot fail to find at least one usable solution.
944
pack_mode7_single_part(*pBlock_pixels_half, block_pixels_q16, all_results, coptions, coptions.m_first_mode7_part1_weight_ise_range, coptions.m_last_mode7_part1_weight_ise_range);
945
}
946
else if (is_greyscale)
947
{
948
// Special case if it's a pure grayscale block and mode 7 was disabled - try it anyway, because mode 11 has worse B channel quantization.
949
pack_mode7_single_part(*pBlock_pixels_half, block_pixels_q16, all_results, coptions, 1, 1);
950
pack_mode7_single_part(*pBlock_pixels_half, block_pixels_q16, all_results, coptions, UHDR_MODE7_PART1_LAST_ISE_RANGE, UHDR_MODE7_PART1_LAST_ISE_RANGE);
951
}
952
}
953
954
bool have_est = false;
955
int best_parts[basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2];
956
957
if ((coptions.m_use_mode7_part2) || (coptions.m_use_mode11_part2))
958
{
959
if (coptions.m_use_estimated_partitions)
960
have_est = estimate_partition(*pBlock_pixels_half, best_parts, coptions.m_max_estimated_partitions);
961
}
962
963
if (coptions.m_use_mode7_part2)
964
{
965
const size_t cur_num_results = all_results.size();
966
967
pack_mode7_2part(*pBlock_pixels_half, block_pixels_q16,
968
all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts,
969
coptions.m_first_mode7_part2_weight_ise_range, coptions.m_last_mode7_part2_weight_ise_range);
970
971
// If we couldn't find any packable 2-subset mode 7 results at weight levels >= 5 levels (which always requires endpoint quant), then try falling back to
972
// 5 levels which doesn't require endpoint quantization.
973
if (all_results.size() == cur_num_results)
974
{
975
if (coptions.m_first_mode7_part2_weight_ise_range >= astc_helpers::BISE_5_LEVELS)
976
{
977
pack_mode7_2part(*pBlock_pixels_half, block_pixels_q16,
978
all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts,
979
astc_helpers::BISE_4_LEVELS, astc_helpers::BISE_4_LEVELS);
980
}
981
}
982
}
983
984
if (coptions.m_use_mode11_part2)
985
{
986
// This always requires endpoint quant, so it could fail to find any usable solutions.
987
pack_mode11_2part(*pBlock_pixels_half, block_pixels_q16, all_results, coptions, have_est ? coptions.m_max_estimated_partitions : 0, best_parts);
988
}
989
990
if (coptions.m_refine_weights)
991
{
992
// TODO: This is quite slow.
993
for (uint32_t i = 0; i < all_results.size(); i++)
994
{
995
bool status = astc_hdr_4x4_refine_weights(pRGBPixelsHalf, all_results[i], coptions, coptions.m_bc6h_err_weight, &all_results[i].m_improved_via_refinement_flag);
996
assert(status);
997
BASISU_NOTE_UNUSED(status);
998
}
999
}
1000
1001
} // !is_solid
1002
1003
return true;
1004
}
1005
1006
bool astc_hdr_4x4_pack_results_to_block(astc_blk& dst_blk, const astc_hdr_4x4_pack_results& results)
1007
{
1008
assert(g_astc_hdr_enc_initialized);
1009
if (!g_astc_hdr_enc_initialized)
1010
return false;
1011
1012
if (results.m_is_solid)
1013
{
1014
memcpy(&dst_blk, &results.m_solid_blk, sizeof(results.m_solid_blk));
1015
}
1016
else
1017
{
1018
bool status = astc_helpers::pack_astc_block((astc_helpers::astc_block&)dst_blk, results.m_best_blk);
1019
if (!status)
1020
{
1021
assert(0);
1022
return false;
1023
}
1024
}
1025
1026
return true;
1027
}
1028
1029
// Refines a block's chosen weight indices, balancing BC6H and ASTC HDR error.
1030
bool astc_hdr_4x4_refine_weights(const half_float *pSource_block,
1031
astc_hdr_4x4_pack_results& cur_results, const uastc_hdr_4x4_codec_options& coptions, float bc6h_weight, bool *pImproved_flag)
1032
{
1033
if (pImproved_flag)
1034
*pImproved_flag = false;
1035
1036
if (cur_results.m_is_solid)
1037
return true;
1038
1039
const uint32_t total_weights = astc_helpers::get_ise_levels(cur_results.m_best_blk.m_weight_ise_range);
1040
assert((total_weights >= MIN_SUPPORTED_WEIGHT_LEVELS) && (total_weights <= MAX_SUPPORTED_WEIGHT_LEVELS));
1041
1042
double best_err[4][4];
1043
uint8_t best_weight[4][4];
1044
for (uint32_t y = 0; y < 4; y++)
1045
{
1046
for (uint32_t x = 0; x < 4; x++)
1047
{
1048
best_err[y][x] = BIG_FLOAT_VAL;
1049
best_weight[y][x] = 0;
1050
}
1051
}
1052
1053
astc_hdr_4x4_pack_results temp_results;
1054
1055
const float c_weights[3] = { coptions.m_r_err_scale, coptions.m_g_err_scale, 1.0f };
1056
1057
for (uint32_t weight_index = 0; weight_index < total_weights; weight_index++)
1058
{
1059
temp_results = cur_results;
1060
for (uint32_t i = 0; i < 16; i++)
1061
temp_results.m_best_blk.m_weights[i] = (uint8_t)weight_index;
1062
1063
half_float unpacked_astc_blk_rgba[4][4][4];
1064
bool res = astc_helpers::decode_block(temp_results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16);
1065
assert(res);
1066
1067
basist::bc6h_block trial_bc6h_blk;
1068
res = basist::astc_hdr_transcode_to_bc6h(temp_results.m_best_blk, trial_bc6h_blk);
1069
assert(res);
1070
1071
half_float unpacked_bc6h_blk[4][4][3];
1072
res = unpack_bc6h(&trial_bc6h_blk, unpacked_bc6h_blk, false);
1073
assert(res);
1074
BASISU_NOTE_UNUSED(res);
1075
1076
for (uint32_t y = 0; y < 4; y++)
1077
{
1078
for (uint32_t x = 0; x < 4; x++)
1079
{
1080
double total_err = 0.0f;
1081
1082
for (uint32_t c = 0; c < 3; c++)
1083
{
1084
const half_float orig_c = pSource_block[(x + y * 4) * 3 + c];
1085
const double orig_c_q = q(orig_c, Q_LOG_BIAS_4x4);
1086
1087
const half_float astc_c = unpacked_astc_blk_rgba[y][x][c];
1088
const double astc_c_q = q(astc_c, Q_LOG_BIAS_4x4);
1089
const double astc_e = square(astc_c_q - orig_c_q) * c_weights[c];
1090
1091
const half_float bc6h_c = unpacked_bc6h_blk[y][x][c];
1092
const double bc6h_c_q = q(bc6h_c, Q_LOG_BIAS_4x4);
1093
const double bc6h_e = square(bc6h_c_q - orig_c_q) * c_weights[c];
1094
1095
const double overall_err = astc_e * (1.0f - bc6h_weight) + bc6h_e * bc6h_weight;
1096
1097
total_err += overall_err;
1098
1099
} // c
1100
1101
if (total_err < best_err[y][x])
1102
{
1103
best_err[y][x] = total_err;
1104
best_weight[y][x] = (uint8_t)weight_index;
1105
}
1106
1107
} // x
1108
} // y
1109
1110
} // weight_index
1111
1112
bool any_changed = false;
1113
for (uint32_t i = 0; i < 16; i++)
1114
{
1115
if (cur_results.m_best_blk.m_weights[i] != best_weight[i >> 2][i & 3])
1116
{
1117
any_changed = true;
1118
break;
1119
}
1120
}
1121
1122
if (any_changed)
1123
{
1124
memcpy(cur_results.m_best_blk.m_weights, best_weight, 16);
1125
1126
{
1127
bool res = basist::astc_hdr_transcode_to_bc6h(cur_results.m_best_blk, cur_results.m_bc6h_block);
1128
assert(res);
1129
BASISU_NOTE_UNUSED(res);
1130
1131
half_float unpacked_astc_blk_rgba[4][4][4];
1132
res = astc_helpers::decode_block(cur_results.m_best_blk, unpacked_astc_blk_rgba, 4, 4, astc_helpers::cDecodeModeHDR16);
1133
assert(res);
1134
1135
half_float unpacked_astc_blk_rgb[4][4][3];
1136
for (uint32_t y = 0; y < 4; y++)
1137
for (uint32_t x = 0; x < 4; x++)
1138
for (uint32_t c = 0; c < 3; c++)
1139
unpacked_astc_blk_rgb[y][x][c] = unpacked_astc_blk_rgba[y][x][c];
1140
1141
cur_results.m_best_block_error = compute_block_error(16, pSource_block, &unpacked_astc_blk_rgb[0][0][0], coptions);
1142
}
1143
1144
if (pImproved_flag)
1145
*pImproved_flag = true;
1146
}
1147
1148
return true;
1149
}
1150
1151
void astc_hdr_4x4_block_stats::update(const astc_hdr_4x4_pack_results& log_blk)
1152
{
1153
std::lock_guard<std::mutex> lck(m_mutex);
1154
1155
m_total_blocks++;
1156
1157
if (log_blk.m_improved_via_refinement_flag)
1158
m_total_refined++;
1159
1160
if (log_blk.m_is_solid)
1161
{
1162
m_total_solid++;
1163
}
1164
else
1165
{
1166
int best_weight_range = log_blk.m_best_blk.m_weight_ise_range;
1167
1168
if (log_blk.m_best_blk.m_color_endpoint_modes[0] == 7)
1169
{
1170
m_mode7_submode_hist[bounds_check(log_blk.m_best_submodes[0], 0U, 6U)]++;
1171
1172
if (log_blk.m_best_blk.m_num_partitions == 2)
1173
{
1174
m_total_mode7_2part++;
1175
1176
m_mode7_submode_hist[bounds_check(log_blk.m_best_submodes[1], 0U, 6U)]++;
1177
m_total_2part++;
1178
1179
m_weight_range_hist_7_2part[bounds_check(best_weight_range, 0, 11)]++;
1180
1181
m_part_hist[bounds_check(log_blk.m_best_pat_index, 0U, 32U)]++;
1182
}
1183
else
1184
{
1185
m_total_mode7_1part++;
1186
1187
m_weight_range_hist_7[bounds_check(best_weight_range, 0, 11)]++;
1188
}
1189
}
1190
else
1191
{
1192
m_mode11_submode_hist[bounds_check(log_blk.m_best_submodes[0], 0U, 9U)]++;
1193
if (log_blk.m_constrained_weights)
1194
m_total_mode11_1part_constrained_weights++;
1195
1196
if (log_blk.m_best_blk.m_num_partitions == 2)
1197
{
1198
m_total_mode11_2part++;
1199
1200
m_mode11_submode_hist[bounds_check(log_blk.m_best_submodes[1], 0U, 9U)]++;
1201
m_total_2part++;
1202
1203
m_weight_range_hist_11_2part[bounds_check(best_weight_range, 0, 11)]++;
1204
1205
m_part_hist[bounds_check(log_blk.m_best_pat_index, 0U, 32U)]++;
1206
}
1207
else
1208
{
1209
m_total_mode11_1part++;
1210
1211
m_weight_range_hist_11[bounds_check(best_weight_range, 0, 11)]++;
1212
}
1213
}
1214
}
1215
}
1216
1217
void astc_hdr_4x4_block_stats::print()
1218
{
1219
std::lock_guard<std::mutex> lck(m_mutex);
1220
1221
assert(m_total_blocks);
1222
if (!m_total_blocks)
1223
return;
1224
1225
printf("\nLow-level ASTC Encoder Statistics:\n");
1226
printf("Total blocks: %u\n", m_total_blocks);
1227
printf("Total solid: %u %3.2f%%\n", m_total_solid, (m_total_solid * 100.0f) / m_total_blocks);
1228
printf("Total refined: %u %3.2f%%\n", m_total_refined, (m_total_refined * 100.0f) / m_total_blocks);
1229
1230
printf("Total mode 11, 1 partition: %u %3.2f%%\n", m_total_mode11_1part, (m_total_mode11_1part * 100.0f) / m_total_blocks);
1231
printf("Total mode 11, 1 partition, constrained weights: %u %3.2f%%\n", m_total_mode11_1part_constrained_weights, (m_total_mode11_1part_constrained_weights * 100.0f) / m_total_blocks);
1232
printf("Total mode 11, 2 partition: %u %3.2f%%\n", m_total_mode11_2part, (m_total_mode11_2part * 100.0f) / m_total_blocks);
1233
1234
printf("Total mode 7, 1 partition: %u %3.2f%%\n", m_total_mode7_1part, (m_total_mode7_1part * 100.0f) / m_total_blocks);
1235
printf("Total mode 7, 2 partition: %u %3.2f%%\n", m_total_mode7_2part, (m_total_mode7_2part * 100.0f) / m_total_blocks);
1236
1237
printf("Total 2 partitions: %u %3.2f%%\n", m_total_2part, (m_total_2part * 100.0f) / m_total_blocks);
1238
printf("\n");
1239
1240
printf("ISE texel weight range histogram mode 11:\n");
1241
for (uint32_t i = 1; i <= UHDR_MODE11_LAST_ISE_RANGE; i++)
1242
printf("%u %u\n", i, m_weight_range_hist_11[i]);
1243
printf("\n");
1244
1245
printf("ISE texel weight range histogram mode 11, 2 partition:\n");
1246
for (uint32_t i = 1; i <= UHDR_MODE11_PART2_LAST_ISE_RANGE; i++)
1247
printf("%u %u\n", i, m_weight_range_hist_11_2part[i]);
1248
printf("\n");
1249
1250
printf("ISE texel weight range histogram mode 7:\n");
1251
for (uint32_t i = 1; i <= UHDR_MODE7_PART1_LAST_ISE_RANGE; i++)
1252
printf("%u %u\n", i, m_weight_range_hist_7[i]);
1253
printf("\n");
1254
1255
printf("ISE texel weight range histogram mode 7, 2 partition:\n");
1256
for (uint32_t i = 1; i <= UHDR_MODE7_PART2_LAST_ISE_RANGE; i++)
1257
printf("%u %u\n", i, m_weight_range_hist_7_2part[i]);
1258
printf("\n");
1259
1260
printf("Mode 11 submode histogram:\n");
1261
for (uint32_t i = 0; i <= MODE11_TOTAL_SUBMODES; i++) // +1 because of the extra direct encoding
1262
printf("%u %u\n", i, m_mode11_submode_hist[i]);
1263
printf("\n");
1264
1265
printf("Mode 7 submode histogram:\n");
1266
for (uint32_t i = 0; i < MODE7_TOTAL_SUBMODES; i++)
1267
printf("%u %u\n", i, m_mode7_submode_hist[i]);
1268
printf("\n");
1269
1270
printf("Partition pattern table usage histogram:\n");
1271
for (uint32_t i = 0; i < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2; i++)
1272
printf("%u:%u ", i, m_part_hist[i]);
1273
printf("\n\n");
1274
}
1275
1276
} // namespace basisu
1277
1278
1279