Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/encoder/basisu_frontend.cpp
9902 views
1
// basisu_frontend.cpp
2
// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
// http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
//
16
// TODO:
17
// This code originally supported full ETC1 and ETC1S, so there's some legacy stuff to be cleaned up in here.
18
// Add endpoint tiling support (where we force adjacent blocks to use the same endpoints during quantization), for a ~10% or more increase in bitrate at same SSIM. The backend already supports this.
19
//
20
#include "../transcoder/basisu.h"
21
#include "basisu_frontend.h"
22
#include "basisu_opencl.h"
23
#include <unordered_set>
24
#include <unordered_map>
25
26
#if BASISU_SUPPORT_SSE
27
#define CPPSPMD_NAME(a) a##_sse41
28
#include "basisu_kernels_declares.h"
29
#endif
30
31
#define BASISU_FRONTEND_VERIFY(c) do { if (!(c)) handle_verify_failure(__LINE__); } while(0)
32
33
namespace basisu
34
{
35
const uint32_t cMaxCodebookCreationThreads = 8;
36
37
const uint32_t BASISU_MAX_ENDPOINT_REFINEMENT_STEPS = 3;
38
//const uint32_t BASISU_MAX_SELECTOR_REFINEMENT_STEPS = 3;
39
40
const uint32_t BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE = 16;
41
const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 = 32;
42
const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT = 16;
43
44
// TODO - How to handle internal verifies in the basisu lib
45
static inline void handle_verify_failure(int line)
46
{
47
error_printf("basisu_frontend: verify check failed at line %i!\n", line);
48
abort();
49
}
50
51
bool basisu_frontend::init(const params &p)
52
{
53
debug_printf("basisu_frontend::init: Multithreaded: %u, Job pool total threads: %u, NumEndpointClusters: %u, NumSelectorClusters: %u, Perceptual: %u, CompressionLevel: %u\n",
54
p.m_multithreaded, p.m_pJob_pool ? p.m_pJob_pool->get_total_threads() : 0,
55
p.m_max_endpoint_clusters, p.m_max_selector_clusters, p.m_perceptual, p.m_compression_level);
56
57
if ((p.m_max_endpoint_clusters < 1) || (p.m_max_endpoint_clusters > cMaxEndpointClusters))
58
return false;
59
if ((p.m_max_selector_clusters < 1) || (p.m_max_selector_clusters > cMaxSelectorClusters))
60
return false;
61
62
m_source_blocks.resize(0);
63
append_vector(m_source_blocks, p.m_pSource_blocks, p.m_num_source_blocks);
64
65
m_params = p;
66
67
if (m_params.m_pOpenCL_context)
68
{
69
BASISU_ASSUME(sizeof(cl_pixel_block) == sizeof(pixel_block));
70
71
// Upload the RGBA pixel blocks a single time.
72
if (!opencl_set_pixel_blocks(m_params.m_pOpenCL_context, m_source_blocks.size(), (cl_pixel_block*)m_source_blocks.data()))
73
{
74
// This is not fatal, we just won't use OpenCL.
75
error_printf("basisu_frontend::init: opencl_set_pixel_blocks() failed\n");
76
m_params.m_pOpenCL_context = nullptr;
77
m_opencl_failed = true;
78
}
79
}
80
81
m_encoded_blocks.resize(m_params.m_num_source_blocks);
82
memset(&m_encoded_blocks[0], 0, m_encoded_blocks.size() * sizeof(m_encoded_blocks[0]));
83
84
m_num_endpoint_codebook_iterations = 1;
85
m_num_selector_codebook_iterations = 1;
86
87
switch (p.m_compression_level)
88
{
89
case 0:
90
{
91
m_endpoint_refinement = false;
92
m_use_hierarchical_endpoint_codebooks = true;
93
m_use_hierarchical_selector_codebooks = true;
94
break;
95
}
96
case 1:
97
{
98
m_endpoint_refinement = true;
99
m_use_hierarchical_endpoint_codebooks = true;
100
m_use_hierarchical_selector_codebooks = true;
101
102
break;
103
}
104
case 2:
105
{
106
m_endpoint_refinement = true;
107
m_use_hierarchical_endpoint_codebooks = true;
108
m_use_hierarchical_selector_codebooks = true;
109
110
break;
111
}
112
case 3:
113
{
114
m_endpoint_refinement = true;
115
m_use_hierarchical_endpoint_codebooks = false;
116
m_use_hierarchical_selector_codebooks = false;
117
break;
118
}
119
case 4:
120
{
121
m_endpoint_refinement = true;
122
m_use_hierarchical_endpoint_codebooks = true;
123
m_use_hierarchical_selector_codebooks = true;
124
m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
125
m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
126
break;
127
}
128
case 5:
129
{
130
m_endpoint_refinement = true;
131
m_use_hierarchical_endpoint_codebooks = false;
132
m_use_hierarchical_selector_codebooks = false;
133
m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
134
m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS;
135
break;
136
}
137
case 6:
138
default:
139
{
140
m_endpoint_refinement = true;
141
m_use_hierarchical_endpoint_codebooks = false;
142
m_use_hierarchical_selector_codebooks = false;
143
m_num_endpoint_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS*2;
144
m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS*2;
145
break;
146
}
147
148
}
149
150
if (m_params.m_disable_hierarchical_endpoint_codebooks)
151
m_use_hierarchical_endpoint_codebooks = false;
152
153
debug_printf("Endpoint refinement: %u, Hierarchical endpoint codebooks: %u, Hierarchical selector codebooks: %u, Endpoint codebook iters: %u, Selector codebook iters: %u\n",
154
m_endpoint_refinement, m_use_hierarchical_endpoint_codebooks, m_use_hierarchical_selector_codebooks, m_num_endpoint_codebook_iterations, m_num_selector_codebook_iterations);
155
156
return true;
157
}
158
159
bool basisu_frontend::compress()
160
{
161
debug_printf("basisu_frontend::compress\n");
162
163
m_total_blocks = m_params.m_num_source_blocks;
164
m_total_pixels = m_total_blocks * cPixelBlockTotalPixels;
165
166
// Encode the initial high quality ETC1S texture
167
168
init_etc1_images();
169
170
// First quantize the ETC1S endpoints
171
172
if (m_params.m_pGlobal_codebooks)
173
{
174
init_global_codebooks();
175
}
176
else
177
{
178
init_endpoint_training_vectors();
179
180
generate_endpoint_clusters();
181
182
for (uint32_t refine_endpoint_step = 0; refine_endpoint_step < m_num_endpoint_codebook_iterations; refine_endpoint_step++)
183
{
184
if (m_params.m_validate)
185
{
186
BASISU_FRONTEND_VERIFY(check_etc1s_constraints());
187
188
BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
189
}
190
191
if (refine_endpoint_step)
192
{
193
introduce_new_endpoint_clusters();
194
}
195
196
if (m_params.m_validate)
197
{
198
BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
199
}
200
201
generate_endpoint_codebook(refine_endpoint_step);
202
203
if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization))
204
{
205
char buf[256];
206
snprintf(buf, sizeof(buf), "endpoint_cluster_vis_pre_%u.png", refine_endpoint_step);
207
dump_endpoint_clusterization_visualization(buf, false);
208
}
209
210
bool early_out = false;
211
212
if (m_endpoint_refinement)
213
{
214
//dump_endpoint_clusterization_visualization("endpoint_clusters_before_refinement.png");
215
216
if (!refine_endpoint_clusterization())
217
early_out = true;
218
219
if ((m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) && (!refine_endpoint_step) && (m_num_endpoint_codebook_iterations == 1))
220
{
221
eliminate_redundant_or_empty_endpoint_clusters();
222
generate_endpoint_codebook(basisu::maximum(1U, refine_endpoint_step));
223
}
224
225
if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization))
226
{
227
char buf[256];
228
snprintf(buf, sizeof(buf), "endpoint_cluster_vis_post_%u.png", refine_endpoint_step);
229
230
dump_endpoint_clusterization_visualization(buf, false);
231
snprintf(buf, sizeof(buf), "endpoint_cluster_colors_vis_post_%u.png", refine_endpoint_step);
232
233
dump_endpoint_clusterization_visualization(buf, true);
234
}
235
}
236
237
if (m_params.m_validate)
238
{
239
BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
240
}
241
242
eliminate_redundant_or_empty_endpoint_clusters();
243
244
if (m_params.m_validate)
245
{
246
BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false));
247
}
248
249
if (m_params.m_debug_stats)
250
debug_printf("Total endpoint clusters: %u\n", (uint32_t)m_endpoint_clusters.size());
251
252
if (early_out)
253
break;
254
}
255
256
if (m_params.m_validate)
257
{
258
BASISU_FRONTEND_VERIFY(check_etc1s_constraints());
259
}
260
261
generate_block_endpoint_clusters();
262
263
create_initial_packed_texture();
264
265
// Now quantize the ETC1S selectors
266
267
generate_selector_clusters();
268
269
if (m_use_hierarchical_selector_codebooks)
270
compute_selector_clusters_within_each_parent_cluster();
271
272
if (m_params.m_compression_level == 0)
273
{
274
create_optimized_selector_codebook(0);
275
276
find_optimal_selector_clusters_for_each_block();
277
278
introduce_special_selector_clusters();
279
}
280
else
281
{
282
const uint32_t num_refine_selector_steps = m_num_selector_codebook_iterations;
283
for (uint32_t refine_selector_steps = 0; refine_selector_steps < num_refine_selector_steps; refine_selector_steps++)
284
{
285
create_optimized_selector_codebook(refine_selector_steps);
286
287
find_optimal_selector_clusters_for_each_block();
288
289
introduce_special_selector_clusters();
290
291
if ((m_params.m_compression_level >= 4) || (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames))
292
{
293
if (!refine_block_endpoints_given_selectors())
294
break;
295
}
296
}
297
}
298
299
optimize_selector_codebook();
300
301
if (m_params.m_debug_stats)
302
debug_printf("Total selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size());
303
}
304
305
finalize();
306
307
if (m_params.m_validate)
308
{
309
if (!validate_output())
310
return false;
311
}
312
313
debug_printf("basisu_frontend::compress: Done\n");
314
315
return true;
316
}
317
318
bool basisu_frontend::init_global_codebooks()
319
{
320
const basist::basisu_lowlevel_etc1s_transcoder* pTranscoder = m_params.m_pGlobal_codebooks;
321
322
const basist::basisu_lowlevel_etc1s_transcoder::endpoint_vec& endpoints = pTranscoder->get_endpoints();
323
const basist::basisu_lowlevel_etc1s_transcoder::selector_vec& selectors = pTranscoder->get_selectors();
324
325
m_endpoint_cluster_etc_params.resize(endpoints.size());
326
for (uint32_t i = 0; i < endpoints.size(); i++)
327
{
328
m_endpoint_cluster_etc_params[i].m_inten_table[0] = endpoints[i].m_inten5;
329
m_endpoint_cluster_etc_params[i].m_inten_table[1] = endpoints[i].m_inten5;
330
331
m_endpoint_cluster_etc_params[i].m_color_unscaled[0].set(endpoints[i].m_color5.r, endpoints[i].m_color5.g, endpoints[i].m_color5.b, 255);
332
m_endpoint_cluster_etc_params[i].m_color_used[0] = true;
333
m_endpoint_cluster_etc_params[i].m_valid = true;
334
}
335
336
m_optimized_cluster_selectors.resize(selectors.size());
337
for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++)
338
{
339
for (uint32_t y = 0; y < 4; y++)
340
for (uint32_t x = 0; x < 4; x++)
341
m_optimized_cluster_selectors[i].set_selector(x, y, selectors[i].get_selector(x, y));
342
}
343
344
m_block_endpoint_clusters_indices.resize(m_total_blocks);
345
346
m_orig_encoded_blocks.resize(m_total_blocks);
347
348
m_block_selector_cluster_index.resize(m_total_blocks);
349
350
#if 0
351
for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
352
{
353
const uint32_t first_index = block_index_iter;
354
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
355
356
m_params.m_pJob_pool->add_job([this, first_index, last_index] {
357
358
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
359
{
360
const etc_block& blk = m_etc1_blocks_etc1s[block_index];
361
362
const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
363
364
etc_block trial_blk;
365
trial_blk.set_block_color5_etc1s(blk.m_color_unscaled[0]);
366
trial_blk.set_flip_bit(true);
367
368
uint64_t best_err = UINT64_MAX;
369
uint32_t best_index = 0;
370
371
for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++)
372
{
373
trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits());
374
375
const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
376
if (cur_err < best_err)
377
{
378
best_err = cur_err;
379
best_index = i;
380
if (!cur_err)
381
break;
382
}
383
384
} // block_index
385
386
m_block_selector_cluster_index[block_index] = best_index;
387
}
388
389
});
390
391
}
392
393
m_params.m_pJob_pool->wait_for_all();
394
395
m_encoded_blocks.resize(m_total_blocks);
396
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
397
{
398
const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
399
const uint32_t selector_index = m_block_selector_cluster_index[block_index];
400
401
etc_block& blk = m_encoded_blocks[block_index];
402
403
blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]);
404
blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]);
405
blk.set_flip_bit(true);
406
blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits());
407
}
408
#endif
409
410
// HACK HACK
411
const uint32_t NUM_PASSES = 3;
412
for (uint32_t pass = 0; pass < NUM_PASSES; pass++)
413
{
414
debug_printf("init_global_codebooks: pass %u\n", pass);
415
416
const uint32_t N = 128;
417
for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
418
{
419
const uint32_t first_index = block_index_iter;
420
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
421
422
m_params.m_pJob_pool->add_job([this, first_index, last_index, pass] {
423
424
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
425
{
426
const etc_block& blk = pass ? m_encoded_blocks[block_index] : m_etc1_blocks_etc1s[block_index];
427
const uint32_t blk_raw_selector_bits = blk.get_raw_selector_bits();
428
429
etc_block trial_blk(blk);
430
trial_blk.set_raw_selector_bits(blk_raw_selector_bits);
431
trial_blk.set_flip_bit(true);
432
433
uint64_t best_err = UINT64_MAX;
434
uint32_t best_index = 0;
435
etc_block best_block(trial_blk);
436
437
for (uint32_t i = 0; i < m_endpoint_cluster_etc_params.size(); i++)
438
{
439
if (m_endpoint_cluster_etc_params[i].m_inten_table[0] > blk.get_inten_table(0))
440
continue;
441
442
trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[i].m_color_unscaled[0]);
443
trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[i].m_inten_table[0]);
444
445
const color_rgba* pSource_pixels = get_source_pixel_block(block_index).get_ptr();
446
uint64_t cur_err;
447
if (!pass)
448
cur_err = trial_blk.determine_selectors(pSource_pixels, m_params.m_perceptual);
449
else
450
cur_err = trial_blk.evaluate_etc1_error(pSource_pixels, m_params.m_perceptual);
451
452
if (cur_err < best_err)
453
{
454
best_err = cur_err;
455
best_index = i;
456
best_block = trial_blk;
457
458
if (!cur_err)
459
break;
460
}
461
}
462
463
m_block_endpoint_clusters_indices[block_index][0] = best_index;
464
m_block_endpoint_clusters_indices[block_index][1] = best_index;
465
466
m_orig_encoded_blocks[block_index] = best_block;
467
468
} // block_index
469
470
});
471
472
}
473
474
m_params.m_pJob_pool->wait_for_all();
475
476
m_endpoint_clusters.resize(0);
477
m_endpoint_clusters.resize(endpoints.size());
478
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
479
{
480
const uint32_t endpoint_cluster_index = m_block_endpoint_clusters_indices[block_index][0];
481
m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2);
482
m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2 + 1);
483
}
484
485
m_block_selector_cluster_index.resize(m_total_blocks);
486
487
for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
488
{
489
const uint32_t first_index = block_index_iter;
490
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
491
492
m_params.m_pJob_pool->add_job([this, first_index, last_index] {
493
494
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
495
{
496
const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
497
498
etc_block trial_blk;
499
trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_color_unscaled[0]);
500
trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_inten_table[0]);
501
trial_blk.set_flip_bit(true);
502
503
uint64_t best_err = UINT64_MAX;
504
uint32_t best_index = 0;
505
506
for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++)
507
{
508
trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits());
509
510
const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
511
if (cur_err < best_err)
512
{
513
best_err = cur_err;
514
best_index = i;
515
if (!cur_err)
516
break;
517
}
518
519
} // block_index
520
521
m_block_selector_cluster_index[block_index] = best_index;
522
}
523
524
});
525
526
}
527
528
m_params.m_pJob_pool->wait_for_all();
529
530
m_encoded_blocks.resize(m_total_blocks);
531
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
532
{
533
const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0];
534
const uint32_t selector_index = m_block_selector_cluster_index[block_index];
535
536
etc_block& blk = m_encoded_blocks[block_index];
537
538
blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]);
539
blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]);
540
blk.set_flip_bit(true);
541
blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits());
542
}
543
544
} // pass
545
546
m_selector_cluster_block_indices.resize(selectors.size());
547
for (uint32_t block_index = 0; block_index < m_etc1_blocks_etc1s.size(); block_index++)
548
m_selector_cluster_block_indices[m_block_selector_cluster_index[block_index]].push_back(block_index);
549
550
return true;
551
}
552
553
void basisu_frontend::introduce_special_selector_clusters()
554
{
555
debug_printf("introduce_special_selector_clusters\n");
556
557
uint32_t total_blocks_relocated = 0;
558
const uint32_t initial_selector_clusters = m_selector_cluster_block_indices.size_u32();
559
560
bool_vec block_relocated_flags(m_total_blocks);
561
562
// Make sure the selector codebook always has pure flat blocks for each possible selector, to avoid obvious artifacts.
563
// optimize_selector_codebook() will clean up any redundant clusters we create here.
564
for (uint32_t sel = 0; sel < 4; sel++)
565
{
566
etc_block blk;
567
clear_obj(blk);
568
for (uint32_t j = 0; j < 16; j++)
569
blk.set_selector(j & 3, j >> 2, sel);
570
571
int k;
572
for (k = 0; k < (int)m_optimized_cluster_selectors.size(); k++)
573
if (m_optimized_cluster_selectors[k].get_raw_selector_bits() == blk.get_raw_selector_bits())
574
break;
575
if (k < (int)m_optimized_cluster_selectors.size())
576
continue;
577
578
debug_printf("Introducing sel %u\n", sel);
579
580
const uint32_t new_selector_cluster_index = m_optimized_cluster_selectors.size_u32();
581
582
m_optimized_cluster_selectors.push_back(blk);
583
584
vector_ensure_element_is_valid(m_selector_cluster_block_indices, new_selector_cluster_index);
585
586
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
587
{
588
if (m_orig_encoded_blocks[block_index].get_raw_selector_bits() != blk.get_raw_selector_bits())
589
continue;
590
591
// See if using flat selectors actually decreases the block's error.
592
const uint32_t old_selector_cluster_index = m_block_selector_cluster_index[block_index];
593
594
etc_block cur_blk;
595
const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, 0);
596
cur_blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(endpoint_cluster_index, false));
597
cur_blk.set_inten_tables_etc1s(get_endpoint_cluster_inten_table(endpoint_cluster_index, false));
598
cur_blk.set_raw_selector_bits(get_selector_cluster_selector_bits(old_selector_cluster_index).get_raw_selector_bits());
599
cur_blk.set_flip_bit(true);
600
601
const uint64_t cur_err = cur_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
602
603
cur_blk.set_raw_selector_bits(blk.get_raw_selector_bits());
604
605
const uint64_t new_err = cur_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual);
606
607
if (new_err >= cur_err)
608
continue;
609
610
// Change the block to use the new cluster
611
m_block_selector_cluster_index[block_index] = new_selector_cluster_index;
612
613
m_selector_cluster_block_indices[new_selector_cluster_index].push_back(block_index);
614
615
block_relocated_flags[block_index] = true;
616
617
#if 0
618
int j = vector_find(m_selector_cluster_block_indices[old_selector_cluster_index], block_index);
619
if (j >= 0)
620
m_selector_cluster_block_indices[old_selector_cluster_index].erase(m_selector_cluster_block_indices[old_selector_cluster_index].begin() + j);
621
#endif
622
623
total_blocks_relocated++;
624
625
m_encoded_blocks[block_index].set_raw_selector_bits(blk.get_raw_selector_bits());
626
627
} // block_index
628
629
} // sel
630
631
if (total_blocks_relocated)
632
{
633
debug_printf("Fixing selector codebook\n");
634
635
for (int selector_cluster_index = 0; selector_cluster_index < (int)initial_selector_clusters; selector_cluster_index++)
636
{
637
uint_vec& block_indices = m_selector_cluster_block_indices[selector_cluster_index];
638
639
uint32_t dst_ofs = 0;
640
641
for (uint32_t i = 0; i < block_indices.size(); i++)
642
{
643
const uint32_t block_index = block_indices[i];
644
if (!block_relocated_flags[block_index])
645
block_indices[dst_ofs++] = block_index;
646
}
647
648
block_indices.resize(dst_ofs);
649
}
650
}
651
652
debug_printf("Total blocks relocated to new flat selector clusters: %u\n", total_blocks_relocated);
653
}
654
655
// This method will change the number and ordering of the selector codebook clusters.
656
void basisu_frontend::optimize_selector_codebook()
657
{
658
debug_printf("optimize_selector_codebook\n");
659
660
const uint32_t orig_total_selector_clusters = m_optimized_cluster_selectors.size_u32();
661
662
bool_vec selector_cluster_was_used(m_optimized_cluster_selectors.size());
663
for (uint32_t i = 0; i < m_total_blocks; i++)
664
selector_cluster_was_used[m_block_selector_cluster_index[i]] = true;
665
666
int_vec old_to_new(m_optimized_cluster_selectors.size());
667
int_vec new_to_old;
668
uint32_t total_new_entries = 0;
669
670
std::unordered_map<uint32_t, uint32_t> selector_hashmap;
671
672
for (int i = 0; i < static_cast<int>(m_optimized_cluster_selectors.size()); i++)
673
{
674
if (!selector_cluster_was_used[i])
675
{
676
old_to_new[i] = -1;
677
continue;
678
}
679
680
const uint32_t raw_selector_bits = m_optimized_cluster_selectors[i].get_raw_selector_bits();
681
682
auto find_res = selector_hashmap.insert(std::make_pair(raw_selector_bits, total_new_entries));
683
if (!find_res.second)
684
{
685
old_to_new[i] = (find_res.first)->second;
686
continue;
687
}
688
689
old_to_new[i] = total_new_entries++;
690
new_to_old.push_back(i);
691
}
692
693
debug_printf("Original selector clusters: %u, new cluster selectors: %u\n", orig_total_selector_clusters, total_new_entries);
694
695
for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++)
696
{
697
BASISU_FRONTEND_VERIFY((old_to_new[m_block_selector_cluster_index[i]] >= 0) && (old_to_new[m_block_selector_cluster_index[i]] < (int)total_new_entries));
698
m_block_selector_cluster_index[i] = old_to_new[m_block_selector_cluster_index[i]];
699
}
700
701
basisu::vector<etc_block> new_optimized_cluster_selectors(m_optimized_cluster_selectors.size() ? total_new_entries : 0);
702
basisu::vector<uint_vec> new_selector_cluster_indices(m_selector_cluster_block_indices.size() ? total_new_entries : 0);
703
704
for (uint32_t i = 0; i < total_new_entries; i++)
705
{
706
if (m_optimized_cluster_selectors.size())
707
new_optimized_cluster_selectors[i] = m_optimized_cluster_selectors[new_to_old[i]];
708
709
//if (m_selector_cluster_block_indices.size())
710
// new_selector_cluster_indices[i] = m_selector_cluster_block_indices[new_to_old[i]];
711
}
712
713
for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++)
714
{
715
new_selector_cluster_indices[m_block_selector_cluster_index[i]].push_back(i);
716
}
717
718
m_optimized_cluster_selectors.swap(new_optimized_cluster_selectors);
719
m_selector_cluster_block_indices.swap(new_selector_cluster_indices);
720
721
// This isn't strictly necessary - doing it for completeness/future sanity.
722
if (m_selector_clusters_within_each_parent_cluster.size())
723
{
724
for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++)
725
for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++)
726
m_selector_clusters_within_each_parent_cluster[i][j] = old_to_new[m_selector_clusters_within_each_parent_cluster[i][j]];
727
}
728
729
debug_printf("optimize_selector_codebook: Before: %u After: %u\n", orig_total_selector_clusters, total_new_entries);
730
}
731
732
void basisu_frontend::init_etc1_images()
733
{
734
debug_printf("basisu_frontend::init_etc1_images\n");
735
736
interval_timer tm;
737
tm.start();
738
739
m_etc1_blocks_etc1s.resize(m_total_blocks);
740
741
bool use_cpu = true;
742
743
if (m_params.m_pOpenCL_context)
744
{
745
uint32_t total_perms = 64;
746
if (m_params.m_compression_level == 0)
747
total_perms = 4;
748
else if (m_params.m_compression_level == 1)
749
total_perms = 16;
750
else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
751
total_perms = OPENCL_ENCODE_ETC1S_MAX_PERMS;
752
753
bool status = opencl_encode_etc1s_blocks(m_params.m_pOpenCL_context, m_etc1_blocks_etc1s.data(), m_params.m_perceptual, total_perms);
754
if (status)
755
use_cpu = false;
756
else
757
{
758
error_printf("basisu_frontend::init_etc1_images: opencl_encode_etc1s_blocks() failed! Using CPU.\n");
759
m_params.m_pOpenCL_context = nullptr;
760
m_opencl_failed = true;
761
}
762
}
763
764
if (use_cpu)
765
{
766
const uint32_t N = 4096;
767
for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
768
{
769
const uint32_t first_index = block_index_iter;
770
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
771
772
m_params.m_pJob_pool->add_job([this, first_index, last_index] {
773
774
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
775
{
776
const pixel_block& source_blk = get_source_pixel_block(block_index);
777
778
etc1_optimizer optimizer;
779
etc1_optimizer::params optimizer_params;
780
etc1_optimizer::results optimizer_results;
781
782
if (m_params.m_compression_level == 0)
783
optimizer_params.m_quality = cETCQualityFast;
784
else if (m_params.m_compression_level == 1)
785
optimizer_params.m_quality = cETCQualityMedium;
786
else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
787
optimizer_params.m_quality = cETCQualityUber;
788
789
optimizer_params.m_num_src_pixels = 16;
790
optimizer_params.m_pSrc_pixels = source_blk.get_ptr();
791
optimizer_params.m_perceptual = m_params.m_perceptual;
792
793
uint8_t selectors[16];
794
optimizer_results.m_pSelectors = selectors;
795
optimizer_results.m_n = 16;
796
797
optimizer.init(optimizer_params, optimizer_results);
798
if (!optimizer.compute())
799
BASISU_FRONTEND_VERIFY(false);
800
801
etc_block& blk = m_etc1_blocks_etc1s[block_index];
802
803
memset(&blk, 0, sizeof(blk));
804
blk.set_block_color5_etc1s(optimizer_results.m_block_color_unscaled);
805
blk.set_inten_tables_etc1s(optimizer_results.m_block_inten_table);
806
blk.set_flip_bit(true);
807
808
for (uint32_t y = 0; y < 4; y++)
809
for (uint32_t x = 0; x < 4; x++)
810
blk.set_selector(x, y, selectors[x + y * 4]);
811
}
812
813
});
814
815
}
816
817
m_params.m_pJob_pool->wait_for_all();
818
819
} // use_cpu
820
821
debug_printf("init_etc1_images: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
822
}
823
824
void basisu_frontend::init_endpoint_training_vectors()
825
{
826
debug_printf("init_endpoint_training_vectors\n");
827
828
vec6F_quantizer::array_of_weighted_training_vecs &training_vecs = m_endpoint_clusterizer.get_training_vecs();
829
830
training_vecs.resize(m_total_blocks * 2);
831
832
const uint32_t N = 16384;
833
for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
834
{
835
const uint32_t first_index = block_index_iter;
836
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
837
838
m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] {
839
840
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
841
{
842
const etc_block &blk = m_etc1_blocks_etc1s[block_index];
843
844
color_rgba block_colors[2];
845
blk.get_block_low_high_colors(block_colors, 0);
846
847
vec6F v;
848
v[0] = block_colors[0].r * (1.0f / 255.0f);
849
v[1] = block_colors[0].g * (1.0f / 255.0f);
850
v[2] = block_colors[0].b * (1.0f / 255.0f);
851
v[3] = block_colors[1].r * (1.0f / 255.0f);
852
v[4] = block_colors[1].g * (1.0f / 255.0f);
853
v[5] = block_colors[1].b * (1.0f / 255.0f);
854
855
training_vecs[block_index * 2 + 0] = std::make_pair(v, 1);
856
training_vecs[block_index * 2 + 1] = std::make_pair(v, 1);
857
858
} // block_index;
859
860
} );
861
862
} // block_index_iter
863
864
m_params.m_pJob_pool->wait_for_all();
865
}
866
867
void basisu_frontend::generate_endpoint_clusters()
868
{
869
debug_printf("Begin endpoint quantization\n");
870
871
const uint32_t parent_codebook_size = (m_params.m_max_endpoint_clusters >= 256) ? BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE : 0;
872
uint32_t max_threads = 0;
873
max_threads = m_params.m_multithreaded ? minimum<int>(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0;
874
if (m_params.m_pJob_pool)
875
max_threads = minimum<int>((int)m_params.m_pJob_pool->get_total_threads(), max_threads);
876
877
debug_printf("max_threads: %u\n", max_threads);
878
bool status = generate_hierarchical_codebook_threaded(m_endpoint_clusterizer,
879
m_params.m_max_endpoint_clusters, m_use_hierarchical_endpoint_codebooks ? parent_codebook_size : 0,
880
m_endpoint_clusters,
881
m_endpoint_parent_clusters,
882
max_threads, m_params.m_pJob_pool, true);
883
BASISU_FRONTEND_VERIFY(status);
884
885
if (m_use_hierarchical_endpoint_codebooks)
886
{
887
if (!m_endpoint_parent_clusters.size())
888
{
889
m_endpoint_parent_clusters.resize(0);
890
m_endpoint_parent_clusters.resize(1);
891
for (uint32_t i = 0; i < m_total_blocks; i++)
892
{
893
m_endpoint_parent_clusters[0].push_back(i*2);
894
m_endpoint_parent_clusters[0].push_back(i*2+1);
895
}
896
}
897
898
BASISU_ASSUME(BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE <= UINT8_MAX);
899
900
m_block_parent_endpoint_cluster.resize(0);
901
m_block_parent_endpoint_cluster.resize(m_total_blocks);
902
vector_set_all(m_block_parent_endpoint_cluster, 0xFF);
903
for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_endpoint_parent_clusters.size(); parent_cluster_index++)
904
{
905
const uint_vec &cluster = m_endpoint_parent_clusters[parent_cluster_index];
906
for (uint32_t j = 0; j < cluster.size(); j++)
907
{
908
const uint32_t block_index = cluster[j] >> 1;
909
m_block_parent_endpoint_cluster[block_index] = static_cast<uint8_t>(parent_cluster_index);
910
}
911
}
912
913
for (uint32_t i = 0; i < m_total_blocks; i++)
914
{
915
BASISU_FRONTEND_VERIFY(m_block_parent_endpoint_cluster[i] != 0xFF);
916
}
917
918
// Ensure that all the blocks within each cluster are all in the same parent cluster, or something is very wrong.
919
for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++)
920
{
921
const uint_vec &cluster = m_endpoint_clusters[cluster_index];
922
923
uint32_t parent_cluster_index = 0;
924
for (uint32_t j = 0; j < cluster.size(); j++)
925
{
926
const uint32_t block_index = cluster[j] >> 1;
927
928
BASISU_FRONTEND_VERIFY(block_index < m_block_parent_endpoint_cluster.size());
929
930
if (!j)
931
{
932
parent_cluster_index = m_block_parent_endpoint_cluster[block_index];
933
}
934
else
935
{
936
BASISU_FRONTEND_VERIFY(m_block_parent_endpoint_cluster[block_index] == parent_cluster_index);
937
}
938
}
939
}
940
}
941
942
if (m_params.m_debug_stats)
943
debug_printf("Total endpoint clusters: %u, parent clusters: %u\n", m_endpoint_clusters.size_u32(), m_endpoint_parent_clusters.size_u32());
944
}
945
946
// Iterate through each array of endpoint cluster block indices and set the m_block_endpoint_clusters_indices[][] array to indicaste which cluster index each block uses.
947
void basisu_frontend::generate_block_endpoint_clusters()
948
{
949
m_block_endpoint_clusters_indices.resize(m_total_blocks);
950
951
for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++)
952
{
953
const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
954
955
for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
956
{
957
const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
958
const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
959
960
m_block_endpoint_clusters_indices[block_index][subblock_index] = cluster_index;
961
962
} // cluster_indices_iter
963
}
964
965
if (m_params.m_validate)
966
{
967
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
968
{
969
uint32_t cluster_0 = m_block_endpoint_clusters_indices[block_index][0];
970
uint32_t cluster_1 = m_block_endpoint_clusters_indices[block_index][1];
971
BASISU_FRONTEND_VERIFY(cluster_0 == cluster_1);
972
}
973
}
974
}
975
976
void basisu_frontend::compute_endpoint_clusters_within_each_parent_cluster()
977
{
978
generate_block_endpoint_clusters();
979
980
m_endpoint_clusters_within_each_parent_cluster.resize(0);
981
m_endpoint_clusters_within_each_parent_cluster.resize(m_endpoint_parent_clusters.size());
982
983
// Note: It's possible that some blocks got moved into the same cluster, but live in different parent clusters.
984
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
985
{
986
const uint32_t cluster_index = m_block_endpoint_clusters_indices[block_index][0];
987
const uint32_t parent_cluster_index = m_block_parent_endpoint_cluster[block_index];
988
989
m_endpoint_clusters_within_each_parent_cluster[parent_cluster_index].push_back(cluster_index);
990
}
991
992
for (uint32_t i = 0; i < m_endpoint_clusters_within_each_parent_cluster.size(); i++)
993
{
994
uint_vec &cluster_indices = m_endpoint_clusters_within_each_parent_cluster[i];
995
996
BASISU_FRONTEND_VERIFY(cluster_indices.size());
997
998
vector_sort(cluster_indices);
999
1000
auto last = std::unique(cluster_indices.begin(), cluster_indices.end());
1001
cluster_indices.erase(last, cluster_indices.end());
1002
}
1003
}
1004
1005
void basisu_frontend::compute_endpoint_subblock_error_vec()
1006
{
1007
m_subblock_endpoint_quant_err_vec.resize(0);
1008
1009
const uint32_t N = 512;
1010
for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N)
1011
{
1012
const uint32_t first_index = cluster_index_iter;
1013
const uint32_t last_index = minimum<uint32_t>(m_endpoint_clusters.size_u32(), cluster_index_iter + N);
1014
1015
m_params.m_pJob_pool->add_job( [this, first_index, last_index] {
1016
1017
for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
1018
{
1019
const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
1020
1021
assert(cluster_indices.size());
1022
1023
for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1024
{
1025
basisu::vector<color_rgba> cluster_pixels(8);
1026
1027
const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1028
const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1029
1030
const bool flipped = true;
1031
1032
const color_rgba *pSource_block_pixels = get_source_pixel_block(block_index).get_ptr();
1033
1034
for (uint32_t pixel_index = 0; pixel_index < 8; pixel_index++)
1035
{
1036
cluster_pixels[pixel_index] = pSource_block_pixels[g_etc1_pixel_indices[flipped][subblock_index][pixel_index]];
1037
}
1038
1039
const endpoint_cluster_etc_params &etc_params = m_endpoint_cluster_etc_params[cluster_index];
1040
1041
assert(etc_params.m_valid);
1042
1043
color_rgba block_colors[4];
1044
etc_block::get_block_colors5(block_colors, etc_params.m_color_unscaled[0], etc_params.m_inten_table[0], true);
1045
1046
uint64_t total_err = 0;
1047
1048
for (uint32_t i = 0; i < 8; i++)
1049
{
1050
const color_rgba &c = cluster_pixels[i];
1051
1052
uint64_t best_err = UINT64_MAX;
1053
//uint32_t best_index = 0;
1054
1055
for (uint32_t s = 0; s < 4; s++)
1056
{
1057
uint64_t err = color_distance(m_params.m_perceptual, c, block_colors[s], false);
1058
if (err < best_err)
1059
{
1060
best_err = err;
1061
//best_index = s;
1062
}
1063
}
1064
1065
total_err += best_err;
1066
}
1067
1068
subblock_endpoint_quant_err quant_err;
1069
quant_err.m_total_err = total_err;
1070
quant_err.m_cluster_index = cluster_index;
1071
quant_err.m_cluster_subblock_index = cluster_indices_iter;
1072
quant_err.m_block_index = block_index;
1073
quant_err.m_subblock_index = subblock_index;
1074
1075
{
1076
std::lock_guard<std::mutex> lock(m_lock);
1077
1078
m_subblock_endpoint_quant_err_vec.push_back(quant_err);
1079
}
1080
}
1081
} // cluster_index
1082
1083
} );
1084
1085
} // cluster_index_iter
1086
1087
m_params.m_pJob_pool->wait_for_all();
1088
1089
vector_sort(m_subblock_endpoint_quant_err_vec);
1090
}
1091
1092
void basisu_frontend::introduce_new_endpoint_clusters()
1093
{
1094
debug_printf("introduce_new_endpoint_clusters\n");
1095
1096
generate_block_endpoint_clusters();
1097
1098
int num_new_endpoint_clusters = m_params.m_max_endpoint_clusters - m_endpoint_clusters.size_u32();
1099
if (num_new_endpoint_clusters <= 0)
1100
return;
1101
1102
compute_endpoint_subblock_error_vec();
1103
1104
const uint32_t num_orig_endpoint_clusters = m_endpoint_clusters.size_u32();
1105
1106
std::unordered_set<uint32_t> training_vector_was_relocated;
1107
1108
uint_vec cluster_sizes(num_orig_endpoint_clusters);
1109
for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++)
1110
cluster_sizes[i] = m_endpoint_clusters[i].size_u32();
1111
1112
std::unordered_set<uint32_t> ignore_cluster;
1113
1114
uint32_t total_new_clusters = 0;
1115
1116
while (num_new_endpoint_clusters)
1117
{
1118
if (m_subblock_endpoint_quant_err_vec.size() == 0)
1119
break;
1120
1121
subblock_endpoint_quant_err subblock_to_move(m_subblock_endpoint_quant_err_vec.back());
1122
1123
m_subblock_endpoint_quant_err_vec.pop_back();
1124
1125
if (unordered_set_contains(ignore_cluster, subblock_to_move.m_cluster_index))
1126
continue;
1127
1128
uint32_t training_vector_index = subblock_to_move.m_block_index * 2 + subblock_to_move.m_subblock_index;
1129
1130
if (cluster_sizes[subblock_to_move.m_cluster_index] <= 2)
1131
continue;
1132
1133
if (unordered_set_contains(training_vector_was_relocated, training_vector_index))
1134
continue;
1135
1136
if (unordered_set_contains(training_vector_was_relocated, training_vector_index ^ 1))
1137
continue;
1138
1139
#if 0
1140
const uint32_t block_index = subblock_to_move.m_block_index;
1141
const etc_block& blk = m_etc1_blocks_etc1s[block_index];
1142
uint32_t ls, hs;
1143
blk.get_selector_range(ls, hs);
1144
if (ls != hs)
1145
continue;
1146
#endif
1147
1148
//const uint32_t new_endpoint_cluster_index = (uint32_t)m_endpoint_clusters.size();
1149
1150
enlarge_vector(m_endpoint_clusters, 1)->push_back(training_vector_index);
1151
enlarge_vector(m_endpoint_cluster_etc_params, 1);
1152
1153
assert(m_endpoint_clusters.size() == m_endpoint_cluster_etc_params.size());
1154
1155
training_vector_was_relocated.insert(training_vector_index);
1156
1157
m_endpoint_clusters.back().push_back(training_vector_index ^ 1);
1158
training_vector_was_relocated.insert(training_vector_index ^ 1);
1159
1160
BASISU_FRONTEND_VERIFY(cluster_sizes[subblock_to_move.m_cluster_index] >= 2);
1161
cluster_sizes[subblock_to_move.m_cluster_index] -= 2;
1162
1163
ignore_cluster.insert(subblock_to_move.m_cluster_index);
1164
1165
total_new_clusters++;
1166
1167
num_new_endpoint_clusters--;
1168
}
1169
1170
debug_printf("Introduced %i new endpoint clusters\n", total_new_clusters);
1171
1172
for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++)
1173
{
1174
uint_vec &cluster_indices = m_endpoint_clusters[i];
1175
1176
uint_vec new_cluster_indices;
1177
for (uint32_t j = 0; j < cluster_indices.size(); j++)
1178
{
1179
uint32_t training_vector_index = cluster_indices[j];
1180
1181
if (!unordered_set_contains(training_vector_was_relocated, training_vector_index))
1182
new_cluster_indices.push_back(training_vector_index);
1183
}
1184
1185
if (cluster_indices.size() != new_cluster_indices.size())
1186
{
1187
BASISU_FRONTEND_VERIFY(new_cluster_indices.size() > 0);
1188
cluster_indices.swap(new_cluster_indices);
1189
}
1190
}
1191
1192
generate_block_endpoint_clusters();
1193
}
1194
1195
struct color_rgba_hasher
1196
{
1197
inline std::size_t operator()(const color_rgba& k) const
1198
{
1199
uint32_t v = *(const uint32_t*)&k;
1200
1201
//return bitmix32(v);
1202
1203
//v ^= (v << 10);
1204
//v ^= (v >> 12);
1205
1206
return v;
1207
}
1208
};
1209
1210
// Given each endpoint cluster, gather all the block pixels which are in that cluster and compute optimized ETC1S endpoints for them.
1211
// TODO: Don't optimize endpoint clusters which haven't changed.
1212
// If step>=1, we check to ensure the new endpoint values actually decrease quantization error.
1213
void basisu_frontend::generate_endpoint_codebook(uint32_t step)
1214
{
1215
debug_printf("generate_endpoint_codebook\n");
1216
1217
interval_timer tm;
1218
tm.start();
1219
1220
m_endpoint_cluster_etc_params.resize(m_endpoint_clusters.size());
1221
1222
bool use_cpu = true;
1223
// TODO: Get this working when step>0
1224
if (m_params.m_pOpenCL_context && !step)
1225
{
1226
const uint32_t total_clusters = (uint32_t)m_endpoint_clusters.size();
1227
1228
basisu::vector<cl_pixel_cluster> pixel_clusters(total_clusters);
1229
1230
std::vector<color_rgba> input_pixels;
1231
input_pixels.reserve(m_total_blocks * 16);
1232
1233
std::vector<uint32_t> pixel_weights;
1234
pixel_weights.reserve(m_total_blocks * 16);
1235
1236
uint_vec cluster_sizes(total_clusters);
1237
1238
//typedef basisu::hash_map<color_rgba, uint32_t, color_rgba_hasher> color_hasher_type;
1239
//color_hasher_type color_hasher;
1240
//color_hasher.reserve(2048);
1241
1242
interval_timer hash_tm;
1243
hash_tm.start();
1244
1245
basisu::vector<uint32_t> colors, colors2;
1246
colors.reserve(65536);
1247
colors2.reserve(65536);
1248
1249
for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++)
1250
{
1251
const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
1252
assert((cluster_indices.size() & 1) == 0);
1253
1254
#if 0
1255
uint64_t first_pixel_index = input_pixels.size();
1256
const uint32_t total_pixels = 16 * (cluster_indices.size() / 2);
1257
1258
input_pixels.resize(input_pixels.size() + total_pixels);
1259
pixel_weights.resize(pixel_weights.size() + total_pixels);
1260
1261
uint64_t dst_ofs = first_pixel_index;
1262
1263
uint64_t total_r = 0, total_g = 0, total_b = 0;
1264
for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1265
{
1266
const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1267
if (subblock_index)
1268
continue;
1269
1270
const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1271
const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
1272
1273
for (uint32_t i = 0; i < 16; i++)
1274
{
1275
input_pixels[dst_ofs] = pBlock_pixels[i];
1276
pixel_weights[dst_ofs] = 1;
1277
dst_ofs++;
1278
1279
total_r += pBlock_pixels[i].r;
1280
total_g += pBlock_pixels[i].g;
1281
total_b += pBlock_pixels[i].b;
1282
}
1283
}
1284
1285
//printf("%i %f %f %f\n", cluster_index, total_r / (float)total_pixels, total_g / (float)total_pixels, total_b / (float)total_pixels);
1286
1287
pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index;
1288
pixel_clusters[cluster_index].m_total_pixels = total_pixels;
1289
cluster_sizes[cluster_index] = total_pixels;
1290
#elif 1
1291
colors.resize(cluster_indices.size() * 8);
1292
colors2.resize(cluster_indices.size() * 8);
1293
uint32_t dst_ofs = 0;
1294
1295
for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1296
{
1297
const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1298
if (subblock_index)
1299
continue;
1300
1301
const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1302
const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
1303
1304
memcpy(colors.data() + dst_ofs, pBlock_pixels, sizeof(color_rgba) * 16);
1305
dst_ofs += 16;
1306
1307
} // cluster_indices_iter
1308
1309
uint32_t* pSorted = radix_sort((uint32_t)colors.size(), colors.data(), colors2.data(), 0, 3);
1310
1311
const uint64_t first_pixel_index = input_pixels.size();
1312
1313
uint32_t prev_color = 0, cur_weight = 0;
1314
1315
for (uint32_t i = 0; i < colors.size(); i++)
1316
{
1317
uint32_t cur_color = pSorted[i];
1318
if (cur_color == prev_color)
1319
{
1320
if (++cur_weight == 0)
1321
cur_weight--;
1322
}
1323
else
1324
{
1325
if (cur_weight)
1326
{
1327
input_pixels.push_back(*(const color_rgba*)&prev_color);
1328
pixel_weights.push_back(cur_weight);
1329
}
1330
1331
prev_color = cur_color;
1332
cur_weight = 1;
1333
}
1334
}
1335
1336
if (cur_weight)
1337
{
1338
input_pixels.push_back(*(const color_rgba*)&prev_color);
1339
pixel_weights.push_back(cur_weight);
1340
}
1341
1342
uint32_t total_unique_pixels = (uint32_t)(input_pixels.size() - first_pixel_index);
1343
1344
pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index;
1345
pixel_clusters[cluster_index].m_total_pixels = total_unique_pixels;
1346
1347
cluster_sizes[cluster_index] = total_unique_pixels;
1348
#else
1349
color_hasher.reset();
1350
1351
for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1352
{
1353
const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1354
if (subblock_index)
1355
continue;
1356
1357
const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1358
const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
1359
1360
uint32_t *pPrev_weight = nullptr;
1361
color_rgba prev_color;
1362
1363
{
1364
color_rgba cur_color = pBlock_pixels[0];
1365
auto res = color_hasher.insert(cur_color, 0);
1366
1367
uint32_t& weight = (res.first)->second;
1368
if (weight != UINT32_MAX)
1369
weight++;
1370
1371
prev_color = cur_color;
1372
pPrev_weight = &(res.first)->second;
1373
}
1374
1375
for (uint32_t i = 1; i < 16; i++)
1376
{
1377
color_rgba cur_color = pBlock_pixels[i];
1378
1379
if (cur_color == prev_color)
1380
{
1381
if (*pPrev_weight != UINT32_MAX)
1382
*pPrev_weight = *pPrev_weight + 1;
1383
}
1384
else
1385
{
1386
auto res = color_hasher.insert(cur_color, 0);
1387
1388
uint32_t& weight = (res.first)->second;
1389
if (weight != UINT32_MAX)
1390
weight++;
1391
1392
prev_color = cur_color;
1393
pPrev_weight = &(res.first)->second;
1394
}
1395
}
1396
1397
} // cluster_indices_iter
1398
1399
const uint64_t first_pixel_index = input_pixels.size();
1400
uint32_t total_unique_pixels = color_hasher.size();
1401
1402
pixel_clusters[cluster_index].m_first_pixel_index = first_pixel_index;
1403
pixel_clusters[cluster_index].m_total_pixels = total_unique_pixels;
1404
1405
input_pixels.resize(first_pixel_index + total_unique_pixels);
1406
pixel_weights.resize(first_pixel_index + total_unique_pixels);
1407
1408
uint32_t j = 0;
1409
1410
for (auto it = color_hasher.begin(); it != color_hasher.end(); ++it, ++j)
1411
{
1412
input_pixels[first_pixel_index + j] = it->first;
1413
pixel_weights[first_pixel_index + j] = it->second;
1414
}
1415
1416
cluster_sizes[cluster_index] = total_unique_pixels;
1417
#endif
1418
1419
} // cluster_index
1420
1421
debug_printf("Total hash time: %3.3f secs\n", hash_tm.get_elapsed_secs());
1422
1423
debug_printf("Total unique colors: %llu\n", input_pixels.size());
1424
1425
uint_vec sorted_cluster_indices_new_to_old(total_clusters);
1426
indirect_sort(total_clusters, sorted_cluster_indices_new_to_old.data(), cluster_sizes.data());
1427
//for (uint32_t i = 0; i < total_clusters; i++)
1428
// sorted_cluster_indices_new_to_old[i] = i;
1429
1430
uint_vec sorted_cluster_indices_old_to_new(total_clusters);
1431
for (uint32_t i = 0; i < total_clusters; i++)
1432
sorted_cluster_indices_old_to_new[sorted_cluster_indices_new_to_old[i]] = i;
1433
1434
basisu::vector<cl_pixel_cluster> sorted_pixel_clusters(total_clusters);
1435
for (uint32_t i = 0; i < total_clusters; i++)
1436
sorted_pixel_clusters[i] = pixel_clusters[sorted_cluster_indices_new_to_old[i]];
1437
1438
uint32_t total_perms = 64;
1439
if (m_params.m_compression_level <= 1)
1440
total_perms = 16;
1441
else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
1442
total_perms = OPENCL_ENCODE_ETC1S_MAX_PERMS;
1443
1444
basisu::vector<etc_block> output_blocks(total_clusters);
1445
1446
if (opencl_encode_etc1s_pixel_clusters(
1447
m_params.m_pOpenCL_context,
1448
output_blocks.data(),
1449
total_clusters,
1450
sorted_pixel_clusters.data(),
1451
input_pixels.size(),
1452
input_pixels.data(),
1453
pixel_weights.data(),
1454
m_params.m_perceptual, total_perms))
1455
{
1456
for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++)
1457
{
1458
const uint32_t new_cluster_index = sorted_cluster_indices_old_to_new[old_cluster_index];
1459
1460
const etc_block& blk = output_blocks[new_cluster_index];
1461
1462
endpoint_cluster_etc_params& prev_etc_params = m_endpoint_cluster_etc_params[old_cluster_index];
1463
1464
prev_etc_params.m_valid = true;
1465
etc_block::unpack_color5(prev_etc_params.m_color_unscaled[0], blk.get_base5_color(), false);
1466
prev_etc_params.m_inten_table[0] = blk.get_inten_table(0);
1467
prev_etc_params.m_color_error[0] = 0; // dummy value - we don't actually use this
1468
}
1469
1470
use_cpu = false;
1471
}
1472
else
1473
{
1474
error_printf("basisu_frontend::generate_endpoint_codebook: opencl_encode_etc1s_pixel_clusters() failed! Using CPU.\n");
1475
m_params.m_pOpenCL_context = nullptr;
1476
m_opencl_failed = true;
1477
}
1478
1479
} // if (opencl_is_available() && m_params.m_use_opencl)
1480
1481
if (use_cpu)
1482
{
1483
const uint32_t N = 128;
1484
for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N)
1485
{
1486
const uint32_t first_index = cluster_index_iter;
1487
const uint32_t last_index = minimum<uint32_t>((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N);
1488
1489
m_params.m_pJob_pool->add_job([this, first_index, last_index, step] {
1490
1491
for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
1492
{
1493
const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
1494
1495
BASISU_FRONTEND_VERIFY(cluster_indices.size());
1496
1497
const uint32_t total_pixels = (uint32_t)cluster_indices.size() * 8;
1498
1499
basisu::vector<color_rgba> cluster_pixels(total_pixels);
1500
1501
for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1502
{
1503
const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1504
const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1505
1506
const bool flipped = true;
1507
1508
const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
1509
1510
for (uint32_t pixel_index = 0; pixel_index < 8; pixel_index++)
1511
{
1512
const color_rgba& c = pBlock_pixels[g_etc1_pixel_indices[flipped][subblock_index][pixel_index]];
1513
cluster_pixels[cluster_indices_iter * 8 + pixel_index] = c;
1514
}
1515
}
1516
1517
endpoint_cluster_etc_params new_subblock_params;
1518
1519
{
1520
etc1_optimizer optimizer;
1521
etc1_solution_coordinates solutions[2];
1522
1523
etc1_optimizer::params cluster_optimizer_params;
1524
cluster_optimizer_params.m_num_src_pixels = total_pixels;
1525
cluster_optimizer_params.m_pSrc_pixels = &cluster_pixels[0];
1526
1527
cluster_optimizer_params.m_use_color4 = false;
1528
cluster_optimizer_params.m_perceptual = m_params.m_perceptual;
1529
1530
if (m_params.m_compression_level <= 1)
1531
cluster_optimizer_params.m_quality = cETCQualityMedium;
1532
else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
1533
cluster_optimizer_params.m_quality = cETCQualityUber;
1534
1535
etc1_optimizer::results cluster_optimizer_results;
1536
1537
basisu::vector<uint8_t> cluster_selectors(total_pixels);
1538
cluster_optimizer_results.m_n = total_pixels;
1539
cluster_optimizer_results.m_pSelectors = &cluster_selectors[0];
1540
1541
optimizer.init(cluster_optimizer_params, cluster_optimizer_results);
1542
1543
if (!optimizer.compute())
1544
BASISU_FRONTEND_VERIFY(false);
1545
1546
new_subblock_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled;
1547
new_subblock_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table;
1548
new_subblock_params.m_color_error[0] = cluster_optimizer_results.m_error;
1549
}
1550
1551
endpoint_cluster_etc_params& prev_etc_params = m_endpoint_cluster_etc_params[cluster_index];
1552
1553
bool use_new_subblock_params = false;
1554
if ((!step) || (!prev_etc_params.m_valid))
1555
use_new_subblock_params = true;
1556
else
1557
{
1558
assert(prev_etc_params.m_valid);
1559
1560
uint64_t total_prev_err = 0;
1561
1562
{
1563
color_rgba block_colors[4];
1564
1565
etc_block::get_block_colors5(block_colors, prev_etc_params.m_color_unscaled[0], prev_etc_params.m_inten_table[0], false);
1566
1567
uint64_t total_err = 0;
1568
1569
for (uint32_t i = 0; i < total_pixels; i++)
1570
{
1571
const color_rgba& c = cluster_pixels[i];
1572
1573
uint64_t best_err = UINT64_MAX;
1574
//uint32_t best_index = 0;
1575
1576
for (uint32_t s = 0; s < 4; s++)
1577
{
1578
uint64_t err = color_distance(m_params.m_perceptual, c, block_colors[s], false);
1579
if (err < best_err)
1580
{
1581
best_err = err;
1582
//best_index = s;
1583
}
1584
}
1585
1586
total_err += best_err;
1587
}
1588
1589
total_prev_err += total_err;
1590
}
1591
1592
// See if we should update this cluster's endpoints (if the error has actually fallen)
1593
if (total_prev_err > new_subblock_params.m_color_error[0])
1594
{
1595
use_new_subblock_params = true;
1596
}
1597
}
1598
1599
if (use_new_subblock_params)
1600
{
1601
new_subblock_params.m_valid = true;
1602
1603
prev_etc_params = new_subblock_params;
1604
}
1605
1606
} // cluster_index
1607
1608
});
1609
1610
} // cluster_index_iter
1611
1612
m_params.m_pJob_pool->wait_for_all();
1613
}
1614
1615
debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
1616
}
1617
1618
bool basisu_frontend::check_etc1s_constraints() const
1619
{
1620
basisu::vector<vec2U> block_clusters(m_total_blocks);
1621
1622
for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++)
1623
{
1624
const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
1625
1626
for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1627
{
1628
const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1629
const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1630
1631
block_clusters[block_index][subblock_index] = cluster_index;
1632
1633
} // cluster_indices_iter
1634
}
1635
1636
for (uint32_t i = 0; i < m_total_blocks; i++)
1637
{
1638
if (block_clusters[i][0] != block_clusters[i][1])
1639
return false;
1640
}
1641
1642
return true;
1643
}
1644
1645
// For each block, determine which ETC1S endpoint cluster can encode that block with lowest error.
1646
// This reassigns blocks to different endpoint clusters.
1647
uint32_t basisu_frontend::refine_endpoint_clusterization()
1648
{
1649
debug_printf("refine_endpoint_clusterization\n");
1650
1651
if (m_use_hierarchical_endpoint_codebooks)
1652
compute_endpoint_clusters_within_each_parent_cluster();
1653
1654
// Note: It's possible that an endpoint cluster may live in more than one parent cluster after the first refinement step.
1655
1656
basisu::vector<vec2U> block_clusters(m_total_blocks);
1657
1658
for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++)
1659
{
1660
const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index];
1661
1662
for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
1663
{
1664
const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1;
1665
const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1;
1666
1667
block_clusters[block_index][subblock_index] = cluster_index;
1668
1669
} // cluster_indices_iter
1670
}
1671
1672
//----------------------------------------------------------
1673
1674
// Create a new endpoint clusterization
1675
1676
interval_timer tm;
1677
tm.start();
1678
1679
uint_vec best_cluster_indices(m_total_blocks);
1680
1681
bool use_cpu = true;
1682
// TODO: Support non-hierarchical endpoint codebooks here
1683
if (m_params.m_pOpenCL_context && m_use_hierarchical_endpoint_codebooks)
1684
{
1685
// For the OpenCL kernel, we order the parent endpoint clusters by smallest to largest for efficiency.
1686
// We also prepare an array of block info structs that point into this new parent endpoint cluster array.
1687
const uint32_t total_parent_clusters = (uint32_t)m_endpoint_clusters_within_each_parent_cluster.size();
1688
1689
basisu::vector<cl_block_info_struct> cl_block_info_structs(m_total_blocks);
1690
1691
// the size of each parent cluster, in total clusters
1692
uint_vec parent_cluster_sizes(total_parent_clusters);
1693
for (uint32_t i = 0; i < total_parent_clusters; i++)
1694
parent_cluster_sizes[i] = (uint32_t)m_endpoint_clusters_within_each_parent_cluster[i].size();
1695
1696
uint_vec first_parent_cluster_ofs(total_parent_clusters);
1697
uint32_t cur_ofs = 0;
1698
for (uint32_t i = 0; i < total_parent_clusters; i++)
1699
{
1700
first_parent_cluster_ofs[i] = cur_ofs;
1701
1702
cur_ofs += parent_cluster_sizes[i];
1703
}
1704
1705
// Note: total_actual_endpoint_clusters is not necessarly equal to m_endpoint_clusters.size(), because clusters may live in multiple parent clusters after the first refinement step.
1706
BASISU_FRONTEND_VERIFY(cur_ofs >= m_endpoint_clusters.size());
1707
const uint32_t total_actual_endpoint_clusters = cur_ofs;
1708
basisu::vector<cl_endpoint_cluster_struct> cl_endpoint_cluster_structs(total_actual_endpoint_clusters);
1709
1710
for (uint32_t i = 0; i < total_parent_clusters; i++)
1711
{
1712
const uint32_t dst_ofs = first_parent_cluster_ofs[i];
1713
1714
const uint32_t parent_cluster_size = parent_cluster_sizes[i];
1715
1716
assert(m_endpoint_clusters_within_each_parent_cluster[i].size() == parent_cluster_size);
1717
1718
for (uint32_t j = 0; j < parent_cluster_size; j++)
1719
{
1720
const uint32_t endpoint_cluster_index = m_endpoint_clusters_within_each_parent_cluster[i][j];
1721
1722
color_rgba cluster_etc_base_color(m_endpoint_cluster_etc_params[endpoint_cluster_index].m_color_unscaled[0]);
1723
uint32_t cluster_etc_inten = m_endpoint_cluster_etc_params[endpoint_cluster_index].m_inten_table[0];
1724
1725
cl_endpoint_cluster_structs[dst_ofs + j].m_unscaled_color = cluster_etc_base_color;
1726
cl_endpoint_cluster_structs[dst_ofs + j].m_etc_inten = (uint8_t)cluster_etc_inten;
1727
cl_endpoint_cluster_structs[dst_ofs + j].m_cluster_index = (uint16_t)endpoint_cluster_index;
1728
}
1729
}
1730
1731
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
1732
{
1733
const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster[block_index];
1734
1735
cl_block_info_structs[block_index].m_num_clusters = (uint16_t)(parent_cluster_sizes[block_parent_endpoint_cluster_index]);
1736
cl_block_info_structs[block_index].m_first_cluster_ofs = (uint16_t)(first_parent_cluster_ofs[block_parent_endpoint_cluster_index]);
1737
1738
const uint32_t block_cluster_index = block_clusters[block_index][0];
1739
cl_block_info_structs[block_index].m_cur_cluster_index = (uint16_t)block_cluster_index;
1740
cl_block_info_structs[block_index].m_cur_cluster_etc_inten = (uint8_t)m_endpoint_cluster_etc_params[block_cluster_index].m_inten_table[0];
1741
}
1742
1743
uint_vec block_cluster_indices(m_total_blocks);
1744
for (uint32_t i = 0; i < m_total_blocks; i++)
1745
block_cluster_indices[i] = block_clusters[i][0];
1746
1747
uint_vec sorted_block_indices(m_total_blocks);
1748
indirect_sort(m_total_blocks, sorted_block_indices.data(), block_cluster_indices.data());
1749
1750
bool status = opencl_refine_endpoint_clusterization(
1751
m_params.m_pOpenCL_context,
1752
cl_block_info_structs.data(),
1753
total_actual_endpoint_clusters,
1754
cl_endpoint_cluster_structs.data(),
1755
sorted_block_indices.data(),
1756
best_cluster_indices.data(),
1757
m_params.m_perceptual);
1758
1759
if (status)
1760
{
1761
use_cpu = false;
1762
}
1763
else
1764
{
1765
error_printf("basisu_frontend::refine_endpoint_clusterization: opencl_refine_endpoint_clusterization() failed! Using CPU.\n");
1766
m_params.m_pOpenCL_context = nullptr;
1767
m_opencl_failed = true;
1768
}
1769
}
1770
1771
if (use_cpu)
1772
{
1773
const uint32_t N = 1024;
1774
for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
1775
{
1776
const uint32_t first_index = block_index_iter;
1777
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
1778
1779
m_params.m_pJob_pool->add_job([this, first_index, last_index, &best_cluster_indices, &block_clusters] {
1780
1781
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
1782
{
1783
const uint32_t cluster_index = block_clusters[block_index][0];
1784
BASISU_FRONTEND_VERIFY(cluster_index == block_clusters[block_index][1]);
1785
1786
const color_rgba* pSubblock_pixels = get_source_pixel_block(block_index).get_ptr();
1787
const uint32_t num_subblock_pixels = 16;
1788
1789
uint64_t best_cluster_err = INT64_MAX;
1790
uint32_t best_cluster_index = 0;
1791
1792
const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster.size() ? m_block_parent_endpoint_cluster[block_index] : 0;
1793
const uint_vec* pCluster_indices = m_endpoint_clusters_within_each_parent_cluster.size() ? &m_endpoint_clusters_within_each_parent_cluster[block_parent_endpoint_cluster_index] : nullptr;
1794
1795
const uint32_t total_clusters = m_use_hierarchical_endpoint_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_endpoint_clusters.size();
1796
1797
for (uint32_t i = 0; i < total_clusters; i++)
1798
{
1799
const uint32_t cluster_iter = m_use_hierarchical_endpoint_codebooks ? (*pCluster_indices)[i] : i;
1800
1801
color_rgba cluster_etc_base_color(m_endpoint_cluster_etc_params[cluster_iter].m_color_unscaled[0]);
1802
uint32_t cluster_etc_inten = m_endpoint_cluster_etc_params[cluster_iter].m_inten_table[0];
1803
1804
uint64_t total_err = 0;
1805
1806
const uint32_t low_selector = 0;//subblock_etc_params_vec[j].m_low_selectors[0];
1807
const uint32_t high_selector = 3;//subblock_etc_params_vec[j].m_high_selectors[0];
1808
color_rgba subblock_colors[4];
1809
// Can't assign it here - may result in too much error when selector quant occurs
1810
if (cluster_etc_inten > m_endpoint_cluster_etc_params[cluster_index].m_inten_table[0])
1811
{
1812
total_err = INT64_MAX;
1813
goto skip_cluster;
1814
}
1815
1816
etc_block::get_block_colors5(subblock_colors, cluster_etc_base_color, cluster_etc_inten);
1817
1818
#if 0
1819
for (uint32_t p = 0; p < num_subblock_pixels; p++)
1820
{
1821
uint64_t best_err = UINT64_MAX;
1822
1823
for (uint32_t r = low_selector; r <= high_selector; r++)
1824
{
1825
uint64_t err = color_distance(m_params.m_perceptual, pSubblock_pixels[p], subblock_colors[r], false);
1826
best_err = minimum(best_err, err);
1827
if (!best_err)
1828
break;
1829
}
1830
1831
total_err += best_err;
1832
if (total_err > best_cluster_err)
1833
break;
1834
} // p
1835
#else
1836
if (m_params.m_perceptual)
1837
{
1838
if (!g_cpu_supports_sse41)
1839
{
1840
for (uint32_t p = 0; p < num_subblock_pixels; p++)
1841
{
1842
uint64_t best_err = UINT64_MAX;
1843
1844
for (uint32_t r = low_selector; r <= high_selector; r++)
1845
{
1846
uint64_t err = color_distance(true, pSubblock_pixels[p], subblock_colors[r], false);
1847
best_err = minimum(best_err, err);
1848
if (!best_err)
1849
break;
1850
}
1851
1852
total_err += best_err;
1853
if (total_err > best_cluster_err)
1854
break;
1855
} // p
1856
}
1857
else
1858
{
1859
#if BASISU_SUPPORT_SSE
1860
find_lowest_error_perceptual_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err);
1861
#endif
1862
}
1863
}
1864
else
1865
{
1866
if (!g_cpu_supports_sse41)
1867
{
1868
for (uint32_t p = 0; p < num_subblock_pixels; p++)
1869
{
1870
uint64_t best_err = UINT64_MAX;
1871
1872
for (uint32_t r = low_selector; r <= high_selector; r++)
1873
{
1874
uint64_t err = color_distance(false, pSubblock_pixels[p], subblock_colors[r], false);
1875
best_err = minimum(best_err, err);
1876
if (!best_err)
1877
break;
1878
}
1879
1880
total_err += best_err;
1881
if (total_err > best_cluster_err)
1882
break;
1883
} // p
1884
}
1885
else
1886
{
1887
#if BASISU_SUPPORT_SSE
1888
find_lowest_error_linear_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err);
1889
#endif
1890
}
1891
}
1892
#endif
1893
1894
skip_cluster:
1895
if ((total_err < best_cluster_err) ||
1896
((cluster_iter == cluster_index) && (total_err == best_cluster_err)))
1897
{
1898
best_cluster_err = total_err;
1899
best_cluster_index = cluster_iter;
1900
1901
if (!best_cluster_err)
1902
break;
1903
}
1904
} // j
1905
1906
best_cluster_indices[block_index] = best_cluster_index;
1907
1908
} // block_index
1909
1910
});
1911
1912
} // block_index_iter
1913
1914
m_params.m_pJob_pool->wait_for_all();
1915
1916
} // use_cpu
1917
1918
debug_printf("refine_endpoint_clusterization time: %3.3f secs\n", tm.get_elapsed_secs());
1919
1920
basisu::vector<typename basisu::vector<uint32_t> > optimized_endpoint_clusters(m_endpoint_clusters.size());
1921
uint32_t total_subblocks_reassigned = 0;
1922
1923
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
1924
{
1925
const uint32_t training_vector_index = block_index * 2 + 0;
1926
1927
const uint32_t orig_cluster_index = block_clusters[block_index][0];
1928
const uint32_t best_cluster_index = best_cluster_indices[block_index];
1929
1930
optimized_endpoint_clusters[best_cluster_index].push_back(training_vector_index);
1931
optimized_endpoint_clusters[best_cluster_index].push_back(training_vector_index + 1);
1932
1933
if (best_cluster_index != orig_cluster_index)
1934
{
1935
total_subblocks_reassigned++;
1936
}
1937
}
1938
1939
debug_printf("total_subblocks_reassigned: %u\n", total_subblocks_reassigned);
1940
1941
m_endpoint_clusters = optimized_endpoint_clusters;
1942
1943
return total_subblocks_reassigned;
1944
}
1945
1946
void basisu_frontend::eliminate_redundant_or_empty_endpoint_clusters()
1947
{
1948
debug_printf("eliminate_redundant_or_empty_endpoint_clusters\n");
1949
1950
// Step 1: Sort endpoint clusters by the base colors/intens
1951
1952
uint_vec sorted_endpoint_cluster_indices(m_endpoint_clusters.size());
1953
for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
1954
sorted_endpoint_cluster_indices[i] = i;
1955
1956
indirect_sort((uint32_t)m_endpoint_clusters.size(), &sorted_endpoint_cluster_indices[0], &m_endpoint_cluster_etc_params[0]);
1957
1958
basisu::vector<basisu::vector<uint32_t> > new_endpoint_clusters(m_endpoint_clusters.size());
1959
basisu::vector<endpoint_cluster_etc_params> new_subblock_etc_params(m_endpoint_clusters.size());
1960
1961
for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
1962
{
1963
uint32_t j = sorted_endpoint_cluster_indices[i];
1964
new_endpoint_clusters[i] = m_endpoint_clusters[j];
1965
new_subblock_etc_params[i] = m_endpoint_cluster_etc_params[j];
1966
}
1967
1968
new_endpoint_clusters.swap(m_endpoint_clusters);
1969
new_subblock_etc_params.swap(m_endpoint_cluster_etc_params);
1970
1971
// Step 2: Eliminate redundant endpoint clusters, or empty endpoint clusters
1972
1973
new_endpoint_clusters.resize(0);
1974
new_subblock_etc_params.resize(0);
1975
1976
for (int i = 0; i < (int)m_endpoint_clusters.size(); )
1977
{
1978
if (!m_endpoint_clusters[i].size())
1979
{
1980
i++;
1981
continue;
1982
}
1983
1984
int j;
1985
for (j = i + 1; j < (int)m_endpoint_clusters.size(); j++)
1986
{
1987
if (!(m_endpoint_cluster_etc_params[i] == m_endpoint_cluster_etc_params[j]))
1988
break;
1989
}
1990
1991
new_endpoint_clusters.push_back(m_endpoint_clusters[i]);
1992
new_subblock_etc_params.push_back(m_endpoint_cluster_etc_params[i]);
1993
1994
for (int k = i + 1; k < j; k++)
1995
{
1996
append_vector(new_endpoint_clusters.back(), m_endpoint_clusters[k]);
1997
}
1998
1999
i = j;
2000
}
2001
2002
if (m_endpoint_clusters.size() != new_endpoint_clusters.size())
2003
{
2004
if (m_params.m_debug_stats)
2005
debug_printf("Eliminated %u redundant or empty clusters\n", (uint32_t)(m_endpoint_clusters.size() - new_endpoint_clusters.size()));
2006
2007
m_endpoint_clusters.swap(new_endpoint_clusters);
2008
2009
m_endpoint_cluster_etc_params.swap(new_subblock_etc_params);
2010
}
2011
}
2012
2013
void basisu_frontend::create_initial_packed_texture()
2014
{
2015
debug_printf("create_initial_packed_texture\n");
2016
2017
interval_timer tm;
2018
tm.start();
2019
2020
bool use_cpu = true;
2021
2022
if ((m_params.m_pOpenCL_context) && (opencl_is_available()))
2023
{
2024
basisu::vector<color_rgba> block_etc5_color_intens(m_total_blocks);
2025
2026
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
2027
{
2028
uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0];
2029
2030
const color_rgba& color_unscaled = m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0];
2031
uint32_t inten = m_endpoint_cluster_etc_params[cluster0].m_inten_table[0];
2032
2033
block_etc5_color_intens[block_index].set(color_unscaled.r, color_unscaled.g, color_unscaled.b, inten);
2034
}
2035
2036
bool status = opencl_determine_selectors(m_params.m_pOpenCL_context, block_etc5_color_intens.data(),
2037
m_encoded_blocks.data(),
2038
m_params.m_perceptual);
2039
if (!status)
2040
{
2041
error_printf("basisu_frontend::create_initial_packed_texture: opencl_determine_selectors() failed! Using CPU.\n");
2042
m_params.m_pOpenCL_context = nullptr;
2043
m_opencl_failed = true;
2044
}
2045
else
2046
{
2047
use_cpu = false;
2048
}
2049
}
2050
2051
if (use_cpu)
2052
{
2053
const uint32_t N = 4096;
2054
for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
2055
{
2056
const uint32_t first_index = block_index_iter;
2057
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
2058
2059
m_params.m_pJob_pool->add_job([this, first_index, last_index] {
2060
2061
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
2062
{
2063
uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0];
2064
uint32_t cluster1 = m_block_endpoint_clusters_indices[block_index][1];
2065
BASISU_FRONTEND_VERIFY(cluster0 == cluster1);
2066
2067
const color_rgba* pSource_pixels = get_source_pixel_block(block_index).get_ptr();
2068
2069
etc_block& blk = m_encoded_blocks[block_index];
2070
2071
color_rgba unscaled[2] = { m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0], m_endpoint_cluster_etc_params[cluster1].m_color_unscaled[0] };
2072
uint32_t inten[2] = { m_endpoint_cluster_etc_params[cluster0].m_inten_table[0], m_endpoint_cluster_etc_params[cluster1].m_inten_table[0] };
2073
2074
blk.set_block_color5(unscaled[0], unscaled[1]);
2075
blk.set_flip_bit(true);
2076
2077
blk.set_inten_table(0, inten[0]);
2078
blk.set_inten_table(1, inten[1]);
2079
2080
blk.determine_selectors(pSource_pixels, m_params.m_perceptual);
2081
2082
} // block_index
2083
2084
});
2085
2086
} // block_index_iter
2087
2088
m_params.m_pJob_pool->wait_for_all();
2089
2090
} // use_cpu
2091
2092
m_orig_encoded_blocks = m_encoded_blocks;
2093
2094
debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
2095
}
2096
2097
void basisu_frontend::compute_selector_clusters_within_each_parent_cluster()
2098
{
2099
uint_vec block_selector_cluster_indices(m_total_blocks);
2100
2101
for (int cluster_index = 0; cluster_index < static_cast<int>(m_selector_cluster_block_indices.size()); cluster_index++)
2102
{
2103
const basisu::vector<uint32_t>& cluster_indices = m_selector_cluster_block_indices[cluster_index];
2104
2105
for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++)
2106
{
2107
const uint32_t block_index = cluster_indices[cluster_indices_iter];
2108
2109
block_selector_cluster_indices[block_index] = cluster_index;
2110
2111
} // cluster_indices_iter
2112
2113
} // cluster_index
2114
2115
m_selector_clusters_within_each_parent_cluster.resize(0);
2116
m_selector_clusters_within_each_parent_cluster.resize(m_selector_parent_cluster_block_indices.size());
2117
2118
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
2119
{
2120
const uint32_t cluster_index = block_selector_cluster_indices[block_index];
2121
const uint32_t parent_cluster_index = m_block_parent_selector_cluster[block_index];
2122
2123
m_selector_clusters_within_each_parent_cluster[parent_cluster_index].push_back(cluster_index);
2124
}
2125
2126
for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++)
2127
{
2128
uint_vec &cluster_indices = m_selector_clusters_within_each_parent_cluster[i];
2129
2130
BASISU_FRONTEND_VERIFY(cluster_indices.size());
2131
2132
vector_sort(cluster_indices);
2133
2134
auto last = std::unique(cluster_indices.begin(), cluster_indices.end());
2135
cluster_indices.erase(last, cluster_indices.end());
2136
}
2137
}
2138
2139
void basisu_frontend::generate_selector_clusters()
2140
{
2141
debug_printf("generate_selector_clusters\n");
2142
2143
typedef tree_vector_quant<vec16F> vec16F_clusterizer;
2144
2145
vec16F_clusterizer::array_of_weighted_training_vecs training_vecs(m_total_blocks);
2146
2147
const uint32_t N = 4096;
2148
for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
2149
{
2150
const uint32_t first_index = block_index_iter;
2151
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
2152
2153
m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] {
2154
2155
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
2156
{
2157
const etc_block &blk = m_encoded_blocks[block_index];
2158
2159
vec16F v;
2160
for (uint32_t y = 0; y < 4; y++)
2161
for (uint32_t x = 0; x < 4; x++)
2162
v[x + y * 4] = static_cast<float>(blk.get_selector(x, y));
2163
2164
const uint32_t subblock_index = (blk.get_inten_table(0) > blk.get_inten_table(1)) ? 0 : 1;
2165
2166
color_rgba block_colors[2];
2167
blk.get_block_low_high_colors(block_colors, subblock_index);
2168
2169
const uint32_t dist = color_distance(m_params.m_perceptual, block_colors[0], block_colors[1], false);
2170
2171
const uint32_t cColorDistToWeight = 300;
2172
const uint32_t cMaxWeight = 4096;
2173
uint32_t weight = clamp<uint32_t>(dist / cColorDistToWeight, 1, cMaxWeight);
2174
2175
training_vecs[block_index].first = v;
2176
training_vecs[block_index].second = weight;
2177
2178
} // block_index
2179
2180
} );
2181
2182
} // block_index_iter
2183
2184
m_params.m_pJob_pool->wait_for_all();
2185
2186
vec16F_clusterizer selector_clusterizer;
2187
for (uint32_t i = 0; i < m_total_blocks; i++)
2188
selector_clusterizer.add_training_vec(training_vecs[i].first, training_vecs[i].second);
2189
2190
const int selector_parent_codebook_size = (m_params.m_compression_level <= 1) ? BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 : BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT;
2191
const uint32_t parent_codebook_size = (m_params.m_max_selector_clusters >= 256) ? selector_parent_codebook_size : 0;
2192
debug_printf("Using selector parent codebook size %u\n", parent_codebook_size);
2193
2194
uint32_t max_threads = 0;
2195
max_threads = m_params.m_multithreaded ? minimum<int>(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0;
2196
if (m_params.m_pJob_pool)
2197
max_threads = minimum<int>((int)m_params.m_pJob_pool->get_total_threads(), max_threads);
2198
2199
bool status = generate_hierarchical_codebook_threaded(selector_clusterizer,
2200
m_params.m_max_selector_clusters, m_use_hierarchical_selector_codebooks ? parent_codebook_size : 0,
2201
m_selector_cluster_block_indices,
2202
m_selector_parent_cluster_block_indices,
2203
max_threads, m_params.m_pJob_pool, false);
2204
BASISU_FRONTEND_VERIFY(status);
2205
2206
if (m_use_hierarchical_selector_codebooks)
2207
{
2208
if (!m_selector_parent_cluster_block_indices.size())
2209
{
2210
m_selector_parent_cluster_block_indices.resize(0);
2211
m_selector_parent_cluster_block_indices.resize(1);
2212
for (uint32_t i = 0; i < m_total_blocks; i++)
2213
m_selector_parent_cluster_block_indices[0].push_back(i);
2214
}
2215
2216
BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 <= UINT8_MAX);
2217
BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT <= UINT8_MAX);
2218
2219
m_block_parent_selector_cluster.resize(0);
2220
m_block_parent_selector_cluster.resize(m_total_blocks);
2221
vector_set_all(m_block_parent_selector_cluster, 0xFF);
2222
2223
for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_selector_parent_cluster_block_indices.size(); parent_cluster_index++)
2224
{
2225
const uint_vec &cluster = m_selector_parent_cluster_block_indices[parent_cluster_index];
2226
for (uint32_t j = 0; j < cluster.size(); j++)
2227
m_block_parent_selector_cluster[cluster[j]] = static_cast<uint8_t>(parent_cluster_index);
2228
}
2229
for (uint32_t i = 0; i < m_total_blocks; i++)
2230
{
2231
BASISU_FRONTEND_VERIFY(m_block_parent_selector_cluster[i] != 0xFF);
2232
}
2233
2234
// Ensure that all the blocks within each cluster are all in the same parent cluster, or something is very wrong.
2235
for (uint32_t cluster_index = 0; cluster_index < m_selector_cluster_block_indices.size(); cluster_index++)
2236
{
2237
const uint_vec &cluster = m_selector_cluster_block_indices[cluster_index];
2238
2239
uint32_t parent_cluster_index = 0;
2240
for (uint32_t j = 0; j < cluster.size(); j++)
2241
{
2242
const uint32_t block_index = cluster[j];
2243
if (!j)
2244
{
2245
parent_cluster_index = m_block_parent_selector_cluster[block_index];
2246
}
2247
else
2248
{
2249
BASISU_FRONTEND_VERIFY(m_block_parent_selector_cluster[block_index] == parent_cluster_index);
2250
}
2251
}
2252
}
2253
}
2254
2255
debug_printf("Total selector clusters: %u, total parent selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size(), (uint32_t)m_selector_parent_cluster_block_indices.size());
2256
}
2257
2258
void basisu_frontend::create_optimized_selector_codebook(uint32_t iter)
2259
{
2260
debug_printf("create_optimized_selector_codebook\n");
2261
2262
interval_timer tm;
2263
tm.start();
2264
2265
const uint32_t total_selector_clusters = (uint32_t)m_selector_cluster_block_indices.size();
2266
2267
debug_printf("Total selector clusters (from m_selector_cluster_block_indices.size()): %u\n", (uint32_t)m_selector_cluster_block_indices.size());
2268
2269
m_optimized_cluster_selectors.resize(total_selector_clusters);
2270
2271
// For each selector codebook entry, and for each of the 4x4 selectors, determine which selector minimizes the error across all the blocks that use that quantized selector.
2272
const uint32_t N = 256;
2273
for (uint32_t cluster_index_iter = 0; cluster_index_iter < total_selector_clusters; cluster_index_iter += N)
2274
{
2275
const uint32_t first_index = cluster_index_iter;
2276
const uint32_t last_index = minimum<uint32_t>((uint32_t)total_selector_clusters, cluster_index_iter + N);
2277
2278
m_params.m_pJob_pool->add_job([this, first_index, last_index] {
2279
2280
for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
2281
{
2282
const basisu::vector<uint32_t>& cluster_block_indices = m_selector_cluster_block_indices[cluster_index];
2283
2284
if (!cluster_block_indices.size())
2285
continue;
2286
2287
uint64_t overall_best_err = 0;
2288
(void)overall_best_err;
2289
2290
uint64_t total_err[4][4][4];
2291
clear_obj(total_err);
2292
2293
for (uint32_t cluster_block_index = 0; cluster_block_index < cluster_block_indices.size(); cluster_block_index++)
2294
{
2295
const uint32_t block_index = cluster_block_indices[cluster_block_index];
2296
2297
const etc_block& blk = m_encoded_blocks[block_index];
2298
2299
color_rgba blk_colors[4];
2300
blk.get_block_colors(blk_colors, 0);
2301
2302
for (uint32_t y = 0; y < 4; y++)
2303
{
2304
for (uint32_t x = 0; x < 4; x++)
2305
{
2306
const color_rgba& orig_color = get_source_pixel_block(block_index)(x, y);
2307
2308
if (m_params.m_perceptual)
2309
{
2310
for (uint32_t s = 0; s < 4; s++)
2311
total_err[y][x][s] += color_distance(true, blk_colors[s], orig_color, false);
2312
}
2313
else
2314
{
2315
for (uint32_t s = 0; s < 4; s++)
2316
total_err[y][x][s] += color_distance(false, blk_colors[s], orig_color, false);
2317
}
2318
} // x
2319
} // y
2320
2321
} // cluster_block_index
2322
2323
for (uint32_t y = 0; y < 4; y++)
2324
{
2325
for (uint32_t x = 0; x < 4; x++)
2326
{
2327
uint64_t best_err = total_err[y][x][0];
2328
uint8_t best_sel = 0;
2329
2330
for (uint32_t s = 1; s < 4; s++)
2331
{
2332
if (total_err[y][x][s] < best_err)
2333
{
2334
best_err = total_err[y][x][s];
2335
best_sel = (uint8_t)s;
2336
}
2337
}
2338
2339
m_optimized_cluster_selectors[cluster_index].set_selector(x, y, best_sel);
2340
2341
overall_best_err += best_err;
2342
} // x
2343
} // y
2344
2345
} // cluster_index
2346
2347
});
2348
2349
} // cluster_index_iter
2350
2351
m_params.m_pJob_pool->wait_for_all();
2352
2353
debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
2354
2355
if (m_params.m_debug_images)
2356
{
2357
uint32_t max_selector_cluster_size = 0;
2358
2359
for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
2360
max_selector_cluster_size = maximum<uint32_t>(max_selector_cluster_size, (uint32_t)m_selector_cluster_block_indices[i].size());
2361
2362
if ((max_selector_cluster_size * 5) < 32768)
2363
{
2364
const uint32_t x_spacer_len = 16;
2365
image selector_cluster_vis(x_spacer_len + max_selector_cluster_size * 5, (uint32_t)m_selector_cluster_block_indices.size() * 5);
2366
2367
for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_block_indices.size(); selector_cluster_index++)
2368
{
2369
const basisu::vector<uint32_t> &cluster_block_indices = m_selector_cluster_block_indices[selector_cluster_index];
2370
2371
for (uint32_t y = 0; y < 4; y++)
2372
for (uint32_t x = 0; x < 4; x++)
2373
selector_cluster_vis.set_clipped(x_spacer_len + x - 12, selector_cluster_index * 5 + y, color_rgba((m_optimized_cluster_selectors[selector_cluster_index].get_selector(x, y) * 255) / 3));
2374
2375
for (uint32_t i = 0; i < cluster_block_indices.size(); i++)
2376
{
2377
uint32_t block_index = cluster_block_indices[i];
2378
2379
const etc_block &blk = m_orig_encoded_blocks[block_index];
2380
2381
for (uint32_t y = 0; y < 4; y++)
2382
for (uint32_t x = 0; x < 4; x++)
2383
selector_cluster_vis.set_clipped(x_spacer_len + x + 5 * i, selector_cluster_index * 5 + y, color_rgba((blk.get_selector(x, y) * 255) / 3));
2384
}
2385
}
2386
2387
char buf[256];
2388
snprintf(buf, sizeof(buf), "selector_cluster_vis_%u.png", iter);
2389
save_png(buf, selector_cluster_vis);
2390
}
2391
}
2392
}
2393
2394
// For each block: Determine which quantized selectors best encode that block, given its quantized endpoints.
2395
// Note that this method may leave some empty clusters (i.e. arrays with no block indices), including at the end.
2396
void basisu_frontend::find_optimal_selector_clusters_for_each_block()
2397
{
2398
debug_printf("find_optimal_selector_clusters_for_each_block\n");
2399
2400
interval_timer tm;
2401
tm.start();
2402
2403
if (m_params.m_validate)
2404
{
2405
// Sanity checks
2406
BASISU_FRONTEND_VERIFY(m_selector_cluster_block_indices.size() == m_optimized_cluster_selectors.size());
2407
for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++)
2408
{
2409
for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++)
2410
{
2411
BASISU_FRONTEND_VERIFY(m_selector_clusters_within_each_parent_cluster[i][j] < m_optimized_cluster_selectors.size());
2412
}
2413
}
2414
}
2415
2416
m_block_selector_cluster_index.resize(m_total_blocks);
2417
2418
if (m_params.m_compression_level == 0)
2419
{
2420
// Just leave the blocks in their original selector clusters.
2421
for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_block_indices.size(); selector_cluster_index++)
2422
{
2423
for (uint32_t j = 0; j < m_selector_cluster_block_indices[selector_cluster_index].size(); j++)
2424
{
2425
const uint32_t block_index = m_selector_cluster_block_indices[selector_cluster_index][j];
2426
2427
m_block_selector_cluster_index[block_index] = selector_cluster_index;
2428
2429
etc_block& blk = m_encoded_blocks[block_index];
2430
blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_cluster_index].get_raw_selector_bits());
2431
}
2432
}
2433
2434
debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
2435
2436
return;
2437
}
2438
2439
bool use_cpu = true;
2440
2441
if ((m_params.m_pOpenCL_context) && m_use_hierarchical_selector_codebooks)
2442
{
2443
const uint32_t num_parent_clusters = m_selector_clusters_within_each_parent_cluster.size_u32();
2444
2445
basisu::vector<fosc_selector_struct> selector_structs;
2446
selector_structs.reserve(m_optimized_cluster_selectors.size());
2447
2448
uint_vec parent_selector_cluster_offsets(num_parent_clusters);
2449
2450
uint_vec selector_cluster_indices;
2451
selector_cluster_indices.reserve(m_optimized_cluster_selectors.size());
2452
2453
uint32_t cur_ofs = 0;
2454
for (uint32_t parent_index = 0; parent_index < num_parent_clusters; parent_index++)
2455
{
2456
parent_selector_cluster_offsets[parent_index] = cur_ofs;
2457
2458
for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[parent_index].size(); j++)
2459
{
2460
const uint32_t selector_cluster_index = m_selector_clusters_within_each_parent_cluster[parent_index][j];
2461
2462
uint32_t sel_bits = 0;
2463
for (uint32_t p = 0; p < 16; p++)
2464
sel_bits |= (m_optimized_cluster_selectors[selector_cluster_index].get_selector(p & 3, p >> 2) << (p * 2));
2465
2466
selector_structs.enlarge(1)->m_packed_selectors = sel_bits;
2467
2468
selector_cluster_indices.push_back(selector_cluster_index);
2469
}
2470
2471
cur_ofs += m_selector_clusters_within_each_parent_cluster[parent_index].size_u32();
2472
}
2473
2474
const uint32_t total_input_selectors = cur_ofs;
2475
2476
basisu::vector<fosc_block_struct> block_structs(m_total_blocks);
2477
for (uint32_t i = 0; i < m_total_blocks; i++)
2478
{
2479
const uint32_t parent_selector_cluster = m_block_parent_selector_cluster[i];
2480
2481
const etc_block& blk = m_encoded_blocks[i];
2482
blk.unpack_color5(block_structs[i].m_etc_color5_inten, blk.get_base5_color(), false);
2483
2484
block_structs[i].m_etc_color5_inten.a = (uint8_t)blk.get_inten_table(0);
2485
block_structs[i].m_first_selector = parent_selector_cluster_offsets[parent_selector_cluster];
2486
block_structs[i].m_num_selectors = m_selector_clusters_within_each_parent_cluster[parent_selector_cluster].size_u32();
2487
}
2488
2489
uint_vec output_selector_cluster_indices(m_total_blocks);
2490
2491
bool status = opencl_find_optimal_selector_clusters_for_each_block(
2492
m_params.m_pOpenCL_context,
2493
block_structs.data(),
2494
total_input_selectors,
2495
selector_structs.data(),
2496
selector_cluster_indices.data(),
2497
output_selector_cluster_indices.data(),
2498
m_params.m_perceptual);
2499
2500
if (!status)
2501
{
2502
error_printf("basisu_frontend::find_optimal_selector_clusters_for_each_block: opencl_find_optimal_selector_clusters_for_each_block() failed! Using CPU.\n");
2503
m_params.m_pOpenCL_context = nullptr;
2504
m_opencl_failed = true;
2505
}
2506
else
2507
{
2508
for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
2509
{
2510
m_selector_cluster_block_indices[i].resize(0);
2511
m_selector_cluster_block_indices[i].reserve(128);
2512
}
2513
2514
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
2515
{
2516
etc_block& blk = m_encoded_blocks[block_index];
2517
2518
uint32_t best_cluster_index = output_selector_cluster_indices[block_index];
2519
2520
blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits());
2521
2522
m_block_selector_cluster_index[block_index] = best_cluster_index;
2523
2524
vector_ensure_element_is_valid(m_selector_cluster_block_indices, best_cluster_index);
2525
m_selector_cluster_block_indices[best_cluster_index].push_back(block_index);
2526
}
2527
2528
use_cpu = false;
2529
}
2530
}
2531
2532
if (use_cpu)
2533
{
2534
basisu::vector<uint8_t> unpacked_optimized_cluster_selectors(16 * m_optimized_cluster_selectors.size());
2535
for (uint32_t cluster_index = 0; cluster_index < m_optimized_cluster_selectors.size(); cluster_index++)
2536
{
2537
for (uint32_t y = 0; y < 4; y++)
2538
{
2539
for (uint32_t x = 0; x < 4; x++)
2540
{
2541
unpacked_optimized_cluster_selectors[cluster_index * 16 + y * 4 + x] = (uint8_t)m_optimized_cluster_selectors[cluster_index].get_selector(x, y);
2542
}
2543
}
2544
}
2545
2546
const uint32_t N = 2048;
2547
for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N)
2548
{
2549
const uint32_t first_index = block_index_iter;
2550
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
2551
2552
m_params.m_pJob_pool->add_job( [this, first_index, last_index, &unpacked_optimized_cluster_selectors] {
2553
2554
int prev_best_cluster_index = 0;
2555
2556
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
2557
{
2558
const pixel_block& block = get_source_pixel_block(block_index);
2559
2560
etc_block& blk = m_encoded_blocks[block_index];
2561
2562
if ((block_index > first_index) && (block == get_source_pixel_block(block_index - 1)))
2563
{
2564
blk.set_raw_selector_bits(m_optimized_cluster_selectors[prev_best_cluster_index].get_raw_selector_bits());
2565
2566
m_block_selector_cluster_index[block_index] = prev_best_cluster_index;
2567
2568
continue;
2569
}
2570
2571
const color_rgba* pBlock_pixels = block.get_ptr();
2572
2573
color_rgba trial_block_colors[4];
2574
blk.get_block_colors_etc1s(trial_block_colors);
2575
2576
// precompute errors for the i-th block pixel and selector sel: [sel][i]
2577
uint32_t trial_errors[4][16];
2578
2579
if (m_params.m_perceptual)
2580
{
2581
for (uint32_t sel = 0; sel < 4; ++sel)
2582
for (uint32_t i = 0; i < 16; ++i)
2583
trial_errors[sel][i] = color_distance(true, pBlock_pixels[i], trial_block_colors[sel], false);
2584
}
2585
else
2586
{
2587
for (uint32_t sel = 0; sel < 4; ++sel)
2588
for (uint32_t i = 0; i < 16; ++i)
2589
trial_errors[sel][i] = color_distance(false, pBlock_pixels[i], trial_block_colors[sel], false);
2590
}
2591
2592
// Compute the minimum possible errors (given any selectors) for pixels 0-15
2593
uint64_t min_possible_error_0_15 = 0;
2594
for (uint32_t i = 0; i < 16; i++)
2595
min_possible_error_0_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
2596
2597
// Compute the minimum possible errors (given any selectors) for pixels 4-15
2598
uint64_t min_possible_error_4_15 = 0;
2599
for (uint32_t i = 4; i < 16; i++)
2600
min_possible_error_4_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
2601
2602
// Compute the minimum possible errors (given any selectors) for pixels 8-15
2603
uint64_t min_possible_error_8_15 = 0;
2604
for (uint32_t i = 8; i < 16; i++)
2605
min_possible_error_8_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
2606
2607
// Compute the minimum possible errors (given any selectors) for pixels 12-15
2608
uint64_t min_possible_error_12_15 = 0;
2609
for (uint32_t i = 12; i < 16; i++)
2610
min_possible_error_12_15 += basisu::minimum(trial_errors[0][i], trial_errors[1][i], trial_errors[2][i], trial_errors[3][i]);
2611
2612
uint64_t best_cluster_err = INT64_MAX;
2613
uint32_t best_cluster_index = 0;
2614
2615
const uint32_t parent_selector_cluster = m_block_parent_selector_cluster.size() ? m_block_parent_selector_cluster[block_index] : 0;
2616
const uint_vec *pCluster_indices = m_selector_clusters_within_each_parent_cluster.size() ? &m_selector_clusters_within_each_parent_cluster[parent_selector_cluster] : nullptr;
2617
2618
const uint32_t total_clusters = m_use_hierarchical_selector_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_selector_cluster_block_indices.size();
2619
2620
#if 0
2621
for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++)
2622
{
2623
const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter;
2624
2625
const etc_block& cluster_blk = m_optimized_cluster_selectors[cluster_index];
2626
2627
uint64_t trial_err = 0;
2628
for (int y = 0; y < 4; y++)
2629
{
2630
for (int x = 0; x < 4; x++)
2631
{
2632
const uint32_t sel = cluster_blk.get_selector(x, y);
2633
2634
trial_err += color_distance(m_params.m_perceptual, trial_block_colors[sel], pBlock_pixels[x + y * 4], false);
2635
if (trial_err > best_cluster_err)
2636
goto early_out;
2637
}
2638
}
2639
2640
if (trial_err < best_cluster_err)
2641
{
2642
best_cluster_err = trial_err;
2643
best_cluster_index = cluster_index;
2644
if (!best_cluster_err)
2645
break;
2646
}
2647
2648
early_out:
2649
;
2650
}
2651
#else
2652
for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++)
2653
{
2654
const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter;
2655
2656
const uint8_t* pSels = &unpacked_optimized_cluster_selectors[cluster_index * 16];
2657
2658
uint64_t trial_err = (uint64_t)trial_errors[pSels[0]][0] + trial_errors[pSels[1]][1] + trial_errors[pSels[2]][2] + trial_errors[pSels[3]][3];
2659
if ((trial_err + min_possible_error_4_15) >= best_cluster_err)
2660
continue;
2661
2662
trial_err += (uint64_t)trial_errors[pSels[4]][4] + trial_errors[pSels[5]][5] + trial_errors[pSels[6]][6] + trial_errors[pSels[7]][7];
2663
if ((trial_err + min_possible_error_8_15) >= best_cluster_err)
2664
continue;
2665
2666
trial_err += (uint64_t)trial_errors[pSels[8]][8] + trial_errors[pSels[9]][9] + trial_errors[pSels[10]][10] + trial_errors[pSels[11]][11];
2667
if ((trial_err + min_possible_error_12_15) >= best_cluster_err)
2668
continue;
2669
2670
trial_err += (uint64_t)trial_errors[pSels[12]][12] + trial_errors[pSels[13]][13] + trial_errors[pSels[14]][14] + trial_errors[pSels[15]][15];
2671
2672
if (trial_err < best_cluster_err)
2673
{
2674
best_cluster_err = trial_err;
2675
best_cluster_index = cluster_index;
2676
if (best_cluster_err == min_possible_error_0_15)
2677
break;
2678
}
2679
2680
} // cluster_iter
2681
#endif
2682
2683
blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits());
2684
2685
m_block_selector_cluster_index[block_index] = best_cluster_index;
2686
2687
prev_best_cluster_index = best_cluster_index;
2688
2689
} // block_index
2690
2691
} );
2692
2693
} // block_index_iter
2694
2695
m_params.m_pJob_pool->wait_for_all();
2696
2697
for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
2698
{
2699
m_selector_cluster_block_indices[i].resize(0);
2700
m_selector_cluster_block_indices[i].reserve(128);
2701
}
2702
2703
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
2704
{
2705
const uint32_t best_cluster_index = m_block_selector_cluster_index[block_index];
2706
2707
vector_ensure_element_is_valid(m_selector_cluster_block_indices, best_cluster_index);
2708
m_selector_cluster_block_indices[best_cluster_index].push_back(block_index);
2709
}
2710
2711
} // if (use_cpu)
2712
2713
debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
2714
}
2715
2716
// TODO: Remove old ETC1 specific stuff, and thread this.
2717
uint32_t basisu_frontend::refine_block_endpoints_given_selectors()
2718
{
2719
debug_printf("refine_block_endpoints_given_selectors\n");
2720
2721
for (int block_index = 0; block_index < static_cast<int>(m_total_blocks); block_index++)
2722
{
2723
//uint32_t selector_cluster = m_block_selector_cluster_index(block_x, block_y);
2724
vec2U &endpoint_clusters = m_block_endpoint_clusters_indices[block_index];
2725
2726
m_endpoint_cluster_etc_params[endpoint_clusters[0]].m_subblocks.push_back(block_index * 2);
2727
2728
m_endpoint_cluster_etc_params[endpoint_clusters[1]].m_subblocks.push_back(block_index * 2 + 1);
2729
}
2730
2731
uint32_t total_subblocks_refined = 0;
2732
uint32_t total_subblocks_examined = 0;
2733
2734
for (uint32_t endpoint_cluster_index = 0; endpoint_cluster_index < m_endpoint_cluster_etc_params.size(); endpoint_cluster_index++)
2735
{
2736
endpoint_cluster_etc_params &subblock_params = m_endpoint_cluster_etc_params[endpoint_cluster_index];
2737
2738
const uint_vec &subblocks = subblock_params.m_subblocks;
2739
//uint32_t total_pixels = subblock.m_subblocks.size() * 8;
2740
2741
basisu::vector<color_rgba> subblock_colors[2]; // [use_individual_mode]
2742
uint8_vec subblock_selectors[2];
2743
2744
uint64_t cur_subblock_err[2] = { 0, 0 };
2745
2746
for (uint32_t subblock_iter = 0; subblock_iter < subblocks.size(); subblock_iter++)
2747
{
2748
uint32_t training_vector_index = subblocks[subblock_iter];
2749
2750
uint32_t block_index = training_vector_index >> 1;
2751
uint32_t subblock_index = training_vector_index & 1;
2752
const bool is_flipped = true;
2753
2754
const etc_block &blk = m_encoded_blocks[block_index];
2755
2756
const bool use_individual_mode = !blk.get_diff_bit();
2757
2758
const color_rgba *pSource_block_pixels = get_source_pixel_block(block_index).get_ptr();
2759
2760
color_rgba unpacked_block_pixels[16];
2761
unpack_etc1(blk, unpacked_block_pixels);
2762
2763
for (uint32_t i = 0; i < 8; i++)
2764
{
2765
const uint32_t pixel_index = g_etc1_pixel_indices[is_flipped][subblock_index][i];
2766
const etc_coord2 &coords = g_etc1_pixel_coords[is_flipped][subblock_index][i];
2767
2768
subblock_colors[use_individual_mode].push_back(pSource_block_pixels[pixel_index]);
2769
2770
cur_subblock_err[use_individual_mode] += color_distance(m_params.m_perceptual, pSource_block_pixels[pixel_index], unpacked_block_pixels[pixel_index], false);
2771
2772
subblock_selectors[use_individual_mode].push_back(static_cast<uint8_t>(blk.get_selector(coords.m_x, coords.m_y)));
2773
}
2774
} // subblock_iter
2775
2776
etc1_optimizer::results cluster_optimizer_results[2];
2777
bool results_valid[2] = { false, false };
2778
2779
clear_obj(cluster_optimizer_results);
2780
2781
basisu::vector<uint8_t> cluster_selectors[2];
2782
2783
for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++)
2784
{
2785
const uint32_t total_pixels = (uint32_t)subblock_colors[use_individual_mode].size();
2786
2787
if (!total_pixels)
2788
continue;
2789
2790
total_subblocks_examined += total_pixels / 8;
2791
2792
etc1_optimizer optimizer;
2793
etc1_solution_coordinates solutions[2];
2794
2795
etc1_optimizer::params cluster_optimizer_params;
2796
cluster_optimizer_params.m_num_src_pixels = total_pixels;
2797
cluster_optimizer_params.m_pSrc_pixels = &subblock_colors[use_individual_mode][0];
2798
2799
cluster_optimizer_params.m_use_color4 = use_individual_mode != 0;
2800
cluster_optimizer_params.m_perceptual = m_params.m_perceptual;
2801
2802
cluster_optimizer_params.m_pForce_selectors = &subblock_selectors[use_individual_mode][0];
2803
cluster_optimizer_params.m_quality = cETCQualityUber;
2804
2805
cluster_selectors[use_individual_mode].resize(total_pixels);
2806
2807
cluster_optimizer_results[use_individual_mode].m_n = total_pixels;
2808
cluster_optimizer_results[use_individual_mode].m_pSelectors = &cluster_selectors[use_individual_mode][0];
2809
2810
optimizer.init(cluster_optimizer_params, cluster_optimizer_results[use_individual_mode]);
2811
2812
if (!optimizer.compute())
2813
continue;
2814
2815
if (cluster_optimizer_results[use_individual_mode].m_error < cur_subblock_err[use_individual_mode])
2816
results_valid[use_individual_mode] = true;
2817
2818
} // use_individual_mode
2819
2820
for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++)
2821
{
2822
if (!results_valid[use_individual_mode])
2823
continue;
2824
2825
uint32_t num_passes = use_individual_mode ? 1 : 2;
2826
2827
bool all_passed5 = true;
2828
2829
for (uint32_t pass = 0; pass < num_passes; pass++)
2830
{
2831
for (uint32_t subblock_iter = 0; subblock_iter < subblocks.size(); subblock_iter++)
2832
{
2833
const uint32_t training_vector_index = subblocks[subblock_iter];
2834
2835
const uint32_t block_index = training_vector_index >> 1;
2836
const uint32_t subblock_index = training_vector_index & 1;
2837
//const bool is_flipped = true;
2838
2839
etc_block &blk = m_encoded_blocks[block_index];
2840
2841
if (!blk.get_diff_bit() != static_cast<bool>(use_individual_mode != 0))
2842
continue;
2843
2844
if (use_individual_mode)
2845
{
2846
blk.set_base4_color(subblock_index, etc_block::pack_color4(cluster_optimizer_results[1].m_block_color_unscaled, false));
2847
blk.set_inten_table(subblock_index, cluster_optimizer_results[1].m_block_inten_table);
2848
2849
subblock_params.m_color_error[1] = cluster_optimizer_results[1].m_error;
2850
subblock_params.m_inten_table[1] = cluster_optimizer_results[1].m_block_inten_table;
2851
subblock_params.m_color_unscaled[1] = cluster_optimizer_results[1].m_block_color_unscaled;
2852
2853
total_subblocks_refined++;
2854
}
2855
else
2856
{
2857
const uint16_t base_color5 = blk.get_base5_color();
2858
const uint16_t delta_color3 = blk.get_delta3_color();
2859
2860
uint32_t r[2], g[2], b[2];
2861
etc_block::unpack_color5(r[0], g[0], b[0], base_color5, false);
2862
bool success = etc_block::unpack_color5(r[1], g[1], b[1], base_color5, delta_color3, false);
2863
assert(success);
2864
BASISU_NOTE_UNUSED(success);
2865
2866
r[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.r;
2867
g[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.g;
2868
b[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.b;
2869
2870
color_rgba colors[2] = { color_rgba(r[0], g[0], b[0], 255), color_rgba(r[1], g[1], b[1], 255) };
2871
2872
if (!etc_block::try_pack_color5_delta3(colors))
2873
{
2874
all_passed5 = false;
2875
break;
2876
}
2877
2878
if ((pass == 1) && (all_passed5))
2879
{
2880
blk.set_block_color5(colors[0], colors[1]);
2881
blk.set_inten_table(subblock_index, cluster_optimizer_results[0].m_block_inten_table);
2882
2883
subblock_params.m_color_error[0] = cluster_optimizer_results[0].m_error;
2884
subblock_params.m_inten_table[0] = cluster_optimizer_results[0].m_block_inten_table;
2885
subblock_params.m_color_unscaled[0] = cluster_optimizer_results[0].m_block_color_unscaled;
2886
2887
total_subblocks_refined++;
2888
}
2889
}
2890
2891
} // subblock_iter
2892
2893
} // pass
2894
2895
} // use_individual_mode
2896
2897
} // endpoint_cluster_index
2898
2899
if (m_params.m_debug_stats)
2900
debug_printf("Total subblock endpoints refined: %u (%3.1f%%)\n", total_subblocks_refined, total_subblocks_refined * 100.0f / total_subblocks_examined);
2901
2902
return total_subblocks_refined;
2903
}
2904
2905
void basisu_frontend::dump_endpoint_clusterization_visualization(const char *pFilename, bool vis_endpoint_colors)
2906
{
2907
debug_printf("dump_endpoint_clusterization_visualization\n");
2908
2909
uint32_t max_endpoint_cluster_size = 0;
2910
2911
basisu::vector<uint32_t> cluster_sizes(m_endpoint_clusters.size());
2912
basisu::vector<uint32_t> sorted_cluster_indices(m_endpoint_clusters.size());
2913
for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
2914
{
2915
max_endpoint_cluster_size = maximum<uint32_t>(max_endpoint_cluster_size, (uint32_t)m_endpoint_clusters[i].size());
2916
cluster_sizes[i] = (uint32_t)m_endpoint_clusters[i].size();
2917
}
2918
2919
if (!max_endpoint_cluster_size)
2920
return;
2921
2922
for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++)
2923
sorted_cluster_indices[i] = i;
2924
2925
//indexed_heap_sort(endpoint_clusters.size(), cluster_sizes.get_ptr(), sorted_cluster_indices.get_ptr());
2926
2927
image endpoint_cluster_vis(12 + minimum<uint32_t>(max_endpoint_cluster_size, 2048) * 5, (uint32_t)m_endpoint_clusters.size() * 3);
2928
2929
for (uint32_t unsorted_cluster_iter = 0; unsorted_cluster_iter < m_endpoint_clusters.size(); unsorted_cluster_iter++)
2930
{
2931
const uint32_t cluster_iter = sorted_cluster_indices[unsorted_cluster_iter];
2932
2933
etc_block blk;
2934
blk.clear();
2935
blk.set_flip_bit(false);
2936
blk.set_diff_bit(true);
2937
blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[cluster_iter].m_inten_table[0]);
2938
blk.set_base5_color(etc_block::pack_color5(m_endpoint_cluster_etc_params[cluster_iter].m_color_unscaled[0], false));
2939
2940
color_rgba blk_colors[4];
2941
blk.get_block_colors(blk_colors, 0);
2942
for (uint32_t i = 0; i < 4; i++)
2943
endpoint_cluster_vis.fill_box(i * 2, 3 * unsorted_cluster_iter, 2, 2, blk_colors[i]);
2944
2945
for (uint32_t subblock_iter = 0; subblock_iter < m_endpoint_clusters[cluster_iter].size(); subblock_iter++)
2946
{
2947
uint32_t training_vector_index = m_endpoint_clusters[cluster_iter][subblock_iter];
2948
2949
const uint32_t block_index = training_vector_index >> 1;
2950
const uint32_t subblock_index = training_vector_index & 1;
2951
2952
const etc_block& blk2 = m_etc1_blocks_etc1s[block_index];
2953
2954
const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
2955
2956
color_rgba subblock_pixels[8];
2957
2958
if (vis_endpoint_colors)
2959
{
2960
color_rgba colors[2];
2961
blk2.get_block_low_high_colors(colors, subblock_index);
2962
for (uint32_t i = 0; i < 8; i++)
2963
subblock_pixels[i] = colors[subblock_index];
2964
}
2965
else
2966
{
2967
for (uint32_t i = 0; i < 8; i++)
2968
subblock_pixels[i] = pBlock_pixels[g_etc1_pixel_indices[blk2.get_flip_bit()][subblock_index][i]];
2969
}
2970
2971
endpoint_cluster_vis.set_block_clipped(subblock_pixels, 12 + 5 * subblock_iter, 3 * unsorted_cluster_iter, 4, 2);
2972
}
2973
}
2974
2975
save_png(pFilename, endpoint_cluster_vis);
2976
debug_printf("Wrote debug visualization file %s\n", pFilename);
2977
}
2978
2979
void basisu_frontend::finalize()
2980
{
2981
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
2982
{
2983
for (uint32_t subblock_index = 0; subblock_index < 2; subblock_index++)
2984
{
2985
const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, subblock_index);
2986
2987
m_endpoint_cluster_etc_params[endpoint_cluster_index].m_color_used[0] = true;
2988
}
2989
}
2990
}
2991
2992
// The backend has remapped the block endpoints while optimizing the output symbols for better rate distortion performance, so let's go and reoptimize the endpoint codebook.
2993
// This is currently the only place where the backend actually goes and changes the quantization and calls the frontend to fix things up.
2994
// This is basically a bottom up clusterization stage, where some leaves can be combined.
2995
void basisu_frontend::reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices)
2996
{
2997
debug_printf("reoptimize_remapped_endpoints\n");
2998
2999
basisu::vector<uint_vec> new_endpoint_cluster_block_indices(m_endpoint_clusters.size());
3000
for (uint32_t i = 0; i < new_block_endpoints.size(); i++)
3001
new_endpoint_cluster_block_indices[new_block_endpoints[i]].push_back(i);
3002
3003
basisu::vector<uint8_t> cluster_valid(new_endpoint_cluster_block_indices.size());
3004
basisu::vector<uint8_t> cluster_improved(new_endpoint_cluster_block_indices.size());
3005
3006
const uint32_t N = 256;
3007
for (uint32_t cluster_index_iter = 0; cluster_index_iter < new_endpoint_cluster_block_indices.size(); cluster_index_iter += N)
3008
{
3009
const uint32_t first_index = cluster_index_iter;
3010
const uint32_t last_index = minimum<uint32_t>((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N);
3011
3012
m_params.m_pJob_pool->add_job( [this, first_index, last_index, &cluster_improved, &cluster_valid, &new_endpoint_cluster_block_indices, &pBlock_selector_indices ] {
3013
3014
for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
3015
{
3016
const basisu::vector<uint32_t>& cluster_block_indices = new_endpoint_cluster_block_indices[cluster_index];
3017
3018
if (!cluster_block_indices.size())
3019
continue;
3020
3021
const uint32_t total_pixels = (uint32_t)cluster_block_indices.size() * 16;
3022
3023
basisu::vector<color_rgba> cluster_pixels(total_pixels);
3024
uint8_vec force_selectors(total_pixels);
3025
3026
etc_block blk;
3027
blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(cluster_index, false));
3028
blk.set_inten_tables_etc1s(get_endpoint_cluster_inten_table(cluster_index, false));
3029
blk.set_flip_bit(true);
3030
3031
uint64_t cur_err = 0;
3032
3033
for (uint32_t cluster_block_indices_iter = 0; cluster_block_indices_iter < cluster_block_indices.size(); cluster_block_indices_iter++)
3034
{
3035
const uint32_t block_index = cluster_block_indices[cluster_block_indices_iter];
3036
3037
const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr();
3038
3039
memcpy(&cluster_pixels[cluster_block_indices_iter * 16], pBlock_pixels, 16 * sizeof(color_rgba));
3040
3041
const uint32_t selector_cluster_index = pBlock_selector_indices ? (*pBlock_selector_indices)[block_index] : get_block_selector_cluster_index(block_index);
3042
3043
const etc_block &blk_selectors = get_selector_cluster_selector_bits(selector_cluster_index);
3044
3045
blk.set_raw_selector_bits(blk_selectors.get_raw_selector_bits());
3046
3047
cur_err += blk.evaluate_etc1_error(pBlock_pixels, m_params.m_perceptual);
3048
3049
for (uint32_t y = 0; y < 4; y++)
3050
for (uint32_t x = 0; x < 4; x++)
3051
force_selectors[cluster_block_indices_iter * 16 + x + y * 4] = static_cast<uint8_t>(blk_selectors.get_selector(x, y));
3052
}
3053
3054
endpoint_cluster_etc_params new_endpoint_cluster_etc_params;
3055
3056
{
3057
etc1_optimizer optimizer;
3058
etc1_solution_coordinates solutions[2];
3059
3060
etc1_optimizer::params cluster_optimizer_params;
3061
cluster_optimizer_params.m_num_src_pixels = total_pixels;
3062
cluster_optimizer_params.m_pSrc_pixels = &cluster_pixels[0];
3063
3064
cluster_optimizer_params.m_use_color4 = false;
3065
cluster_optimizer_params.m_perceptual = m_params.m_perceptual;
3066
cluster_optimizer_params.m_pForce_selectors = &force_selectors[0];
3067
3068
if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL)
3069
cluster_optimizer_params.m_quality = cETCQualityUber;
3070
else
3071
cluster_optimizer_params.m_quality = cETCQualitySlow;
3072
3073
etc1_optimizer::results cluster_optimizer_results;
3074
3075
basisu::vector<uint8_t> cluster_selectors(total_pixels);
3076
cluster_optimizer_results.m_n = total_pixels;
3077
cluster_optimizer_results.m_pSelectors = &cluster_selectors[0];
3078
3079
optimizer.init(cluster_optimizer_params, cluster_optimizer_results);
3080
3081
if (!optimizer.compute())
3082
BASISU_FRONTEND_VERIFY(false);
3083
3084
new_endpoint_cluster_etc_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled;
3085
new_endpoint_cluster_etc_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table;
3086
new_endpoint_cluster_etc_params.m_color_error[0] = cluster_optimizer_results.m_error;
3087
new_endpoint_cluster_etc_params.m_color_used[0] = true;
3088
new_endpoint_cluster_etc_params.m_valid = true;
3089
}
3090
3091
if (new_endpoint_cluster_etc_params.m_color_error[0] < cur_err)
3092
{
3093
m_endpoint_cluster_etc_params[cluster_index] = new_endpoint_cluster_etc_params;
3094
3095
cluster_improved[cluster_index] = true;
3096
}
3097
3098
cluster_valid[cluster_index] = true;
3099
3100
} // cluster_index
3101
3102
} );
3103
3104
} // cluster_index_iter
3105
3106
m_params.m_pJob_pool->wait_for_all();
3107
3108
uint32_t total_unused_clusters = 0;
3109
uint32_t total_improved_clusters = 0;
3110
3111
old_to_new_endpoint_cluster_indices.resize(m_endpoint_clusters.size());
3112
vector_set_all(old_to_new_endpoint_cluster_indices, -1);
3113
3114
int total_new_endpoint_clusters = 0;
3115
3116
for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++)
3117
{
3118
if (!cluster_valid[old_cluster_index])
3119
total_unused_clusters++;
3120
else
3121
old_to_new_endpoint_cluster_indices[old_cluster_index] = total_new_endpoint_clusters++;
3122
3123
if (cluster_improved[old_cluster_index])
3124
total_improved_clusters++;
3125
}
3126
3127
debug_printf("Total unused clusters: %u\n", total_unused_clusters);
3128
debug_printf("Total improved_clusters: %u\n", total_improved_clusters);
3129
debug_printf("Total endpoint clusters: %u\n", total_new_endpoint_clusters);
3130
3131
if (optimize_final_codebook)
3132
{
3133
cluster_subblock_etc_params_vec new_endpoint_cluster_etc_params(total_new_endpoint_clusters);
3134
3135
for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++)
3136
{
3137
if (old_to_new_endpoint_cluster_indices[old_cluster_index] >= 0)
3138
new_endpoint_cluster_etc_params[old_to_new_endpoint_cluster_indices[old_cluster_index]] = m_endpoint_cluster_etc_params[old_cluster_index];
3139
}
3140
3141
debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 1\n");
3142
3143
basisu::vector<uint_vec> new_endpoint_clusters(total_new_endpoint_clusters);
3144
3145
for (uint32_t block_index = 0; block_index < new_block_endpoints.size(); block_index++)
3146
{
3147
const uint32_t old_endpoint_cluster_index = new_block_endpoints[block_index];
3148
3149
const int new_endpoint_cluster_index = old_to_new_endpoint_cluster_indices[old_endpoint_cluster_index];
3150
BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index >= 0);
3151
3152
BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index < (int)new_endpoint_clusters.size());
3153
3154
new_endpoint_clusters[new_endpoint_cluster_index].push_back(block_index * 2 + 0);
3155
new_endpoint_clusters[new_endpoint_cluster_index].push_back(block_index * 2 + 1);
3156
3157
BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index < (int)new_endpoint_cluster_etc_params.size());
3158
3159
new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 0);
3160
new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 1);
3161
3162
m_block_endpoint_clusters_indices[block_index][0] = new_endpoint_cluster_index;
3163
m_block_endpoint_clusters_indices[block_index][1] = new_endpoint_cluster_index;
3164
}
3165
3166
debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 2\n");
3167
3168
m_endpoint_clusters = new_endpoint_clusters;
3169
m_endpoint_cluster_etc_params = new_endpoint_cluster_etc_params;
3170
3171
eliminate_redundant_or_empty_endpoint_clusters();
3172
3173
debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 3\n");
3174
3175
for (uint32_t new_cluster_index = 0; new_cluster_index < m_endpoint_clusters.size(); new_cluster_index++)
3176
{
3177
for (uint32_t cluster_block_iter = 0; cluster_block_iter < m_endpoint_clusters[new_cluster_index].size(); cluster_block_iter++)
3178
{
3179
const uint32_t subblock_index = m_endpoint_clusters[new_cluster_index][cluster_block_iter];
3180
const uint32_t block_index = subblock_index >> 1;
3181
3182
m_block_endpoint_clusters_indices[block_index][0] = new_cluster_index;
3183
m_block_endpoint_clusters_indices[block_index][1] = new_cluster_index;
3184
3185
const uint32_t old_cluster_index = new_block_endpoints[block_index];
3186
3187
old_to_new_endpoint_cluster_indices[old_cluster_index] = new_cluster_index;
3188
}
3189
}
3190
3191
debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 4\n");
3192
3193
for (uint32_t block_index = 0; block_index < m_encoded_blocks.size(); block_index++)
3194
{
3195
const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, 0);
3196
3197
m_encoded_blocks[block_index].set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(endpoint_cluster_index, false));
3198
m_encoded_blocks[block_index].set_inten_tables_etc1s(get_endpoint_cluster_inten_table(endpoint_cluster_index, false));
3199
}
3200
3201
debug_printf("Final (post-RDO) endpoint clusters: %u\n", m_endpoint_clusters.size());
3202
}
3203
3204
//debug_printf("validate_output: %u\n", validate_output());
3205
}
3206
3207
// Endpoint clusterization hierarchy integrity checker.
3208
// Note this doesn't check for empty clusters.
3209
bool basisu_frontend::validate_endpoint_cluster_hierarchy(bool ensure_clusters_have_same_parents) const
3210
{
3211
if (!m_endpoint_parent_clusters.size())
3212
return true;
3213
3214
int_vec subblock_parent_indices(m_total_blocks * 2);
3215
subblock_parent_indices.set_all(-1);
3216
3217
int_vec subblock_cluster_indices(m_total_blocks * 2);
3218
subblock_cluster_indices.set_all(-1);
3219
3220
for (uint32_t parent_index = 0; parent_index < m_endpoint_parent_clusters.size(); parent_index++)
3221
{
3222
for (uint32_t i = 0; i < m_endpoint_parent_clusters[parent_index].size(); i++)
3223
{
3224
uint32_t subblock_index = m_endpoint_parent_clusters[parent_index][i];
3225
if (subblock_index >= m_total_blocks * 2)
3226
return false;
3227
3228
// If the endpoint cluster lives in more than one parent node, that's wrong.
3229
if (subblock_parent_indices[subblock_index] != -1)
3230
return false;
3231
3232
subblock_parent_indices[subblock_index] = parent_index;
3233
}
3234
}
3235
3236
// Make sure all endpoint clusters are present in the parent cluster.
3237
for (uint32_t i = 0; i < subblock_parent_indices.size(); i++)
3238
{
3239
if (subblock_parent_indices[i] == -1)
3240
return false;
3241
}
3242
3243
for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++)
3244
{
3245
int parent_index = 0;
3246
3247
for (uint32_t i = 0; i < m_endpoint_clusters[cluster_index].size(); i++)
3248
{
3249
uint32_t subblock_index = m_endpoint_clusters[cluster_index][i];
3250
if (subblock_index >= m_total_blocks * 2)
3251
return false;
3252
3253
if (subblock_cluster_indices[subblock_index] != -1)
3254
return false;
3255
3256
subblock_cluster_indices[subblock_index] = cluster_index;
3257
3258
// There are transformations on the endpoint clusters that can break the strict tree requirement
3259
if (ensure_clusters_have_same_parents)
3260
{
3261
// Make sure all the subblocks are in the same parent cluster
3262
if (!i)
3263
parent_index = subblock_parent_indices[subblock_index];
3264
else if (subblock_parent_indices[subblock_index] != parent_index)
3265
return false;
3266
}
3267
}
3268
}
3269
3270
// Make sure all endpoint clusters are present in the parent cluster.
3271
for (uint32_t i = 0; i < subblock_cluster_indices.size(); i++)
3272
{
3273
if (subblock_cluster_indices[i] == -1)
3274
return false;
3275
}
3276
3277
return true;
3278
}
3279
3280
// This is very slow and only intended for debugging/development. It's enabled using the "-validate_etc1s" command line option.
3281
bool basisu_frontend::validate_output() const
3282
{
3283
debug_printf("validate_output\n");
3284
3285
if (!check_etc1s_constraints())
3286
return false;
3287
3288
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
3289
{
3290
//#define CHECK(x) do { if (!(x)) { DebugBreak(); return false; } } while(0)
3291
#define CHECK(x) BASISU_FRONTEND_VERIFY(x);
3292
3293
CHECK(get_output_block(block_index).get_flip_bit() == true);
3294
3295
const bool diff_flag = get_diff_flag(block_index);
3296
CHECK(diff_flag == true);
3297
3298
etc_block blk;
3299
memset(&blk, 0, sizeof(blk));
3300
blk.set_flip_bit(true);
3301
blk.set_diff_bit(true);
3302
3303
const uint32_t endpoint_cluster0_index = get_subblock_endpoint_cluster_index(block_index, 0);
3304
const uint32_t endpoint_cluster1_index = get_subblock_endpoint_cluster_index(block_index, 1);
3305
3306
// basisu only supports ETC1S, so these must be equal.
3307
CHECK(endpoint_cluster0_index == endpoint_cluster1_index);
3308
3309
CHECK(blk.set_block_color5_check(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false)));
3310
3311
CHECK(get_endpoint_cluster_color_is_used(endpoint_cluster0_index, false));
3312
3313
blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, false));
3314
blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, false));
3315
3316
const uint32_t selector_cluster_index = get_block_selector_cluster_index(block_index);
3317
CHECK(selector_cluster_index < get_total_selector_clusters());
3318
3319
CHECK(vector_find(get_selector_cluster_block_indices(selector_cluster_index), block_index) != -1);
3320
3321
blk.set_raw_selector_bits(get_selector_cluster_selector_bits(selector_cluster_index).get_raw_selector_bits());
3322
3323
const etc_block &rdo_output_block = get_output_block(block_index);
3324
3325
CHECK(rdo_output_block.get_flip_bit() == blk.get_flip_bit());
3326
CHECK(rdo_output_block.get_diff_bit() == blk.get_diff_bit());
3327
CHECK(rdo_output_block.get_inten_table(0) == blk.get_inten_table(0));
3328
CHECK(rdo_output_block.get_inten_table(1) == blk.get_inten_table(1));
3329
CHECK(rdo_output_block.get_base5_color() == blk.get_base5_color());
3330
CHECK(rdo_output_block.get_delta3_color() == blk.get_delta3_color());
3331
CHECK(rdo_output_block.get_raw_selector_bits() == blk.get_raw_selector_bits());
3332
3333
#undef CHECK
3334
}
3335
3336
return true;
3337
}
3338
3339
void basisu_frontend::dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks)
3340
{
3341
gpu_image g;
3342
g.init(texture_format::cETC1, num_blocks_x * 4, num_blocks_y * 4);
3343
3344
for (uint32_t y = 0; y < num_blocks_y; y++)
3345
{
3346
for (uint32_t x = 0; x < num_blocks_x; x++)
3347
{
3348
const uint32_t block_index = first_block + x + y * num_blocks_x;
3349
3350
etc_block &blk = *(etc_block *)g.get_block_ptr(x, y);
3351
3352
if (output_blocks)
3353
blk = get_output_block(block_index);
3354
else
3355
{
3356
const bool diff_flag = get_diff_flag(block_index);
3357
3358
blk.set_diff_bit(diff_flag);
3359
blk.set_flip_bit(true);
3360
3361
const uint32_t endpoint_cluster0_index = get_subblock_endpoint_cluster_index(block_index, 0);
3362
const uint32_t endpoint_cluster1_index = get_subblock_endpoint_cluster_index(block_index, 1);
3363
3364
if (diff_flag)
3365
blk.set_block_color5(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false));
3366
else
3367
blk.set_block_color4(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, true), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, true));
3368
3369
blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, !diff_flag));
3370
blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, !diff_flag));
3371
3372
const uint32_t selector_cluster_index = get_block_selector_cluster_index(block_index);
3373
blk.set_raw_selector_bits(get_selector_cluster_selector_bits(selector_cluster_index).get_raw_selector_bits());
3374
}
3375
}
3376
}
3377
3378
image img;
3379
g.unpack(img);
3380
3381
save_png(pFilename, img);
3382
}
3383
3384
} // namespace basisu
3385
3386
3387