Path: blob/master/thirdparty/basis_universal/encoder/basisu_comp.cpp
9902 views
// basisu_comp.cpp1// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.2//3// Licensed under the Apache License, Version 2.0 (the "License");4// you may not use this file except in compliance with the License.5// You may obtain a copy of the License at6//7// http://www.apache.org/licenses/LICENSE-2.08//9// Unless required by applicable law or agreed to in writing, software10// distributed under the License is distributed on an "AS IS" BASIS,11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12// See the License for the specific language governing permissions and13// limitations under the License.14#include "basisu_comp.h"15#include "basisu_enc.h"16#include <unordered_set>17#include <atomic>18#include <map>1920//#define UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS2122// basisu_transcoder.cpp is where basisu_miniz lives now, we just need the declarations here.23#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES24#include "basisu_miniz.h"2526#include "basisu_opencl.h"2728#include "../transcoder/basisu_astc_hdr_core.h"2930#if !BASISD_SUPPORT_KTX231#error BASISD_SUPPORT_KTX2 must be enabled (set to 1).32#endif3334#if BASISD_SUPPORT_KTX2_ZSTD35#include <zstd.h>36#endif3738// Set to 1 to disable the mipPadding alignment workaround (which only seems to be needed when no key-values are written at all)39#define BASISU_DISABLE_KTX2_ALIGNMENT_WORKAROUND (0)4041// Set to 1 to disable writing all KTX2 key values, triggering an early validator bug.42#define BASISU_DISABLE_KTX2_KEY_VALUES (0)4344using namespace buminiz;4546#define BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN 047#define DEBUG_CROP_TEXTURE_TO_64x64 (0)48#define DEBUG_RESIZE_TEXTURE (0)4950namespace basisu51{52basis_compressor::basis_compressor() :53m_pOpenCL_context(nullptr),54m_fmt_mode(basist::basis_tex_format::cETC1S),55m_basis_file_size(0),56m_basis_bits_per_texel(0.0f),57m_total_blocks(0),58m_hdr_image_scale(1.0f),59m_ldr_to_hdr_upconversion_nit_multiplier(1.0f),60m_upconverted_any_ldr_images(false),61m_any_source_image_has_alpha(false),62m_opencl_failed(false)63{64debug_printf("basis_compressor::basis_compressor\n");6566assert(g_library_initialized);67}6869basis_compressor::~basis_compressor()70{71if (m_pOpenCL_context)72{73opencl_destroy_context(m_pOpenCL_context);74m_pOpenCL_context = nullptr;75}76}7778void basis_compressor::check_for_hdr_inputs()79{80if ((!m_params.m_source_filenames.size()) && (!m_params.m_source_images.size()))81{82if (m_params.m_source_images_hdr.size())83{84// Assume they want UASTC HDR if they've specified any HDR source images.85m_params.m_hdr = true;86}87}8889if (!m_params.m_hdr)90{91// See if any files are .EXR or .HDR, if so switch the compressor to UASTC HDR mode.92for (uint32_t i = 0; i < m_params.m_source_filenames.size(); i++)93{94std::string filename;95string_get_filename(m_params.m_source_filenames[i].c_str(), filename);9697std::string ext(string_get_extension(filename));98string_tolower(ext);99100if ((ext == "exr") || (ext == "hdr"))101{102m_params.m_hdr = true;103break;104}105}106}107108if (m_params.m_hdr)109{110if (m_params.m_source_alpha_filenames.size())111{112debug_printf("Warning: Alpha channel image filenames are not yet supported in UASTC HDR/ASTC HDR modes.\n");113m_params.m_source_alpha_filenames.clear();114}115}116117if (m_params.m_hdr)118m_params.m_uastc = true;119}120121bool basis_compressor::sanity_check_input_params()122{123// Check for no source filenames specified.124if ((m_params.m_read_source_images) && (!m_params.m_source_filenames.size()))125{126assert(0);127return false;128}129130// See if they've specified any source filenames, but didn't tell us to read them.131if ((!m_params.m_read_source_images) && (m_params.m_source_filenames.size()))132{133assert(0);134return false;135}136137// Sanity check the input image parameters.138if (m_params.m_read_source_images)139{140// Caller can't specify their own images if they want us to read source images from files.141if (m_params.m_source_images.size() || m_params.m_source_images_hdr.size())142{143assert(0);144return false;145}146147if (m_params.m_source_mipmap_images.size() || m_params.m_source_mipmap_images_hdr.size())148{149assert(0);150return false;151}152}153else154{155// They didn't tell us to read any source files, so check for no LDR/HDR source images.156if (!m_params.m_source_images.size() && !m_params.m_source_images_hdr.size())157{158assert(0);159return false;160}161162// Now we know we've been supplied LDR and/or HDR source images, check for LDR vs. HDR conflicts.163164if (m_params.m_source_images.size())165{166// They've supplied LDR images, so make sure they also haven't specified HDR input images.167if (m_params.m_source_images_hdr.size() || m_params.m_source_mipmap_images_hdr.size())168{169assert(0);170return false;171}172}173else174{175// No LDR images, so make sure they haven't specified any LDR mipmaps.176if (m_params.m_source_mipmap_images.size())177{178assert(0);179return false;180}181182// No LDR images, so ensure they've supplied some HDR images to process.183if (!m_params.m_source_images_hdr.size())184{185assert(0);186return false;187}188}189}190191return true;192}193194bool basis_compressor::init(const basis_compressor_params ¶ms)195{196debug_printf("basis_compressor::init\n");197198if (!g_library_initialized)199{200error_printf("basis_compressor::init: basisu_encoder_init() MUST be called before using any encoder functionality!\n");201return false;202}203204if (!params.m_pJob_pool)205{206error_printf("basis_compressor::init: A non-null job_pool pointer must be specified\n");207return false;208}209210m_params = params;211212if ((m_params.m_compute_stats) && (!m_params.m_validate_output_data))213m_params.m_validate_output_data = true;214215m_hdr_image_scale = 1.0f;216m_ldr_to_hdr_upconversion_nit_multiplier = 1.0f;217m_upconverted_any_ldr_images = false;218219check_for_hdr_inputs();220221if (m_params.m_debug)222{223debug_printf("basis_compressor::init:\n");224225#define PRINT_BOOL_VALUE(v) fmt_debug_printf("{}: {} {}\n", BASISU_STRINGIZE2(v), static_cast<bool>(m_params.v), m_params.v.was_changed());226#define PRINT_INT_VALUE(v) fmt_debug_printf("{}: {} {}\n", BASISU_STRINGIZE2(v), static_cast<int>(m_params.v), m_params.v.was_changed());227#define PRINT_UINT_VALUE(v) fmt_debug_printf("{}: {} {}\n", BASISU_STRINGIZE2(v), static_cast<uint32_t>(m_params.v), m_params.v.was_changed());228#define PRINT_FLOAT_VALUE(v) fmt_debug_printf("{}: {} {}\n", BASISU_STRINGIZE2(v), static_cast<float>(m_params.v), m_params.v.was_changed());229230fmt_debug_printf("Source LDR images: {}, HDR images: {}, filenames: {}, alpha filenames: {}, LDR mipmap images: {}, HDR mipmap images: {}\n",231(uint64_t)m_params.m_source_images.size(), (uint64_t)m_params.m_source_images_hdr.size(),232(uint64_t)m_params.m_source_filenames.size(), (uint64_t)m_params.m_source_alpha_filenames.size(),233(uint64_t)m_params.m_source_mipmap_images.size(), (uint64_t)m_params.m_source_mipmap_images_hdr.size());234235if (m_params.m_source_mipmap_images.size())236{237debug_printf("m_source_mipmap_images array sizes:\n");238for (uint32_t i = 0; i < m_params.m_source_mipmap_images.size(); i++)239debug_printf("%u ", m_params.m_source_mipmap_images[i].size());240debug_printf("\n");241}242243if (m_params.m_source_mipmap_images_hdr.size())244{245debug_printf("m_source_mipmap_images_hdr array sizes:\n");246for (uint32_t i = 0; i < m_params.m_source_mipmap_images_hdr.size(); i++)247debug_printf("%u ", m_params.m_source_mipmap_images_hdr[i].size());248debug_printf("\n");249}250251PRINT_BOOL_VALUE(m_hdr);252253switch (m_params.m_hdr_mode)254{255case hdr_modes::cUASTC_HDR_4X4:256{257fmt_debug_printf("m_hdr_mode: cUASTC_HDR_4X4\n");258break;259}260case hdr_modes::cASTC_HDR_6X6:261{262fmt_debug_printf("m_hdr_mode: cASTC_HDR_6X6\n");263break;264}265case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE:266{267fmt_debug_printf("m_hdr_mode: cASTC_HDR_6X6_INTERMEDIATE\n");268break;269}270default:271assert(false);272return false;273}274275PRINT_BOOL_VALUE(m_uastc);276PRINT_BOOL_VALUE(m_use_opencl);277PRINT_BOOL_VALUE(m_y_flip);278PRINT_BOOL_VALUE(m_debug);279PRINT_BOOL_VALUE(m_validate_etc1s);280PRINT_BOOL_VALUE(m_debug_images);281PRINT_INT_VALUE(m_compression_level);282PRINT_BOOL_VALUE(m_perceptual);283PRINT_BOOL_VALUE(m_no_endpoint_rdo);284PRINT_BOOL_VALUE(m_no_selector_rdo);285PRINT_BOOL_VALUE(m_read_source_images);286PRINT_BOOL_VALUE(m_write_output_basis_or_ktx2_files);287PRINT_BOOL_VALUE(m_compute_stats);288PRINT_BOOL_VALUE(m_check_for_alpha);289PRINT_BOOL_VALUE(m_force_alpha);290debug_printf("swizzle: %d,%d,%d,%d\n",291m_params.m_swizzle[0],292m_params.m_swizzle[1],293m_params.m_swizzle[2],294m_params.m_swizzle[3]);295PRINT_BOOL_VALUE(m_renormalize);296PRINT_BOOL_VALUE(m_multithreading);297PRINT_BOOL_VALUE(m_disable_hierarchical_endpoint_codebooks);298299PRINT_FLOAT_VALUE(m_endpoint_rdo_thresh);300PRINT_FLOAT_VALUE(m_selector_rdo_thresh);301302PRINT_BOOL_VALUE(m_mip_gen);303PRINT_BOOL_VALUE(m_mip_renormalize);304PRINT_BOOL_VALUE(m_mip_wrapping);305PRINT_BOOL_VALUE(m_mip_fast);306PRINT_BOOL_VALUE(m_mip_srgb);307PRINT_FLOAT_VALUE(m_mip_premultiplied);308PRINT_FLOAT_VALUE(m_mip_scale);309PRINT_INT_VALUE(m_mip_smallest_dimension);310debug_printf("m_mip_filter: %s\n", m_params.m_mip_filter.c_str());311312debug_printf("m_max_endpoint_clusters: %u\n", m_params.m_etc1s_max_endpoint_clusters);313debug_printf("m_max_selector_clusters: %u\n", m_params.m_etc1s_max_selector_clusters);314debug_printf("m_etc1s_quality_level: %i\n", m_params.m_etc1s_quality_level);315debug_printf("UASTC HDR 4x4 quality level: %u\n", m_params.m_uastc_hdr_4x4_options.m_level);316317debug_printf("m_tex_type: %u\n", m_params.m_tex_type);318debug_printf("m_userdata0: 0x%X, m_userdata1: 0x%X\n", m_params.m_userdata0, m_params.m_userdata1);319debug_printf("m_us_per_frame: %i (%f fps)\n", m_params.m_us_per_frame, m_params.m_us_per_frame ? 1.0f / (m_params.m_us_per_frame / 1000000.0f) : 0);320debug_printf("m_pack_uastc_ldr_4x4_flags: 0x%X\n", m_params.m_pack_uastc_ldr_4x4_flags);321322PRINT_BOOL_VALUE(m_rdo_uastc_ldr_4x4);323PRINT_FLOAT_VALUE(m_rdo_uastc_ldr_4x4_quality_scalar);324PRINT_INT_VALUE(m_rdo_uastc_ldr_4x4_dict_size);325PRINT_FLOAT_VALUE(m_rdo_uastc_ldr_4x4_max_allowed_rms_increase_ratio);326PRINT_FLOAT_VALUE(m_rdo_uastc_ldr_4x4_skip_block_rms_thresh);327PRINT_FLOAT_VALUE(m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale);328PRINT_FLOAT_VALUE(m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev);329PRINT_BOOL_VALUE(m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode)330PRINT_BOOL_VALUE(m_rdo_uastc_ldr_4x4_multithreading);331332PRINT_INT_VALUE(m_resample_width);333PRINT_INT_VALUE(m_resample_height);334PRINT_FLOAT_VALUE(m_resample_factor);335336debug_printf("Has global codebooks: %u\n", m_params.m_pGlobal_codebooks ? 1 : 0);337if (m_params.m_pGlobal_codebooks)338{339debug_printf("Global codebook endpoints: %u selectors: %u\n", m_params.m_pGlobal_codebooks->get_endpoints().size(), m_params.m_pGlobal_codebooks->get_selectors().size());340}341342PRINT_BOOL_VALUE(m_create_ktx2_file);343344debug_printf("KTX2 UASTC supercompression: %u\n", m_params.m_ktx2_uastc_supercompression);345debug_printf("KTX2 Zstd supercompression level: %i\n", (int)m_params.m_ktx2_zstd_supercompression_level);346debug_printf("KTX2 sRGB transfer func: %u\n", (int)m_params.m_ktx2_srgb_transfer_func);347debug_printf("Total KTX2 key values: %u\n", m_params.m_ktx2_key_values.size());348for (uint32_t i = 0; i < m_params.m_ktx2_key_values.size(); i++)349{350debug_printf("Key: \"%s\"\n", m_params.m_ktx2_key_values[i].m_key.data());351debug_printf("Value size: %u\n", m_params.m_ktx2_key_values[i].m_value.size());352}353354PRINT_BOOL_VALUE(m_validate_output_data);355PRINT_BOOL_VALUE(m_ldr_hdr_upconversion_srgb_to_linear);356PRINT_FLOAT_VALUE(m_ldr_hdr_upconversion_nit_multiplier);357debug_printf("Allow UASTC HDR 4x4 uber mode: %u\n", m_params.m_uastc_hdr_4x4_options.m_allow_uber_mode);358debug_printf("UASTC HDR 4x4 ultra quant: %u\n", m_params.m_uastc_hdr_4x4_options.m_ultra_quant);359PRINT_BOOL_VALUE(m_hdr_favor_astc);360361#undef PRINT_BOOL_VALUE362#undef PRINT_INT_VALUE363#undef PRINT_UINT_VALUE364#undef PRINT_FLOAT_VALUE365}366367if (!sanity_check_input_params())368return false;369370if ((m_params.m_use_opencl) && opencl_is_available() && !m_pOpenCL_context && !m_opencl_failed)371{372m_pOpenCL_context = opencl_create_context();373if (!m_pOpenCL_context)374m_opencl_failed = true;375}376377return true;378}379380void basis_compressor::pick_format_mode()381{382// Unfortunately due to the legacy of this code and backwards compat this is more complex than I would like.383m_fmt_mode = basist::basis_tex_format::cETC1S;384385if (m_params.m_hdr)386{387assert(m_params.m_uastc);388389switch (m_params.m_hdr_mode)390{391case hdr_modes::cUASTC_HDR_4X4:392m_fmt_mode = basist::basis_tex_format::cUASTC_HDR_4x4;393break;394case hdr_modes::cASTC_HDR_6X6:395m_fmt_mode = basist::basis_tex_format::cASTC_HDR_6x6;396break;397case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE:398m_fmt_mode = basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE;399break;400default:401assert(0);402break;403}404}405else if (m_params.m_uastc)406{407m_fmt_mode = basist::basis_tex_format::cUASTC4x4;408}409410if (m_params.m_debug)411{412switch (m_fmt_mode)413{414case basist::basis_tex_format::cETC1S:415fmt_debug_printf("Format Mode: cETC1S\n");416break;417case basist::basis_tex_format::cUASTC4x4:418fmt_debug_printf("Format Mode: cUASTC4x4\n");419break;420case basist::basis_tex_format::cUASTC_HDR_4x4:421fmt_debug_printf("Format Mode: cUASTC_HDR_4x4\n");422break;423case basist::basis_tex_format::cASTC_HDR_6x6:424fmt_debug_printf("Format Mode: cASTC_HDR_6x6\n");425break;426case basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE:427fmt_debug_printf("Format Mode: cASTC_HDR_6x6_INTERMEDIATE\n");428break;429default:430assert(0);431break;432}433}434}435436basis_compressor::error_code basis_compressor::process()437{438debug_printf("basis_compressor::process\n");439440if (!read_dds_source_images())441return cECFailedReadingSourceImages;442443// Note: After here m_params.m_hdr, m_params.m_uastc and m_fmt_mode cannot be changed.444pick_format_mode();445446if (!read_source_images())447return cECFailedReadingSourceImages;448449if (!validate_texture_type_constraints())450return cECFailedValidating;451452if (m_params.m_create_ktx2_file)453{454if (!validate_ktx2_constraints())455{456error_printf("Inputs do not satisfy .KTX2 texture constraints: all source images must be the same resolution and have the same number of mipmap levels.\n");457return cECFailedValidating;458}459}460461if (!extract_source_blocks())462return cECFailedFrontEnd;463464if (m_params.m_hdr)465{466if (m_params.m_hdr_mode == hdr_modes::cUASTC_HDR_4X4)467{468// UASTC 4x4 HDR469if (m_params.m_status_output)470printf("Mode: UASTC 4x4 HDR Level %u\n", m_params.m_uastc_hdr_4x4_options.m_level);471472error_code ec = encode_slices_to_uastc_4x4_hdr();473if (ec != cECSuccess)474return ec;475}476else477{478assert((m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6) || (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE));479480// ASTC 6x6 HDR481if (m_params.m_status_output)482{483fmt_printf("Mode: ASTC 6x6 HDR {}, Base Level: {}, Highest Level: {}, Lambda: {}, REC 2020: {}\n",484(m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE) ? "Intermediate" : "",485m_params.m_astc_hdr_6x6_options.m_master_comp_level, m_params.m_astc_hdr_6x6_options.m_highest_comp_level,486m_params.m_astc_hdr_6x6_options.m_lambda, m_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut);487}488489error_code ec = encode_slices_to_astc_6x6_hdr();490if (ec != cECSuccess)491return ec;492}493}494else if (m_params.m_uastc)495{496// UASTC 4x4 LDR497if (m_params.m_status_output)498printf("Mode: UASTC LDR 4x4 Level %u\n", m_params.m_pack_uastc_ldr_4x4_flags & cPackUASTCLevelMask);499500error_code ec = encode_slices_to_uastc_4x4_ldr();501if (ec != cECSuccess)502return ec;503}504else505{506// ETC1S507if (m_params.m_status_output)508printf("Mode: ETC1S Quality %i, Level %i\n", m_params.m_etc1s_quality_level, (int)m_params.m_compression_level);509510if (!process_frontend())511return cECFailedFrontEnd;512513if (!extract_frontend_texture_data())514return cECFailedFontendExtract;515516if (!process_backend())517return cECFailedBackend;518}519520if (!create_basis_file_and_transcode())521return cECFailedCreateBasisFile;522523if (m_params.m_create_ktx2_file)524{525if (!create_ktx2_file())526return cECFailedCreateKTX2File;527}528529if (!write_output_files_and_compute_stats())530return cECFailedWritingOutput;531532return cECSuccess;533}534535basis_compressor::error_code basis_compressor::encode_slices_to_astc_6x6_hdr()536{537debug_printf("basis_compressor::encode_slices_to_astc_6x6_hdr\n");538539interval_timer tm;540tm.start();541542m_uastc_slice_textures.resize(m_slice_descs.size());543for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)544m_uastc_slice_textures[slice_index].init(texture_format::cASTC_HDR_6x6, m_slice_descs[slice_index].m_orig_width, m_slice_descs[slice_index].m_orig_height);545546if (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6)547m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cASTC_HDR_6x6;548else if (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE)549m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE;550else551{552assert(0);553return cECFailedEncodeUASTC;554}555556m_uastc_backend_output.m_etc1s = false;557m_uastc_backend_output.m_srgb = false;558m_uastc_backend_output.m_slice_desc = m_slice_descs;559m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size());560m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size());561562astc_6x6_hdr::astc_hdr_6x6_global_config global_cfg(m_params.m_astc_hdr_6x6_options);563564global_cfg.m_image_stats = m_params.m_compute_stats;565global_cfg.m_debug_images = m_params.m_debug_images;566global_cfg.m_output_images = m_params.m_debug_images;567global_cfg.m_debug_output = m_params.m_debug;568global_cfg.m_status_output = m_params.m_status_output || m_params.m_debug;569570for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)571{572gpu_image& dst_tex = m_uastc_slice_textures[slice_index];573uint8_vec &dst_buf = m_uastc_backend_output.m_slice_image_data[slice_index];574575basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];576(void)slice_desc;577578const imagef& source_image = m_slice_images_hdr[slice_index];579assert(source_image.get_width() && source_image.get_height());580581uint8_vec intermediate_tex_data, astc_tex_data;582583global_cfg.m_debug_image_prefix = m_params.m_astc_hdr_6x6_options.m_debug_image_prefix;584global_cfg.m_debug_image_prefix += fmt_string("slice_{}_", slice_index);585586global_cfg.m_output_image_prefix = m_params.m_astc_hdr_6x6_options.m_output_image_prefix;587global_cfg.m_output_image_prefix += fmt_string("slice_{}_", slice_index);588589if (m_params.m_debug)590fmt_debug_printf("----------------------------------------------------------------------------\n");591592astc_6x6_hdr::result_metrics metrics;593bool status = astc_6x6_hdr::compress_photo(source_image, global_cfg, m_params.m_pJob_pool, intermediate_tex_data, astc_tex_data, metrics);594if (!status)595return cECFailedEncodeUASTC;596597if (m_params.m_debug)598fmt_debug_printf("----------------------------------------------------------------------------\n");599600// Currently it always gives us both intermediate and RDO601assert(intermediate_tex_data.size());602assert(astc_tex_data.size());603assert((astc_tex_data.size() & 15) == 0);604assert(dst_tex.get_size_in_bytes() == astc_tex_data.size_in_bytes());605606memcpy(dst_tex.get_ptr(), astc_tex_data.data(), astc_tex_data.size_in_bytes());607608if (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6)609{610dst_buf.resize(dst_tex.get_size_in_bytes());611memcpy(&dst_buf[0], dst_tex.get_ptr(), dst_tex.get_size_in_bytes());612}613else614{615assert(m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE);616617dst_buf.resize(intermediate_tex_data.size_in_bytes());618memcpy(&dst_buf[0], intermediate_tex_data.get_ptr(), intermediate_tex_data.size_in_bytes());619}620621m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(dst_buf.get_ptr(), dst_buf.size_in_bytes(), 0);622}623624return cECSuccess;625}626627basis_compressor::error_code basis_compressor::encode_slices_to_uastc_4x4_hdr()628{629debug_printf("basis_compressor::encode_slices_to_uastc_4x4_hdr\n");630631interval_timer tm;632tm.start();633634m_uastc_slice_textures.resize(m_slice_descs.size());635for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)636m_uastc_slice_textures[slice_index].init(texture_format::cUASTC_HDR_4x4, m_slice_descs[slice_index].m_orig_width, m_slice_descs[slice_index].m_orig_height);637638m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cUASTC_HDR_4x4;639m_uastc_backend_output.m_etc1s = false;640m_uastc_backend_output.m_srgb = false;641m_uastc_backend_output.m_slice_desc = m_slice_descs;642m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size());643m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size());644645if (!m_params.m_perceptual)646{647m_params.m_uastc_hdr_4x4_options.m_r_err_scale = 1.0f;648m_params.m_uastc_hdr_4x4_options.m_g_err_scale = 1.0f;649}650651const float DEFAULT_BC6H_ERROR_WEIGHT = .65f;// .85f;652const float LOWEST_BC6H_ERROR_WEIGHT = .1f;653m_params.m_uastc_hdr_4x4_options.m_bc6h_err_weight = m_params.m_hdr_favor_astc ? LOWEST_BC6H_ERROR_WEIGHT : DEFAULT_BC6H_ERROR_WEIGHT;654655std::atomic<bool> any_failures;656any_failures.store(false);657658astc_hdr_4x4_block_stats enc_stats;659660struct uastc_blk_desc661{662uint32_t m_solid_flag;663uint32_t m_num_partitions;664uint32_t m_cem_index;665uint32_t m_weight_ise_range;666uint32_t m_endpoint_ise_range;667668bool operator< (const uastc_blk_desc& desc) const669{670if (this == &desc)671return false;672673#define COMP(XX) if (XX < desc.XX) return true; else if (XX != desc.XX) return false;674COMP(m_solid_flag)675COMP(m_num_partitions)676COMP(m_cem_index)677COMP(m_weight_ise_range)678COMP(m_endpoint_ise_range)679#undef COMP680681return false;682}683684bool operator== (const uastc_blk_desc& desc) const685{686if (this == &desc)687return true;688if ((*this < desc) || (desc < *this))689return false;690return true;691}692693bool operator!= (const uastc_blk_desc& desc) const694{695return !(*this == desc);696}697};698699struct uastc_blk_desc_stats700{701uastc_blk_desc_stats() : m_count(0) { }702uint32_t m_count;703#ifdef UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS704basisu::vector<basist::astc_blk> m_blks;705#endif706};707708std::map<uastc_blk_desc, uastc_blk_desc_stats> unique_block_descs;709std::mutex unique_block_desc_mutex;710711for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)712{713gpu_image& tex = m_uastc_slice_textures[slice_index];714basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];715(void)slice_desc;716717const uint32_t num_blocks_x = tex.get_blocks_x();718const uint32_t num_blocks_y = tex.get_blocks_y();719const uint32_t total_blocks = tex.get_total_blocks();720const imagef& source_image = m_slice_images_hdr[slice_index];721722std::atomic<uint32_t> total_blocks_processed;723total_blocks_processed.store(0);724725const uint32_t N = 256;726for (uint32_t block_index_iter = 0; block_index_iter < total_blocks; block_index_iter += N)727{728const uint32_t first_index = block_index_iter;729const uint32_t last_index = minimum<uint32_t>(total_blocks, block_index_iter + N);730731m_params.m_pJob_pool->add_job([this, first_index, last_index, num_blocks_x, num_blocks_y, total_blocks, &source_image,732&tex, &total_blocks_processed, &any_failures, &enc_stats, &unique_block_descs, &unique_block_desc_mutex]733{734BASISU_NOTE_UNUSED(num_blocks_y);735736basisu::vector<astc_hdr_4x4_pack_results> all_results;737all_results.reserve(256);738739for (uint32_t block_index = first_index; block_index < last_index; block_index++)740{741const uint32_t block_x = block_index % num_blocks_x;742const uint32_t block_y = block_index / num_blocks_x;743744//if ((block_x == 176) && (block_y == 128))745// printf("!");746747vec4F block_pixels[16];748749source_image.extract_block_clamped(&block_pixels[0], block_x * 4, block_y * 4, 4, 4);750751basist::astc_blk& dest_block = *(basist::astc_blk*)tex.get_block_ptr(block_x, block_y);752753float rgb_pixels[16 * 3];754basist::half_float rgb_pixels_half[16 * 3];755for (uint32_t i = 0; i < 16; i++)756{757rgb_pixels[i * 3 + 0] = block_pixels[i][0];758rgb_pixels_half[i * 3 + 0] = float_to_half_non_neg_no_nan_inf(block_pixels[i][0]);759760rgb_pixels[i * 3 + 1] = block_pixels[i][1];761rgb_pixels_half[i * 3 + 1] = float_to_half_non_neg_no_nan_inf(block_pixels[i][1]);762763rgb_pixels[i * 3 + 2] = block_pixels[i][2];764rgb_pixels_half[i * 3 + 2] = float_to_half_non_neg_no_nan_inf(block_pixels[i][2]);765}766767bool status = astc_hdr_4x4_enc_block(&rgb_pixels[0], rgb_pixels_half, m_params.m_uastc_hdr_4x4_options, all_results);768if (!status)769{770any_failures.store(true);771continue;772}773774double best_err = 1e+30f;775int best_result_index = -1;776777const double bc6h_err_weight = m_params.m_uastc_hdr_4x4_options.m_bc6h_err_weight;778const double astc_err_weight = (1.0f - bc6h_err_weight);779780for (uint32_t i = 0; i < all_results.size(); i++)781{782basist::half_float unpacked_bc6h_block[4 * 4 * 3];783unpack_bc6h(&all_results[i].m_bc6h_block, unpacked_bc6h_block, false);784785all_results[i].m_bc6h_block_error = compute_block_error(16, rgb_pixels_half, unpacked_bc6h_block, m_params.m_uastc_hdr_4x4_options);786787double overall_err = (all_results[i].m_bc6h_block_error * bc6h_err_weight) + (all_results[i].m_best_block_error * astc_err_weight);788789if ((!i) || (overall_err < best_err))790{791best_err = overall_err;792best_result_index = i;793}794}795796const astc_hdr_4x4_pack_results& best_results = all_results[best_result_index];797798astc_hdr_4x4_pack_results_to_block(dest_block, best_results);799800// Verify that this block is valid UASTC HDR and we can successfully transcode it to BC6H.801// (Well, except in fastest mode.)802if (m_params.m_uastc_hdr_4x4_options.m_level > 0)803{804basist::bc6h_block transcoded_bc6h_blk;805bool transcode_results = astc_hdr_transcode_to_bc6h(dest_block, transcoded_bc6h_blk);806assert(transcode_results);807if ((!transcode_results) && (!any_failures))808{809error_printf("basis_compressor::encode_slices_to_uastc_4x4_hdr: UASTC HDR block transcode check failed!\n");810811any_failures.store(true);812continue;813}814}815816if (m_params.m_debug)817{818// enc_stats has its own mutex819enc_stats.update(best_results);820821uastc_blk_desc blk_desc;822clear_obj(blk_desc);823824blk_desc.m_solid_flag = best_results.m_is_solid;825if (!blk_desc.m_solid_flag)826{827blk_desc.m_num_partitions = best_results.m_best_blk.m_num_partitions;828blk_desc.m_cem_index = best_results.m_best_blk.m_color_endpoint_modes[0];829blk_desc.m_weight_ise_range = best_results.m_best_blk.m_weight_ise_range;830blk_desc.m_endpoint_ise_range = best_results.m_best_blk.m_endpoint_ise_range;831}832833{834std::lock_guard<std::mutex> lck(unique_block_desc_mutex);835836auto res = unique_block_descs.insert(std::make_pair(blk_desc, uastc_blk_desc_stats()));837838(res.first)->second.m_count++;839#ifdef UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS840(res.first)->second.m_blks.push_back(dest_block);841#endif842}843}844845total_blocks_processed++;846847uint32_t val = total_blocks_processed;848if (((val & 1023) == 1023) && m_params.m_status_output)849{850debug_printf("basis_compressor::encode_slices_to_uastc_4x4_hdr: %3.1f%% done\n", static_cast<float>(val) * 100.0f / total_blocks);851}852}853854});855856} // block_index_iter857858m_params.m_pJob_pool->wait_for_all();859860if (any_failures)861return cECFailedEncodeUASTC;862863m_uastc_backend_output.m_slice_image_data[slice_index].resize(tex.get_size_in_bytes());864memcpy(&m_uastc_backend_output.m_slice_image_data[slice_index][0], tex.get_ptr(), tex.get_size_in_bytes());865866m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(tex.get_ptr(), tex.get_size_in_bytes(), 0);867868} // slice_index869870debug_printf("basis_compressor::encode_slices_to_uastc_4x4_hdr: Total time: %3.3f secs\n", tm.get_elapsed_secs());871872if (m_params.m_debug)873{874debug_printf("\n----- Total unique UASTC block descs: %u\n", (uint32_t)unique_block_descs.size());875876uint32_t c = 0;877for (auto it = unique_block_descs.begin(); it != unique_block_descs.end(); ++it)878{879debug_printf("%u. Total uses: %u %3.2f%%, solid color: %u\n", c, it->second.m_count,880((float)it->second.m_count * 100.0f) / enc_stats.m_total_blocks, it->first.m_solid_flag);881882if (!it->first.m_solid_flag)883{884debug_printf(" Num partitions: %u\n", it->first.m_num_partitions);885debug_printf(" CEM index: %u\n", it->first.m_cem_index);886debug_printf(" Weight ISE range: %u (%u levels)\n", it->first.m_weight_ise_range, astc_helpers::get_ise_levels(it->first.m_weight_ise_range));887debug_printf(" Endpoint ISE range: %u (%u levels)\n", it->first.m_endpoint_ise_range, astc_helpers::get_ise_levels(it->first.m_endpoint_ise_range));888}889890#ifdef UASTC_HDR_DEBUG_SAVE_CATEGORIZED_BLOCKS891debug_printf(" -- UASTC HDR block bytes:\n");892for (uint32_t j = 0; j < minimum<uint32_t>(4, it->second.m_blks.size()); j++)893{894basist::astc_blk& blk = it->second.m_blks[j];895896debug_printf(" - UASTC HDR: { ");897for (uint32_t k = 0; k < 16; k++)898debug_printf("%u%s", ((const uint8_t*)&blk)[k], (k != 15) ? ", " : "");899debug_printf(" }\n");900901basist::bc6h_block bc6h_blk;902bool res = astc_hdr_transcode_to_bc6h(blk, bc6h_blk);903assert(res);904if (!res)905{906error_printf("astc_hdr_transcode_to_bc6h() failed!\n");907return cECFailedEncodeUASTC;908}909910debug_printf(" - BC6H: { ");911for (uint32_t k = 0; k < 16; k++)912debug_printf("%u%s", ((const uint8_t*)&bc6h_blk)[k], (k != 15) ? ", " : "");913debug_printf(" }\n");914}915#endif916917c++;918}919printf("\n");920921enc_stats.print();922}923924return cECSuccess;925}926927basis_compressor::error_code basis_compressor::encode_slices_to_uastc_4x4_ldr()928{929debug_printf("basis_compressor::encode_slices_to_uastc_4x4_ldr\n");930931m_uastc_slice_textures.resize(m_slice_descs.size());932for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)933m_uastc_slice_textures[slice_index].init(texture_format::cUASTC4x4, m_slice_descs[slice_index].m_orig_width, m_slice_descs[slice_index].m_orig_height);934935m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cUASTC4x4;936m_uastc_backend_output.m_etc1s = false;937m_uastc_backend_output.m_slice_desc = m_slice_descs;938m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size());939m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size());940941for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)942{943gpu_image& tex = m_uastc_slice_textures[slice_index];944basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];945(void)slice_desc;946947const uint32_t num_blocks_x = tex.get_blocks_x();948const uint32_t num_blocks_y = tex.get_blocks_y();949const uint32_t total_blocks = tex.get_total_blocks();950const image& source_image = m_slice_images[slice_index];951952std::atomic<uint32_t> total_blocks_processed;953total_blocks_processed.store(0);954955const uint32_t N = 256;956for (uint32_t block_index_iter = 0; block_index_iter < total_blocks; block_index_iter += N)957{958const uint32_t first_index = block_index_iter;959const uint32_t last_index = minimum<uint32_t>(total_blocks, block_index_iter + N);960961m_params.m_pJob_pool->add_job([this, first_index, last_index, num_blocks_x, num_blocks_y, total_blocks, &source_image, &tex, &total_blocks_processed]962{963BASISU_NOTE_UNUSED(num_blocks_y);964965uint32_t uastc_flags = m_params.m_pack_uastc_ldr_4x4_flags;966if ((m_params.m_rdo_uastc_ldr_4x4) && (m_params.m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode))967uastc_flags |= cPackUASTCFavorSimplerModes;968969for (uint32_t block_index = first_index; block_index < last_index; block_index++)970{971const uint32_t block_x = block_index % num_blocks_x;972const uint32_t block_y = block_index / num_blocks_x;973974color_rgba block_pixels[4][4];975976source_image.extract_block_clamped((color_rgba*)block_pixels, block_x * 4, block_y * 4, 4, 4);977978basist::uastc_block& dest_block = *(basist::uastc_block*)tex.get_block_ptr(block_x, block_y);979980encode_uastc(&block_pixels[0][0].r, dest_block, uastc_flags);981982total_blocks_processed++;983984uint32_t val = total_blocks_processed;985if (((val & 16383) == 16383) && m_params.m_status_output)986{987debug_printf("basis_compressor::encode_slices_to_uastc_4x4_ldr: %3.1f%% done\n", static_cast<float>(val) * 100.0f / total_blocks);988}989990}991992});993994} // block_index_iter995996m_params.m_pJob_pool->wait_for_all();997998if (m_params.m_rdo_uastc_ldr_4x4)999{1000uastc_rdo_params rdo_params;1001rdo_params.m_lambda = m_params.m_rdo_uastc_ldr_4x4_quality_scalar;1002rdo_params.m_max_allowed_rms_increase_ratio = m_params.m_rdo_uastc_ldr_4x4_max_allowed_rms_increase_ratio;1003rdo_params.m_skip_block_rms_thresh = m_params.m_rdo_uastc_ldr_4x4_skip_block_rms_thresh;1004rdo_params.m_lz_dict_size = m_params.m_rdo_uastc_ldr_4x4_dict_size;1005rdo_params.m_smooth_block_max_error_scale = m_params.m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale;1006rdo_params.m_max_smooth_block_std_dev = m_params.m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev;10071008bool status = uastc_rdo(tex.get_total_blocks(), (basist::uastc_block*)tex.get_ptr(),1009(const color_rgba *)m_source_blocks[slice_desc.m_first_block_index].m_pixels, rdo_params, m_params.m_pack_uastc_ldr_4x4_flags, m_params.m_rdo_uastc_ldr_4x4_multithreading ? m_params.m_pJob_pool : nullptr,1010(m_params.m_rdo_uastc_ldr_4x4_multithreading && m_params.m_pJob_pool) ? basisu::minimum<uint32_t>(4, (uint32_t)m_params.m_pJob_pool->get_total_threads()) : 0);1011if (!status)1012{1013return cECFailedUASTCRDOPostProcess;1014}1015}10161017m_uastc_backend_output.m_slice_image_data[slice_index].resize(tex.get_size_in_bytes());1018memcpy(&m_uastc_backend_output.m_slice_image_data[slice_index][0], tex.get_ptr(), tex.get_size_in_bytes());10191020m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(tex.get_ptr(), tex.get_size_in_bytes(), 0);10211022} // slice_index10231024return cECSuccess;1025}10261027bool basis_compressor::generate_mipmaps(const imagef& img, basisu::vector<imagef>& mips, bool has_alpha)1028{1029debug_printf("basis_compressor::generate_mipmaps\n");10301031interval_timer tm;1032tm.start();10331034uint32_t total_levels = 1;1035uint32_t w = img.get_width(), h = img.get_height();1036while (maximum<uint32_t>(w, h) > (uint32_t)m_params.m_mip_smallest_dimension)1037{1038w = maximum(w >> 1U, 1U);1039h = maximum(h >> 1U, 1U);1040total_levels++;1041}10421043for (uint32_t level = 1; level < total_levels; level++)1044{1045const uint32_t level_width = maximum<uint32_t>(1, img.get_width() >> level);1046const uint32_t level_height = maximum<uint32_t>(1, img.get_height() >> level);10471048imagef& level_img = *enlarge_vector(mips, 1);1049level_img.resize(level_width, level_height);10501051const imagef* pSource_image = &img;10521053if (m_params.m_mip_fast)1054{1055if (level > 1)1056pSource_image = &mips[level - 1];1057}10581059bool status = image_resample(*pSource_image, level_img,1060//m_params.m_mip_filter.c_str(),1061"box", // TODO: negative lobes in the filter are causing negative colors, try Mitchell1062m_params.m_mip_scale, m_params.m_mip_wrapping, 0, has_alpha ? 4 : 3);1063if (!status)1064{1065error_printf("basis_compressor::generate_mipmaps: image_resample() failed!\n");1066return false;1067}10681069clean_hdr_image(level_img);1070}10711072if (m_params.m_debug)1073debug_printf("Total mipmap generation time: %3.3f secs\n", tm.get_elapsed_secs());10741075return true;1076}10771078bool basis_compressor::generate_mipmaps(const image &img, basisu::vector<image> &mips, bool has_alpha)1079{1080debug_printf("basis_compressor::generate_mipmaps\n");10811082interval_timer tm;1083tm.start();10841085uint32_t total_levels = 1;1086uint32_t w = img.get_width(), h = img.get_height();1087while (maximum<uint32_t>(w, h) > (uint32_t)m_params.m_mip_smallest_dimension)1088{1089w = maximum(w >> 1U, 1U);1090h = maximum(h >> 1U, 1U);1091total_levels++;1092}10931094#if BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN1095// Requires stb_image_resize1096stbir_filter filter = STBIR_FILTER_DEFAULT;1097if (m_params.m_mip_filter == "box")1098filter = STBIR_FILTER_BOX;1099else if (m_params.m_mip_filter == "triangle")1100filter = STBIR_FILTER_TRIANGLE;1101else if (m_params.m_mip_filter == "cubic")1102filter = STBIR_FILTER_CUBICBSPLINE;1103else if (m_params.m_mip_filter == "catmull")1104filter = STBIR_FILTER_CATMULLROM;1105else if (m_params.m_mip_filter == "mitchell")1106filter = STBIR_FILTER_MITCHELL;11071108for (uint32_t level = 1; level < total_levels; level++)1109{1110const uint32_t level_width = maximum<uint32_t>(1, img.get_width() >> level);1111const uint32_t level_height = maximum<uint32_t>(1, img.get_height() >> level);11121113image &level_img = *enlarge_vector(mips, 1);1114level_img.resize(level_width, level_height);11151116int result = stbir_resize_uint8_generic(1117(const uint8_t *)img.get_ptr(), img.get_width(), img.get_height(), img.get_pitch() * sizeof(color_rgba),1118(uint8_t *)level_img.get_ptr(), level_img.get_width(), level_img.get_height(), level_img.get_pitch() * sizeof(color_rgba),1119has_alpha ? 4 : 3, has_alpha ? 3 : STBIR_ALPHA_CHANNEL_NONE, m_params.m_mip_premultiplied ? STBIR_FLAG_ALPHA_PREMULTIPLIED : 0,1120m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR,1121nullptr);11221123if (result == 0)1124{1125error_printf("basis_compressor::generate_mipmaps: stbir_resize_uint8_generic() failed!\n");1126return false;1127}11281129if (m_params.m_mip_renormalize)1130level_img.renormalize_normal_map();1131}1132#else1133for (uint32_t level = 1; level < total_levels; level++)1134{1135const uint32_t level_width = maximum<uint32_t>(1, img.get_width() >> level);1136const uint32_t level_height = maximum<uint32_t>(1, img.get_height() >> level);11371138image& level_img = *enlarge_vector(mips, 1);1139level_img.resize(level_width, level_height);11401141const image* pSource_image = &img;11421143if (m_params.m_mip_fast)1144{1145if (level > 1)1146pSource_image = &mips[level - 1];1147}11481149bool status = image_resample(*pSource_image, level_img, m_params.m_mip_srgb, m_params.m_mip_filter.c_str(), m_params.m_mip_scale, m_params.m_mip_wrapping, 0, has_alpha ? 4 : 3);1150if (!status)1151{1152error_printf("basis_compressor::generate_mipmaps: image_resample() failed!\n");1153return false;1154}11551156if (m_params.m_mip_renormalize)1157level_img.renormalize_normal_map();1158}1159#endif11601161if (m_params.m_debug)1162debug_printf("Total mipmap generation time: %3.3f secs\n", tm.get_elapsed_secs());11631164return true;1165}11661167void basis_compressor::clean_hdr_image(imagef& src_img)1168{1169const uint32_t width = src_img.get_width();1170const uint32_t height = src_img.get_height();11711172float max_used_val = 0.0f;1173for (uint32_t y = 0; y < height; y++)1174{1175for (uint32_t x = 0; x < width; x++)1176{1177vec4F& c = src_img(x, y);1178for (uint32_t i = 0; i < 3; i++)1179max_used_val = maximum(max_used_val, c[i]);1180}1181}11821183double hdr_image_scale = 1.0f;1184if (max_used_val > basist::ASTC_HDR_MAX_VAL)1185{1186hdr_image_scale = max_used_val / basist::ASTC_HDR_MAX_VAL;11871188const double inv_hdr_image_scale = basist::ASTC_HDR_MAX_VAL / max_used_val;11891190for (uint32_t y = 0; y < src_img.get_height(); y++)1191{1192for (uint32_t x = 0; x < src_img.get_width(); x++)1193{1194vec4F& c = src_img(x, y);11951196for (uint32_t i = 0; i < 3; i++)1197c[i] = (float)minimum<double>(c[i] * inv_hdr_image_scale, basist::ASTC_HDR_MAX_VAL);1198}1199}12001201printf("Warning: The input HDR image's maximum used float value was %f, which is too high to encode as ASTC HDR. The image's components have been linearly scaled so the maximum used value is %f, by multiplying by %f.\n",1202max_used_val, basist::ASTC_HDR_MAX_VAL, inv_hdr_image_scale);12031204printf("The decoded ASTC HDR texture will have to be scaled up by %f.\n", hdr_image_scale);1205}12061207// TODO: Determine a constant scale factor, apply if > MAX_HALF_FLOAT1208if (!src_img.clean_astc_hdr_pixels(basist::ASTC_HDR_MAX_VAL))1209printf("Warning: clean_astc_hdr_pixels() had to modify the input image to encode to ASTC HDR - see previous warning(s).\n");12101211m_hdr_image_scale = (float)hdr_image_scale;12121213float lowest_nonzero_val = 1e+30f;1214float lowest_val = 1e+30f;1215float highest_val = -1e+30f;12161217for (uint32_t y = 0; y < src_img.get_height(); y++)1218{1219for (uint32_t x = 0; x < src_img.get_width(); x++)1220{1221const vec4F& c = src_img(x, y);12221223for (uint32_t i = 0; i < 3; i++)1224{1225lowest_val = basisu::minimum(lowest_val, c[i]);12261227if (c[i] != 0.0f)1228lowest_nonzero_val = basisu::minimum(lowest_nonzero_val, c[i]);12291230highest_val = basisu::maximum(highest_val, c[i]);1231}1232}1233}12341235debug_printf("Lowest image value: %e, lowest non-zero value: %e, highest value: %e, dynamic range: %e\n", lowest_val, lowest_nonzero_val, highest_val, highest_val / lowest_nonzero_val);1236}12371238bool basis_compressor::read_dds_source_images()1239{1240debug_printf("basis_compressor::read_dds_source_images\n");12411242// Nothing to do if the caller doesn't want us reading source images.1243if ((!m_params.m_read_source_images) || (!m_params.m_source_filenames.size()))1244return true;12451246// Just bail of the caller has specified their own source images.1247if (m_params.m_source_images.size() || m_params.m_source_images_hdr.size())1248return true;12491250if (m_params.m_source_mipmap_images.size() || m_params.m_source_mipmap_images_hdr.size())1251return true;12521253// See if any input filenames are .DDS1254bool any_dds = false, all_dds = true;1255for (uint32_t i = 0; i < m_params.m_source_filenames.size(); i++)1256{1257std::string ext(string_get_extension(m_params.m_source_filenames[i]));1258if (strcasecmp(ext.c_str(), "dds") == 0)1259any_dds = true;1260else1261all_dds = false;1262}12631264// Bail if no .DDS files specified.1265if (!any_dds)1266return true;12671268// If any input is .DDS they all must be .DDS, for simplicity.1269if (!all_dds)1270{1271error_printf("If any filename is DDS, all filenames must be DDS.\n");1272return false;1273}12741275// Can't jam in alpha channel images if any .DDS files specified.1276if (m_params.m_source_alpha_filenames.size())1277{1278error_printf("Source alpha filenames are not supported in DDS mode.\n");1279return false;1280}12811282bool any_mipmaps = false;12831284// Read each .DDS texture file1285for (uint32_t i = 0; i < m_params.m_source_filenames.size(); i++)1286{1287basisu::vector<image> ldr_mips;1288basisu::vector<imagef> hdr_mips;1289bool status = read_uncompressed_dds_file(m_params.m_source_filenames[i].c_str(), ldr_mips, hdr_mips);1290if (!status)1291return false;12921293assert(ldr_mips.size() || hdr_mips.size());12941295if (m_params.m_status_output)1296{1297printf("Read DDS file \"%s\", %s, %ux%u, %zu mipmap levels\n",1298m_params.m_source_filenames[i].c_str(),1299ldr_mips.size() ? "LDR" : "HDR",1300ldr_mips.size() ? ldr_mips[0].get_width() : hdr_mips[0].get_width(),1301ldr_mips.size() ? ldr_mips[0].get_height() : hdr_mips[0].get_height(),1302ldr_mips.size() ? ldr_mips.size() : hdr_mips.size());1303}13041305if (ldr_mips.size())1306{1307if (m_params.m_source_images_hdr.size())1308{1309error_printf("All DDS files must be of the same type (all LDR, or all HDR)\n");1310return false;1311}13121313m_params.m_source_images.push_back(ldr_mips[0]);1314m_params.m_source_mipmap_images.resize(m_params.m_source_mipmap_images.size() + 1);13151316if (ldr_mips.size() > 1)1317{1318ldr_mips.erase_index(0U);13191320m_params.m_source_mipmap_images.back().swap(ldr_mips);13211322any_mipmaps = true;1323}1324}1325else1326{1327if (m_params.m_source_images.size())1328{1329error_printf("All DDS files must be of the same type (all LDR, or all HDR)\n");1330return false;1331}13321333m_params.m_source_images_hdr.push_back(hdr_mips[0]);1334m_params.m_source_mipmap_images_hdr.resize(m_params.m_source_mipmap_images_hdr.size() + 1);13351336if (hdr_mips.size() > 1)1337{1338hdr_mips.erase_index(0U);13391340m_params.m_source_mipmap_images_hdr.back().swap(hdr_mips);13411342any_mipmaps = true;1343}13441345m_params.m_hdr = true;1346m_params.m_uastc = true;1347}1348}13491350m_params.m_read_source_images = false;1351m_params.m_source_filenames.clear();1352m_params.m_source_alpha_filenames.clear();13531354if (!any_mipmaps)1355{1356m_params.m_source_mipmap_images.clear();1357m_params.m_source_mipmap_images_hdr.clear();1358}13591360if ((m_params.m_hdr) && (!m_params.m_source_images_hdr.size()))1361{1362error_printf("HDR mode enabled, but only LDR .DDS files were loaded. HDR mode requires half or float (HDR) .DDS inputs.\n");1363return false;1364}13651366return true;1367}13681369bool basis_compressor::read_source_images()1370{1371debug_printf("basis_compressor::read_source_images\n");13721373const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() :1374(m_params.m_hdr ? (uint32_t)m_params.m_source_images_hdr.size() : (uint32_t)m_params.m_source_images.size());13751376if (!total_source_files)1377{1378debug_printf("basis_compressor::read_source_images: No source images to process\n");13791380return false;1381}13821383m_stats.resize(0);1384m_slice_descs.resize(0);1385m_slice_images.resize(0);1386m_slice_images_hdr.resize(0);13871388m_total_blocks = 0;1389uint32_t total_macroblocks = 0;13901391m_any_source_image_has_alpha = false;13921393basisu::vector<image> source_images;1394basisu::vector<imagef> source_images_hdr;13951396basisu::vector<std::string> source_filenames;13971398// TODO: Note HDR images don't support alpha here, currently.13991400// First load all source images, and determine if any have an alpha channel.1401for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++)1402{1403const char* pSource_filename = "";14041405image file_image;1406imagef file_image_hdr;14071408if (m_params.m_read_source_images)1409{1410pSource_filename = m_params.m_source_filenames[source_file_index].c_str();14111412// Load the source image1413if (m_params.m_hdr)1414{1415float upconversion_nit_multiplier = m_params.m_ldr_hdr_upconversion_nit_multiplier;1416if (upconversion_nit_multiplier == 0.0f)1417{1418// Note: We used to use a normalized nit multiplier of 1.0 for UASTC HDR 4x4. We're now writing upconverted output files in absolute luminance (100 nits).1419upconversion_nit_multiplier = LDR_TO_HDR_NITS;1420}14211422m_ldr_to_hdr_upconversion_nit_multiplier = upconversion_nit_multiplier;1423if (!is_image_filename_hdr(pSource_filename))1424m_upconverted_any_ldr_images = true;14251426if (!load_image_hdr(pSource_filename, file_image_hdr, m_params.m_ldr_hdr_upconversion_srgb_to_linear, upconversion_nit_multiplier, m_params.m_ldr_hdr_upconversion_black_bias))1427{1428error_printf("Failed reading source image: %s\n", pSource_filename);1429return false;1430}14311432// TODO: For now, just slam alpha to 1.0f. None of our HDR encoders support alpha yet.1433for (uint32_t y = 0; y < file_image_hdr.get_height(); y++)1434for (uint32_t x = 0; x < file_image_hdr.get_width(); x++)1435file_image_hdr(x, y)[3] = 1.0f;1436}1437else1438{1439if (!load_image(pSource_filename, file_image))1440{1441error_printf("Failed reading source image: %s\n", pSource_filename);1442return false;1443}1444}14451446const uint32_t width = m_params.m_hdr ? file_image_hdr.get_width() : file_image.get_width();1447const uint32_t height = m_params.m_hdr ? file_image_hdr.get_height() : file_image.get_height();14481449if (m_params.m_status_output)1450{1451printf("Read source image \"%s\", %ux%u\n", pSource_filename, width, height);1452}14531454if (m_params.m_hdr)1455{1456clean_hdr_image(file_image_hdr);1457}1458else1459{1460// Optionally load another image and put a grayscale version of it into the alpha channel.1461if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size()))1462{1463const char* pSource_alpha_image = m_params.m_source_alpha_filenames[source_file_index].c_str();14641465image alpha_data;14661467if (!load_image(pSource_alpha_image, alpha_data))1468{1469error_printf("Failed reading source image: %s\n", pSource_alpha_image);1470return false;1471}14721473if (m_params.m_status_output)1474printf("Read source alpha image \"%s\", %ux%u\n", pSource_alpha_image, alpha_data.get_width(), alpha_data.get_height());14751476alpha_data.crop(width, height);14771478for (uint32_t y = 0; y < height; y++)1479for (uint32_t x = 0; x < width; x++)1480file_image(x, y).a = (uint8_t)alpha_data(x, y).get_709_luma();1481}1482}1483}1484else1485{1486if (m_params.m_hdr)1487{1488file_image_hdr = m_params.m_source_images_hdr[source_file_index];1489clean_hdr_image(file_image_hdr);1490}1491else1492{1493file_image = m_params.m_source_images[source_file_index];1494}1495}14961497if (!m_params.m_hdr)1498{1499if (m_params.m_renormalize)1500file_image.renormalize_normal_map();1501}15021503bool alpha_swizzled = false;15041505if (m_params.m_swizzle[0] != 0 ||1506m_params.m_swizzle[1] != 1 ||1507m_params.m_swizzle[2] != 2 ||1508m_params.m_swizzle[3] != 3)1509{1510if (!m_params.m_hdr)1511{1512// Used for XY normal maps in RG - puts X in color, Y in alpha1513for (uint32_t y = 0; y < file_image.get_height(); y++)1514{1515for (uint32_t x = 0; x < file_image.get_width(); x++)1516{1517const color_rgba& c = file_image(x, y);1518file_image(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]);1519}1520}15211522alpha_swizzled = (m_params.m_swizzle[3] != 3);1523}1524else1525{1526// Used for XY normal maps in RG - puts X in color, Y in alpha1527for (uint32_t y = 0; y < file_image_hdr.get_height(); y++)1528{1529for (uint32_t x = 0; x < file_image_hdr.get_width(); x++)1530{1531const vec4F& c = file_image_hdr(x, y);15321533// For now, alpha is always 1.0f in UASTC HDR.1534file_image_hdr(x, y).set(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], 1.0f); // c[m_params.m_swizzle[3]]);1535}1536}1537}1538}15391540bool has_alpha = false;15411542if (!m_params.m_hdr)1543{1544if (m_params.m_force_alpha || alpha_swizzled)1545has_alpha = true;1546else if (!m_params.m_check_for_alpha)1547file_image.set_alpha(255);1548else if (file_image.has_alpha())1549has_alpha = true;15501551if (has_alpha)1552m_any_source_image_has_alpha = true;1553}15541555{1556const uint32_t width = m_params.m_hdr ? file_image_hdr.get_width() : file_image.get_width();1557const uint32_t height = m_params.m_hdr ? file_image_hdr.get_height() : file_image.get_height();15581559debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, width, height, has_alpha);1560}15611562if (m_params.m_y_flip)1563{1564if (m_params.m_hdr)1565file_image_hdr.flip_y();1566else1567file_image.flip_y();1568}15691570#if DEBUG_CROP_TEXTURE_TO_64x641571if (m_params.m_hdr)1572file_image_hdr.resize(64, 64);1573else1574file_image.resize(64, 64);1575#endif15761577if ((m_params.m_resample_width > 0) && (m_params.m_resample_height > 0))1578{1579int new_width = basisu::minimum<int>(m_params.m_resample_width, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);1580int new_height = basisu::minimum<int>(m_params.m_resample_height, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);15811582debug_printf("Resampling to %ix%i\n", new_width, new_height);15831584// TODO: A box filter - kaiser looks too sharp on video. Let the caller control this.1585if (m_params.m_hdr)1586{1587imagef temp_img(new_width, new_height);1588image_resample(file_image_hdr, temp_img, "box"); // "kaiser");1589clean_hdr_image(temp_img);1590temp_img.swap(file_image_hdr);1591}1592else1593{1594image temp_img(new_width, new_height);1595image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser");1596temp_img.swap(file_image);1597}1598}1599else if (m_params.m_resample_factor > 0.0f)1600{1601// TODO: A box filter - kaiser looks too sharp on video. Let the caller control this.1602if (m_params.m_hdr)1603{1604int new_width = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image_hdr.get_width() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);1605int new_height = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image_hdr.get_height() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);16061607debug_printf("Resampling to %ix%i\n", new_width, new_height);16081609imagef temp_img(new_width, new_height);1610image_resample(file_image_hdr, temp_img, "box"); // "kaiser");1611clean_hdr_image(temp_img);1612temp_img.swap(file_image_hdr);1613}1614else1615{1616int new_width = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_width() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);1617int new_height = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_height() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION);16181619debug_printf("Resampling to %ix%i\n", new_width, new_height);16201621image temp_img(new_width, new_height);1622image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser");1623temp_img.swap(file_image);1624}1625}16261627const uint32_t width = m_params.m_hdr ? file_image_hdr.get_width() : file_image.get_width();1628const uint32_t height = m_params.m_hdr ? file_image_hdr.get_height() : file_image.get_height();16291630if ((!width) || (!height))1631{1632error_printf("basis_compressor::read_source_images: Source image has a zero width and/or height!\n");1633return false;1634}16351636if ((width > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (height > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION))1637{1638error_printf("basis_compressor::read_source_images: Source image \"%s\" is too large!\n", pSource_filename);1639return false;1640}16411642if (!m_params.m_hdr)1643source_images.enlarge(1)->swap(file_image);1644else1645source_images_hdr.enlarge(1)->swap(file_image_hdr);16461647source_filenames.push_back(pSource_filename);1648}16491650// Check if the caller has generated their own mipmaps.1651if (m_params.m_hdr)1652{1653if (m_params.m_source_mipmap_images_hdr.size())1654{1655// Make sure they've passed us enough mipmap chains.1656if ((m_params.m_source_images_hdr.size() != m_params.m_source_mipmap_images_hdr.size()) || (total_source_files != m_params.m_source_images_hdr.size()))1657{1658error_printf("basis_compressor::read_source_images(): m_params.m_source_mipmap_images_hdr.size() must equal m_params.m_source_images_hdr.size()!\n");1659return false;1660}1661}1662}1663else1664{1665if (m_params.m_source_mipmap_images.size())1666{1667// Make sure they've passed us enough mipmap chains.1668if ((m_params.m_source_images.size() != m_params.m_source_mipmap_images.size()) || (total_source_files != m_params.m_source_images.size()))1669{1670error_printf("basis_compressor::read_source_images(): m_params.m_source_mipmap_images.size() must equal m_params.m_source_images.size()!\n");1671return false;1672}16731674// Check if any of the user-supplied mipmap levels has alpha.1675if (!m_any_source_image_has_alpha)1676{1677for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++)1678{1679for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++)1680{1681const image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index];16821683// Be sure to take into account any swizzling which will be applied.1684if (mip_img.has_alpha(m_params.m_swizzle[3]))1685{1686m_any_source_image_has_alpha = true;1687break;1688}1689}16901691if (m_any_source_image_has_alpha)1692break;1693}1694}1695}1696}16971698debug_printf("Any source image has alpha: %u\n", m_any_source_image_has_alpha);16991700// Now, for each source image, create the slices corresponding to that image.1701for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++)1702{1703const std::string &source_filename = source_filenames[source_file_index];17041705basisu::vector<image> slices;1706basisu::vector<imagef> slices_hdr;17071708slices.reserve(32);1709slices_hdr.reserve(32);17101711// The first (largest) mipmap level.1712image *pFile_image = source_images.size() ? &source_images[source_file_index] : nullptr;1713imagef *pFile_image_hdr = source_images_hdr.size() ? &source_images_hdr[source_file_index] : nullptr;17141715// Reserve a slot for mip0.1716if (m_params.m_hdr)1717slices_hdr.resize(1);1718else1719slices.resize(1);17201721if ((!m_params.m_hdr) && (m_params.m_source_mipmap_images.size()))1722{1723// User-provided mipmaps for each layer or image in the texture array.1724for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++)1725{1726image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index];17271728if ((m_params.m_swizzle[0] != 0) ||1729(m_params.m_swizzle[1] != 1) ||1730(m_params.m_swizzle[2] != 2) ||1731(m_params.m_swizzle[3] != 3))1732{1733// Used for XY normal maps in RG - puts X in color, Y in alpha1734for (uint32_t y = 0; y < mip_img.get_height(); y++)1735{1736for (uint32_t x = 0; x < mip_img.get_width(); x++)1737{1738const color_rgba& c = mip_img(x, y);1739mip_img(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]);1740}1741}1742}17431744slices.push_back(mip_img);1745}1746}1747else if ((m_params.m_hdr) && (m_params.m_source_mipmap_images_hdr.size()))1748{1749// User-provided mipmaps for each layer or image in the texture array.1750for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images_hdr[source_file_index].size(); mip_index++)1751{1752imagef& mip_img = m_params.m_source_mipmap_images_hdr[source_file_index][mip_index];17531754if ((m_params.m_swizzle[0] != 0) ||1755(m_params.m_swizzle[1] != 1) ||1756(m_params.m_swizzle[2] != 2) ||1757(m_params.m_swizzle[3] != 3))1758{1759// Used for XY normal maps in RG - puts X in color, Y in alpha1760for (uint32_t y = 0; y < mip_img.get_height(); y++)1761{1762for (uint32_t x = 0; x < mip_img.get_width(); x++)1763{1764const vec4F& c = mip_img(x, y);17651766// For now, HDR alpha is always 1.0f.1767mip_img(x, y).set(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], 1.0f); // c[m_params.m_swizzle[3]]);1768}1769}1770}17711772clean_hdr_image(mip_img);17731774slices_hdr.push_back(mip_img);1775}1776}1777else if (m_params.m_mip_gen)1778{1779// Automatically generate mipmaps.1780if (m_params.m_hdr)1781{1782if (!generate_mipmaps(*pFile_image_hdr, slices_hdr, m_any_source_image_has_alpha))1783return false;1784}1785else1786{1787if (!generate_mipmaps(*pFile_image, slices, m_any_source_image_has_alpha))1788return false;1789}1790}17911792// Swap in the largest mipmap level here to avoid copying it, because generate_mips() will change the array.1793// NOTE: file_image is now blank.1794if (m_params.m_hdr)1795slices_hdr[0].swap(*pFile_image_hdr);1796else1797slices[0].swap(*pFile_image);17981799uint_vec mip_indices(m_params.m_hdr ? slices_hdr.size() : slices.size());1800for (uint32_t i = 0; i < (m_params.m_hdr ? slices_hdr.size() : slices.size()); i++)1801mip_indices[i] = i;18021803if ((!m_params.m_hdr) && (m_any_source_image_has_alpha) && (!m_params.m_uastc))1804{1805// For ETC1S, if source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB.1806basisu::vector<image> alpha_slices;1807uint_vec new_mip_indices;18081809alpha_slices.reserve(slices.size() * 2);18101811for (uint32_t i = 0; i < slices.size(); i++)1812{1813image lvl_rgb(slices[i]);1814image lvl_a(lvl_rgb);18151816for (uint32_t y = 0; y < lvl_a.get_height(); y++)1817{1818for (uint32_t x = 0; x < lvl_a.get_width(); x++)1819{1820uint8_t a = lvl_a(x, y).a;1821lvl_a(x, y).set_noclamp_rgba(a, a, a, 255);1822}1823}18241825lvl_rgb.set_alpha(255);18261827alpha_slices.push_back(lvl_rgb);1828new_mip_indices.push_back(i);18291830alpha_slices.push_back(lvl_a);1831new_mip_indices.push_back(i);1832}18331834slices.swap(alpha_slices);1835mip_indices.swap(new_mip_indices);1836}18371838if (m_params.m_hdr)1839{1840assert(slices_hdr.size() == mip_indices.size());1841}1842else1843{1844assert(slices.size() == mip_indices.size());1845}18461847for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? slices_hdr.size() : slices.size()); slice_index++)1848{1849image *pSlice_image = m_params.m_hdr ? nullptr : &slices[slice_index];1850imagef *pSlice_image_hdr = m_params.m_hdr ? &slices_hdr[slice_index] : nullptr;18511852const uint32_t orig_width = m_params.m_hdr ? pSlice_image_hdr->get_width() : pSlice_image->get_width();1853const uint32_t orig_height = m_params.m_hdr ? pSlice_image_hdr->get_height() : pSlice_image->get_height();18541855bool is_alpha_slice = false;1856if ((!m_params.m_hdr) && (m_any_source_image_has_alpha))1857{1858if (m_params.m_uastc)1859{1860is_alpha_slice = pSlice_image->has_alpha();1861}1862else1863{1864is_alpha_slice = (slice_index & 1) != 0;1865}1866}18671868// Enlarge the source image to block boundaries, duplicating edge pixels if necessary to avoid introducing extra colors into blocks.1869if (m_params.m_hdr)1870{1871// Don't pad in 6x6 mode, the lower level compressor handles it.1872if (m_params.m_hdr_mode == hdr_modes::cUASTC_HDR_4X4)1873{1874pSlice_image_hdr->crop_dup_borders(pSlice_image_hdr->get_block_width(get_block_width()) * get_block_width(), pSlice_image_hdr->get_block_height(get_block_height()) * get_block_height());1875}1876}1877else1878{1879pSlice_image->crop_dup_borders(pSlice_image->get_block_width(get_block_width()) * get_block_width(), pSlice_image->get_block_height(get_block_height()) * get_block_height());1880}18811882if (m_params.m_debug_images)1883{1884if (m_params.m_hdr)1885write_exr(string_format("basis_debug_source_image_%u_slice_%u.exr", source_file_index, slice_index).c_str(), *pSlice_image_hdr, 3, 0);1886else1887save_png(string_format("basis_debug_source_image_%u_slice_%u.png", source_file_index, slice_index).c_str(), *pSlice_image);1888}18891890const size_t dest_image_index = (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size());18911892enlarge_vector(m_stats, 1);18931894if (m_params.m_hdr)1895enlarge_vector(m_slice_images_hdr, 1);1896else1897enlarge_vector(m_slice_images, 1);18981899enlarge_vector(m_slice_descs, 1);19001901m_stats[dest_image_index].m_filename = source_filename.c_str();1902m_stats[dest_image_index].m_width = orig_width;1903m_stats[dest_image_index].m_height = orig_height;19041905debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n",1906m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(),1907orig_width, orig_height,1908m_params.m_hdr ? pSlice_image_hdr->get_width() : pSlice_image->get_width(),1909m_params.m_hdr ? pSlice_image_hdr->get_height() : pSlice_image->get_height());19101911basisu_backend_slice_desc& slice_desc = m_slice_descs[dest_image_index];19121913slice_desc.m_first_block_index = m_total_blocks;19141915slice_desc.m_orig_width = orig_width;1916slice_desc.m_orig_height = orig_height;19171918if (m_params.m_hdr)1919{1920slice_desc.m_width = pSlice_image_hdr->get_width();1921slice_desc.m_height = pSlice_image_hdr->get_height();19221923slice_desc.m_num_blocks_x = pSlice_image_hdr->get_block_width(get_block_width());1924slice_desc.m_num_blocks_y = pSlice_image_hdr->get_block_height(get_block_height());1925}1926else1927{1928slice_desc.m_width = pSlice_image->get_width();1929slice_desc.m_height = pSlice_image->get_height();19301931slice_desc.m_num_blocks_x = pSlice_image->get_block_width(get_block_width());1932slice_desc.m_num_blocks_y = pSlice_image->get_block_height(get_block_height());1933}19341935slice_desc.m_num_macroblocks_x = (slice_desc.m_num_blocks_x + 1) >> 1;1936slice_desc.m_num_macroblocks_y = (slice_desc.m_num_blocks_y + 1) >> 1;19371938slice_desc.m_source_file_index = source_file_index;19391940slice_desc.m_mip_index = mip_indices[slice_index];19411942slice_desc.m_alpha = is_alpha_slice;1943slice_desc.m_iframe = false;1944if (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)1945{1946slice_desc.m_iframe = (source_file_index == 0);1947}19481949m_total_blocks += slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y;1950total_macroblocks += slice_desc.m_num_macroblocks_x * slice_desc.m_num_macroblocks_y;19511952// Finally, swap in the slice's image to avoid copying it.1953// NOTE: slice_image is now blank.1954if (m_params.m_hdr)1955m_slice_images_hdr[dest_image_index].swap(*pSlice_image_hdr);1956else1957m_slice_images[dest_image_index].swap(*pSlice_image);19581959} // slice_index19601961} // source_file_index19621963debug_printf("Total blocks: %u, Total macroblocks: %u\n", m_total_blocks, total_macroblocks);19641965// Make sure we don't have too many slices1966if (m_slice_descs.size() > BASISU_MAX_SLICES)1967{1968error_printf("Too many slices!\n");1969return false;1970}19711972// Basic sanity check on the slices1973for (uint32_t i = 1; i < m_slice_descs.size(); i++)1974{1975const basisu_backend_slice_desc &prev_slice_desc = m_slice_descs[i - 1];1976const basisu_backend_slice_desc &slice_desc = m_slice_descs[i];19771978// Make sure images are in order1979int image_delta = (int)slice_desc.m_source_file_index - (int)prev_slice_desc.m_source_file_index;1980if (image_delta > 1)1981return false;19821983// Make sure mipmap levels are in order1984if (!image_delta)1985{1986int level_delta = (int)slice_desc.m_mip_index - (int)prev_slice_desc.m_mip_index;1987if (level_delta > 1)1988return false;1989}1990}19911992if (m_params.m_status_output)1993{1994printf("Total slices: %u\n", (uint32_t)m_slice_descs.size());1995}19961997for (uint32_t i = 0; i < m_slice_descs.size(); i++)1998{1999const basisu_backend_slice_desc &slice_desc = m_slice_descs[i];20002001if (m_params.m_status_output)2002{2003printf("Slice: %u, alpha: %u, orig width/height: %ux%u, width/height: %ux%u, first_block: %u, image_index: %u, mip_level: %u, iframe: %u\n",2004i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height,2005slice_desc.m_width, slice_desc.m_height,2006slice_desc.m_first_block_index, slice_desc.m_source_file_index, slice_desc.m_mip_index, slice_desc.m_iframe);2007}20082009if (m_any_source_image_has_alpha)2010{2011// HDR doesn't support alpha yet2012if (m_params.m_hdr)2013return false;20142015if (!m_params.m_uastc)2016{2017// For ETC1S, alpha slices must be at odd slice indices.2018if (slice_desc.m_alpha)2019{2020if ((i & 1) == 0)2021return false;20222023const basisu_backend_slice_desc& prev_slice_desc = m_slice_descs[i - 1];20242025// Make sure previous slice has this image's color data2026if (prev_slice_desc.m_source_file_index != slice_desc.m_source_file_index)2027return false;2028if (prev_slice_desc.m_alpha)2029return false;2030if (prev_slice_desc.m_mip_index != slice_desc.m_mip_index)2031return false;2032if (prev_slice_desc.m_num_blocks_x != slice_desc.m_num_blocks_x)2033return false;2034if (prev_slice_desc.m_num_blocks_y != slice_desc.m_num_blocks_y)2035return false;2036}2037else if (i & 1)2038return false;2039}2040}2041else if (slice_desc.m_alpha)2042{2043return false;2044}20452046if ((slice_desc.m_orig_width > slice_desc.m_width) || (slice_desc.m_orig_height > slice_desc.m_height))2047return false;20482049if ((slice_desc.m_source_file_index == 0) && (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames))2050{2051if (!slice_desc.m_iframe)2052return false;2053}2054}20552056return true;2057}20582059// Do some basic validation for 2D arrays, cubemaps, video, and volumes.2060bool basis_compressor::validate_texture_type_constraints()2061{2062debug_printf("basis_compressor::validate_texture_type_constraints\n");20632064// In 2D mode anything goes (each image may have a different resolution and # of mipmap levels).2065if (m_params.m_tex_type == basist::cBASISTexType2D)2066return true;20672068uint32_t total_basis_images = 0;20692070for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); slice_index++)2071{2072const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index];20732074total_basis_images = maximum<uint32_t>(total_basis_images, slice_desc.m_source_file_index + 1);2075}20762077if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray)2078{2079// For cubemaps, validate that the total # of Basis images is a multiple of 6.2080if ((total_basis_images % 6) != 0)2081{2082error_printf("basis_compressor::validate_texture_type_constraints: For cubemaps the total number of input images is not a multiple of 6!\n");2083return false;2084}2085}20862087// Now validate that all the mip0's have the same dimensions, and that each image has the same # of mipmap levels.2088uint_vec image_mipmap_levels(total_basis_images);20892090int width = -1, height = -1;2091for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); slice_index++)2092{2093const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index];20942095image_mipmap_levels[slice_desc.m_source_file_index] = maximum(image_mipmap_levels[slice_desc.m_source_file_index], slice_desc.m_mip_index + 1);20962097if (slice_desc.m_mip_index != 0)2098continue;20992100if (width < 0)2101{2102width = slice_desc.m_orig_width;2103height = slice_desc.m_orig_height;2104}2105else if ((width != (int)slice_desc.m_orig_width) || (height != (int)slice_desc.m_orig_height))2106{2107error_printf("basis_compressor::validate_texture_type_constraints: The source image resolutions are not all equal!\n");2108return false;2109}2110}21112112for (size_t i = 1; i < image_mipmap_levels.size(); i++)2113{2114if (image_mipmap_levels[0] != image_mipmap_levels[i])2115{2116error_printf("basis_compressor::validate_texture_type_constraints: Each image must have the same number of mipmap levels!\n");2117return false;2118}2119}21202121return true;2122}21232124bool basis_compressor::extract_source_blocks()2125{2126debug_printf("basis_compressor::extract_source_blocks\n");21272128// No need to extract blocks in 6x6 mode, but the 4x4 compressors want 4x4 blocks.2129if ((m_fmt_mode == basist::basis_tex_format::cASTC_HDR_6x6) || (m_fmt_mode == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE))2130return true;21312132if (m_params.m_hdr)2133m_source_blocks_hdr.resize(m_total_blocks);2134else2135m_source_blocks.resize(m_total_blocks);21362137for (uint32_t slice_index = 0; slice_index < (m_params.m_hdr ? m_slice_images_hdr.size() : m_slice_images.size()); slice_index++)2138{2139const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];21402141const uint32_t num_blocks_x = slice_desc.m_num_blocks_x;2142const uint32_t num_blocks_y = slice_desc.m_num_blocks_y;21432144const image *pSource_image = m_params.m_hdr ? nullptr : &m_slice_images[slice_index];2145const imagef *pSource_image_hdr = m_params.m_hdr ? &m_slice_images_hdr[slice_index] : nullptr;21462147for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)2148{2149for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)2150{2151if (m_params.m_hdr)2152{2153vec4F* pBlock = m_source_blocks_hdr[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr();21542155pSource_image_hdr->extract_block_clamped(pBlock, block_x * 4, block_y * 4, 4, 4);21562157// Additional (technically optional) early sanity checking of the block texels.2158for (uint32_t i = 0; i < 16; i++)2159{2160for (uint32_t c = 0; c < 3; c++)2161{2162float v = pBlock[i][c];21632164if (std::isnan(v) || std::isinf(v) || (v < 0.0f) || (v > basist::MAX_HALF_FLOAT))2165{2166error_printf("basis_compressor::extract_source_blocks: invalid float component\n");2167return false;2168}2169}2170}2171}2172else2173{2174pSource_image->extract_block_clamped(m_source_blocks[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(), block_x * 4, block_y * 4, 4, 4);2175}2176}2177}2178}21792180return true;2181}21822183bool basis_compressor::process_frontend()2184{2185debug_printf("basis_compressor::process_frontend\n");21862187#if 02188// TODO2189basis_etc1_pack_params pack_params;2190pack_params.m_quality = cETCQualityMedium;2191pack_params.m_perceptual = m_params.m_perceptual;2192pack_params.m_use_color4 = false;21932194pack_etc1_block_context pack_context;21952196std::unordered_set<uint64_t> endpoint_hash;2197std::unordered_set<uint32_t> selector_hash;21982199for (uint32_t i = 0; i < m_source_blocks.size(); i++)2200{2201etc_block blk;2202pack_etc1_block(blk, m_source_blocks[i].get_ptr(), pack_params, pack_context);22032204const color_rgba c0(blk.get_block_color(0, false));2205endpoint_hash.insert((c0.r | (c0.g << 5) | (c0.b << 10)) | (blk.get_inten_table(0) << 16));22062207const color_rgba c1(blk.get_block_color(1, false));2208endpoint_hash.insert((c1.r | (c1.g << 5) | (c1.b << 10)) | (blk.get_inten_table(1) << 16));22092210selector_hash.insert(blk.get_raw_selector_bits());2211}22122213const uint32_t total_unique_endpoints = (uint32_t)endpoint_hash.size();2214const uint32_t total_unique_selectors = (uint32_t)selector_hash.size();22152216if (m_params.m_debug)2217{2218debug_printf("Unique endpoints: %u, unique selectors: %u\n", total_unique_endpoints, total_unique_selectors);2219}2220#endif22212222const double total_texels = m_total_blocks * 16.0f;22232224int endpoint_clusters = m_params.m_etc1s_max_endpoint_clusters;2225int selector_clusters = m_params.m_etc1s_max_selector_clusters;22262227if (endpoint_clusters > basisu_frontend::cMaxEndpointClusters)2228{2229error_printf("Too many endpoint clusters! (%u but max is %u)\n", endpoint_clusters, basisu_frontend::cMaxEndpointClusters);2230return false;2231}2232if (selector_clusters > basisu_frontend::cMaxSelectorClusters)2233{2234error_printf("Too many selector clusters! (%u but max is %u)\n", selector_clusters, basisu_frontend::cMaxSelectorClusters);2235return false;2236}22372238if (m_params.m_etc1s_quality_level != -1)2239{2240const float quality = saturate(m_params.m_etc1s_quality_level / 255.0f);22412242const float bits_per_endpoint_cluster = 14.0f;2243const float max_desired_endpoint_cluster_bits_per_texel = 1.0f; // .15f2244int max_endpoints = static_cast<int>((max_desired_endpoint_cluster_bits_per_texel * total_texels) / bits_per_endpoint_cluster);22452246const float mid = 128.0f / 255.0f;22472248float color_endpoint_quality = quality;22492250const float endpoint_split_point = 0.5f;22512252// In v1.2 and in previous versions, the endpoint codebook size at quality 128 was 3072. This wasn't quite large enough.2253const int ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE = 4800;2254const int MAX_ENDPOINT_CODEBOOK_SIZE = 8192;22552256if (color_endpoint_quality <= mid)2257{2258color_endpoint_quality = lerp(0.0f, endpoint_split_point, powf(color_endpoint_quality / mid, .65f));22592260max_endpoints = clamp<int>(max_endpoints, 256, ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE);2261max_endpoints = minimum<uint32_t>(max_endpoints, m_total_blocks);22622263if (max_endpoints < 64)2264max_endpoints = 64;2265endpoint_clusters = clamp<uint32_t>((uint32_t)(.5f + lerp<float>(32, static_cast<float>(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters);2266}2267else2268{2269color_endpoint_quality = powf((color_endpoint_quality - mid) / (1.0f - mid), 1.6f);22702271max_endpoints = clamp<int>(max_endpoints, 256, MAX_ENDPOINT_CODEBOOK_SIZE);2272max_endpoints = minimum<uint32_t>(max_endpoints, m_total_blocks);22732274if (max_endpoints < ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE)2275max_endpoints = ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE;2276endpoint_clusters = clamp<uint32_t>((uint32_t)(.5f + lerp<float>(ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE, static_cast<float>(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters);2277}22782279float bits_per_selector_cluster = 14.0f;22802281const float max_desired_selector_cluster_bits_per_texel = 1.0f; // .15f2282int max_selectors = static_cast<int>((max_desired_selector_cluster_bits_per_texel * total_texels) / bits_per_selector_cluster);2283max_selectors = clamp<int>(max_selectors, 256, basisu_frontend::cMaxSelectorClusters);2284max_selectors = minimum<uint32_t>(max_selectors, m_total_blocks);22852286float color_selector_quality = quality;2287//color_selector_quality = powf(color_selector_quality, 1.65f);2288color_selector_quality = powf(color_selector_quality, 2.62f);22892290if (max_selectors < 96)2291max_selectors = 96;2292selector_clusters = clamp<uint32_t>((uint32_t)(.5f + lerp<float>(96, static_cast<float>(max_selectors), color_selector_quality)), 8, basisu_frontend::cMaxSelectorClusters);22932294debug_printf("Max endpoints: %u, max selectors: %u\n", endpoint_clusters, selector_clusters);22952296if (m_params.m_etc1s_quality_level >= 223)2297{2298if (!m_params.m_selector_rdo_thresh.was_changed())2299{2300if (!m_params.m_endpoint_rdo_thresh.was_changed())2301m_params.m_endpoint_rdo_thresh *= .25f;23022303if (!m_params.m_selector_rdo_thresh.was_changed())2304m_params.m_selector_rdo_thresh *= .25f;2305}2306}2307else if (m_params.m_etc1s_quality_level >= 192)2308{2309if (!m_params.m_endpoint_rdo_thresh.was_changed())2310m_params.m_endpoint_rdo_thresh *= .5f;23112312if (!m_params.m_selector_rdo_thresh.was_changed())2313m_params.m_selector_rdo_thresh *= .5f;2314}2315else if (m_params.m_etc1s_quality_level >= 160)2316{2317if (!m_params.m_endpoint_rdo_thresh.was_changed())2318m_params.m_endpoint_rdo_thresh *= .75f;23192320if (!m_params.m_selector_rdo_thresh.was_changed())2321m_params.m_selector_rdo_thresh *= .75f;2322}2323else if (m_params.m_etc1s_quality_level >= 129)2324{2325float l = (quality - 129 / 255.0f) / ((160 - 129) / 255.0f);23262327if (!m_params.m_endpoint_rdo_thresh.was_changed())2328m_params.m_endpoint_rdo_thresh *= lerp<float>(1.0f, .75f, l);23292330if (!m_params.m_selector_rdo_thresh.was_changed())2331m_params.m_selector_rdo_thresh *= lerp<float>(1.0f, .75f, l);2332}2333}23342335basisu_frontend::params p;2336p.m_num_source_blocks = m_total_blocks;2337p.m_pSource_blocks = &m_source_blocks[0];2338p.m_max_endpoint_clusters = endpoint_clusters;2339p.m_max_selector_clusters = selector_clusters;2340p.m_perceptual = m_params.m_perceptual;2341p.m_debug_stats = m_params.m_debug;2342p.m_debug_images = m_params.m_debug_images;2343p.m_compression_level = m_params.m_compression_level;2344p.m_tex_type = m_params.m_tex_type;2345p.m_multithreaded = m_params.m_multithreading;2346p.m_disable_hierarchical_endpoint_codebooks = m_params.m_disable_hierarchical_endpoint_codebooks;2347p.m_validate = m_params.m_validate_etc1s;2348p.m_pJob_pool = m_params.m_pJob_pool;2349p.m_pGlobal_codebooks = m_params.m_pGlobal_codebooks;23502351// Don't keep trying to use OpenCL if it ever fails.2352p.m_pOpenCL_context = !m_opencl_failed ? m_pOpenCL_context : nullptr;23532354if (!m_frontend.init(p))2355{2356error_printf("basisu_frontend::init() failed!\n");2357return false;2358}23592360m_frontend.compress();23612362if (m_frontend.get_opencl_failed())2363m_opencl_failed = true;23642365if (m_params.m_debug_images)2366{2367for (uint32_t i = 0; i < m_slice_descs.size(); i++)2368{2369char filename[1024];2370#ifdef _WIN322371sprintf_s(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i);2372#else2373snprintf(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i);2374#endif2375m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, true);23762377#ifdef _WIN322378sprintf_s(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i);2379#else2380snprintf(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i);2381#endif2382m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, false);2383}2384}23852386return true;2387}23882389bool basis_compressor::extract_frontend_texture_data()2390{2391if (!m_params.m_compute_stats)2392return true;23932394debug_printf("basis_compressor::extract_frontend_texture_data\n");23952396m_frontend_output_textures.resize(m_slice_descs.size());2397m_best_etc1s_images.resize(m_slice_descs.size());2398m_best_etc1s_images_unpacked.resize(m_slice_descs.size());23992400for (uint32_t i = 0; i < m_slice_descs.size(); i++)2401{2402const basisu_backend_slice_desc &slice_desc = m_slice_descs[i];24032404const uint32_t num_blocks_x = slice_desc.m_num_blocks_x;2405const uint32_t num_blocks_y = slice_desc.m_num_blocks_y;24062407const uint32_t width = num_blocks_x * 4;2408const uint32_t height = num_blocks_y * 4;24092410m_frontend_output_textures[i].init(texture_format::cETC1, width, height);24112412for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)2413for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)2414memcpy(m_frontend_output_textures[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_output_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block));24152416#if 02417if (m_params.m_debug_images)2418{2419char filename[1024];2420sprintf_s(filename, sizeof(filename), "rdo_etc_frontend_%u_", i);2421write_etc1_vis_images(m_frontend_output_textures[i], filename);2422}2423#endif24242425m_best_etc1s_images[i].init(texture_format::cETC1, width, height);2426for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)2427for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)2428memcpy(m_best_etc1s_images[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_etc1s_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block));24292430m_best_etc1s_images[i].unpack(m_best_etc1s_images_unpacked[i]);2431}24322433return true;2434}24352436bool basis_compressor::process_backend()2437{2438debug_printf("basis_compressor::process_backend\n");24392440basisu_backend_params backend_params;2441backend_params.m_debug = m_params.m_debug;2442backend_params.m_debug_images = m_params.m_debug_images;2443backend_params.m_etc1s = true;2444backend_params.m_compression_level = m_params.m_compression_level;24452446if (!m_params.m_no_endpoint_rdo)2447backend_params.m_endpoint_rdo_quality_thresh = m_params.m_endpoint_rdo_thresh;24482449if (!m_params.m_no_selector_rdo)2450backend_params.m_selector_rdo_quality_thresh = m_params.m_selector_rdo_thresh;24512452backend_params.m_used_global_codebooks = m_frontend.get_params().m_pGlobal_codebooks != nullptr;2453backend_params.m_validate = m_params.m_validate_output_data;24542455m_backend.init(&m_frontend, backend_params, m_slice_descs);2456uint32_t total_packed_bytes = m_backend.encode();24572458if (!total_packed_bytes)2459{2460error_printf("basis_compressor::encode() failed!\n");2461return false;2462}24632464debug_printf("Total packed bytes (estimated): %u\n", total_packed_bytes);24652466return true;2467}24682469bool basis_compressor::create_basis_file_and_transcode()2470{2471debug_printf("basis_compressor::create_basis_file_and_transcode\n");24722473const basisu_backend_output& encoded_output = m_params.m_uastc ? m_uastc_backend_output : m_backend.get_output();24742475if (!m_basis_file.init(encoded_output, m_params.m_tex_type, m_params.m_userdata0, m_params.m_userdata1, m_params.m_y_flip, m_params.m_us_per_frame))2476{2477error_printf("basis_compressor::create_basis_file_and_transcode: basisu_backend:init() failed!\n");2478return false;2479}24802481const uint8_vec& comp_data = m_basis_file.get_compressed_data();24822483m_output_basis_file = comp_data;24842485uint32_t total_orig_pixels = 0;24862487for (uint32_t i = 0; i < m_slice_descs.size(); i++)2488{2489const basisu_backend_slice_desc& slice_desc = m_slice_descs[i];24902491total_orig_pixels += slice_desc.m_orig_width * slice_desc.m_orig_height;2492}24932494m_basis_file_size = (uint32_t)comp_data.size();2495m_basis_bits_per_texel = total_orig_pixels ? (comp_data.size() * 8.0f) / total_orig_pixels : 0;24962497debug_printf("Total .basis output file size: %u, %3.3f bits/texel\n", comp_data.size(), comp_data.size() * 8.0f / total_orig_pixels);24982499// HDR 6x6 TODO2500// HACK HACK2501const bool is_hdr_6x6 = m_params.m_hdr && (m_params.m_hdr_mode != hdr_modes::cUASTC_HDR_4X4);25022503if (m_params.m_validate_output_data)2504{2505interval_timer tm;2506tm.start();25072508basist::basisu_transcoder_init();25092510debug_printf("basist::basisu_transcoder_init: Took %f ms\n", tm.get_elapsed_ms());25112512// Verify the compressed data by transcoding it to ASTC (or ETC1)/BC7 and validating the CRC's.2513basist::basisu_transcoder decoder;2514if (!decoder.validate_file_checksums(&comp_data[0], (uint32_t)comp_data.size(), true))2515{2516error_printf("decoder.validate_file_checksums() failed!\n");2517return false;2518}25192520m_decoded_output_textures.resize(m_slice_descs.size());25212522if (m_params.m_hdr)2523{2524m_decoded_output_textures_bc6h_hdr_unpacked.resize(m_slice_descs.size());25252526m_decoded_output_textures_astc_hdr.resize(m_slice_descs.size());2527m_decoded_output_textures_astc_hdr_unpacked.resize(m_slice_descs.size());2528}2529else2530{2531m_decoded_output_textures_unpacked.resize(m_slice_descs.size());25322533m_decoded_output_textures_bc7.resize(m_slice_descs.size());2534m_decoded_output_textures_unpacked_bc7.resize(m_slice_descs.size());2535}25362537tm.start();25382539if (m_params.m_pGlobal_codebooks)2540{2541decoder.set_global_codebooks(m_params.m_pGlobal_codebooks);2542}25432544if (!decoder.start_transcoding(&comp_data[0], (uint32_t)comp_data.size()))2545{2546error_printf("decoder.start_transcoding() failed!\n");2547return false;2548}25492550double start_transcoding_time = tm.get_elapsed_secs();25512552debug_printf("basisu_compressor::start_transcoding() took %3.3fms\n", start_transcoding_time * 1000.0f);25532554double total_time_etc1s_or_astc = 0;25552556for (uint32_t slice_iter = 0; slice_iter < m_slice_descs.size(); slice_iter++)2557{2558// Select either BC6H, UASTC LDR 4x4, or ETC12559basisu::texture_format tex_format = m_params.m_hdr ? texture_format::cBC6HUnsigned : (m_params.m_uastc ? texture_format::cUASTC4x4 : texture_format::cETC1);2560basist::block_format blk_format = m_params.m_hdr ? basist::block_format::cBC6H : (m_params.m_uastc ? basist::block_format::cUASTC_4x4 : basist::block_format::cETC1);25612562gpu_image decoded_texture;2563decoded_texture.init(2564tex_format,2565m_slice_descs[slice_iter].m_width, m_slice_descs[slice_iter].m_height);25662567tm.start();25682569const uint32_t block_size_x = basisu::get_block_width(tex_format);2570const uint32_t block_size_y = basisu::get_block_height(tex_format);2571const uint32_t num_dst_blocks_x = (m_slice_descs[slice_iter].m_orig_width + block_size_x - 1) / block_size_x;2572const uint32_t num_dst_blocks_y = (m_slice_descs[slice_iter].m_orig_height + block_size_y - 1) / block_size_y;2573const uint32_t total_dst_blocks = num_dst_blocks_x * num_dst_blocks_y;25742575uint32_t bytes_per_block = m_params.m_uastc ? 16 : 8;25762577if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), slice_iter,2578reinterpret_cast<etc_block*>(decoded_texture.get_ptr()), total_dst_blocks, blk_format, bytes_per_block))2579{2580error_printf("Transcoding failed on slice %u!\n", slice_iter);2581return false;2582}25832584total_time_etc1s_or_astc += tm.get_elapsed_secs();25852586if (encoded_output.m_tex_format == basist::basis_tex_format::cETC1S)2587{2588uint32_t image_crc16 = basist::crc16(decoded_texture.get_ptr(), decoded_texture.get_size_in_bytes(), 0);2589if (image_crc16 != encoded_output.m_slice_image_crcs[slice_iter])2590{2591error_printf("Decoded image data CRC check failed on slice %u!\n", slice_iter);2592return false;2593}2594debug_printf("Decoded image data CRC check succeeded on slice %i\n", slice_iter);2595}25962597m_decoded_output_textures[slice_iter] = decoded_texture;2598}25992600double total_alt_transcode_time = 0;2601tm.start();26022603if (m_params.m_hdr)2604{2605if (is_hdr_6x6)2606{2607assert(basist::basis_is_format_supported(basist::transcoder_texture_format::cTFASTC_HDR_6x6_RGBA, basist::basis_tex_format::cASTC_HDR_6x6));2608assert(basist::basis_is_format_supported(basist::transcoder_texture_format::cTFASTC_HDR_6x6_RGBA, basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE));26092610for (uint32_t i = 0; i < m_slice_descs.size(); i++)2611{2612gpu_image decoded_texture;2613decoded_texture.init(texture_format::cASTC_HDR_6x6, m_slice_descs[i].m_width, m_slice_descs[i].m_height);26142615if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i,2616reinterpret_cast<basist::astc_blk*>(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cASTC_HDR_6x6, 16))2617{2618error_printf("Transcoding failed to ASTC HDR on slice %u!\n", i);2619return false;2620}26212622m_decoded_output_textures_astc_hdr[i] = decoded_texture;2623}2624}2625else2626{2627assert(basist::basis_is_format_supported(basist::transcoder_texture_format::cTFASTC_HDR_4x4_RGBA, basist::basis_tex_format::cUASTC_HDR_4x4));26282629for (uint32_t i = 0; i < m_slice_descs.size(); i++)2630{2631gpu_image decoded_texture;2632decoded_texture.init(texture_format::cASTC_HDR_4x4, m_slice_descs[i].m_width, m_slice_descs[i].m_height);26332634if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i,2635reinterpret_cast<basist::astc_blk*>(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cASTC_HDR_4x4, 16))2636{2637error_printf("Transcoding failed to ASTC HDR on slice %u!\n", i);2638return false;2639}26402641m_decoded_output_textures_astc_hdr[i] = decoded_texture;2642}2643}2644}2645else2646{2647if (basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cUASTC4x4) &&2648basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cETC1S))2649{2650for (uint32_t i = 0; i < m_slice_descs.size(); i++)2651{2652gpu_image decoded_texture;2653decoded_texture.init(texture_format::cBC7, m_slice_descs[i].m_width, m_slice_descs[i].m_height);26542655if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i,2656reinterpret_cast<etc_block*>(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cBC7, 16))2657{2658error_printf("Transcoding failed to BC7 on slice %u!\n", i);2659return false;2660}26612662m_decoded_output_textures_bc7[i] = decoded_texture;2663}2664}2665}26662667total_alt_transcode_time = tm.get_elapsed_secs();26682669for (uint32_t i = 0; i < m_slice_descs.size(); i++)2670{2671if (m_params.m_hdr)2672{2673// BC6H2674bool status = m_decoded_output_textures[i].unpack_hdr(m_decoded_output_textures_bc6h_hdr_unpacked[i]);2675assert(status);2676BASISU_NOTE_UNUSED(status);26772678// ASTC HDR2679status = m_decoded_output_textures_astc_hdr[i].unpack_hdr(m_decoded_output_textures_astc_hdr_unpacked[i]);2680assert(status);2681}2682else2683{2684bool status = m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i]);2685assert(status);2686BASISU_NOTE_UNUSED(status);26872688if (m_decoded_output_textures_bc7[i].get_pixel_width())2689{2690status = m_decoded_output_textures_bc7[i].unpack(m_decoded_output_textures_unpacked_bc7[i]);2691assert(status);2692}2693}2694}26952696debug_printf("Transcoded to %s in %3.3fms, %f texels/sec\n",2697m_params.m_hdr ? "BC6H" : (m_params.m_uastc ? "ASTC" : "ETC1"),2698total_time_etc1s_or_astc * 1000.0f, total_orig_pixels / total_time_etc1s_or_astc);26992700if (total_alt_transcode_time != 0)2701debug_printf("Alternate transcode in %3.3fms, %f texels/sec\n", total_alt_transcode_time * 1000.0f, total_orig_pixels / total_alt_transcode_time);27022703if (!is_hdr_6x6)2704{2705for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)2706{2707const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];27082709const uint32_t total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y;2710BASISU_NOTE_UNUSED(total_blocks);27112712assert(m_decoded_output_textures[slice_index].get_total_blocks() == total_blocks);2713}2714}27152716} // if (m_params.m_validate_output_data)27172718return true;2719}27202721bool basis_compressor::write_hdr_debug_images(const char* pBasename, const imagef& orig_hdr_img, uint32_t width, uint32_t height)2722{2723// Copy image to account for 4x4 block expansion2724imagef hdr_img(orig_hdr_img);2725hdr_img.resize(width, height);27262727image srgb_img(width, height);27282729const float inv_upconversion_scale = (m_ldr_to_hdr_upconversion_nit_multiplier > 0.0f) ? (1.0f / m_ldr_to_hdr_upconversion_nit_multiplier) : 1.0f;27302731for (uint32_t y = 0; y < height; y++)2732{2733for (uint32_t x = 0; x < width; x++)2734{2735vec4F p(hdr_img(x, y));27362737p[0] = clamp(p[0] * inv_upconversion_scale, 0.0f, 1.0f);2738p[1] = clamp(p[1] * inv_upconversion_scale, 0.0f, 1.0f);2739p[2] = clamp(p[2] * inv_upconversion_scale, 0.0f, 1.0f);27402741int rc = (int)std::round(linear_to_srgb(p[0]) * 255.0f);2742int gc = (int)std::round(linear_to_srgb(p[1]) * 255.0f);2743int bc = (int)std::round(linear_to_srgb(p[2]) * 255.0f);27442745srgb_img.set_clipped(x, y, color_rgba(rc, gc, bc, 255));2746}2747}27482749{2750const std::string filename(string_format("%s_linear_clamped_to_srgb.png", pBasename));2751save_png(filename.c_str(), srgb_img);2752printf("Wrote .PNG file %s\n", filename.c_str());2753}27542755{2756const std::string filename(string_format("%s_compressive_tonemapped.png", pBasename));2757image compressive_tonemapped_img;27582759bool status = tonemap_image_compressive(compressive_tonemapped_img, hdr_img);2760if (!status)2761{2762error_printf("basis_compressor::write_hdr_debug_images: tonemap_image_compressive() failed (invalid half-float input)\n");2763}2764else2765{2766save_png(filename.c_str(), compressive_tonemapped_img);2767printf("Wrote .PNG file %s\n", filename.c_str());2768}2769}27702771image tonemapped_img;27722773for (int e = -5; e <= 5; e++)2774{2775const float scale = powf(2.0f, (float)e);27762777tonemap_image_reinhard(tonemapped_img, hdr_img, scale);27782779std::string filename(string_format("%s_reinhard_tonemapped_scale_%f.png", pBasename, scale));2780save_png(filename.c_str(), tonemapped_img, cImageSaveIgnoreAlpha);2781printf("Wrote .PNG file %s\n", filename.c_str());2782}27832784return true;2785}27862787bool basis_compressor::write_output_files_and_compute_stats()2788{2789debug_printf("basis_compressor::write_output_files_and_compute_stats\n");27902791const uint8_vec& comp_data = m_params.m_create_ktx2_file ? m_output_ktx2_file : m_basis_file.get_compressed_data();2792if (m_params.m_write_output_basis_or_ktx2_files)2793{2794const std::string& output_filename = m_params.m_out_filename;27952796if (!write_vec_to_file(output_filename.c_str(), comp_data))2797{2798error_printf("Failed writing output data to file \"%s\"\n", output_filename.c_str());2799return false;2800}28012802if (m_params.m_status_output)2803{2804printf("Wrote output .basis/.ktx2 file \"%s\"\n", output_filename.c_str());2805}2806}28072808size_t comp_size = 0;2809if ((m_params.m_compute_stats) && (m_params.m_uastc) && (comp_data.size()))2810{2811void* pComp_data = tdefl_compress_mem_to_heap(&comp_data[0], comp_data.size(), &comp_size, TDEFL_MAX_PROBES_MASK);// TDEFL_DEFAULT_MAX_PROBES);2812size_t decomp_size = 0;2813void* pDecomp_data = tinfl_decompress_mem_to_heap(pComp_data, comp_size, &decomp_size, 0);2814if ((decomp_size != comp_data.size()) || (memcmp(pDecomp_data, &comp_data[0], decomp_size) != 0))2815{2816printf("basis_compressor::create_basis_file_and_transcode:: miniz compression or decompression failed!\n");2817return false;2818}28192820mz_free(pComp_data);2821mz_free(pDecomp_data);28222823uint32_t total_texels = 0;2824for (uint32_t i = 0; i < m_slice_descs.size(); i++)2825total_texels += (m_slice_descs[i].m_orig_width * m_slice_descs[i].m_orig_height);28262827m_basis_bits_per_texel = ((float)comp_size * 8.0f) / total_texels;28282829fmt_debug_printf("Output file size: {}, {3.2} bits/texel, LZ compressed file size: {}, {3.2} bits/texel\n",2830(uint64_t)comp_data.size(), ((float)comp_data.size() * 8.0f) / total_texels,2831(uint64_t)comp_size, m_basis_bits_per_texel);2832}28332834m_stats.resize(m_slice_descs.size());28352836if (m_params.m_validate_output_data)2837{2838if (m_params.m_hdr)2839{2840if (m_params.m_print_stats)2841{2842printf("ASTC/BC6H half float space error metrics (a piecewise linear approximation of log2 error):\n");2843}28442845for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)2846{2847const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];28482849if (m_params.m_compute_stats)2850{2851image_stats& s = m_stats[slice_index];28522853if (m_params.m_print_stats)2854{2855printf("Slice: %u\n", slice_index);2856}28572858image_metrics im;28592860if (m_params.m_print_stats)2861{2862printf("\nASTC channels:\n");2863for (uint32_t i = 0; i < 3; i++)2864{2865im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], i, 1, true);28662867printf("%c: ", "RGB"[i]);2868im.print_hp();2869}28702871printf("BC6H channels:\n");2872for (uint32_t i = 0; i < 3; i++)2873{2874im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], i, 1, true);28752876printf("%c: ", "RGB"[i]);2877im.print_hp();2878}2879}28802881im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], 0, 3, true);2882s.m_basis_rgb_avg_psnr = (float)im.m_psnr;28832884if (m_params.m_print_stats)2885{2886printf("\nASTC RGB: ");2887im.print_hp();2888#if 02889// Validation2890im.calc_half2(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], 0, 3, true);2891printf("\nASTC RGB (Alt): ");2892im.print_hp();2893#endif2894}28952896im.calc_half(m_slice_images_hdr[slice_index], m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], 0, 3, true);2897s.m_basis_rgb_avg_bc6h_psnr = (float)im.m_psnr;28982899if (m_params.m_print_stats)2900{2901printf("BC6H RGB: ");2902im.print_hp();2903//printf("\n");2904}29052906im.calc(m_slice_images_hdr[slice_index], m_decoded_output_textures_astc_hdr_unpacked[slice_index], 0, 3, true, true);2907s.m_basis_rgb_avg_log2_psnr = (float)im.m_psnr;29082909if (m_params.m_print_stats)2910{2911printf("\nASTC Log2 RGB: ");2912im.print_hp();2913}29142915im.calc(m_slice_images_hdr[slice_index], m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], 0, 3, true, true);2916s.m_basis_rgb_avg_bc6h_log2_psnr = (float)im.m_psnr;29172918if (m_params.m_print_stats)2919{2920printf("BC6H Log2 RGB: ");2921im.print_hp();29222923printf("\n");2924}2925}29262927if (m_params.m_debug_images)2928{2929std::string out_basename;2930if (m_params.m_out_filename.size())2931string_get_filename(m_params.m_out_filename.c_str(), out_basename);2932else if (m_params.m_source_filenames.size())2933string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename);29342935string_remove_extension(out_basename);2936out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index);29372938// Write BC6H .DDS file.2939{2940gpu_image bc6h_tex(m_decoded_output_textures[slice_index]);2941bc6h_tex.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);29422943std::string filename(out_basename + "_bc6h.dds");2944write_compressed_texture_file(filename.c_str(), bc6h_tex, true);2945printf("Wrote .DDS file %s\n", filename.c_str());2946}29472948// Write ASTC .KTX/.astc files. ("astcenc -dh input.astc output.exr" to decode the astc file.)2949{2950gpu_image astc_tex(m_decoded_output_textures_astc_hdr[slice_index]);2951astc_tex.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);29522953std::string filename1(out_basename + "_astc.astc");29542955uint32_t block_width = 4, block_height = 4;2956if ((m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6) || (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE))2957{2958block_width = 6;2959block_height = 6;2960}29612962write_astc_file(filename1.c_str(), astc_tex.get_ptr(), block_width, block_height, slice_desc.m_orig_width, slice_desc.m_orig_height);2963printf("Wrote .ASTC file %s\n", filename1.c_str());29642965std::string filename2(out_basename + "_astc.ktx");2966write_compressed_texture_file(filename2.c_str(), astc_tex, true);2967printf("Wrote .KTX file %s\n", filename2.c_str());2968}29692970// Write unpacked ASTC image to .EXR2971{2972imagef astc_img(m_decoded_output_textures_astc_hdr_unpacked[slice_index]);2973astc_img.resize(slice_desc.m_orig_width, slice_desc.m_orig_height);29742975std::string filename(out_basename + "_unpacked_astc.exr");2976write_exr(filename.c_str(), astc_img, 3, 0);2977printf("Wrote .EXR file %s\n", filename.c_str());2978}29792980// Write unpacked BC6H image to .EXR2981{2982imagef bc6h_img(m_decoded_output_textures_bc6h_hdr_unpacked[slice_index]);2983bc6h_img.resize(slice_desc.m_orig_width, slice_desc.m_orig_height);29842985std::string filename(out_basename + "_unpacked_bc6h.exr");2986write_exr(filename.c_str(), bc6h_img, 3, 0);2987printf("Wrote .EXR file %s\n", filename.c_str());2988}29892990// Write tonemapped/srgb images2991write_hdr_debug_images((out_basename + "_source").c_str(), m_slice_images_hdr[slice_index], slice_desc.m_orig_width, slice_desc.m_orig_height);2992write_hdr_debug_images((out_basename + "_unpacked_astc").c_str(), m_decoded_output_textures_astc_hdr_unpacked[slice_index], slice_desc.m_orig_width, slice_desc.m_orig_height);2993write_hdr_debug_images((out_basename + "_unpacked_bc6h").c_str(), m_decoded_output_textures_bc6h_hdr_unpacked[slice_index], slice_desc.m_orig_width, slice_desc.m_orig_height);2994}2995}2996}2997else2998{2999for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)3000{3001const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];30023003if (m_params.m_compute_stats)3004{3005if (m_params.m_print_stats)3006printf("Slice: %u\n", slice_index);30073008image_stats& s = m_stats[slice_index];30093010image_metrics em;30113012// ---- .basis stats3013em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3);3014if (m_params.m_print_stats)3015em.print(".basis RGB Avg: ");3016s.m_basis_rgb_avg_psnr = (float)em.m_psnr;30173018em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 4);3019if (m_params.m_print_stats)3020em.print(".basis RGBA Avg: ");3021s.m_basis_rgba_avg_psnr = (float)em.m_psnr;30223023em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 1);3024if (m_params.m_print_stats)3025em.print(".basis R Avg: ");30263027em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 1, 1);3028if (m_params.m_print_stats)3029em.print(".basis G Avg: ");30303031em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 2, 1);3032if (m_params.m_print_stats)3033em.print(".basis B Avg: ");30343035if (m_params.m_uastc)3036{3037em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 3, 1);3038if (m_params.m_print_stats)3039em.print(".basis A Avg: ");30403041s.m_basis_a_avg_psnr = (float)em.m_psnr;3042}30433044em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0);3045if (m_params.m_print_stats)3046em.print(".basis 709 Luma: ");3047s.m_basis_luma_709_psnr = static_cast<float>(em.m_psnr);3048s.m_basis_luma_709_ssim = static_cast<float>(em.m_ssim);30493050em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0, true, true);3051if (m_params.m_print_stats)3052em.print(".basis 601 Luma: ");3053s.m_basis_luma_601_psnr = static_cast<float>(em.m_psnr);30543055if (m_slice_descs.size() == 1)3056{3057const uint32_t output_size = comp_size ? (uint32_t)comp_size : (uint32_t)comp_data.size();3058if (m_params.m_print_stats)3059{3060debug_printf(".basis RGB PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_rgb_avg_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height)));3061debug_printf(".basis Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_luma_709_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height)));3062}3063}30643065if (m_decoded_output_textures_unpacked_bc7[slice_index].get_width())3066{3067// ---- BC7 stats3068em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 3);3069//if (m_params.m_print_stats)3070// em.print("BC7 RGB Avg: ");3071s.m_bc7_rgb_avg_psnr = (float)em.m_psnr;30723073em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 4);3074//if (m_params.m_print_stats)3075// em.print("BC7 RGBA Avg: ");3076s.m_bc7_rgba_avg_psnr = (float)em.m_psnr;30773078em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 1);3079//if (m_params.m_print_stats)3080// em.print("BC7 R Avg: ");30813082em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 1, 1);3083//if (m_params.m_print_stats)3084// em.print("BC7 G Avg: ");30853086em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 2, 1);3087//if (m_params.m_print_stats)3088// em.print("BC7 B Avg: ");30893090if (m_params.m_uastc)3091{3092em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 3, 1);3093//if (m_params.m_print_stats)3094// em.print("BC7 A Avg: ");30953096s.m_bc7_a_avg_psnr = (float)em.m_psnr;3097}30983099em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0);3100//if (m_params.m_print_stats)3101// em.print("BC7 709 Luma: ");3102s.m_bc7_luma_709_psnr = static_cast<float>(em.m_psnr);3103s.m_bc7_luma_709_ssim = static_cast<float>(em.m_ssim);31043105em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0, true, true);3106//if (m_params.m_print_stats)3107// em.print("BC7 601 Luma: ");3108s.m_bc7_luma_601_psnr = static_cast<float>(em.m_psnr);3109}31103111if (!m_params.m_uastc)3112{3113// ---- Nearly best possible ETC1S stats3114em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 3);3115//if (m_params.m_print_stats)3116// em.print("Unquantized ETC1S RGB Avg: ");3117s.m_best_etc1s_rgb_avg_psnr = static_cast<float>(em.m_psnr);31183119em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0);3120//if (m_params.m_print_stats)3121// em.print("Unquantized ETC1S 709 Luma: ");3122s.m_best_etc1s_luma_709_psnr = static_cast<float>(em.m_psnr);3123s.m_best_etc1s_luma_709_ssim = static_cast<float>(em.m_ssim);31243125em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0, true, true);3126//if (m_params.m_print_stats)3127// em.print("Unquantized ETC1S 601 Luma: ");3128s.m_best_etc1s_luma_601_psnr = static_cast<float>(em.m_psnr);3129}3130}31313132std::string out_basename;3133if (m_params.m_out_filename.size())3134string_get_filename(m_params.m_out_filename.c_str(), out_basename);3135else if (m_params.m_source_filenames.size())3136string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename);31373138string_remove_extension(out_basename);3139out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index);31403141if ((!m_params.m_uastc) && (m_frontend.get_params().m_debug_images))3142{3143// Write "best" ETC1S debug images3144if (!m_params.m_uastc)3145{3146gpu_image best_etc1s_gpu_image(m_best_etc1s_images[slice_index]);3147best_etc1s_gpu_image.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);3148write_compressed_texture_file((out_basename + "_best_etc1s.ktx").c_str(), best_etc1s_gpu_image, true);31493150image best_etc1s_unpacked;3151best_etc1s_gpu_image.unpack(best_etc1s_unpacked);3152save_png(out_basename + "_best_etc1s.png", best_etc1s_unpacked);3153}3154}31553156if (m_params.m_debug_images)3157{3158// Write decoded ETC1S/ASTC debug images3159{3160gpu_image decoded_etc1s_or_astc(m_decoded_output_textures[slice_index]);3161decoded_etc1s_or_astc.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);3162write_compressed_texture_file((out_basename + "_transcoded_etc1s_or_astc.ktx").c_str(), decoded_etc1s_or_astc, true);31633164image temp(m_decoded_output_textures_unpacked[slice_index]);3165temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height);3166save_png(out_basename + "_transcoded_etc1s_or_astc.png", temp);3167}31683169// Write decoded BC7 debug images3170if (m_decoded_output_textures_bc7[slice_index].get_pixel_width())3171{3172gpu_image decoded_bc7(m_decoded_output_textures_bc7[slice_index]);3173decoded_bc7.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);3174write_compressed_texture_file((out_basename + "_transcoded_bc7.ktx").c_str(), decoded_bc7, true);31753176image temp(m_decoded_output_textures_unpacked_bc7[slice_index]);3177temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height);3178save_png(out_basename + "_transcoded_bc7.png", temp);3179}3180}3181}3182} // if (m_params.m_hdr)31833184} // if (m_params.m_validate_output_data)31853186return true;3187}31883189// Make sure all the mip 0's have the same dimensions and number of mipmap levels, or we can't encode the KTX2 file.3190bool basis_compressor::validate_ktx2_constraints()3191{3192uint32_t base_width = 0, base_height = 0;3193uint32_t total_layers = 0;3194for (uint32_t i = 0; i < m_slice_descs.size(); i++)3195{3196if (m_slice_descs[i].m_mip_index == 0)3197{3198if (!base_width)3199{3200base_width = m_slice_descs[i].m_orig_width;3201base_height = m_slice_descs[i].m_orig_height;3202}3203else3204{3205if ((m_slice_descs[i].m_orig_width != base_width) || (m_slice_descs[i].m_orig_height != base_height))3206{3207return false;3208}3209}32103211total_layers = maximum<uint32_t>(total_layers, m_slice_descs[i].m_source_file_index + 1);3212}3213}32143215basisu::vector<uint32_t> total_mips(total_layers);3216for (uint32_t i = 0; i < m_slice_descs.size(); i++)3217total_mips[m_slice_descs[i].m_source_file_index] = maximum<uint32_t>(total_mips[m_slice_descs[i].m_source_file_index], m_slice_descs[i].m_mip_index + 1);32183219for (uint32_t i = 1; i < total_layers; i++)3220{3221if (total_mips[0] != total_mips[i])3222{3223return false;3224}3225}32263227return true;3228}32293230// colorModel=KTX2_KDF_DF_MODEL_ETC1S (0xA3)3231// LDR ETC1S texture data in a custom format, with global codebooks3232static uint8_t g_ktx2_etc1s_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF };3233static uint8_t g_ktx2_etc1s_alpha_dfd[60] = { 0x3C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x38,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF,0x40,0x0,0x3F,0xF,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF };32343235// colorModel=KTX2_KDF_DF_MODEL_UASTC_LDR_4X4 (0xA6)3236// LDR UASTC 4x4 texture data in a custom block format3237static uint8_t g_ktx2_uastc_ldr_4x4_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x4,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF };3238static uint8_t g_ktx2_uastc_ldr_4x4_alpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF };32393240// colorModel=KTX2_KDF_DF_MODEL_UASTC_HDR_4X4 (0xA7)3241// Standard ASTC HDR 4x4 texture data but constrained for easy transcoding to BC6H, either highest quality or RDO optimized.3242static uint8_t g_ktx2_uastc_hdr_4x4_nonalpha_dfd[44] =3243{32440x2C,0x0,0x0,0x0, // 0 totalSize32450x0,0x0,0x0,0x0, // 1 descriptorType/vendorId32460x2,0x0,0x28,0x0, // 2 descriptorBlockSize/versionNumber32470xA7,0x1,0x1,0x0, // 3 flags, transferFunction, colorPrimaries, colorModel (KTX2_KDF_DF_MODEL_UASTC_HDR_4X4)32480x3,0x3,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension332490x10,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane332500x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane732510x0,0x0,0x7F,0x80, // 7 bitLength/bitOffset/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.)32520x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition332530x0,0x0,0x0,0x0, // 9 sampleLower (0.0)32540x00, 0x00, 0x80, 0x3F // 10 sampleHigher (1.0)3255};32563257// colorModel=KTX2_KDF_DF_MODEL_ASTC (0xA2)3258// Standard ASTC HDR 6x6 texture data, either highest quality or RDO optimized.3259static uint8_t g_ktx2_astc_hdr_6x6_nonalpha_dfd[44] =3260{32610x2C,0x0,0x0,0x0, // 0 totalSize32620x0,0x0,0x0,0x0, // 1 descriptorType/vendorId32630x2,0x0,0x28,0x0, // 2 descriptorBlockSize/versionNumber32640xA2,0x1,0x1,0x0, // 3 flags, transferFunction, colorPrimaries, colorModel (0xA2/162, standard ASTC, KTX2_KDF_DF_MODEL_ASTC)32650x5,0x5,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension332660x10,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane332670x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane732680x0,0x0,0x7F,0x80, // 7 bitLength/bitOffset/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.)32690x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition332700x0,0x0,0x0,0x0, // 9 sampleLower (0.0)32710x00, 0x00, 0x80, 0x3F // 10 sampleHigher (1.0)3272};32733274// colorModel=KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE (0xA8)3275// Our custom intermediate format that when decoded directly outputs ASTC HDR 6x63276static uint8_t g_ktx2_astc_hdr_6x6_intermediate_nonalpha_dfd[44] =3277{32780x2C,0x0,0x0,0x0, // 0 totalSize32790x0,0x0,0x0,0x0, // 1 descriptorType/vendorId32800x2,0x0,0x28,0x0, // 2 descriptorBlockSize/versionNumber32810xA8,0x1,0x1,0x0, // 3 flags, transferFunction, colorPrimaries, colorModel (KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE)32820x5,0x5,0x0,0x0, // 4 texelBlockDimension0-texelBlockDimension332830x10,0x0,0x0,0x0, // 5 bytesPlane0-bytesPlane332840x0,0x0,0x0,0x0, // 6 bytesPlane4-bytesPlane732850x0,0x0,0x7F,0x80, // 7 bitLength/bitOffset/channelType and Qualifer flags (KHR_DF_SAMPLE_DATATYPE_FLOAT etc.)32860x0,0x0,0x0,0x0, // 8 samplePosition0-samplePosition332870x0,0x0,0x0,0x0, // 9 sampleLower (0.0)32880x00, 0x00, 0x80, 0x3F // 10 sampleHigher (1.0)3289};32903291bool basis_compressor::get_dfd(uint8_vec &dfd, const basist::ktx2_header &header)3292{3293const uint8_t* pDFD;3294uint32_t dfd_len;32953296if (m_params.m_uastc)3297{3298if (m_params.m_hdr)3299{3300switch (m_params.m_hdr_mode)3301{3302case hdr_modes::cUASTC_HDR_4X4:3303{3304pDFD = g_ktx2_uastc_hdr_4x4_nonalpha_dfd;3305dfd_len = sizeof(g_ktx2_uastc_hdr_4x4_nonalpha_dfd);3306break;3307}3308case hdr_modes::cASTC_HDR_6X6:3309{3310pDFD = g_ktx2_astc_hdr_6x6_nonalpha_dfd;3311dfd_len = sizeof(g_ktx2_astc_hdr_6x6_nonalpha_dfd);3312break;3313}3314case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE:3315{3316pDFD = g_ktx2_astc_hdr_6x6_intermediate_nonalpha_dfd;3317dfd_len = sizeof(g_ktx2_astc_hdr_6x6_intermediate_nonalpha_dfd);3318break;3319}3320default:3321{3322assert(0);3323return false;3324}3325}3326}3327// Must be LDR UASTC 4x43328else if (m_any_source_image_has_alpha)3329{3330pDFD = g_ktx2_uastc_ldr_4x4_alpha_dfd;3331dfd_len = sizeof(g_ktx2_uastc_ldr_4x4_alpha_dfd);3332}3333else3334{3335pDFD = g_ktx2_uastc_ldr_4x4_nonalpha_dfd;3336dfd_len = sizeof(g_ktx2_uastc_ldr_4x4_nonalpha_dfd);3337}3338}3339else3340{3341// Must be ETC1S.3342assert(!m_params.m_hdr);33433344if (m_any_source_image_has_alpha)3345{3346pDFD = g_ktx2_etc1s_alpha_dfd;3347dfd_len = sizeof(g_ktx2_etc1s_alpha_dfd);3348}3349else3350{3351pDFD = g_ktx2_etc1s_nonalpha_dfd;3352dfd_len = sizeof(g_ktx2_etc1s_nonalpha_dfd);3353}3354}33553356assert(dfd_len >= 44);33573358dfd.resize(dfd_len);3359memcpy(dfd.data(), pDFD, dfd_len);33603361uint32_t dfd_bits = basisu::read_le_dword(dfd.data() + 3 * sizeof(uint32_t));33623363// Color primaries3364if ((m_params.m_hdr) && (m_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut))3365{3366dfd_bits &= ~(0xFF << 8);3367dfd_bits |= (basist::KTX2_DF_PRIMARIES_BT2020 << 8);3368}33693370// Transfer function3371dfd_bits &= ~(0xFF << 16);33723373if (m_params.m_hdr)3374{3375// TODO: In HDR mode, always write linear for now.3376dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_LINEAR << 16);3377}3378else3379{3380if (m_params.m_ktx2_srgb_transfer_func)3381dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_SRGB << 16);3382else3383dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_LINEAR << 16);3384}33853386basisu::write_le_dword(dfd.data() + 3 * sizeof(uint32_t), dfd_bits);33873388if (header.m_supercompression_scheme != basist::KTX2_SS_NONE)3389{3390uint32_t plane_bits = basisu::read_le_dword(dfd.data() + 5 * sizeof(uint32_t));33913392plane_bits &= ~0xFF;33933394basisu::write_le_dword(dfd.data() + 5 * sizeof(uint32_t), plane_bits);3395}33963397// Fix up the DFD channel(s)3398uint32_t dfd_chan0 = basisu::read_le_dword(dfd.data() + 7 * sizeof(uint32_t));33993400if (m_params.m_uastc)3401{3402dfd_chan0 &= ~(0xF << 24);34033404// TODO: Allow the caller to override this3405if (m_any_source_image_has_alpha)3406dfd_chan0 |= (basist::KTX2_DF_CHANNEL_UASTC_RGBA << 24);3407else3408dfd_chan0 |= (basist::KTX2_DF_CHANNEL_UASTC_RGB << 24);3409}34103411basisu::write_le_dword(dfd.data() + 7 * sizeof(uint32_t), dfd_chan0);34123413return true;3414}34153416bool basis_compressor::create_ktx2_file()3417{3418//bool needs_global_data = false;3419bool can_use_zstd = false;34203421switch (m_fmt_mode)3422{3423case basist::basis_tex_format::cETC1S:3424{3425//needs_global_data = true;3426break;3427}3428case basist::basis_tex_format::cUASTC4x4:3429{3430can_use_zstd = true;3431break;3432}3433case basist::basis_tex_format::cUASTC_HDR_4x4:3434{3435can_use_zstd = true;3436break;3437}3438case basist::basis_tex_format::cASTC_HDR_6x6:3439{3440can_use_zstd = true;3441break;3442}3443case basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE:3444{3445//needs_global_data = true;3446break;3447}3448default:3449assert(0);3450fmt_debug_printf("HERE 1\n");3451return false;3452}34533454if (can_use_zstd)3455{3456if ((m_params.m_ktx2_uastc_supercompression != basist::KTX2_SS_NONE) && (m_params.m_ktx2_uastc_supercompression != basist::KTX2_SS_ZSTANDARD))3457{3458fmt_debug_printf("HERE 2\n");3459return false;3460}3461}34623463const basisu_backend_output& backend_output = m_backend.get_output();34643465// Determine the width/height, number of array layers, mipmap levels, and the number of faces (1 for 2D, 6 for cubemap).3466// This does not support 1D or 3D.3467uint32_t base_width = 0, base_height = 0, total_layers = 0, total_levels = 0, total_faces = 1;34683469for (uint32_t i = 0; i < m_slice_descs.size(); i++)3470{3471if ((m_slice_descs[i].m_mip_index == 0) && (!base_width))3472{3473base_width = m_slice_descs[i].m_orig_width;3474base_height = m_slice_descs[i].m_orig_height;3475}34763477total_layers = maximum<uint32_t>(total_layers, m_slice_descs[i].m_source_file_index + 1);34783479if (!m_slice_descs[i].m_source_file_index)3480total_levels = maximum<uint32_t>(total_levels, m_slice_descs[i].m_mip_index + 1);3481}34823483if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray)3484{3485assert((total_layers % 6) == 0);34863487total_layers /= 6;3488assert(total_layers >= 1);34893490total_faces = 6;3491}34923493basist::ktx2_header header;3494memset(&header, 0, sizeof(header));34953496memcpy(header.m_identifier, basist::g_ktx2_file_identifier, sizeof(basist::g_ktx2_file_identifier));3497header.m_pixel_width = base_width;3498header.m_pixel_height = base_height;3499header.m_face_count = total_faces;35003501if (m_params.m_hdr)3502{3503if (m_params.m_hdr_mode == hdr_modes::cUASTC_HDR_4X4)3504header.m_vk_format = basist::KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK;3505else if (m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6)3506header.m_vk_format = basist::KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK;3507else3508{3509assert(m_params.m_hdr_mode == hdr_modes::cASTC_HDR_6X6_INTERMEDIATE);35103511header.m_vk_format = basist::KTX2_VK_FORMAT_UNDEFINED;3512}3513}3514else3515{3516// Either ETC1S or UASTC LDR 4x4.3517assert((m_fmt_mode == basist::basis_tex_format::cETC1S) || (m_fmt_mode == basist::basis_tex_format::cUASTC4x4));35183519header.m_vk_format = basist::KTX2_VK_FORMAT_UNDEFINED;3520}35213522header.m_type_size = 1;3523header.m_level_count = total_levels;3524header.m_layer_count = (total_layers > 1) ? total_layers : 0;35253526if (can_use_zstd)3527{3528switch (m_params.m_ktx2_uastc_supercompression)3529{3530case basist::KTX2_SS_NONE:3531{3532header.m_supercompression_scheme = basist::KTX2_SS_NONE;3533break;3534}3535case basist::KTX2_SS_ZSTANDARD:3536{3537#if BASISD_SUPPORT_KTX2_ZSTD3538header.m_supercompression_scheme = basist::KTX2_SS_ZSTANDARD;3539#else3540header.m_supercompression_scheme = basist::KTX2_SS_NONE;3541#endif3542break;3543}3544default:3545assert(0);3546fmt_debug_printf("HERE 3\n");3547return false;3548}3549}35503551basisu::vector<uint8_vec> level_data_bytes(total_levels);3552basisu::vector<uint8_vec> compressed_level_data_bytes(total_levels);3553size_t_vec slice_level_offsets(m_slice_descs.size());35543555// This will append the texture data in the correct order (for each level: layer, then face).3556for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)3557{3558const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];35593560slice_level_offsets[slice_index] = level_data_bytes[slice_desc.m_mip_index].size();35613562if (m_fmt_mode == basist::basis_tex_format::cETC1S)3563{3564append_vector(level_data_bytes[slice_desc.m_mip_index], backend_output.m_slice_image_data[slice_index]);3565}3566else3567{3568append_vector(level_data_bytes[slice_desc.m_mip_index], m_uastc_backend_output.m_slice_image_data[slice_index]);3569}3570}35713572// Zstd Supercompression3573if ((can_use_zstd) && (header.m_supercompression_scheme == basist::KTX2_SS_ZSTANDARD))3574{3575#if BASISD_SUPPORT_KTX2_ZSTD3576for (uint32_t level_index = 0; level_index < total_levels; level_index++)3577{3578compressed_level_data_bytes[level_index].resize(ZSTD_compressBound(level_data_bytes[level_index].size()));35793580size_t result = ZSTD_compress(compressed_level_data_bytes[level_index].data(), compressed_level_data_bytes[level_index].size(),3581level_data_bytes[level_index].data(), level_data_bytes[level_index].size(),3582m_params.m_ktx2_zstd_supercompression_level);35833584if (ZSTD_isError(result))3585{3586fmt_debug_printf("HERE 5\n");3587return false;3588}35893590compressed_level_data_bytes[level_index].resize(result);3591}3592#else3593// Can't get here3594assert(0);3595fmt_debug_printf("HERE 6\n");3596return false;3597#endif3598}3599else3600{3601// No supercompression3602compressed_level_data_bytes = level_data_bytes;3603}36043605uint8_vec ktx2_global_data;36063607// Create ETC1S global supercompressed data3608if (m_fmt_mode == basist::basis_tex_format::cETC1S)3609{3610basist::ktx2_etc1s_global_data_header etc1s_global_data_header;3611clear_obj(etc1s_global_data_header);36123613etc1s_global_data_header.m_endpoint_count = backend_output.m_num_endpoints;3614etc1s_global_data_header.m_selector_count = backend_output.m_num_selectors;3615etc1s_global_data_header.m_endpoints_byte_length = backend_output.m_endpoint_palette.size();3616etc1s_global_data_header.m_selectors_byte_length = backend_output.m_selector_palette.size();3617etc1s_global_data_header.m_tables_byte_length = backend_output.m_slice_image_tables.size();36183619basisu::vector<basist::ktx2_etc1s_image_desc> etc1s_image_descs(total_levels * total_layers * total_faces);3620memset(etc1s_image_descs.data(), 0, etc1s_image_descs.size_in_bytes());36213622for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)3623{3624const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];36253626const uint32_t level_index = slice_desc.m_mip_index;3627uint32_t layer_index = slice_desc.m_source_file_index;3628uint32_t face_index = 0;36293630if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray)3631{3632face_index = layer_index % 6;3633layer_index /= 6;3634}36353636const uint32_t etc1s_image_index = level_index * (total_layers * total_faces) + layer_index * total_faces + face_index;36373638if (slice_desc.m_alpha)3639{3640etc1s_image_descs[etc1s_image_index].m_alpha_slice_byte_length = backend_output.m_slice_image_data[slice_index].size();3641etc1s_image_descs[etc1s_image_index].m_alpha_slice_byte_offset = slice_level_offsets[slice_index];3642}3643else3644{3645if (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)3646etc1s_image_descs[etc1s_image_index].m_image_flags = !slice_desc.m_iframe ? basist::KTX2_IMAGE_IS_P_FRAME : 0;36473648etc1s_image_descs[etc1s_image_index].m_rgb_slice_byte_length = backend_output.m_slice_image_data[slice_index].size();3649etc1s_image_descs[etc1s_image_index].m_rgb_slice_byte_offset = slice_level_offsets[slice_index];3650}3651} // slice_index36523653append_vector(ktx2_global_data, (const uint8_t*)&etc1s_global_data_header, sizeof(etc1s_global_data_header));3654append_vector(ktx2_global_data, (const uint8_t*)etc1s_image_descs.data(), etc1s_image_descs.size_in_bytes());3655append_vector(ktx2_global_data, backend_output.m_endpoint_palette);3656append_vector(ktx2_global_data, backend_output.m_selector_palette);3657append_vector(ktx2_global_data, backend_output.m_slice_image_tables);36583659header.m_supercompression_scheme = basist::KTX2_SS_BASISLZ;3660}3661else if (m_fmt_mode == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE)3662{3663basisu::vector<basist::ktx2_astc_hdr_6x6_intermediate_image_desc> image_descs(total_levels * total_layers * total_faces);3664memset(image_descs.data(), 0, image_descs.size_in_bytes());36653666for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)3667{3668const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index];36693670const uint32_t level_index = slice_desc.m_mip_index;3671uint32_t layer_index = slice_desc.m_source_file_index;3672uint32_t face_index = 0;36733674if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray)3675{3676face_index = layer_index % 6;3677layer_index /= 6;3678}36793680const uint32_t output_image_index = level_index * (total_layers * total_faces) + layer_index * total_faces + face_index;36813682image_descs[output_image_index].m_rgb_slice_byte_length = m_uastc_backend_output.m_slice_image_data[slice_index].size();3683image_descs[output_image_index].m_rgb_slice_byte_offset = slice_level_offsets[slice_index];36843685} // slice_index36863687append_vector(ktx2_global_data, (const uint8_t*)image_descs.data(), image_descs.size_in_bytes());36883689header.m_supercompression_scheme = basist::KTX2_SS_BASISLZ;3690}36913692// Key values3693basist::ktx2_transcoder::key_value_vec key_values(m_params.m_ktx2_key_values);36943695basist::ktx2_add_key_value(key_values, "KTXwriter", fmt_string("Basis Universal {}", BASISU_LIB_VERSION_STRING));36963697if (m_params.m_hdr)3698{3699if (m_upconverted_any_ldr_images)3700basist::ktx2_add_key_value(key_values, "LDRUpconversionMultiplier", fmt_string("{}", m_ldr_to_hdr_upconversion_nit_multiplier));37013702if (m_params.m_ldr_hdr_upconversion_srgb_to_linear)3703basist::ktx2_add_key_value(key_values, "LDRUpconversionSRGBToLinear", "1");3704}37053706key_values.sort();37073708#if BASISU_DISABLE_KTX2_KEY_VALUES3709// HACK HACK - Clear the key values array, which causes no key values to be written (triggering the ktx2check validator bug).3710key_values.clear();3711#endif37123713uint8_vec key_value_data;37143715// DFD3716uint8_vec dfd;3717if (!get_dfd(dfd, header))3718{3719fmt_debug_printf("HERE 7\n");3720return false;3721}37223723const uint32_t kvd_file_offset = sizeof(header) + sizeof(basist::ktx2_level_index) * total_levels + (uint32_t)dfd.size();37243725for (uint32_t pass = 0; pass < 2; pass++)3726{3727for (uint32_t i = 0; i < key_values.size(); i++)3728{3729if (key_values[i].m_key.size() < 2)3730{3731fmt_debug_printf("HERE 8\n");3732return false;3733}37343735if (key_values[i].m_key.back() != 0)3736{3737fmt_debug_printf("HERE 9\n");3738return false;3739}37403741const uint64_t total_len = (uint64_t)key_values[i].m_key.size() + (uint64_t)key_values[i].m_value.size();3742if (total_len >= UINT32_MAX)3743{3744fmt_debug_printf("HERE 10\n");3745return false;3746}37473748packed_uint<4> le_len((uint32_t)total_len);3749append_vector(key_value_data, (const uint8_t*)&le_len, sizeof(le_len));37503751append_vector(key_value_data, key_values[i].m_key);3752append_vector(key_value_data, key_values[i].m_value);37533754const uint32_t ofs = key_value_data.size() & 3;3755const uint32_t padding = (4 - ofs) & 3;3756for (uint32_t p = 0; p < padding; p++)3757key_value_data.push_back(0);3758}37593760if (header.m_supercompression_scheme != basist::KTX2_SS_NONE)3761break;37623763#if BASISU_DISABLE_KTX2_ALIGNMENT_WORKAROUND3764break;3765#endif37663767// Hack to ensure the KVD block ends on a 16 byte boundary, because we have no other official way of aligning the data.3768uint32_t kvd_end_file_offset = kvd_file_offset + (uint32_t)key_value_data.size();3769uint32_t bytes_needed_to_pad = (16 - (kvd_end_file_offset & 15)) & 15;3770if (!bytes_needed_to_pad)3771{3772// We're good. No need to add a dummy key.3773break;3774}37753776assert(!pass);3777if (pass)3778{3779fmt_debug_printf("HERE 11\n");3780return false;3781}37823783if (bytes_needed_to_pad < 6)3784bytes_needed_to_pad += 16;37853786// Just add the padding. It's likely not necessary anymore, but can't really hurt.3787//printf("WARNING: Due to a KTX2 validator bug related to mipPadding, we must insert a dummy key into the KTX2 file of %u bytes\n", bytes_needed_to_pad);37883789// We're not good - need to add a dummy key large enough to force file alignment so the mip level array gets aligned.3790// We can't just add some bytes before the mip level array because ktx2check will see that as extra data in the file that shouldn't be there in ktxValidator::validateDataSize().3791key_values.enlarge(1);3792for (uint32_t i = 0; i < (bytes_needed_to_pad - 4 - 1 - 1); i++)3793key_values.back().m_key.push_back(127);37943795key_values.back().m_key.push_back(0);37963797key_values.back().m_value.push_back(0);37983799key_values.sort();38003801key_value_data.resize(0);38023803// Try again3804}38053806basisu::vector<basist::ktx2_level_index> level_index_array(total_levels);3807memset(level_index_array.data(), 0, level_index_array.size_in_bytes());38083809m_output_ktx2_file.clear();3810m_output_ktx2_file.reserve(m_output_basis_file.size());38113812// Dummy header3813m_output_ktx2_file.resize(sizeof(header));38143815// Level index array3816append_vector(m_output_ktx2_file, (const uint8_t*)level_index_array.data(), level_index_array.size_in_bytes());38173818// DFD3819const uint8_t* pDFD = dfd.data();3820uint32_t dfd_len = (uint32_t)dfd.size();38213822header.m_dfd_byte_offset = m_output_ktx2_file.size();3823header.m_dfd_byte_length = dfd_len;3824append_vector(m_output_ktx2_file, pDFD, dfd_len);38253826// Key value data3827if (key_value_data.size())3828{3829assert(kvd_file_offset == m_output_ktx2_file.size());38303831header.m_kvd_byte_offset = m_output_ktx2_file.size();3832header.m_kvd_byte_length = key_value_data.size();3833append_vector(m_output_ktx2_file, key_value_data);3834}38353836// Global Supercompressed Data3837if (ktx2_global_data.size())3838{3839uint32_t ofs = m_output_ktx2_file.size() & 7;3840uint32_t padding = (8 - ofs) & 7;3841for (uint32_t i = 0; i < padding; i++)3842m_output_ktx2_file.push_back(0);38433844header.m_sgd_byte_length = ktx2_global_data.size();3845header.m_sgd_byte_offset = m_output_ktx2_file.size();38463847append_vector(m_output_ktx2_file, ktx2_global_data);3848}38493850// mipPadding3851if (header.m_supercompression_scheme == basist::KTX2_SS_NONE)3852{3853// We currently can't do this or the validator will incorrectly give an error.3854uint32_t ofs = m_output_ktx2_file.size() & 15;3855uint32_t padding = (16 - ofs) & 15;38563857// Make sure we're always aligned here (due to a validator bug).3858if (padding)3859{3860printf("Warning: KTX2 mip level data is not 16-byte aligned. This may trigger a ktx2check validation bug. Writing %u bytes of mipPadding.\n", padding);3861}38623863for (uint32_t i = 0; i < padding; i++)3864m_output_ktx2_file.push_back(0);3865}38663867// Level data - write the smallest mipmap first.3868for (int level = total_levels - 1; level >= 0; level--)3869{3870level_index_array[level].m_byte_length = compressed_level_data_bytes[level].size();38713872//if (m_params.m_uastc)3873if (can_use_zstd)3874{3875level_index_array[level].m_uncompressed_byte_length = level_data_bytes[level].size();3876}38773878level_index_array[level].m_byte_offset = m_output_ktx2_file.size();3879append_vector(m_output_ktx2_file, compressed_level_data_bytes[level]);3880}38813882// Write final header3883memcpy(m_output_ktx2_file.data(), &header, sizeof(header));38843885// Write final level index array3886memcpy(m_output_ktx2_file.data() + sizeof(header), level_index_array.data(), level_index_array.size_in_bytes());38873888uint32_t total_orig_pixels = 0;38893890for (uint32_t i = 0; i < m_slice_descs.size(); i++)3891{3892const basisu_backend_slice_desc& slice_desc = m_slice_descs[i];3893total_orig_pixels += slice_desc.m_orig_width * slice_desc.m_orig_height;3894}38953896debug_printf("Total .ktx2 output file size: %u, %3.3f bits/texel\n", m_output_ktx2_file.size(), ((float)m_output_ktx2_file.size() * 8.0f) / total_orig_pixels);38973898return true;3899}39003901bool basis_parallel_compress(3902uint32_t total_threads,3903const basisu::vector<basis_compressor_params>& params_vec,3904basisu::vector< parallel_results >& results_vec)3905{3906assert(g_library_initialized);3907if (!g_library_initialized)3908{3909error_printf("basis_parallel_compress: basisu_encoder_init() MUST be called before using any encoder functionality!\n");3910return false;3911}39123913assert(total_threads >= 1);3914total_threads = basisu::maximum<uint32_t>(total_threads, 1);39153916job_pool jpool(total_threads);39173918results_vec.resize(0);3919results_vec.resize(params_vec.size());39203921std::atomic<bool> result;3922result.store(true);39233924std::atomic<bool> opencl_failed;3925opencl_failed.store(false);39263927for (uint32_t pindex = 0; pindex < params_vec.size(); pindex++)3928{3929jpool.add_job([pindex, ¶ms_vec, &results_vec, &result, &opencl_failed] {39303931basis_compressor_params params = params_vec[pindex];3932parallel_results& results = results_vec[pindex];39333934interval_timer tm;3935tm.start();39363937basis_compressor c;39383939// Dummy job pool3940job_pool task_jpool(1);3941params.m_pJob_pool = &task_jpool;3942// TODO: Remove this flag entirely3943params.m_multithreading = true;39443945// Stop using OpenCL if a failure ever occurs.3946if (opencl_failed)3947params.m_use_opencl = false;39483949bool status = c.init(params);39503951if (c.get_opencl_failed())3952opencl_failed.store(true);39533954if (status)3955{3956basis_compressor::error_code ec = c.process();39573958if (c.get_opencl_failed())3959opencl_failed.store(true);39603961results.m_error_code = ec;39623963if (ec == basis_compressor::cECSuccess)3964{3965results.m_basis_file = c.get_output_basis_file();3966results.m_ktx2_file = c.get_output_ktx2_file();3967results.m_stats = c.get_stats();3968results.m_basis_bits_per_texel = c.get_basis_bits_per_texel();3969results.m_any_source_image_has_alpha = c.get_any_source_image_has_alpha();3970}3971else3972{3973result = false;3974}3975}3976else3977{3978results.m_error_code = basis_compressor::cECFailedInitializing;39793980result = false;3981}39823983results.m_total_time = tm.get_elapsed_secs();3984} );39853986} // pindex39873988jpool.wait_for_all();39893990if (opencl_failed)3991error_printf("An OpenCL error occured sometime during compression. The compressor fell back to CPU processing after the failure.\n");39923993return result;3994}39953996static void* basis_compress(3997basist::basis_tex_format mode,3998const basisu::vector<image> *pSource_images,3999const basisu::vector<imagef> *pSource_images_hdr,4000uint32_t flags_and_quality, float uastc_rdo_quality,4001size_t* pSize,4002image_stats* pStats)4003{4004assert((pSource_images != nullptr) || (pSource_images_hdr != nullptr));4005assert(!((pSource_images != nullptr) && (pSource_images_hdr != nullptr)));40064007// Check input parameters4008if (pSource_images)4009{4010if ((!pSource_images->size()) || (!pSize))4011{4012error_printf("basis_compress: Invalid parameter\n");4013assert(0);4014return nullptr;4015}4016}4017else4018{4019if ((!pSource_images_hdr->size()) || (!pSize))4020{4021error_printf("basis_compress: Invalid parameter\n");4022assert(0);4023return nullptr;4024}4025}40264027*pSize = 0;40284029// Initialize a job pool4030uint32_t num_threads = 1;4031if (flags_and_quality & cFlagThreaded)4032num_threads = basisu::maximum<uint32_t>(1, std::thread::hardware_concurrency());40334034job_pool jp(num_threads);40354036// Initialize the compressor parameter struct4037basis_compressor_params comp_params;4038comp_params.set_format_mode(mode);40394040comp_params.m_pJob_pool = &jp;40414042comp_params.m_y_flip = (flags_and_quality & cFlagYFlip) != 0;4043comp_params.m_debug = (flags_and_quality & cFlagDebug) != 0;4044comp_params.m_debug_images = (flags_and_quality & cFlagDebugImages) != 0;40454046// Copy the largest mipmap level4047if (pSource_images)4048{4049comp_params.m_source_images.resize(1);4050comp_params.m_source_images[0] = (*pSource_images)[0];40514052// Copy the smaller mipmap levels, if any4053if (pSource_images->size() > 1)4054{4055comp_params.m_source_mipmap_images.resize(1);4056comp_params.m_source_mipmap_images[0].resize(pSource_images->size() - 1);40574058for (uint32_t i = 1; i < pSource_images->size(); i++)4059comp_params.m_source_mipmap_images[0][i - 1] = (*pSource_images)[i];4060}4061}4062else4063{4064comp_params.m_source_images_hdr.resize(1);4065comp_params.m_source_images_hdr[0] = (*pSource_images_hdr)[0];40664067// Copy the smaller mipmap levels, if any4068if (pSource_images_hdr->size() > 1)4069{4070comp_params.m_source_mipmap_images_hdr.resize(1);4071comp_params.m_source_mipmap_images_hdr[0].resize(pSource_images_hdr->size() - 1);40724073for (uint32_t i = 1; i < pSource_images->size(); i++)4074comp_params.m_source_mipmap_images_hdr[0][i - 1] = (*pSource_images_hdr)[i];4075}4076}40774078comp_params.m_multithreading = (flags_and_quality & cFlagThreaded) != 0;4079comp_params.m_use_opencl = (flags_and_quality & cFlagUseOpenCL) != 0;40804081comp_params.m_write_output_basis_or_ktx2_files = false;40824083comp_params.m_perceptual = (flags_and_quality & cFlagSRGB) != 0;4084comp_params.m_mip_srgb = comp_params.m_perceptual;4085comp_params.m_mip_gen = (flags_and_quality & (cFlagGenMipsWrap | cFlagGenMipsClamp)) != 0;4086comp_params.m_mip_wrapping = (flags_and_quality & cFlagGenMipsWrap) != 0;40874088if (mode == basist::basis_tex_format::cUASTC4x4)4089{4090comp_params.m_pack_uastc_ldr_4x4_flags = flags_and_quality & cPackUASTCLevelMask;4091comp_params.m_rdo_uastc_ldr_4x4 = (flags_and_quality & cFlagUASTCRDO) != 0;4092comp_params.m_rdo_uastc_ldr_4x4_quality_scalar = uastc_rdo_quality;4093}4094else if (mode == basist::basis_tex_format::cETC1S)4095{4096comp_params.m_etc1s_quality_level = basisu::maximum<uint32_t>(1, flags_and_quality & 255);4097}40984099comp_params.m_create_ktx2_file = (flags_and_quality & cFlagKTX2) != 0;41004101if (comp_params.m_create_ktx2_file)4102{4103// Set KTX2 specific parameters.4104if ((flags_and_quality & cFlagKTX2UASTCSuperCompression) && (comp_params.m_uastc))4105comp_params.m_ktx2_uastc_supercompression = basist::KTX2_SS_ZSTANDARD;41064107comp_params.m_ktx2_srgb_transfer_func = comp_params.m_perceptual;4108}41094110comp_params.m_compute_stats = (pStats != nullptr);4111comp_params.m_print_stats = (flags_and_quality & cFlagPrintStats) != 0;4112comp_params.m_status_output = (flags_and_quality & cFlagPrintStatus) != 0;41134114if (mode == basist::basis_tex_format::cUASTC_HDR_4x4)4115{4116comp_params.m_uastc_hdr_4x4_options.set_quality_level(flags_and_quality & cPackUASTCLevelMask);4117}4118else if ((mode == basist::basis_tex_format::cASTC_HDR_6x6) || (mode == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE))4119{4120comp_params.m_astc_hdr_6x6_options.set_user_level(flags_and_quality & cPackUASTCLevelMask);4121comp_params.m_astc_hdr_6x6_options.m_lambda = uastc_rdo_quality;4122comp_params.m_astc_hdr_6x6_options.m_rec2020_bt2100_color_gamut = (flags_and_quality & cFlagREC2020) != 0;4123}41244125// Create the compressor, initialize it, and process the input4126basis_compressor comp;4127if (!comp.init(comp_params))4128{4129error_printf("basis_compress: basis_compressor::init() failed!\n");4130return nullptr;4131}41324133basis_compressor::error_code ec = comp.process();41344135if (ec != basis_compressor::cECSuccess)4136{4137error_printf("basis_compress: basis_compressor::process() failed with error code %u\n", (uint32_t)ec);4138return nullptr;4139}41404141if ((pStats) && (comp.get_opencl_failed()))4142{4143pStats->m_opencl_failed = true;4144}41454146// Get the output file data and return it to the caller4147void* pFile_data = nullptr;4148const uint8_vec* pFile_data_vec = comp_params.m_create_ktx2_file ? &comp.get_output_ktx2_file() : &comp.get_output_basis_file();41494150pFile_data = malloc(pFile_data_vec->size());4151if (!pFile_data)4152{4153error_printf("basis_compress: Out of memory\n");4154return nullptr;4155}4156memcpy(pFile_data, pFile_data_vec->get_ptr(), pFile_data_vec->size());41574158*pSize = pFile_data_vec->size();41594160if ((pStats) && (comp.get_stats().size()))4161{4162*pStats = comp.get_stats()[0];4163}41644165return pFile_data;4166}41674168void* basis_compress(4169basist::basis_tex_format mode,4170const basisu::vector<image>& source_images,4171uint32_t flags_and_quality, float uastc_rdo_quality,4172size_t* pSize,4173image_stats* pStats)4174{4175return basis_compress(mode, &source_images, nullptr, flags_and_quality, uastc_rdo_quality, pSize, pStats);4176}41774178void* basis_compress(4179basist::basis_tex_format mode,4180const basisu::vector<imagef>& source_images_hdr,4181uint32_t flags_and_quality, float lambda,4182size_t* pSize,4183image_stats* pStats)4184{4185return basis_compress(mode, nullptr, &source_images_hdr, flags_and_quality, lambda, pSize, pStats);4186}41874188void* basis_compress(4189basist::basis_tex_format mode,4190const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels,4191uint32_t flags_and_quality, float uastc_rdo_quality,4192size_t* pSize,4193image_stats* pStats)4194{4195if (!pitch_in_pixels)4196pitch_in_pixels = width;41974198if ((!pImageRGBA) || (!width) || (!height) || (pitch_in_pixels < width) || (!pSize))4199{4200error_printf("basis_compress: Invalid parameter\n");4201assert(0);4202return nullptr;4203}42044205*pSize = 0;42064207if ((width > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (height > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION))4208{4209error_printf("basis_compress: Image too large\n");4210return nullptr;4211}42124213// Copy the source image4214basisu::vector<image> source_image(1);4215source_image[0].crop(width, height, width, g_black_color, false);4216for (uint32_t y = 0; y < height; y++)4217memcpy(source_image[0].get_ptr() + y * width, (const color_rgba*)pImageRGBA + y * pitch_in_pixels, width * sizeof(color_rgba));42184219return basis_compress(mode, source_image, flags_and_quality, uastc_rdo_quality, pSize, pStats);4220}42214222void basis_free_data(void* p)4223{4224free(p);4225}42264227bool basis_benchmark_etc1s_opencl(bool* pOpenCL_failed)4228{4229if (pOpenCL_failed)4230*pOpenCL_failed = false;42314232if (!opencl_is_available())4233{4234error_printf("basis_benchmark_etc1s_opencl: OpenCL support must be enabled first!\n");4235return false;4236}42374238const uint32_t W = 1024, H = 1024;4239basisu::vector<image> images;4240image& img = images.enlarge(1)->resize(W, H);42414242const uint32_t NUM_RAND_LETTERS = 6000;// 40000;42434244rand r;4245r.seed(200);42464247for (uint32_t i = 0; i < NUM_RAND_LETTERS; i++)4248{4249uint32_t x = r.irand(0, W - 1), y = r.irand(0, H - 1);4250uint32_t sx = r.irand(1, 4), sy = r.irand(1, 4);4251color_rgba c(r.byte(), r.byte(), r.byte(), 255);42524253img.debug_text(x, y, sx, sy, c, nullptr, false, "%c", static_cast<char>(r.irand(32, 127)));4254}42554256//save_png("test.png", img);42574258image_stats stats;42594260uint32_t flags_and_quality = cFlagSRGB | cFlagThreaded | 255;4261size_t comp_size = 0;42624263double best_cpu_time = 1e+9f, best_gpu_time = 1e+9f;42644265const uint32_t TIMES_TO_ENCODE = 2;4266interval_timer tm;42674268for (uint32_t i = 0; i < TIMES_TO_ENCODE; i++)4269{4270tm.start();4271void* pComp_data = basis_compress(4272basist::basis_tex_format::cETC1S,4273images,4274flags_and_quality, 1.0f,4275&comp_size,4276&stats);4277double cpu_time = tm.get_elapsed_secs();4278if (!pComp_data)4279{4280error_printf("basis_benchmark_etc1s_opencl: basis_compress() failed (CPU)!\n");4281return false;4282}42834284best_cpu_time = minimum(best_cpu_time, cpu_time);42854286basis_free_data(pComp_data);4287}42884289printf("Best CPU time: %3.3f\n", best_cpu_time);42904291for (uint32_t i = 0; i < TIMES_TO_ENCODE; i++)4292{4293tm.start();4294void* pComp_data = basis_compress(4295basist::basis_tex_format::cETC1S,4296images,4297flags_and_quality | cFlagUseOpenCL, 1.0f,4298&comp_size,4299&stats);43004301if (stats.m_opencl_failed)4302{4303error_printf("basis_benchmark_etc1s_opencl: OpenCL failed!\n");43044305basis_free_data(pComp_data);43064307if (pOpenCL_failed)4308*pOpenCL_failed = true;43094310return false;4311}43124313double gpu_time = tm.get_elapsed_secs();4314if (!pComp_data)4315{4316error_printf("basis_benchmark_etc1s_opencl: basis_compress() failed (GPU)!\n");4317return false;4318}43194320best_gpu_time = minimum(best_gpu_time, gpu_time);43214322basis_free_data(pComp_data);4323}43244325printf("Best GPU time: %3.3f\n", best_gpu_time);43264327return best_gpu_time < best_cpu_time;4328}43294330} // namespace basisu433143324333433443354336