Path: blob/master/thirdparty/basis_universal/encoder/basisu_backend.cpp
9903 views
// basisu_backend.cpp1// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.2//3// Licensed under the Apache License, Version 2.0 (the "License");4// you may not use this file except in compliance with the License.5// You may obtain a copy of the License at6//7// http://www.apache.org/licenses/LICENSE-2.08//9// Unless required by applicable law or agreed to in writing, software10// distributed under the License is distributed on an "AS IS" BASIS,11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12// See the License for the specific language governing permissions and13// limitations under the License.14//15// TODO: This code originally supported full ETC1 and ETC1S, so there's some legacy stuff in here.16//17#include "basisu_backend.h"1819#if BASISU_SUPPORT_SSE20#define CPPSPMD_NAME(a) a##_sse4121#include "basisu_kernels_declares.h"22#endif2324#define BASISU_FASTER_SELECTOR_REORDERING 025#define BASISU_BACKEND_VERIFY(c) verify(c, __LINE__);2627namespace basisu28{29// TODO30static inline void verify(bool condition, int line)31{32if (!condition)33{34fprintf(stderr, "ERROR: basisu_backend: verify() failed at line %i!\n", line);35abort();36}37}3839basisu_backend::basisu_backend()40{41clear();42}4344void basisu_backend::clear()45{46m_pFront_end = NULL;47m_params.clear();48m_output.clear();49}5051void basisu_backend::init(basisu_frontend* pFront_end, basisu_backend_params& params, const basisu_backend_slice_desc_vec& slice_descs)52{53m_pFront_end = pFront_end;54m_params = params;55m_slices = slice_descs;5657debug_printf("basisu_backend::Init: Slices: %u, ETC1S: %u, EndpointRDOQualityThresh: %f, SelectorRDOQualityThresh: %f\n",58m_slices.size(),59params.m_etc1s,60params.m_endpoint_rdo_quality_thresh,61params.m_selector_rdo_quality_thresh);6263debug_printf("Frontend endpoints: %u selectors: %u\n", m_pFront_end->get_total_endpoint_clusters(), m_pFront_end->get_total_selector_clusters());6465for (uint32_t i = 0; i < m_slices.size(); i++)66{67debug_printf("Slice: %u, OrigWidth: %u, OrigHeight: %u, Width: %u, Height: %u, NumBlocksX: %u, NumBlocksY: %u, FirstBlockIndex: %u\n",68i,69m_slices[i].m_orig_width, m_slices[i].m_orig_height,70m_slices[i].m_width, m_slices[i].m_height,71m_slices[i].m_num_blocks_x, m_slices[i].m_num_blocks_y,72m_slices[i].m_first_block_index);73}74}7576void basisu_backend::create_endpoint_palette()77{78const basisu_frontend& r = *m_pFront_end;7980m_output.m_num_endpoints = r.get_total_endpoint_clusters();8182m_endpoint_palette.resize(r.get_total_endpoint_clusters());83for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++)84{85etc1_endpoint_palette_entry& e = m_endpoint_palette[i];8687e.m_color5_valid = r.get_endpoint_cluster_color_is_used(i, false);88e.m_color5 = r.get_endpoint_cluster_unscaled_color(i, false);89e.m_inten5 = r.get_endpoint_cluster_inten_table(i, false);9091BASISU_BACKEND_VERIFY(e.m_color5_valid);92}93}9495void basisu_backend::create_selector_palette()96{97const basisu_frontend& r = *m_pFront_end;9899m_output.m_num_selectors = r.get_total_selector_clusters();100101m_selector_palette.resize(r.get_total_selector_clusters());102103for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++)104{105etc1_selector_palette_entry& s = m_selector_palette[i];106107const etc_block& selector_bits = r.get_selector_cluster_selector_bits(i);108109for (uint32_t y = 0; y < 4; y++)110{111for (uint32_t x = 0; x < 4; x++)112{113s[y * 4 + x] = static_cast<uint8_t>(selector_bits.get_selector(x, y));114}115}116}117}118119static const struct120{121int8_t m_dx, m_dy;122} g_endpoint_preds[] =123{124{ -1, 0 },125{ 0, -1 },126{ -1, -1 }127};128129void basisu_backend::reoptimize_and_sort_endpoints_codebook(uint32_t total_block_endpoints_remapped, uint_vec& all_endpoint_indices)130{131basisu_frontend& r = *m_pFront_end;132//const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;133134if (m_params.m_used_global_codebooks)135{136m_endpoint_remap_table_old_to_new.clear();137m_endpoint_remap_table_old_to_new.resize(r.get_total_endpoint_clusters());138for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++)139m_endpoint_remap_table_old_to_new[i] = i;140}141else142{143//if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 0))144if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 1))145{146// We've changed the block endpoint indices, so we need to go and adjust the endpoint codebook (remove unused entries, optimize existing entries that have changed)147uint_vec new_block_endpoints(get_total_blocks());148149for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)150{151const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;152const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;153const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;154155for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)156for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)157new_block_endpoints[first_block_index + block_x + block_y * num_blocks_x] = m_slice_encoder_blocks[slice_index](block_x, block_y).m_endpoint_index;158}159160int_vec old_to_new_endpoint_indices;161r.reoptimize_remapped_endpoints(new_block_endpoints, old_to_new_endpoint_indices, true);162163create_endpoint_palette();164165for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)166{167//const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;168169//const uint32_t width = m_slices[slice_index].m_width;170//const uint32_t height = m_slices[slice_index].m_height;171const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;172const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;173174for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)175{176for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)177{178//const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;179180encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);181182m.m_endpoint_index = old_to_new_endpoint_indices[m.m_endpoint_index];183} // block_x184} // block_y185} // slice_index186187for (uint32_t i = 0; i < all_endpoint_indices.size(); i++)188all_endpoint_indices[i] = old_to_new_endpoint_indices[all_endpoint_indices[i]];189190} //if (total_block_endpoints_remapped)191192// Sort endpoint codebook193palette_index_reorderer reorderer;194reorderer.init((uint32_t)all_endpoint_indices.size(), &all_endpoint_indices[0], r.get_total_endpoint_clusters(), nullptr, nullptr, 0);195m_endpoint_remap_table_old_to_new = reorderer.get_remap_table();196}197198// For endpoints, old_to_new[] may not be bijective!199// Some "old" entries may be unused and don't get remapped into the "new" array.200201m_old_endpoint_was_used.clear();202m_old_endpoint_was_used.resize(r.get_total_endpoint_clusters());203uint32_t first_old_entry_index = UINT32_MAX;204205for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)206{207const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x, num_blocks_y = m_slices[slice_index].m_num_blocks_y;208for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)209{210for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)211{212encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);213const uint32_t old_endpoint_index = m.m_endpoint_index;214215m_old_endpoint_was_used[old_endpoint_index] = true;216first_old_entry_index = basisu::minimum(first_old_entry_index, old_endpoint_index);217} // block_x218} // block_y219} // slice_index220221debug_printf("basisu_backend::reoptimize_and_sort_endpoints_codebook: First old entry index: %u\n", first_old_entry_index);222223m_new_endpoint_was_used.clear();224m_new_endpoint_was_used.resize(r.get_total_endpoint_clusters());225226m_endpoint_remap_table_new_to_old.clear();227m_endpoint_remap_table_new_to_old.resize(r.get_total_endpoint_clusters());228229// Set unused entries in the new array to point to the first used entry in the old array.230m_endpoint_remap_table_new_to_old.set_all(first_old_entry_index);231232for (uint32_t old_index = 0; old_index < m_endpoint_remap_table_old_to_new.size(); old_index++)233{234if (m_old_endpoint_was_used[old_index])235{236const uint32_t new_index = m_endpoint_remap_table_old_to_new[old_index];237238m_new_endpoint_was_used[new_index] = true;239240m_endpoint_remap_table_new_to_old[new_index] = old_index;241}242}243}244245void basisu_backend::sort_selector_codebook()246{247basisu_frontend& r = *m_pFront_end;248249m_selector_remap_table_new_to_old.resize(r.get_total_selector_clusters());250251if ((m_params.m_compression_level == 0) || (m_params.m_used_global_codebooks))252{253for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++)254m_selector_remap_table_new_to_old[i] = i;255}256else257{258m_selector_remap_table_new_to_old[0] = 0;259uint32_t prev_selector_index = 0;260261int_vec remaining_selectors;262remaining_selectors.reserve(r.get_total_selector_clusters() - 1);263for (uint32_t i = 1; i < r.get_total_selector_clusters(); i++)264remaining_selectors.push_back(i);265266uint_vec selector_palette_bytes(m_selector_palette.size());267for (uint32_t i = 0; i < m_selector_palette.size(); i++)268selector_palette_bytes[i] = m_selector_palette[i].get_byte(0) | (m_selector_palette[i].get_byte(1) << 8) | (m_selector_palette[i].get_byte(2) << 16) | (m_selector_palette[i].get_byte(3) << 24);269270// This is the traveling salesman problem.271for (uint32_t i = 1; i < r.get_total_selector_clusters(); i++)272{273uint32_t best_hamming_dist = 100;274uint32_t best_index = 0;275276#if BASISU_FASTER_SELECTOR_REORDERING277const uint32_t step = (remaining_selectors.size() > 16) ? 16 : 1;278for (uint32_t j = 0; j < remaining_selectors.size(); j += step)279#else280for (uint32_t j = 0; j < remaining_selectors.size(); j++)281#endif282{283int selector_index = remaining_selectors[j];284285uint32_t k = selector_palette_bytes[prev_selector_index] ^ selector_palette_bytes[selector_index];286uint32_t hamming_dist = g_hamming_dist[k & 0xFF] + g_hamming_dist[(k >> 8) & 0xFF] + g_hamming_dist[(k >> 16) & 0xFF] + g_hamming_dist[k >> 24];287288if (hamming_dist < best_hamming_dist)289{290best_hamming_dist = hamming_dist;291best_index = j;292if (best_hamming_dist <= 1)293break;294}295}296297prev_selector_index = remaining_selectors[best_index];298m_selector_remap_table_new_to_old[i] = prev_selector_index;299300remaining_selectors[best_index] = remaining_selectors.back();301remaining_selectors.resize(remaining_selectors.size() - 1);302}303}304305m_selector_remap_table_old_to_new.resize(r.get_total_selector_clusters());306for (uint32_t i = 0; i < m_selector_remap_table_new_to_old.size(); i++)307m_selector_remap_table_old_to_new[m_selector_remap_table_new_to_old[i]] = i;308}309int basisu_backend::find_video_frame(int slice_index, int delta)310{311for (uint32_t s = 0; s < m_slices.size(); s++)312{313if ((int)m_slices[s].m_source_file_index != ((int)m_slices[slice_index].m_source_file_index + delta))314continue;315if (m_slices[s].m_mip_index != m_slices[slice_index].m_mip_index)316continue;317318// Being super paranoid here.319if (m_slices[s].m_num_blocks_x != (m_slices[slice_index].m_num_blocks_x))320continue;321if (m_slices[s].m_num_blocks_y != (m_slices[slice_index].m_num_blocks_y))322continue;323if (m_slices[s].m_alpha != (m_slices[slice_index].m_alpha))324continue;325return s;326}327328return -1;329}330331void basisu_backend::check_for_valid_cr_blocks()332{333basisu_frontend& r = *m_pFront_end;334const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;335336if (!is_video)337return;338339debug_printf("basisu_backend::check_for_valid_cr_blocks\n");340341uint32_t total_crs = 0;342uint32_t total_invalid_crs = 0;343344for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)345{346const bool is_iframe = m_slices[slice_index].m_iframe;347//const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;348349//const uint32_t width = m_slices[slice_index].m_width;350//const uint32_t height = m_slices[slice_index].m_height;351const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;352const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;353const int prev_frame_slice_index = find_video_frame(slice_index, -1);354355// If we don't have a previous frame, and we're not an i-frame, something is wrong.356if ((prev_frame_slice_index < 0) && (!is_iframe))357{358BASISU_BACKEND_VERIFY(0);359}360361if ((is_iframe) || (prev_frame_slice_index < 0))362{363// Ensure no blocks use CR's364for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)365{366for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)367{368encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);369BASISU_BACKEND_VERIFY(m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX);370}371}372}373else374{375// For blocks that use CR's, make sure the endpoints/selectors haven't really changed.376for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)377{378for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)379{380encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);381382if (m.m_endpoint_predictor == basist::CR_ENDPOINT_PRED_INDEX)383{384total_crs++;385386encoder_block& prev_m = m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y);387388if ((m.m_endpoint_index != prev_m.m_endpoint_index) || (m.m_selector_index != prev_m.m_selector_index))389{390total_invalid_crs++;391}392}393} // block_x394} // block_y395396} // !slice_index397398} // slice_index399400debug_printf("Total CR's: %u, Total invalid CR's: %u\n", total_crs, total_invalid_crs);401402BASISU_BACKEND_VERIFY(total_invalid_crs == 0);403}404405void basisu_backend::create_encoder_blocks()406{407debug_printf("basisu_backend::create_encoder_blocks\n");408409interval_timer tm;410tm.start();411412basisu_frontend& r = *m_pFront_end;413const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;414415m_slice_encoder_blocks.resize(m_slices.size());416417uint32_t total_endpoint_pred_missed = 0, total_endpoint_pred_hits = 0, total_block_endpoints_remapped = 0;418419uint_vec all_endpoint_indices;420all_endpoint_indices.reserve(get_total_blocks());421422for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)423{424const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1;425const bool is_iframe = m_slices[slice_index].m_iframe;426const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;427428//const uint32_t width = m_slices[slice_index].m_width;429//const uint32_t height = m_slices[slice_index].m_height;430const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;431const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;432433m_slice_encoder_blocks[slice_index].resize(num_blocks_x, num_blocks_y);434435for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)436{437for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)438{439const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;440441encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);442443m.m_endpoint_index = r.get_subblock_endpoint_cluster_index(block_index, 0);444BASISU_BACKEND_VERIFY(r.get_subblock_endpoint_cluster_index(block_index, 0) == r.get_subblock_endpoint_cluster_index(block_index, 1));445446m.m_selector_index = r.get_block_selector_cluster_index(block_index);447448m.m_endpoint_predictor = basist::NO_ENDPOINT_PRED_INDEX;449450const uint32_t block_endpoint = m.m_endpoint_index;451452uint32_t best_endpoint_pred = UINT32_MAX;453454for (uint32_t endpoint_pred = 0; endpoint_pred < basist::NUM_ENDPOINT_PREDS; endpoint_pred++)455{456if ((is_video) && (endpoint_pred == basist::CR_ENDPOINT_PRED_INDEX))457{458if ((prev_frame_slice_index != -1) && (!is_iframe))459{460const uint32_t cur_endpoint = m_slice_encoder_blocks[slice_index](block_x, block_y).m_endpoint_index;461const uint32_t cur_selector = m_slice_encoder_blocks[slice_index](block_x, block_y).m_selector_index;462const uint32_t prev_endpoint = m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y).m_endpoint_index;463const uint32_t prev_selector = m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y).m_selector_index;464if ((cur_endpoint == prev_endpoint) && (cur_selector == prev_selector))465{466best_endpoint_pred = basist::CR_ENDPOINT_PRED_INDEX;467m_slice_encoder_blocks[prev_frame_slice_index](block_x, block_y).m_is_cr_target = true;468}469}470}471else472{473int pred_block_x = block_x + g_endpoint_preds[endpoint_pred].m_dx;474if ((pred_block_x < 0) || (pred_block_x >= (int)num_blocks_x))475continue;476477int pred_block_y = block_y + g_endpoint_preds[endpoint_pred].m_dy;478if ((pred_block_y < 0) || (pred_block_y >= (int)num_blocks_y))479continue;480481uint32_t pred_endpoint = m_slice_encoder_blocks[slice_index](pred_block_x, pred_block_y).m_endpoint_index;482483if (pred_endpoint == block_endpoint)484{485if (endpoint_pred < best_endpoint_pred)486{487best_endpoint_pred = endpoint_pred;488}489}490}491492} // endpoint_pred493494if (best_endpoint_pred != UINT32_MAX)495{496m.m_endpoint_predictor = best_endpoint_pred;497498total_endpoint_pred_hits++;499}500else if (m_params.m_endpoint_rdo_quality_thresh > 0.0f)501{502const pixel_block& src_pixels = r.get_source_pixel_block(block_index);503504etc_block etc_blk(r.get_output_block(block_index));505506uint64_t cur_err = etc_blk.evaluate_etc1_error(src_pixels.get_ptr(), r.get_params().m_perceptual);507508if (cur_err)509{510const uint64_t thresh_err = (uint64_t)(cur_err * maximum(1.0f, m_params.m_endpoint_rdo_quality_thresh));511512etc_block trial_etc_block(etc_blk);513514uint64_t best_err = UINT64_MAX;515uint32_t best_endpoint_index = 0;516517best_endpoint_pred = UINT32_MAX;518519for (uint32_t endpoint_pred = 0; endpoint_pred < basist::NUM_ENDPOINT_PREDS; endpoint_pred++)520{521if ((is_video) && (endpoint_pred == basist::CR_ENDPOINT_PRED_INDEX))522continue;523524int pred_block_x = block_x + g_endpoint_preds[endpoint_pred].m_dx;525if ((pred_block_x < 0) || (pred_block_x >= (int)num_blocks_x))526continue;527528int pred_block_y = block_y + g_endpoint_preds[endpoint_pred].m_dy;529if ((pred_block_y < 0) || (pred_block_y >= (int)num_blocks_y))530continue;531532uint32_t pred_endpoint_index = m_slice_encoder_blocks[slice_index](pred_block_x, pred_block_y).m_endpoint_index;533534uint32_t pred_inten = r.get_endpoint_cluster_inten_table(pred_endpoint_index, false);535color_rgba pred_color = r.get_endpoint_cluster_unscaled_color(pred_endpoint_index, false);536537trial_etc_block.set_block_color5(pred_color, pred_color);538trial_etc_block.set_inten_table(0, pred_inten);539trial_etc_block.set_inten_table(1, pred_inten);540541color_rgba trial_colors[16];542unpack_etc1(trial_etc_block, trial_colors);543544uint64_t trial_err = 0;545if (r.get_params().m_perceptual)546{547for (uint32_t p = 0; p < 16; p++)548{549trial_err += color_distance(true, src_pixels.get_ptr()[p], trial_colors[p], false);550if (trial_err > thresh_err)551break;552}553}554else555{556for (uint32_t p = 0; p < 16; p++)557{558trial_err += color_distance(false, src_pixels.get_ptr()[p], trial_colors[p], false);559if (trial_err > thresh_err)560break;561}562}563564if (trial_err <= thresh_err)565{566if ((trial_err < best_err) || ((trial_err == best_err) && (endpoint_pred < best_endpoint_pred)))567{568best_endpoint_pred = endpoint_pred;569best_err = trial_err;570best_endpoint_index = pred_endpoint_index;571}572}573} // endpoint_pred574575if (best_endpoint_pred != UINT32_MAX)576{577m.m_endpoint_index = best_endpoint_index;578m.m_endpoint_predictor = best_endpoint_pred;579580total_endpoint_pred_hits++;581total_block_endpoints_remapped++;582}583else584{585total_endpoint_pred_missed++;586}587}588}589else590{591total_endpoint_pred_missed++;592}593594if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX)595{596all_endpoint_indices.push_back(m.m_endpoint_index);597}598599} // block_x600601} // block_y602603} // slice604605debug_printf("total_endpoint_pred_missed: %u (%3.2f%%) total_endpoint_pred_hit: %u (%3.2f%%), total_block_endpoints_remapped: %u (%3.2f%%)\n",606total_endpoint_pred_missed, total_endpoint_pred_missed * 100.0f / get_total_blocks(),607total_endpoint_pred_hits, total_endpoint_pred_hits * 100.0f / get_total_blocks(),608total_block_endpoints_remapped, total_block_endpoints_remapped * 100.0f / get_total_blocks());609610reoptimize_and_sort_endpoints_codebook(total_block_endpoints_remapped, all_endpoint_indices);611612sort_selector_codebook();613check_for_valid_cr_blocks();614615debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());616}617618void basisu_backend::compute_slice_crcs()619{620for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)621{622//const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;623const uint32_t width = m_slices[slice_index].m_width;624const uint32_t height = m_slices[slice_index].m_height;625const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;626const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;627628gpu_image gi;629gi.init(texture_format::cETC1, width, height);630631for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)632{633for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)634{635//const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;636637encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);638639{640etc_block& output_block = *(etc_block*)gi.get_block_ptr(block_x, block_y);641642output_block.set_diff_bit(true);643// Setting the flip bit to false to be compatible with the Khronos KDFS.644//output_block.set_flip_bit(true);645output_block.set_flip_bit(false);646647const uint32_t endpoint_index = m.m_endpoint_index;648649output_block.set_block_color5_etc1s(m_endpoint_palette[endpoint_index].m_color5);650output_block.set_inten_tables_etc1s(m_endpoint_palette[endpoint_index].m_inten5);651652const uint32_t selector_idx = m.m_selector_index;653654const etc1_selector_palette_entry& selectors = m_selector_palette[selector_idx];655for (uint32_t sy = 0; sy < 4; sy++)656for (uint32_t sx = 0; sx < 4; sx++)657output_block.set_selector(sx, sy, selectors(sx, sy));658}659660} // block_x661} // block_y662663m_output.m_slice_image_crcs[slice_index] = basist::crc16(gi.get_ptr(), gi.get_size_in_bytes(), 0);664665if (m_params.m_debug_images)666{667image gi_unpacked;668gi.unpack(gi_unpacked);669670char buf[256];671#ifdef _WIN32672sprintf_s(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index);673#else674snprintf(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index);675#endif676save_png(buf, gi_unpacked);677}678679} // slice_index680}681682//uint32_t g_color_delta_hist[255 * 3 + 1];683//uint32_t g_color_delta_bad_hist[255 * 3 + 1];684685// TODO: Split this into multiple methods.686bool basisu_backend::encode_image()687{688basisu_frontend& r = *m_pFront_end;689const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;690691uint32_t total_used_selector_history_buf = 0;692uint32_t total_selector_indices_remapped = 0;693694basist::approx_move_to_front selector_history_buf(basist::MAX_SELECTOR_HISTORY_BUF_SIZE);695histogram selector_history_buf_histogram(basist::MAX_SELECTOR_HISTORY_BUF_SIZE);696histogram selector_histogram(r.get_total_selector_clusters() + basist::MAX_SELECTOR_HISTORY_BUF_SIZE + 1);697histogram selector_history_buf_rle_histogram(1 << basist::SELECTOR_HISTORY_BUF_RLE_COUNT_BITS);698699basisu::vector<uint_vec> selector_syms(m_slices.size());700701const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = r.get_total_selector_clusters();702const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + basist::MAX_SELECTOR_HISTORY_BUF_SIZE;703704m_output.m_slice_image_crcs.resize(m_slices.size());705706histogram delta_endpoint_histogram(r.get_total_endpoint_clusters());707708histogram endpoint_pred_histogram(basist::ENDPOINT_PRED_TOTAL_SYMBOLS);709basisu::vector<uint_vec> endpoint_pred_syms(m_slices.size());710711uint32_t total_endpoint_indices_remapped = 0;712713uint_vec block_endpoint_indices, block_selector_indices;714715interval_timer tm;716tm.start();717718const int COLOR_DELTA_THRESH = 8;719const int SEL_DIFF_THRESHOLD = 11;720721for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)722{723//const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1;724//const int next_frame_slice_index = is_video ? find_video_frame(slice_index, 1) : -1;725const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;726//const uint32_t width = m_slices[slice_index].m_width;727//const uint32_t height = m_slices[slice_index].m_height;728const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;729const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;730731selector_history_buf.reset();732733int selector_history_buf_rle_count = 0;734735int prev_endpoint_pred_sym_bits = -1, endpoint_pred_repeat_count = 0;736737uint32_t prev_endpoint_index = 0;738739vector2D<uint8_t> block_endpoints_are_referenced(num_blocks_x, num_blocks_y);740741for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)742{743for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)744{745//const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;746747encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);748749if (m.m_endpoint_predictor == 0)750block_endpoints_are_referenced(block_x - 1, block_y) = true;751else if (m.m_endpoint_predictor == 1)752block_endpoints_are_referenced(block_x, block_y - 1) = true;753else if (m.m_endpoint_predictor == 2)754{755if (!is_video)756block_endpoints_are_referenced(block_x - 1, block_y - 1) = true;757}758if (is_video)759{760if (m.m_is_cr_target)761block_endpoints_are_referenced(block_x, block_y) = true;762}763764} // block_x765} // block_y766767for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)768{769for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)770{771const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x;772773encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);774775if (((block_x & 1) == 0) && ((block_y & 1) == 0))776{777uint32_t endpoint_pred_cur_sym_bits = 0;778779for (uint32_t y = 0; y < 2; y++)780{781for (uint32_t x = 0; x < 2; x++)782{783const uint32_t bx = block_x + x;784const uint32_t by = block_y + y;785786uint32_t pred = basist::NO_ENDPOINT_PRED_INDEX;787if ((bx < num_blocks_x) && (by < num_blocks_y))788pred = m_slice_encoder_blocks[slice_index](bx, by).m_endpoint_predictor;789790endpoint_pred_cur_sym_bits |= (pred << (x * 2 + y * 4));791}792}793794if ((int)endpoint_pred_cur_sym_bits == prev_endpoint_pred_sym_bits)795{796endpoint_pred_repeat_count++;797}798else799{800if (endpoint_pred_repeat_count > 0)801{802if (endpoint_pred_repeat_count > (int)basist::ENDPOINT_PRED_MIN_REPEAT_COUNT)803{804endpoint_pred_histogram.inc(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL);805endpoint_pred_syms[slice_index].push_back(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL);806807endpoint_pred_syms[slice_index].push_back(endpoint_pred_repeat_count);808}809else810{811for (int j = 0; j < endpoint_pred_repeat_count; j++)812{813endpoint_pred_histogram.inc(prev_endpoint_pred_sym_bits);814endpoint_pred_syms[slice_index].push_back(prev_endpoint_pred_sym_bits);815}816}817818endpoint_pred_repeat_count = 0;819}820821endpoint_pred_histogram.inc(endpoint_pred_cur_sym_bits);822endpoint_pred_syms[slice_index].push_back(endpoint_pred_cur_sym_bits);823824prev_endpoint_pred_sym_bits = endpoint_pred_cur_sym_bits;825}826}827828int new_endpoint_index = m_endpoint_remap_table_old_to_new[m.m_endpoint_index];829830if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX)831{832int endpoint_delta = new_endpoint_index - prev_endpoint_index;833834if ((m_params.m_endpoint_rdo_quality_thresh > 1.0f) && (iabs(endpoint_delta) > 1) && (!block_endpoints_are_referenced(block_x, block_y)))835{836const pixel_block& src_pixels = r.get_source_pixel_block(block_index);837838etc_block etc_blk(r.get_output_block(block_index));839840const uint64_t cur_err = etc_blk.evaluate_etc1_error(src_pixels.get_ptr(), r.get_params().m_perceptual);841const uint32_t cur_inten5 = etc_blk.get_inten_table(0);842843const etc1_endpoint_palette_entry& cur_endpoints = m_endpoint_palette[m.m_endpoint_index];844845if (cur_err)846{847const float endpoint_remap_thresh = maximum(1.0f, m_params.m_endpoint_rdo_quality_thresh);848const uint64_t thresh_err = (uint64_t)(cur_err * endpoint_remap_thresh);849850//const int MAX_ENDPOINT_SEARCH_DIST = (m_params.m_compression_level >= 2) ? 64 : 32;851const int MAX_ENDPOINT_SEARCH_DIST = (m_params.m_compression_level >= 2) ? 64 : 16;852853if (!g_cpu_supports_sse41)854{855const uint64_t initial_best_trial_err = UINT64_MAX;856uint64_t best_trial_err = initial_best_trial_err;857int best_trial_idx = 0;858859etc_block trial_etc_blk(etc_blk);860861const int search_dist = minimum<int>(iabs(endpoint_delta) - 1, MAX_ENDPOINT_SEARCH_DIST);862for (int d = -search_dist; d < search_dist; d++)863{864int trial_idx = prev_endpoint_index + d;865if (trial_idx < 0)866trial_idx += (int)r.get_total_endpoint_clusters();867else if (trial_idx >= (int)r.get_total_endpoint_clusters())868trial_idx -= (int)r.get_total_endpoint_clusters();869870if (trial_idx == new_endpoint_index)871continue;872873// Skip it if this new endpoint palette entry is actually never used.874if (!m_new_endpoint_was_used[trial_idx])875continue;876877const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]];878879if (m_params.m_compression_level <= 1)880{881if (p.m_inten5 > cur_inten5)882continue;883884int delta_r = iabs(cur_endpoints.m_color5.r - p.m_color5.r);885int delta_g = iabs(cur_endpoints.m_color5.g - p.m_color5.g);886int delta_b = iabs(cur_endpoints.m_color5.b - p.m_color5.b);887int color_delta = delta_r + delta_g + delta_b;888889if (color_delta > COLOR_DELTA_THRESH)890continue;891}892893trial_etc_blk.set_block_color5_etc1s(p.m_color5);894trial_etc_blk.set_inten_tables_etc1s(p.m_inten5);895896uint64_t trial_err = trial_etc_blk.evaluate_etc1_error(src_pixels.get_ptr(), r.get_params().m_perceptual);897898if ((trial_err < best_trial_err) && (trial_err <= thresh_err))899{900best_trial_err = trial_err;901best_trial_idx = trial_idx;902}903}904905if (best_trial_err != initial_best_trial_err)906{907m.m_endpoint_index = m_endpoint_remap_table_new_to_old[best_trial_idx];908909new_endpoint_index = best_trial_idx;910911endpoint_delta = new_endpoint_index - prev_endpoint_index;912913total_endpoint_indices_remapped++;914}915}916else917{918#if BASISU_SUPPORT_SSE919uint8_t block_selectors[16];920for (uint32_t i = 0; i < 16; i++)921block_selectors[i] = (uint8_t)etc_blk.get_selector(i & 3, i >> 2);922923const int64_t initial_best_trial_err = INT64_MAX;924int64_t best_trial_err = initial_best_trial_err;925int best_trial_idx = 0;926927const int search_dist = minimum<int>(iabs(endpoint_delta) - 1, MAX_ENDPOINT_SEARCH_DIST);928for (int d = -search_dist; d < search_dist; d++)929{930int trial_idx = prev_endpoint_index + d;931if (trial_idx < 0)932trial_idx += (int)r.get_total_endpoint_clusters();933else if (trial_idx >= (int)r.get_total_endpoint_clusters())934trial_idx -= (int)r.get_total_endpoint_clusters();935936if (trial_idx == new_endpoint_index)937continue;938939// Skip it if this new endpoint palette entry is actually never used.940if (!m_new_endpoint_was_used[trial_idx])941continue;942943const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]];944945if (m_params.m_compression_level <= 1)946{947if (p.m_inten5 > cur_inten5)948continue;949950int delta_r = iabs(cur_endpoints.m_color5.r - p.m_color5.r);951int delta_g = iabs(cur_endpoints.m_color5.g - p.m_color5.g);952int delta_b = iabs(cur_endpoints.m_color5.b - p.m_color5.b);953int color_delta = delta_r + delta_g + delta_b;954955if (color_delta > COLOR_DELTA_THRESH)956continue;957}958959color_rgba block_colors[4];960etc_block::get_block_colors_etc1s(block_colors, p.m_color5, p.m_inten5);961962int64_t trial_err;963if (r.get_params().m_perceptual)964{965perceptual_distance_rgb_4_N_sse41(&trial_err, block_selectors, block_colors, src_pixels.get_ptr(), 16, best_trial_err);966}967else968{969linear_distance_rgb_4_N_sse41(&trial_err, block_selectors, block_colors, src_pixels.get_ptr(), 16, best_trial_err);970}971972//if (trial_err > thresh_err)973// g_color_delta_bad_hist[color_delta]++;974975if ((trial_err < best_trial_err) && (trial_err <= (int64_t)thresh_err))976{977best_trial_err = trial_err;978best_trial_idx = trial_idx;979}980}981982if (best_trial_err != initial_best_trial_err)983{984m.m_endpoint_index = m_endpoint_remap_table_new_to_old[best_trial_idx];985986new_endpoint_index = best_trial_idx;987988endpoint_delta = new_endpoint_index - prev_endpoint_index;989990total_endpoint_indices_remapped++;991}992#endif // BASISU_SUPPORT_SSE993} // if (!g_cpu_supports_sse41)994995} // if (cur_err)996997} // if ((m_params.m_endpoint_rdo_quality_thresh > 1.0f) && (iabs(endpoint_delta) > 1) && (!block_endpoints_are_referenced(block_x, block_y)))998999if (endpoint_delta < 0)1000endpoint_delta += (int)r.get_total_endpoint_clusters();10011002delta_endpoint_histogram.inc(endpoint_delta);10031004} // if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX)10051006block_endpoint_indices.push_back(m_endpoint_remap_table_new_to_old[new_endpoint_index]);10071008prev_endpoint_index = new_endpoint_index;10091010if ((!is_video) || (m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX))1011{1012int new_selector_index = m_selector_remap_table_old_to_new[m.m_selector_index];10131014const float selector_remap_thresh = maximum(1.0f, m_params.m_selector_rdo_quality_thresh); //2.5f;10151016int selector_history_buf_index = -1;10171018// At low comp levels this hurts compression a tiny amount, but is significantly faster so it's a good tradeoff.1019if ((m.m_is_cr_target) || (m_params.m_compression_level <= 1))1020{1021for (uint32_t j = 0; j < selector_history_buf.size(); j++)1022{1023const int trial_idx = selector_history_buf[j];1024if (trial_idx == new_selector_index)1025{1026total_used_selector_history_buf++;1027selector_history_buf_index = j;1028selector_history_buf_histogram.inc(j);1029break;1030}1031}1032}10331034// If the block is a CR target we can't override its selectors.1035if ((!m.m_is_cr_target) && (selector_history_buf_index == -1))1036{1037const pixel_block& src_pixels = r.get_source_pixel_block(block_index);10381039etc_block etc_blk = r.get_output_block(block_index);10401041// This is new code - the initial release just used the endpoints from the frontend, which isn't correct/accurate.1042const etc1_endpoint_palette_entry& q = m_endpoint_palette[m_endpoint_remap_table_new_to_old[new_endpoint_index]];1043etc_blk.set_block_color5_etc1s(q.m_color5);1044etc_blk.set_inten_tables_etc1s(q.m_inten5);10451046color_rgba block_colors[4];1047etc_blk.get_block_colors(block_colors, 0);10481049const uint8_t* pCur_selectors = &m_selector_palette[m.m_selector_index][0];10501051uint64_t cur_err = 0;1052if (r.get_params().m_perceptual)1053{1054for (uint32_t p = 0; p < 16; p++)1055cur_err += color_distance(true, src_pixels.get_ptr()[p], block_colors[pCur_selectors[p]], false);1056}1057else1058{1059for (uint32_t p = 0; p < 16; p++)1060cur_err += color_distance(false, src_pixels.get_ptr()[p], block_colors[pCur_selectors[p]], false);1061}10621063const uint64_t limit_err = (uint64_t)ceilf(cur_err * selector_remap_thresh);10641065// Even if cur_err==limit_err, we still want to scan the history buffer because there may be equivalent entries that are cheaper to code.10661067uint64_t best_trial_err = UINT64_MAX;1068int best_trial_idx = 0;1069uint32_t best_trial_history_buf_idx = 0;10701071for (uint32_t j = 0; j < selector_history_buf.size(); j++)1072{1073const int trial_idx = selector_history_buf[j];10741075const uint8_t* pSelectors = &m_selector_palette[m_selector_remap_table_new_to_old[trial_idx]][0];10761077if (m_params.m_compression_level <= 1)1078{1079// Predict if evaluating the full color error would cause an early out, by summing the abs err of the selector indices.1080int sel_diff = 0;1081for (uint32_t p = 0; p < 16; p += 4)1082{1083sel_diff += iabs(pCur_selectors[p + 0] - pSelectors[p + 0]);1084sel_diff += iabs(pCur_selectors[p + 1] - pSelectors[p + 1]);1085sel_diff += iabs(pCur_selectors[p + 2] - pSelectors[p + 2]);1086sel_diff += iabs(pCur_selectors[p + 3] - pSelectors[p + 3]);1087if (sel_diff >= SEL_DIFF_THRESHOLD)1088break;1089}1090if (sel_diff >= SEL_DIFF_THRESHOLD)1091continue;1092}10931094const uint64_t thresh_err = minimum(limit_err, best_trial_err);1095uint64_t trial_err = 0;10961097// This tends to early out quickly, so SSE has a hard time competing.1098if (r.get_params().m_perceptual)1099{1100for (uint32_t p = 0; p < 16; p++)1101{1102uint32_t sel = pSelectors[p];1103trial_err += color_distance(true, src_pixels.get_ptr()[p], block_colors[sel], false);1104if (trial_err > thresh_err)1105break;1106}1107}1108else1109{1110for (uint32_t p = 0; p < 16; p++)1111{1112uint32_t sel = pSelectors[p];1113trial_err += color_distance(false, src_pixels.get_ptr()[p], block_colors[sel], false);1114if (trial_err > thresh_err)1115break;1116}1117}11181119if ((trial_err < best_trial_err) && (trial_err <= thresh_err))1120{1121assert(trial_err <= limit_err);11221123best_trial_err = trial_err;1124best_trial_idx = trial_idx;1125best_trial_history_buf_idx = j;1126}1127}11281129if (best_trial_err != UINT64_MAX)1130{1131if (new_selector_index != best_trial_idx)1132total_selector_indices_remapped++;11331134new_selector_index = best_trial_idx;11351136total_used_selector_history_buf++;11371138selector_history_buf_index = best_trial_history_buf_idx;11391140selector_history_buf_histogram.inc(best_trial_history_buf_idx);1141}11421143} // if (m_params.m_selector_rdo_quality_thresh > 0.0f)11441145m.m_selector_index = m_selector_remap_table_new_to_old[new_selector_index];114611471148if ((selector_history_buf_rle_count) && (selector_history_buf_index != 0))1149{1150if (selector_history_buf_rle_count >= (int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH)1151{1152selector_syms[slice_index].push_back(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);1153selector_syms[slice_index].push_back(selector_history_buf_rle_count);11541155int run_sym = selector_history_buf_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;1156if (run_sym >= ((int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))1157selector_history_buf_rle_histogram.inc(basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1);1158else1159selector_history_buf_rle_histogram.inc(run_sym);11601161selector_histogram.inc(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);1162}1163else1164{1165for (int k = 0; k < selector_history_buf_rle_count; k++)1166{1167uint32_t sym_index = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + 0;11681169selector_syms[slice_index].push_back(sym_index);11701171selector_histogram.inc(sym_index);1172}1173}11741175selector_history_buf_rle_count = 0;1176}11771178if (selector_history_buf_index >= 0)1179{1180if (selector_history_buf_index == 0)1181selector_history_buf_rle_count++;1182else1183{1184uint32_t history_buf_sym = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + selector_history_buf_index;11851186selector_syms[slice_index].push_back(history_buf_sym);11871188selector_histogram.inc(history_buf_sym);1189}1190}1191else1192{1193selector_syms[slice_index].push_back(new_selector_index);11941195selector_histogram.inc(new_selector_index);1196}11971198m.m_selector_history_buf_index = selector_history_buf_index;11991200if (selector_history_buf_index < 0)1201selector_history_buf.add(new_selector_index);1202else if (selector_history_buf.size())1203selector_history_buf.use(selector_history_buf_index);1204}1205block_selector_indices.push_back(m.m_selector_index);12061207} // block_x12081209} // block_y12101211if (endpoint_pred_repeat_count > 0)1212{1213if (endpoint_pred_repeat_count > (int)basist::ENDPOINT_PRED_MIN_REPEAT_COUNT)1214{1215endpoint_pred_histogram.inc(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL);1216endpoint_pred_syms[slice_index].push_back(basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL);12171218endpoint_pred_syms[slice_index].push_back(endpoint_pred_repeat_count);1219}1220else1221{1222for (int j = 0; j < endpoint_pred_repeat_count; j++)1223{1224endpoint_pred_histogram.inc(prev_endpoint_pred_sym_bits);1225endpoint_pred_syms[slice_index].push_back(prev_endpoint_pred_sym_bits);1226}1227}12281229endpoint_pred_repeat_count = 0;1230}12311232if (selector_history_buf_rle_count)1233{1234if (selector_history_buf_rle_count >= (int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH)1235{1236selector_syms[slice_index].push_back(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);1237selector_syms[slice_index].push_back(selector_history_buf_rle_count);12381239int run_sym = selector_history_buf_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;1240if (run_sym >= ((int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))1241selector_history_buf_rle_histogram.inc(basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1);1242else1243selector_history_buf_rle_histogram.inc(run_sym);12441245selector_histogram.inc(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);1246}1247else1248{1249for (int i = 0; i < selector_history_buf_rle_count; i++)1250{1251uint32_t sym_index = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + 0;12521253selector_syms[slice_index].push_back(sym_index);12541255selector_histogram.inc(sym_index);1256}1257}12581259selector_history_buf_rle_count = 0;1260}12611262} // slice_index12631264//for (int i = 0; i <= 255 * 3; i++)1265//{1266// printf("%u, %u, %f\n", g_color_delta_bad_hist[i], g_color_delta_hist[i], g_color_delta_hist[i] ? g_color_delta_bad_hist[i] / (float)g_color_delta_hist[i] : 0);1267//}12681269double total_prep_time = tm.get_elapsed_secs();1270debug_printf("basisu_backend::encode_image: Total prep time: %3.2f\n", total_prep_time);12711272debug_printf("Endpoint pred RDO total endpoint indices remapped: %u %3.2f%%\n",1273total_endpoint_indices_remapped, total_endpoint_indices_remapped * 100.0f / get_total_blocks());12741275debug_printf("Selector history RDO total selector indices remapped: %u %3.2f%%, Used history buf: %u %3.2f%%\n",1276total_selector_indices_remapped, total_selector_indices_remapped * 100.0f / get_total_blocks(),1277total_used_selector_history_buf, total_used_selector_history_buf * 100.0f / get_total_blocks());12781279//if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 0))1280if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 1) && (!m_params.m_used_global_codebooks))1281{1282int_vec unused;1283r.reoptimize_remapped_endpoints(block_endpoint_indices, unused, false, &block_selector_indices);12841285create_endpoint_palette();1286}12871288check_for_valid_cr_blocks();1289compute_slice_crcs();12901291double endpoint_pred_entropy = endpoint_pred_histogram.get_entropy() / endpoint_pred_histogram.get_total();1292double delta_endpoint_entropy = delta_endpoint_histogram.get_entropy() / delta_endpoint_histogram.get_total();1293double selector_entropy = selector_histogram.get_entropy() / selector_histogram.get_total();12941295debug_printf("Histogram entropy: EndpointPred: %3.3f DeltaEndpoint: %3.3f DeltaSelector: %3.3f\n", endpoint_pred_entropy, delta_endpoint_entropy, selector_entropy);12961297if (!endpoint_pred_histogram.get_total())1298endpoint_pred_histogram.inc(0);1299huffman_encoding_table endpoint_pred_model;1300if (!endpoint_pred_model.init(endpoint_pred_histogram, 16))1301{1302error_printf("endpoint_pred_model.init() failed!");1303return false;1304}13051306if (!delta_endpoint_histogram.get_total())1307delta_endpoint_histogram.inc(0);1308huffman_encoding_table delta_endpoint_model;1309if (!delta_endpoint_model.init(delta_endpoint_histogram, 16))1310{1311error_printf("delta_endpoint_model.init() failed!");1312return false;1313}1314if (!selector_histogram.get_total())1315selector_histogram.inc(0);13161317huffman_encoding_table selector_model;1318if (!selector_model.init(selector_histogram, 16))1319{1320error_printf("selector_model.init() failed!");1321return false;1322}13231324if (!selector_history_buf_rle_histogram.get_total())1325selector_history_buf_rle_histogram.inc(0);13261327huffman_encoding_table selector_history_buf_rle_model;1328if (!selector_history_buf_rle_model.init(selector_history_buf_rle_histogram, 16))1329{1330error_printf("selector_history_buf_rle_model.init() failed!");1331return false;1332}13331334bitwise_coder coder;1335coder.init(1024 * 1024 * 4);13361337uint32_t endpoint_pred_model_bits = coder.emit_huffman_table(endpoint_pred_model);1338uint32_t delta_endpoint_bits = coder.emit_huffman_table(delta_endpoint_model);1339uint32_t selector_model_bits = coder.emit_huffman_table(selector_model);1340uint32_t selector_history_buf_run_sym_bits = coder.emit_huffman_table(selector_history_buf_rle_model);13411342coder.put_bits(basist::MAX_SELECTOR_HISTORY_BUF_SIZE, 13);13431344debug_printf("Model sizes: EndpointPred: %u bits %u bytes (%3.3f bpp) DeltaEndpoint: %u bits %u bytes (%3.3f bpp) Selector: %u bits %u bytes (%3.3f bpp) SelectorHistBufRLE: %u bits %u bytes (%3.3f bpp)\n",1345endpoint_pred_model_bits, (endpoint_pred_model_bits + 7) / 8, endpoint_pred_model_bits / float(get_total_input_texels()),1346delta_endpoint_bits, (delta_endpoint_bits + 7) / 8, delta_endpoint_bits / float(get_total_input_texels()),1347selector_model_bits, (selector_model_bits + 7) / 8, selector_model_bits / float(get_total_input_texels()),1348selector_history_buf_run_sym_bits, (selector_history_buf_run_sym_bits + 7) / 8, selector_history_buf_run_sym_bits / float(get_total_input_texels()));13491350coder.flush();13511352m_output.m_slice_image_tables = coder.get_bytes();13531354uint32_t total_endpoint_pred_bits = 0, total_delta_endpoint_bits = 0, total_selector_bits = 0;13551356uint32_t total_image_bytes = 0;13571358m_output.m_slice_image_data.resize(m_slices.size());13591360for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)1361{1362//const uint32_t width = m_slices[slice_index].m_width;1363//const uint32_t height = m_slices[slice_index].m_height;1364const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;1365const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;13661367coder.init(1024 * 1024 * 4);13681369uint32_t cur_selector_sym_ofs = 0;1370uint32_t selector_rle_count = 0;13711372int endpoint_pred_repeat_count = 0;1373uint32_t cur_endpoint_pred_sym_ofs = 0;1374// uint32_t prev_endpoint_pred_sym = 0;1375uint32_t prev_endpoint_index = 0;13761377for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)1378{1379for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)1380{1381const encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);13821383if (((block_x & 1) == 0) && ((block_y & 1) == 0))1384{1385if (endpoint_pred_repeat_count > 0)1386{1387endpoint_pred_repeat_count--;1388}1389else1390{1391uint32_t sym = endpoint_pred_syms[slice_index][cur_endpoint_pred_sym_ofs++];13921393if (sym == basist::ENDPOINT_PRED_REPEAT_LAST_SYMBOL)1394{1395total_endpoint_pred_bits += coder.put_code(sym, endpoint_pred_model);13961397endpoint_pred_repeat_count = endpoint_pred_syms[slice_index][cur_endpoint_pred_sym_ofs++];1398assert(endpoint_pred_repeat_count >= (int)basist::ENDPOINT_PRED_MIN_REPEAT_COUNT);13991400total_endpoint_pred_bits += coder.put_vlc(endpoint_pred_repeat_count - basist::ENDPOINT_PRED_MIN_REPEAT_COUNT, basist::ENDPOINT_PRED_COUNT_VLC_BITS);14011402endpoint_pred_repeat_count--;1403}1404else1405{1406total_endpoint_pred_bits += coder.put_code(sym, endpoint_pred_model);14071408//prev_endpoint_pred_sym = sym;1409}1410}1411}14121413const int new_endpoint_index = m_endpoint_remap_table_old_to_new[m.m_endpoint_index];14141415if (m.m_endpoint_predictor == basist::NO_ENDPOINT_PRED_INDEX)1416{1417int endpoint_delta = new_endpoint_index - prev_endpoint_index;1418if (endpoint_delta < 0)1419endpoint_delta += (int)r.get_total_endpoint_clusters();14201421total_delta_endpoint_bits += coder.put_code(endpoint_delta, delta_endpoint_model);1422}14231424prev_endpoint_index = new_endpoint_index;14251426if ((!is_video) || (m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX))1427{1428if (!selector_rle_count)1429{1430uint32_t selector_sym_index = selector_syms[slice_index][cur_selector_sym_ofs++];14311432if (selector_sym_index == SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX)1433selector_rle_count = selector_syms[slice_index][cur_selector_sym_ofs++];14341435total_selector_bits += coder.put_code(selector_sym_index, selector_model);14361437if (selector_sym_index == SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX)1438{1439int run_sym = selector_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;1440if (run_sym >= ((int)basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))1441{1442total_selector_bits += coder.put_code(basist::SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1, selector_history_buf_rle_model);14431444uint32_t n = selector_rle_count - basist::SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;1445total_selector_bits += coder.put_vlc(n, 7);1446}1447else1448total_selector_bits += coder.put_code(run_sym, selector_history_buf_rle_model);1449}1450}14511452if (selector_rle_count)1453selector_rle_count--;1454}14551456} // block_x14571458} // block_y14591460BASISU_BACKEND_VERIFY(cur_endpoint_pred_sym_ofs == endpoint_pred_syms[slice_index].size());1461BASISU_BACKEND_VERIFY(cur_selector_sym_ofs == selector_syms[slice_index].size());14621463coder.flush();14641465m_output.m_slice_image_data[slice_index] = coder.get_bytes();14661467total_image_bytes += (uint32_t)coder.get_bytes().size();14681469debug_printf("Slice %u compressed size: %u bytes, %3.3f bits per slice texel\n", slice_index, m_output.m_slice_image_data[slice_index].size(), m_output.m_slice_image_data[slice_index].size() * 8.0f / (m_slices[slice_index].m_orig_width * m_slices[slice_index].m_orig_height));14701471} // slice_index14721473const double total_texels = static_cast<double>(get_total_input_texels());1474const double total_blocks = static_cast<double>(get_total_blocks());14751476debug_printf("Total endpoint pred bits: %u bytes: %u bits/texel: %3.3f bits/block: %3.3f\n", total_endpoint_pred_bits, total_endpoint_pred_bits / 8, total_endpoint_pred_bits / total_texels, total_endpoint_pred_bits / total_blocks);1477debug_printf("Total delta endpoint bits: %u bytes: %u bits/texel: %3.3f bits/block: %3.3f\n", total_delta_endpoint_bits, total_delta_endpoint_bits / 8, total_delta_endpoint_bits / total_texels, total_delta_endpoint_bits / total_blocks);1478debug_printf("Total selector bits: %u bytes: %u bits/texel: %3.3f bits/block: %3.3f\n", total_selector_bits, total_selector_bits / 8, total_selector_bits / total_texels, total_selector_bits / total_blocks);14791480debug_printf("Total table bytes: %u, %3.3f bits/texel\n", m_output.m_slice_image_tables.size(), m_output.m_slice_image_tables.size() * 8.0f / total_texels);1481debug_printf("Total image bytes: %u, %3.3f bits/texel\n", total_image_bytes, total_image_bytes * 8.0f / total_texels);14821483return true;1484}14851486bool basisu_backend::encode_endpoint_palette()1487{1488const basisu_frontend& r = *m_pFront_end;14891490// The endpoint indices may have been changed by the backend's RDO step, so go and figure out which ones are actually used again.1491bool_vec old_endpoint_was_used(r.get_total_endpoint_clusters());1492uint32_t first_old_entry_index = UINT32_MAX;14931494for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)1495{1496const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x, num_blocks_y = m_slices[slice_index].m_num_blocks_y;1497for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)1498{1499for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)1500{1501encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y);1502const uint32_t old_endpoint_index = m.m_endpoint_index;15031504old_endpoint_was_used[old_endpoint_index] = true;1505first_old_entry_index = basisu::minimum(first_old_entry_index, old_endpoint_index);1506} // block_x1507} // block_y1508} // slice_index15091510debug_printf("basisu_backend::encode_endpoint_palette: first_old_entry_index: %u\n", first_old_entry_index);15111512// Maps NEW to OLD endpoints1513uint_vec endpoint_remap_table_new_to_old(r.get_total_endpoint_clusters());1514endpoint_remap_table_new_to_old.set_all(first_old_entry_index);15151516bool_vec new_endpoint_was_used(r.get_total_endpoint_clusters());15171518for (uint32_t old_endpoint_index = 0; old_endpoint_index < m_endpoint_remap_table_old_to_new.size(); old_endpoint_index++)1519{1520if (old_endpoint_was_used[old_endpoint_index])1521{1522const uint32_t new_endpoint_index = m_endpoint_remap_table_old_to_new[old_endpoint_index];15231524new_endpoint_was_used[new_endpoint_index] = true;15251526endpoint_remap_table_new_to_old[new_endpoint_index] = old_endpoint_index;1527}1528}15291530// TODO: Some new endpoint palette entries may actually be unused and aren't worth coding. Fix that.15311532uint32_t total_unused_new_entries = 0;1533for (uint32_t i = 0; i < new_endpoint_was_used.size(); i++)1534if (!new_endpoint_was_used[i])1535total_unused_new_entries++;1536debug_printf("basisu_backend::encode_endpoint_palette: total_unused_new_entries: %u out of %u\n", total_unused_new_entries, new_endpoint_was_used.size());15371538bool is_grayscale = true;1539for (uint32_t old_endpoint_index = 0; old_endpoint_index < (uint32_t)m_endpoint_palette.size(); old_endpoint_index++)1540{1541int r5 = m_endpoint_palette[old_endpoint_index].m_color5[0];1542int g5 = m_endpoint_palette[old_endpoint_index].m_color5[1];1543int b5 = m_endpoint_palette[old_endpoint_index].m_color5[2];1544if ((r5 != g5) || (r5 != b5))1545{1546is_grayscale = false;1547break;1548}1549}15501551histogram color5_delta_hist0(32); // prev 0-9, delta is -9 to 311552histogram color5_delta_hist1(32); // prev 10-21, delta is -21 to 211553histogram color5_delta_hist2(32); // prev 22-31, delta is -31 to 91554histogram inten_delta_hist(8);15551556color_rgba prev_color5(16, 16, 16, 0);1557uint32_t prev_inten = 0;15581559for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++)1560{1561const uint32_t old_endpoint_index = endpoint_remap_table_new_to_old[new_endpoint_index];15621563int delta_inten = m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten;1564inten_delta_hist.inc(delta_inten & 7);1565prev_inten = m_endpoint_palette[old_endpoint_index].m_inten5;15661567for (uint32_t i = 0; i < (is_grayscale ? 1U : 3U); i++)1568{1569const int delta = (m_endpoint_palette[old_endpoint_index].m_color5[i] - prev_color5[i]) & 31;15701571if (prev_color5[i] <= basist::COLOR5_PAL0_PREV_HI)1572color5_delta_hist0.inc(delta);1573else if (prev_color5[i] <= basist::COLOR5_PAL1_PREV_HI)1574color5_delta_hist1.inc(delta);1575else1576color5_delta_hist2.inc(delta);15771578prev_color5[i] = m_endpoint_palette[old_endpoint_index].m_color5[i];1579}1580}15811582if (!color5_delta_hist0.get_total()) color5_delta_hist0.inc(0);1583if (!color5_delta_hist1.get_total()) color5_delta_hist1.inc(0);1584if (!color5_delta_hist2.get_total()) color5_delta_hist2.inc(0);15851586huffman_encoding_table color5_delta_model0, color5_delta_model1, color5_delta_model2, inten_delta_model;1587if (!color5_delta_model0.init(color5_delta_hist0, 16))1588{1589error_printf("color5_delta_model.init() failed!");1590return false;1591}15921593if (!color5_delta_model1.init(color5_delta_hist1, 16))1594{1595error_printf("color5_delta_model.init() failed!");1596return false;1597}15981599if (!color5_delta_model2.init(color5_delta_hist2, 16))1600{1601error_printf("color5_delta_model.init() failed!");1602return false;1603}16041605if (!inten_delta_model.init(inten_delta_hist, 16))1606{1607error_printf("inten3_model.init() failed!");1608return false;1609}16101611bitwise_coder coder;16121613coder.init(8192);16141615coder.emit_huffman_table(color5_delta_model0);1616coder.emit_huffman_table(color5_delta_model1);1617coder.emit_huffman_table(color5_delta_model2);1618coder.emit_huffman_table(inten_delta_model);16191620coder.put_bits(is_grayscale, 1);16211622prev_color5.set(16, 16, 16, 0);1623prev_inten = 0;16241625for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++)1626{1627const uint32_t old_endpoint_index = endpoint_remap_table_new_to_old[new_endpoint_index];16281629int delta_inten = (m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten) & 7;1630coder.put_code(delta_inten, inten_delta_model);1631prev_inten = m_endpoint_palette[old_endpoint_index].m_inten5;16321633for (uint32_t i = 0; i < (is_grayscale ? 1U : 3U); i++)1634{1635const int delta = (m_endpoint_palette[old_endpoint_index].m_color5[i] - prev_color5[i]) & 31;16361637if (prev_color5[i] <= basist::COLOR5_PAL0_PREV_HI)1638coder.put_code(delta, color5_delta_model0);1639else if (prev_color5[i] <= basist::COLOR5_PAL1_PREV_HI)1640coder.put_code(delta, color5_delta_model1);1641else1642coder.put_code(delta, color5_delta_model2);16431644prev_color5[i] = m_endpoint_palette[old_endpoint_index].m_color5[i];1645}16461647} // q16481649coder.flush();16501651m_output.m_endpoint_palette = coder.get_bytes();16521653debug_printf("Endpoint codebook size: %u bits %u bytes, Bits per entry: %3.1f, Avg bits/texel: %3.3f\n",16548 * (int)m_output.m_endpoint_palette.size(), (int)m_output.m_endpoint_palette.size(), m_output.m_endpoint_palette.size() * 8.0f / r.get_total_endpoint_clusters(), m_output.m_endpoint_palette.size() * 8.0f / get_total_input_texels());16551656return true;1657}16581659bool basisu_backend::encode_selector_palette()1660{1661const basisu_frontend& r = *m_pFront_end;16621663histogram delta_selector_pal_histogram(256);16641665for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)1666{1667if (!q)1668continue;16691670const etc1_selector_palette_entry& cur = m_selector_palette[m_selector_remap_table_new_to_old[q]];1671const etc1_selector_palette_entry predictor(m_selector_palette[m_selector_remap_table_new_to_old[q - 1]]);16721673for (uint32_t j = 0; j < 4; j++)1674delta_selector_pal_histogram.inc(cur.get_byte(j) ^ predictor.get_byte(j));1675}16761677if (!delta_selector_pal_histogram.get_total())1678delta_selector_pal_histogram.inc(0);16791680huffman_encoding_table delta_selector_pal_model;1681if (!delta_selector_pal_model.init(delta_selector_pal_histogram, 16))1682{1683error_printf("delta_selector_pal_model.init() failed!");1684return false;1685}16861687bitwise_coder coder;1688coder.init(1024 * 1024);16891690coder.put_bits(0, 1); // use global codebook1691coder.put_bits(0, 1); // uses hybrid codebooks16921693coder.put_bits(0, 1); // raw bytes16941695coder.emit_huffman_table(delta_selector_pal_model);16961697for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)1698{1699if (!q)1700{1701for (uint32_t j = 0; j < 4; j++)1702coder.put_bits(m_selector_palette[m_selector_remap_table_new_to_old[q]].get_byte(j), 8);1703continue;1704}17051706const etc1_selector_palette_entry& cur = m_selector_palette[m_selector_remap_table_new_to_old[q]];1707const etc1_selector_palette_entry predictor(m_selector_palette[m_selector_remap_table_new_to_old[q - 1]]);17081709for (uint32_t j = 0; j < 4; j++)1710coder.put_code(cur.get_byte(j) ^ predictor.get_byte(j), delta_selector_pal_model);1711}17121713coder.flush();17141715m_output.m_selector_palette = coder.get_bytes();17161717if (m_output.m_selector_palette.size() >= r.get_total_selector_clusters() * 4)1718{1719coder.init(1024 * 1024);17201721coder.put_bits(0, 1); // use global codebook1722coder.put_bits(0, 1); // uses hybrid codebooks17231724coder.put_bits(1, 1); // raw bytes17251726for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)1727{1728const uint32_t i = m_selector_remap_table_new_to_old[q];17291730for (uint32_t j = 0; j < 4; j++)1731coder.put_bits(m_selector_palette[i].get_byte(j), 8);1732}17331734coder.flush();17351736m_output.m_selector_palette = coder.get_bytes();1737}17381739debug_printf("Selector codebook bits: %u bytes: %u, Bits per entry: %3.1f, Avg bits/texel: %3.3f\n",1740(int)m_output.m_selector_palette.size() * 8, (int)m_output.m_selector_palette.size(),1741m_output.m_selector_palette.size() * 8.0f / r.get_total_selector_clusters(), m_output.m_selector_palette.size() * 8.0f / get_total_input_texels());17421743return true;1744}17451746uint32_t basisu_backend::encode()1747{1748//const bool is_video = m_pFront_end->get_params().m_tex_type == basist::cBASISTexTypeVideoFrames;1749m_output.m_slice_desc = m_slices;1750m_output.m_etc1s = m_params.m_etc1s;1751m_output.m_uses_global_codebooks = m_params.m_used_global_codebooks;1752m_output.m_srgb = m_pFront_end->get_params().m_perceptual;17531754create_endpoint_palette();1755create_selector_palette();17561757create_encoder_blocks();17581759if (!encode_image())1760return 0;17611762if (!encode_endpoint_palette())1763return 0;17641765if (!encode_selector_palette())1766return 0;17671768uint32_t total_compressed_bytes = (uint32_t)(m_output.m_slice_image_tables.size() + m_output.m_endpoint_palette.size() + m_output.m_selector_palette.size());1769for (uint32_t i = 0; i < m_output.m_slice_image_data.size(); i++)1770total_compressed_bytes += (uint32_t)m_output.m_slice_image_data[i].size();17711772debug_printf("Wrote %u bytes, %3.3f bits/texel\n", total_compressed_bytes, total_compressed_bytes * 8.0f / get_total_input_texels());17731774return total_compressed_bytes;1775}17761777} // namespace basisu177817791780