Path: blob/master/modules/betsy/image_compress_betsy.cpp
20943 views
/**************************************************************************/1/* image_compress_betsy.cpp */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930#include "image_compress_betsy.h"3132#include "core/config/project_settings.h"3334#include "betsy_bc1.h"3536#include "alpha_stitch.glsl.gen.h"37#include "bc1.glsl.gen.h"38#include "bc4.glsl.gen.h"39#include "bc6h.glsl.gen.h"40#include "rgb_to_rgba.glsl.gen.h"41#include "servers/display/display_server.h"4243static Mutex betsy_mutex;44static BetsyCompressor *betsy = nullptr;4546static const BetsyShaderType FORMAT_TO_TYPE[BETSY_FORMAT_MAX] = {47BETSY_SHADER_BC1_STANDARD,48BETSY_SHADER_BC1_DITHER,49BETSY_SHADER_BC1_STANDARD,50BETSY_SHADER_BC4_SIGNED,51BETSY_SHADER_BC4_UNSIGNED,52BETSY_SHADER_BC4_SIGNED,53BETSY_SHADER_BC4_UNSIGNED,54BETSY_SHADER_BC6_SIGNED,55BETSY_SHADER_BC6_UNSIGNED,56};5758static const RD::DataFormat BETSY_TO_RD_FORMAT[BETSY_FORMAT_MAX] = {59RD::DATA_FORMAT_R32G32_UINT,60RD::DATA_FORMAT_R32G32_UINT,61RD::DATA_FORMAT_R32G32_UINT,62RD::DATA_FORMAT_R32G32_UINT,63RD::DATA_FORMAT_R32G32_UINT,64RD::DATA_FORMAT_R32G32_UINT,65RD::DATA_FORMAT_R32G32_UINT,66RD::DATA_FORMAT_R32G32B32A32_UINT,67RD::DATA_FORMAT_R32G32B32A32_UINT,68};6970static const Image::Format BETSY_TO_IMAGE_FORMAT[BETSY_FORMAT_MAX] = {71Image::FORMAT_DXT1,72Image::FORMAT_DXT1,73Image::FORMAT_DXT5,74Image::FORMAT_RGTC_R,75Image::FORMAT_RGTC_R,76Image::FORMAT_RGTC_RG,77Image::FORMAT_RGTC_RG,78Image::FORMAT_BPTC_RGBF,79Image::FORMAT_BPTC_RGBFU,80};8182void BetsyCompressor::_init() {83if (!DisplayServer::can_create_rendering_device()) {84return;85}8687// Create local RD.88RenderingContextDriver *rcd = nullptr;89RenderingDevice *rd = RenderingServer::get_singleton()->create_local_rendering_device();9091if (rd == nullptr) {92#if defined(RD_ENABLED)93#if defined(METAL_ENABLED)94rcd = memnew(RenderingContextDriverMetal);95rd = memnew(RenderingDevice);96#endif97#if defined(VULKAN_ENABLED)98if (rcd == nullptr) {99rcd = memnew(RenderingContextDriverVulkan);100rd = memnew(RenderingDevice);101}102#endif103#endif104if (rcd != nullptr && rd != nullptr) {105Error err = rcd->initialize();106if (err == OK) {107err = rd->initialize(rcd);108}109110if (err != OK) {111memdelete(rd);112memdelete(rcd);113rd = nullptr;114rcd = nullptr;115}116}117}118119ERR_FAIL_NULL_MSG(rd, "Unable to create a local RenderingDevice.");120121compress_rd = rd;122compress_rcd = rcd;123124// Create the sampler state.125RD::SamplerState src_sampler_state;126{127src_sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;128src_sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;129src_sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST;130src_sampler_state.min_filter = RD::SAMPLER_FILTER_NEAREST;131src_sampler_state.mip_filter = RD::SAMPLER_FILTER_NEAREST;132}133134src_sampler = compress_rd->sampler_create(src_sampler_state);135136// Initialize RDShaderFiles.137{138Ref<RDShaderFile> bc1_shader;139bc1_shader.instantiate();140Error err = bc1_shader->parse_versions_from_text(bc1_shader_glsl);141142if (err != OK) {143bc1_shader->print_errors("Betsy BC1 compress shader");144}145146// Standard BC1 compression.147cached_shaders[BETSY_SHADER_BC1_STANDARD].compiled = compress_rd->shader_create_from_spirv(bc1_shader->get_spirv_stages("standard"));148ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_STANDARD].compiled.is_null());149150cached_shaders[BETSY_SHADER_BC1_STANDARD].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC1_STANDARD].compiled);151ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_STANDARD].pipeline.is_null());152153// Dither BC1 variant. Unused, so comment out for now.154//cached_shaders[BETSY_SHADER_BC1_DITHER].compiled = compress_rd->shader_create_from_spirv(bc1_shader->get_spirv_stages("dithered"));155//ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_DITHER].compiled.is_null());156157//cached_shaders[BETSY_SHADER_BC1_DITHER].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC1_DITHER].compiled);158//ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC1_DITHER].pipeline.is_null());159}160161{162Ref<RDShaderFile> bc4_shader;163bc4_shader.instantiate();164Error err = bc4_shader->parse_versions_from_text(bc4_shader_glsl);165166if (err != OK) {167bc4_shader->print_errors("Betsy BC4 compress shader");168}169170// Signed BC4 compression. Unused, so comment out for now.171//cached_shaders[BETSY_SHADER_BC4_SIGNED].compiled = compress_rd->shader_create_from_spirv(bc4_shader->get_spirv_stages("signed"));172//ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_SIGNED].compiled.is_null());173174//cached_shaders[BETSY_SHADER_BC4_SIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC4_SIGNED].compiled);175//ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_SIGNED].pipeline.is_null());176177// Unsigned BC4 compression.178cached_shaders[BETSY_SHADER_BC4_UNSIGNED].compiled = compress_rd->shader_create_from_spirv(bc4_shader->get_spirv_stages("unsigned"));179ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_UNSIGNED].compiled.is_null());180181cached_shaders[BETSY_SHADER_BC4_UNSIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC4_UNSIGNED].compiled);182ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC4_UNSIGNED].pipeline.is_null());183}184185{186Ref<RDShaderFile> bc6h_shader;187bc6h_shader.instantiate();188Error err = bc6h_shader->parse_versions_from_text(bc6h_shader_glsl);189190if (err != OK) {191bc6h_shader->print_errors("Betsy BC6 compress shader");192}193194// Signed BC6 compression.195cached_shaders[BETSY_SHADER_BC6_SIGNED].compiled = compress_rd->shader_create_from_spirv(bc6h_shader->get_spirv_stages("signed"));196ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_SIGNED].compiled.is_null());197198cached_shaders[BETSY_SHADER_BC6_SIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC6_SIGNED].compiled);199ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_SIGNED].pipeline.is_null());200201// Unsigned BC6 compression.202cached_shaders[BETSY_SHADER_BC6_UNSIGNED].compiled = compress_rd->shader_create_from_spirv(bc6h_shader->get_spirv_stages("unsigned"));203ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_UNSIGNED].compiled.is_null());204205cached_shaders[BETSY_SHADER_BC6_UNSIGNED].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_BC6_UNSIGNED].compiled);206ERR_FAIL_COND(cached_shaders[BETSY_SHADER_BC6_UNSIGNED].pipeline.is_null());207}208209{210Ref<RDShaderFile> alpha_stitch_shader;211alpha_stitch_shader.instantiate();212Error err = alpha_stitch_shader->parse_versions_from_text(alpha_stitch_shader_glsl);213214if (err != OK) {215alpha_stitch_shader->print_errors("Betsy alpha stitch shader");216}217cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled = compress_rd->shader_create_from_spirv(alpha_stitch_shader->get_spirv_stages());218ERR_FAIL_COND(cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled.is_null());219220cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled);221ERR_FAIL_COND(cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline.is_null());222}223224{225Ref<RDShaderFile> rgb_to_rgba_shader;226rgb_to_rgba_shader.instantiate();227Error err = rgb_to_rgba_shader->parse_versions_from_text(rgb_to_rgba_shader_glsl);228229if (err != OK) {230rgb_to_rgba_shader->print_errors("Betsy RGB to RGBA shader");231}232233// Float32.234cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_float"));235ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled.is_null());236237cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled);238ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].pipeline.is_null());239240// Float16.241cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_half"));242ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled.is_null());243244cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled);245ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].pipeline.is_null());246247// Unorm8.248cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_unorm8"));249ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled.is_null());250251cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled);252ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].pipeline.is_null());253254// Unorm16.255cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_unorm16"));256ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled.is_null());257258cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled);259ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].pipeline.is_null());260}261}262263void BetsyCompressor::init() {264WorkerThreadPool::TaskID tid = WorkerThreadPool::get_singleton()->add_task(callable_mp(this, &BetsyCompressor::_thread_loop), true, "Betsy pump task", true);265command_queue.set_pump_task_id(tid);266command_queue.push(this, &BetsyCompressor::_assign_mt_ids, tid);267command_queue.push_and_sync(this, &BetsyCompressor::_init);268DEV_ASSERT(task_id == tid);269}270271void BetsyCompressor::_assign_mt_ids(WorkerThreadPool::TaskID p_pump_task_id) {272task_id = p_pump_task_id;273}274275// Yield thread to WTP so other tasks can be done on it.276// Automatically regains control as soon a task is pushed to the command queue.277void BetsyCompressor::_thread_loop() {278while (!exit) {279WorkerThreadPool::get_singleton()->yield();280command_queue.flush_all();281}282}283284void BetsyCompressor::_thread_exit() {285exit = true;286287if (compress_rd != nullptr) {288if (dxt1_encoding_table_buffer.is_valid()) {289compress_rd->free_rid(dxt1_encoding_table_buffer);290}291292compress_rd->free_rid(src_sampler);293294// Clear the shader cache, pipelines will be unreferenced automatically.295for (int i = 0; i < BETSY_SHADER_MAX; i++) {296if (cached_shaders[i].compiled.is_valid()) {297compress_rd->free_rid(cached_shaders[i].compiled);298}299}300301// Free the RD (and RCD if necessary).302memdelete(compress_rd);303compress_rd = nullptr;304if (compress_rcd != nullptr) {305memdelete(compress_rcd);306compress_rcd = nullptr;307}308}309}310311void BetsyCompressor::finish() {312command_queue.push(this, &BetsyCompressor::_thread_exit);313if (task_id != WorkerThreadPool::INVALID_TASK_ID) {314WorkerThreadPool::get_singleton()->wait_for_task_completion(task_id);315task_id = WorkerThreadPool::INVALID_TASK_ID;316}317}318319// Helper functions.320321static int get_next_multiple(int n, int m) {322return n + (m - (n % m));323}324325static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format, bool &r_is_rgb) {326r_is_rgb = false;327328switch (r_img->get_format()) {329case Image::FORMAT_L8:330r_img->convert(Image::FORMAT_RGBA8);331r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;332break;333334case Image::FORMAT_LA8:335r_img->convert(Image::FORMAT_RGBA8);336r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;337break;338339case Image::FORMAT_R8:340r_format = RD::DATA_FORMAT_R8_UNORM;341break;342343case Image::FORMAT_RG8:344r_format = RD::DATA_FORMAT_R8G8_UNORM;345break;346347case Image::FORMAT_RGB8:348r_is_rgb = true;349r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;350break;351352case Image::FORMAT_RGBA8:353r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;354break;355356case Image::FORMAT_RH:357r_format = RD::DATA_FORMAT_R16_SFLOAT;358break;359360case Image::FORMAT_RGH:361r_format = RD::DATA_FORMAT_R16G16_SFLOAT;362break;363364case Image::FORMAT_RGBH:365r_is_rgb = true;366r_format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;367break;368369case Image::FORMAT_RGBAH:370r_format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;371break;372373case Image::FORMAT_RF:374r_format = RD::DATA_FORMAT_R32_SFLOAT;375break;376377case Image::FORMAT_RGF:378r_format = RD::DATA_FORMAT_R32G32_SFLOAT;379break;380381case Image::FORMAT_RGBF:382r_is_rgb = true;383r_format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;384break;385386case Image::FORMAT_RGBAF:387r_format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;388break;389390case Image::FORMAT_RGBE9995:391r_format = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32;392break;393394case Image::FORMAT_R16:395r_format = RD::DATA_FORMAT_R16_UNORM;396break;397398case Image::FORMAT_RG16:399r_format = RD::DATA_FORMAT_R16G16_UNORM;400break;401402case Image::FORMAT_RGB16:403r_is_rgb = true;404r_format = RD::DATA_FORMAT_R16G16B16A16_UNORM;405break;406407case Image::FORMAT_RGBA16:408r_format = RD::DATA_FORMAT_R16G16B16A16_UNORM;409break;410411default: {412return ERR_UNAVAILABLE;413}414}415416return OK;417}418419Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) {420uint64_t start_time = OS::get_singleton()->get_ticks_msec();421422// Return an error so that the compression can fall back to cpu compression423if (compress_rd == nullptr) {424return ERR_CANT_CREATE;425}426427if (r_img->is_compressed()) {428return ERR_INVALID_DATA;429}430431int img_width = r_img->get_width();432int img_height = r_img->get_height();433if (img_width % 4 != 0 || img_height % 4 != 0) {434img_width = img_width <= 2 ? img_width : (img_width + 3) & ~3;435img_height = img_height <= 2 ? img_height : (img_height + 3) & ~3;436}437438Error err = OK;439440// Destination format.441Image::Format dest_format = BETSY_TO_IMAGE_FORMAT[p_format];442RD::DataFormat dst_rd_format = BETSY_TO_RD_FORMAT[p_format];443444BetsyShaderType shader_type = FORMAT_TO_TYPE[p_format];445BetsyShader shader = cached_shaders[shader_type];446BetsyShader secondary_shader; // The secondary shader is used for alpha blocks. For BC it's BC4U and for ETC it's ETC2_RU (8-bit variant).447BetsyShader stitch_shader;448bool needs_alpha_block = false;449450switch (p_format) {451case BETSY_FORMAT_BC3:452case BETSY_FORMAT_BC5_UNSIGNED:453needs_alpha_block = true;454secondary_shader = cached_shaders[BETSY_SHADER_BC4_UNSIGNED];455stitch_shader = cached_shaders[BETSY_SHADER_ALPHA_STITCH];456break;457default:458break;459}460461// src_texture format information.462RD::TextureFormat src_texture_format;463{464src_texture_format.array_layers = 1;465src_texture_format.depth = 1;466src_texture_format.mipmaps = 1;467src_texture_format.texture_type = RD::TEXTURE_TYPE_2D;468src_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;469}470471bool needs_rgb_to_rgba = false;472err = get_src_texture_format(r_img, src_texture_format.format, needs_rgb_to_rgba);473474if (err != OK) {475return err;476}477478// For the destination format just copy the source format and change the usage bits.479RD::TextureFormat dst_texture_format = src_texture_format;480dst_texture_format.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;481dst_texture_format.format = dst_rd_format;482483RD::TextureFormat dst_texture_format_alpha;484RD::TextureFormat dst_texture_format_combined;485486if (needs_alpha_block) {487dst_texture_format_combined = dst_texture_format;488dst_texture_format_combined.format = RD::DATA_FORMAT_R32G32B32A32_UINT;489490dst_texture_format.usage_bits |= RD::TEXTURE_USAGE_SAMPLING_BIT;491492dst_texture_format_alpha = dst_texture_format;493dst_texture_format_alpha.format = RD::DATA_FORMAT_R32G32_UINT;494}495496// Encoding table setup.497if ((dest_format == Image::FORMAT_DXT1 || dest_format == Image::FORMAT_DXT5) && dxt1_encoding_table_buffer.is_null()) {498LocalVector<float> dxt1_encoding_table;499dxt1_encoding_table.resize(256 * 4);500501for (int i = 0; i < 256; i++) {502dxt1_encoding_table[i * 2 + 0] = static_cast<float>(stb__OMatch5[i][0]);503dxt1_encoding_table[i * 2 + 1] = static_cast<float>(stb__OMatch5[i][1]);504dxt1_encoding_table[512 + (i * 2 + 0)] = static_cast<float>(stb__OMatch6[i][0]);505dxt1_encoding_table[512 + (i * 2 + 1)] = static_cast<float>(stb__OMatch6[i][1]);506}507508dxt1_encoding_table_buffer = compress_rd->storage_buffer_create(dxt1_encoding_table.size() * sizeof(float), Span<float>(dxt1_encoding_table).reinterpret<uint8_t>());509}510511const int mip_count = r_img->get_mipmap_count() + 1;512513// Container for the compressed data.514Vector<uint8_t> dst_data;515dst_data.resize(Image::get_image_data_size(img_width, img_height, dest_format, r_img->has_mipmaps()));516uint8_t *dst_data_ptr = dst_data.ptrw();517518Vector<Vector<uint8_t>> src_images;519src_images.push_back(Vector<uint8_t>());520Vector<uint8_t> *src_image_ptr = src_images.ptrw();521522// Compress each mipmap.523for (int i = 0; i < mip_count; i++) {524int width, height;525Image::get_image_mipmap_offset_and_dimensions(img_width, img_height, dest_format, i, width, height);526527int64_t src_mip_ofs, src_mip_size;528int src_mip_w, src_mip_h;529r_img->get_mipmap_offset_size_and_dimensions(i, src_mip_ofs, src_mip_size, src_mip_w, src_mip_h);530531// Set the source texture width and size.532src_texture_format.height = height;533src_texture_format.width = width;534535// Set the destination texture width and size.536dst_texture_format.height = (height + 3) >> 2;537dst_texture_format.width = (width + 3) >> 2;538539// Pad textures to nearest block by smearing.540if (width != src_mip_w || height != src_mip_h) {541const uint8_t *src_mip_read = r_img->ptr() + src_mip_ofs;542543// Reserve the buffer for padded image data.544int px_size = Image::get_format_pixel_size(r_img->get_format());545src_image_ptr[0].resize(width * height * px_size);546uint8_t *ptrw = src_image_ptr[0].ptrw();547548int x = 0, y = 0;549for (y = 0; y < src_mip_h; y++) {550for (x = 0; x < src_mip_w; x++) {551memcpy(ptrw + (width * y + x) * px_size, src_mip_read + (src_mip_w * y + x) * px_size, px_size);552}553554// First, smear in x.555for (; x < width; x++) {556memcpy(ptrw + (width * y + x) * px_size, ptrw + (width * y + x - 1) * px_size, px_size);557}558}559560// Then, smear in y.561for (; y < height; y++) {562for (x = 0; x < width; x++) {563memcpy(ptrw + (width * y + x) * px_size, ptrw + (width * y + x - width) * px_size, px_size);564}565}566} else {567// Create a buffer filled with the source mip layer data.568src_image_ptr[0].resize(src_mip_size);569memcpy(src_image_ptr[0].ptrw(), r_img->ptr() + src_mip_ofs, src_mip_size);570}571572// Create the textures on the GPU.573RID src_texture;574RID dst_texture_primary = compress_rd->texture_create(dst_texture_format, RD::TextureView());575576if (needs_rgb_to_rgba) {577// RGB textures cannot be sampled directly on most hardware, so we do a little trick involving a compute shader578// which takes the input data as an SSBO and converts it directly into an RGBA image.579BetsyShaderType rgb_shader_type = BETSY_SHADER_MAX;580581switch (r_img->get_format()) {582case Image::FORMAT_RGB8:583rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_UNORM8;584break;585case Image::FORMAT_RGBH:586rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_HALF;587break;588case Image::FORMAT_RGBF:589rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_FLOAT;590break;591case Image::FORMAT_RGB16:592rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_UNORM16;593break;594default:595break;596}597598// The source 'RGB' buffer.599RID source_buffer = compress_rd->storage_buffer_create(src_image_ptr[0].size(), src_image_ptr[0].span());600601RD::TextureFormat rgba_texture_format = src_texture_format;602rgba_texture_format.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;603src_texture = compress_rd->texture_create(rgba_texture_format, RD::TextureView());604605Vector<RD::Uniform> uniforms;606{607{608RD::Uniform u;609u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;610u.binding = 0;611u.append_id(source_buffer);612uniforms.push_back(u);613}614{615RD::Uniform u;616u.uniform_type = RD::UNIFORM_TYPE_IMAGE;617u.binding = 1;618u.append_id(src_texture);619uniforms.push_back(u);620}621}622623BetsyShader &rgb_shader = cached_shaders[rgb_shader_type];624625RID uniform_set = compress_rd->uniform_set_create(uniforms, rgb_shader.compiled, 0);626RD::ComputeListID compute_list = compress_rd->compute_list_begin();627628compress_rd->compute_list_bind_compute_pipeline(compute_list, rgb_shader.pipeline);629compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);630631// Prepare the push constant with the mipmap's resolution.632RGBToRGBAPushConstant push_constant;633push_constant.width = width;634push_constant.height = height;635636compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RGBToRGBAPushConstant));637compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 8) / 8, get_next_multiple(height, 8) / 8, 1);638639compress_rd->compute_list_end();640641compress_rd->free_rid(source_buffer);642} else {643src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images);644}645646{647Vector<RD::Uniform> uniforms;648{649{650RD::Uniform u;651u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;652u.binding = 0;653u.append_id(src_sampler);654u.append_id(src_texture);655uniforms.push_back(u);656}657{658RD::Uniform u;659u.uniform_type = RD::UNIFORM_TYPE_IMAGE;660u.binding = 1;661u.append_id(dst_texture_primary);662uniforms.push_back(u);663}664665if (dest_format == Image::FORMAT_DXT1 || dest_format == Image::FORMAT_DXT5) {666RD::Uniform u;667u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;668u.binding = 2;669u.append_id(dxt1_encoding_table_buffer);670uniforms.push_back(u);671}672}673674RID uniform_set = compress_rd->uniform_set_create(uniforms, shader.compiled, 0);675RD::ComputeListID compute_list = compress_rd->compute_list_begin();676677compress_rd->compute_list_bind_compute_pipeline(compute_list, shader.pipeline);678compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);679680switch (shader_type) {681case BETSY_SHADER_BC6_SIGNED:682case BETSY_SHADER_BC6_UNSIGNED: {683BC6PushConstant push_constant;684push_constant.sizeX = 1.0f / width;685push_constant.sizeY = 1.0f / height;686687compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant));688compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);689} break;690691case BETSY_SHADER_BC1_STANDARD: {692BC1PushConstant push_constant;693push_constant.num_refines = 2;694695compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC1PushConstant));696compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);697} break;698699case BETSY_SHADER_BC4_UNSIGNED: {700BC4PushConstant push_constant;701push_constant.channel_idx = 0;702703compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC4PushConstant));704compress_rd->compute_list_dispatch(compute_list, 1, get_next_multiple(width, 16) / 16, get_next_multiple(height, 16) / 16);705} break;706707default: {708} break;709}710711compress_rd->compute_list_end();712713if (!needs_alpha_block) {714compress_rd->submit();715compress_rd->sync();716}717}718719RID dst_texture_rid = dst_texture_primary;720721if (needs_alpha_block) {722// Set the destination texture width and size.723dst_texture_format_alpha.height = (height + 3) >> 2;724dst_texture_format_alpha.width = (width + 3) >> 2;725726RID dst_texture_alpha = compress_rd->texture_create(dst_texture_format_alpha, RD::TextureView());727728{729Vector<RD::Uniform> uniforms;730{731{732RD::Uniform u;733u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;734u.binding = 0;735u.append_id(src_sampler);736u.append_id(src_texture);737uniforms.push_back(u);738}739{740RD::Uniform u;741u.uniform_type = RD::UNIFORM_TYPE_IMAGE;742u.binding = 1;743u.append_id(dst_texture_alpha);744uniforms.push_back(u);745}746}747748RID uniform_set = compress_rd->uniform_set_create(uniforms, secondary_shader.compiled, 0);749RD::ComputeListID compute_list = compress_rd->compute_list_begin();750751compress_rd->compute_list_bind_compute_pipeline(compute_list, secondary_shader.pipeline);752compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);753754BC4PushConstant push_constant;755push_constant.channel_idx = dest_format == Image::FORMAT_DXT5 ? 3 : 1;756757compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC4PushConstant));758compress_rd->compute_list_dispatch(compute_list, 1, get_next_multiple(width, 16) / 16, get_next_multiple(height, 16) / 16);759760compress_rd->compute_list_end();761}762763// Stitching764765// Set the destination texture width and size.766dst_texture_format_combined.height = (height + 3) >> 2;767dst_texture_format_combined.width = (width + 3) >> 2;768769RID dst_texture_combined = compress_rd->texture_create(dst_texture_format_combined, RD::TextureView());770771{772Vector<RD::Uniform> uniforms;773{774{775RD::Uniform u;776u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;777u.binding = 0;778u.append_id(src_sampler);779u.append_id(dest_format == Image::FORMAT_DXT5 ? dst_texture_alpha : dst_texture_primary);780uniforms.push_back(u);781}782{783RD::Uniform u;784u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;785u.binding = 1;786u.append_id(src_sampler);787u.append_id(dest_format == Image::FORMAT_DXT5 ? dst_texture_primary : dst_texture_alpha);788uniforms.push_back(u);789}790{791RD::Uniform u;792u.uniform_type = RD::UNIFORM_TYPE_IMAGE;793u.binding = 2;794u.append_id(dst_texture_combined);795uniforms.push_back(u);796}797}798799RID uniform_set = compress_rd->uniform_set_create(uniforms, stitch_shader.compiled, 0);800RD::ComputeListID compute_list = compress_rd->compute_list_begin();801802compress_rd->compute_list_bind_compute_pipeline(compute_list, stitch_shader.pipeline);803compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);804compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);805806compress_rd->compute_list_end();807808compress_rd->submit();809compress_rd->sync();810}811812dst_texture_rid = dst_texture_combined;813814compress_rd->free_rid(dst_texture_primary);815compress_rd->free_rid(dst_texture_alpha);816}817818// Copy data from the GPU to the buffer.819const Vector<uint8_t> texture_data = compress_rd->texture_get_data(dst_texture_rid, 0);820int64_t dst_ofs = Image::get_image_mipmap_offset(img_width, img_height, dest_format, i);821822memcpy(dst_data_ptr + dst_ofs, texture_data.ptr(), texture_data.size());823824// Free the source and dest texture.825compress_rd->free_rid(src_texture);826compress_rd->free_rid(dst_texture_rid);827}828829src_images.clear();830831// Set the compressed data to the image.832r_img->set_data(img_width, img_height, r_img->has_mipmaps(), dest_format, dst_data);833834print_verbose(835vformat("Betsy: Encoding a %dx%d image with %d mipmaps as %s took %d ms.",836img_width,837img_height,838r_img->get_mipmap_count(),839Image::get_format_name(dest_format),840OS::get_singleton()->get_ticks_msec() - start_time));841842return OK;843}844845void ensure_betsy_exists() {846betsy_mutex.lock();847if (betsy == nullptr) {848betsy = memnew(BetsyCompressor);849betsy->init();850}851betsy_mutex.unlock();852}853854Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels) {855ensure_betsy_exists();856Image::Format format = r_img->get_format();857Error result = ERR_UNAVAILABLE;858859if (format >= Image::FORMAT_RF && format <= Image::FORMAT_RGBE9995) {860if (r_img->detect_signed()) {861result = betsy->compress(BETSY_FORMAT_BC6_SIGNED, r_img);862} else {863result = betsy->compress(BETSY_FORMAT_BC6_UNSIGNED, r_img);864}865}866867if (!GLOBAL_GET("rendering/textures/vram_compression/cache_gpu_compressor")) {868free_device();869}870871return result;872}873874Error _betsy_compress_s3tc(Image *r_img, Image::UsedChannels p_channels) {875ensure_betsy_exists();876Error result = ERR_UNAVAILABLE;877878switch (p_channels) {879case Image::USED_CHANNELS_RGB:880case Image::USED_CHANNELS_L:881result = betsy->compress(BETSY_FORMAT_BC1, r_img);882break;883884case Image::USED_CHANNELS_RGBA:885case Image::USED_CHANNELS_LA:886result = betsy->compress(BETSY_FORMAT_BC3, r_img);887break;888889case Image::USED_CHANNELS_R:890result = betsy->compress(BETSY_FORMAT_BC4_UNSIGNED, r_img);891break;892893case Image::USED_CHANNELS_RG:894result = betsy->compress(BETSY_FORMAT_BC5_UNSIGNED, r_img);895break;896897default:898break;899}900901if (!GLOBAL_GET("rendering/textures/vram_compression/cache_gpu_compressor")) {902free_device();903}904905return result;906}907908void free_device() {909if (betsy != nullptr) {910betsy->finish();911memdelete(betsy);912}913}914915916