CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Common/GPU/thin3d.cpp
Views: 1401
#include <cassert>1#include <cstring>2#include <cstdint>34#include "Common/Data/Convert/ColorConv.h"5#include "Common/GPU/thin3d.h"6#include "Common/Log.h"7#include "Common/System/Display.h"89namespace Draw {1011size_t DataFormatSizeInBytes(DataFormat fmt) {12switch (fmt) {13case DataFormat::R8_UNORM: return 1;14case DataFormat::R8G8_UNORM: return 2;15case DataFormat::R8G8B8_UNORM: return 3;1617case DataFormat::R4G4_UNORM_PACK8: return 1;18case DataFormat::R4G4B4A4_UNORM_PACK16: return 2;19case DataFormat::B4G4R4A4_UNORM_PACK16: return 2;20case DataFormat::A4R4G4B4_UNORM_PACK16: return 2;21case DataFormat::R5G5B5A1_UNORM_PACK16: return 2;22case DataFormat::B5G5R5A1_UNORM_PACK16: return 2;23case DataFormat::R5G6B5_UNORM_PACK16: return 2;24case DataFormat::B5G6R5_UNORM_PACK16: return 2;25case DataFormat::A1R5G5B5_UNORM_PACK16: return 2;2627case DataFormat::R8G8B8A8_UNORM:28case DataFormat::R8G8B8A8_UNORM_SRGB: return 4;29case DataFormat::B8G8R8A8_UNORM:30case DataFormat::B8G8R8A8_UNORM_SRGB: return 4;3132case DataFormat::R8G8B8A8_SNORM: return 4;33case DataFormat::R8G8B8A8_UINT: return 4;34case DataFormat::R8G8B8A8_SINT: return 4;3536case DataFormat::R16_UNORM: return 2;3738case DataFormat::R16_FLOAT: return 2;39case DataFormat::R16G16_FLOAT: return 4;40case DataFormat::R16G16B16A16_FLOAT: return 8;41case DataFormat::R32_FLOAT: return 4;42case DataFormat::R32G32_FLOAT: return 8;43case DataFormat::R32G32B32_FLOAT: return 12;44case DataFormat::R32G32B32A32_FLOAT: return 16;4546case DataFormat::S8: return 1;47case DataFormat::D16: return 2;48case DataFormat::D16_S8: return 3;49case DataFormat::D24_S8: return 4;50case DataFormat::D32F: return 4;51// Or maybe 8...52case DataFormat::D32F_S8: return 5;5354default:55return 0;56}57}5859const char *DataFormatToString(DataFormat fmt) {60switch (fmt) {61case DataFormat::R8_UNORM: return "R8_UNORM";62case DataFormat::R8G8_UNORM: return "R8G8_UNORM";63case DataFormat::R8G8B8A8_UNORM: return "R8G8B8A8_UNORM";64case DataFormat::B8G8R8A8_UNORM: return "B8G8R8A8_UNORM";65case DataFormat::R16_UNORM: return "R16_UNORM";66case DataFormat::R16_FLOAT: return "R16_FLOAT";67case DataFormat::R32_FLOAT: return "R32_FLOAT";6869case DataFormat::S8: return "S8";70case DataFormat::D16: return "D16";71case DataFormat::D16_S8: return "D16_S8";72case DataFormat::D24_S8: return "D24_S8";73case DataFormat::D32F: return "D32F";74case DataFormat::D32F_S8: return "D32F_S8";7576default:77return "(N/A)";78}79}8081bool DataFormatIsDepthStencil(DataFormat fmt) {82switch (fmt) {83case DataFormat::D16:84case DataFormat::D16_S8:85case DataFormat::D24_S8:86case DataFormat::S8:87case DataFormat::D32F:88case DataFormat::D32F_S8:89return true;90default:91return false;92}93}9495// We don't bother listing the formats that are irrelevant for PPSSPP, like BC6 (HDR format)96// or weird-shaped ASTC formats. We only support 4x4 block size formats for now.97// If you pass in a blockSize parameter, it receives byte count that a 4x4 block takes in this format.98bool DataFormatIsBlockCompressed(DataFormat fmt, int *blockSize) {99switch (fmt) {100case DataFormat::BC1_RGBA_UNORM_BLOCK:101case DataFormat::BC4_UNORM_BLOCK:102case DataFormat::ETC2_R8G8B8_UNORM_BLOCK:103if (blockSize) *blockSize = 8; // 64 bits104return true;105case DataFormat::BC2_UNORM_BLOCK:106case DataFormat::BC3_UNORM_BLOCK:107case DataFormat::BC5_UNORM_BLOCK:108case DataFormat::BC7_UNORM_BLOCK:109case DataFormat::ETC2_R8G8B8A1_UNORM_BLOCK:110case DataFormat::ETC2_R8G8B8A8_UNORM_BLOCK:111case DataFormat::ASTC_4x4_UNORM_BLOCK:112if (blockSize) *blockSize = 16; // 128 bits113return true;114default:115if (blockSize) *blockSize = 0;116return false;117}118}119120RefCountedObject::~RefCountedObject() {121const int rc = refcount_.load();122_dbg_assert_msg_(rc == 0xDEDEDE, "Unexpected refcount %d in object of type '%s'", rc, name_);123}124125bool RefCountedObject::Release() {126if (refcount_ > 0 && refcount_ < 10000) {127if (--refcount_ == 0) {128// Make it very obvious if we try to free this again.129refcount_ = 0xDEDEDE;130delete this;131return true;132}133} else {134// No point in printing the name here if the object has already been free-d, it'll be corrupt and dangerous to print.135_dbg_assert_msg_(false, "Refcount (%d) invalid for object %p - corrupt?", refcount_.load(), this);136}137return false;138}139140bool RefCountedObject::ReleaseAssertLast() {141bool released = Release();142_dbg_assert_msg_(released, "RefCountedObject: Expected to be the last reference, but isn't! (%s)", name_);143return released;144}145146// ================================== PIXEL/FRAGMENT SHADERS147148// The Vulkan ones can be re-used with modern GL later if desired, as they're just GLSL.149150static const std::vector<ShaderSource> fsTexCol = {151{ShaderLanguage::GLSL_1xx,152"#ifdef GL_ES\n"153"precision lowp float;\n"154"#endif\n"155"#if __VERSION__ >= 130\n"156"#define varying in\n"157"#define texture2D texture\n"158"#define gl_FragColor fragColor0\n"159"out vec4 fragColor0;\n"160"#endif\n"161"varying vec4 oColor0;\n"162"varying vec2 oTexCoord0;\n"163"uniform sampler2D Sampler0;\n"164"void main() { gl_FragColor = texture2D(Sampler0, oTexCoord0) * oColor0; }\n"165},166{ShaderLanguage::HLSL_D3D9,167"struct PS_INPUT { float4 color : COLOR0; float2 uv : TEXCOORD0; };\n"168"sampler2D Sampler0 : register(s0);\n"169"float4 main(PS_INPUT input) : COLOR0 {\n"170" return input.color * tex2D(Sampler0, input.uv);\n"171"}\n"172},173{ShaderLanguage::HLSL_D3D11,174"struct PS_INPUT { float4 color : COLOR0; float2 uv : TEXCOORD0; };\n"175"SamplerState samp : register(s0);\n"176"Texture2D<float4> tex : register(t0);\n"177"float4 main(PS_INPUT input) : SV_Target {\n"178" float4 col = input.color * tex.Sample(samp, input.uv);\n"179" return col;\n"180"}\n"181},182{ShaderLanguage::GLSL_VULKAN,183"#version 140\n"184"#extension GL_ARB_separate_shader_objects : enable\n"185"#extension GL_ARB_shading_language_420pack : enable\n"186"layout(location = 0) in vec4 oColor0;\n"187"layout(location = 1) in vec2 oTexCoord0;\n"188"layout(location = 0) out vec4 fragColor0;\n"189"layout(set = 0, binding = 1) uniform sampler2D Sampler0;\n"190"void main() { fragColor0 = texture(Sampler0, oTexCoord0) * oColor0; }\n"191}192};193194static const std::vector<ShaderSource> fsTexColRBSwizzle = {195{GLSL_1xx,196"#ifdef GL_ES\n"197"precision lowp float;\n"198"#endif\n"199"#if __VERSION__ >= 130\n"200"#define varying in\n"201"#define texture2D texture\n"202"#define gl_FragColor fragColor0\n"203"out vec4 fragColor0;\n"204"#endif\n"205"varying vec4 oColor0;\n"206"varying vec2 oTexCoord0;\n"207"uniform sampler2D Sampler0;\n"208"void main() { gl_FragColor = texture2D(Sampler0, oTexCoord0).zyxw * oColor0; }\n"209},210{ShaderLanguage::HLSL_D3D9,211"struct PS_INPUT { float4 color : COLOR0; float2 uv : TEXCOORD0; };\n"212"sampler2D Sampler0 : register(s0);\n"213"float4 main(PS_INPUT input) : COLOR0 {\n"214" return input.color * tex2D(Sampler0, input.uv).zyxw;\n"215"}\n"216},217{ShaderLanguage::HLSL_D3D11,218"struct PS_INPUT { float4 color : COLOR0; float2 uv : TEXCOORD0; };\n"219"SamplerState samp : register(s0);\n"220"Texture2D<float4> tex : register(t0);\n"221"float4 main(PS_INPUT input) : SV_Target {\n"222" float4 col = input.color * tex.Sample(samp, input.uv).bgra;\n"223" return col;\n"224"}\n"225},226{ShaderLanguage::GLSL_VULKAN,227"#version 140\n"228"#extension GL_ARB_separate_shader_objects : enable\n"229"#extension GL_ARB_shading_language_420pack : enable\n"230"layout(location = 0) in vec4 oColor0;\n"231"layout(location = 1) in vec2 oTexCoord0;\n"232"layout(location = 0) out vec4 fragColor0\n;"233"layout(set = 0, binding = 1) uniform sampler2D Sampler0;\n"234"void main() { fragColor0 = texture(Sampler0, oTexCoord0).bgra * oColor0; }\n"235}236};237238static const std::vector<ShaderSource> fsCol = {239{ GLSL_1xx,240"#ifdef GL_ES\n"241"precision lowp float;\n"242"#endif\n"243"#if __VERSION__ >= 130\n"244"#define varying in\n"245"#define gl_FragColor fragColor0\n"246"out vec4 fragColor0;\n"247"#endif\n"248"varying vec4 oColor0;\n"249"void main() { gl_FragColor = oColor0; }\n"250},251{ ShaderLanguage::HLSL_D3D9,252"struct PS_INPUT { float4 color : COLOR0; };\n"253"float4 main(PS_INPUT input) : COLOR0 {\n"254" return input.color;\n"255"}\n"256},257{ ShaderLanguage::HLSL_D3D11,258"struct PS_INPUT { float4 color : COLOR0; };\n"259"float4 main(PS_INPUT input) : SV_Target {\n"260" return input.color;\n"261"}\n"262},263{ ShaderLanguage::GLSL_VULKAN,264"#version 140\n"265"#extension GL_ARB_separate_shader_objects : enable\n"266"#extension GL_ARB_shading_language_420pack : enable\n"267"layout(location = 0) in vec4 oColor0;\n"268"layout(location = 0) out vec4 fragColor0;\n"269"void main() { fragColor0 = oColor0; }\n"270}271};272273// ================================== VERTEX SHADERS274275static const std::vector<ShaderSource> vsCol = {276{ GLSL_1xx,277"#if __VERSION__ >= 130\n"278"#define attribute in\n"279"#define varying out\n"280"#endif\n"281"attribute vec3 Position;\n"282"attribute vec4 Color0;\n"283"varying vec4 oColor0;\n"284285"uniform mat4 WorldViewProj;\n"286"uniform vec2 TintSaturation;\n"287"void main() {\n"288" gl_Position = WorldViewProj * vec4(Position, 1.0);\n"289" oColor0 = Color0;\n"290"}"291},292{ ShaderLanguage::HLSL_D3D9,293"struct VS_INPUT { float3 Position : POSITION; float4 Color0 : COLOR0; };\n"294"struct VS_OUTPUT { float4 Position : POSITION; float4 Color0 : COLOR0; };\n"295"float4x4 WorldViewProj : register(c0);\n"296"float2 TintSaturation : register(c4);\n"297"VS_OUTPUT main(VS_INPUT input) {\n"298" VS_OUTPUT output;\n"299" output.Position = mul(float4(input.Position, 1.0), WorldViewProj);\n"300" output.Color0 = input.Color0;\n"301" return output;\n"302"}\n"303},304{ ShaderLanguage::HLSL_D3D11,305"struct VS_INPUT { float3 Position : POSITION; float4 Color0 : COLOR0; };\n"306"struct VS_OUTPUT { float4 Color0 : COLOR0; float4 Position : SV_Position; };\n"307"cbuffer ConstantBuffer : register(b0) {\n"308" matrix WorldViewProj;\n"309" float2 TintSaturation;\n"310"};\n"311"VS_OUTPUT main(VS_INPUT input) {\n"312" VS_OUTPUT output;\n"313" output.Position = mul(WorldViewProj, float4(input.Position, 1.0));\n"314" output.Color0 = input.Color0;\n"315" return output;\n"316"}\n"317},318{ ShaderLanguage::GLSL_VULKAN,319R"(#version 450320#extension GL_ARB_separate_shader_objects : enable321#extension GL_ARB_shading_language_420pack : enable322layout (std140, set = 0, binding = 0) uniform bufferVals {323mat4 WorldViewProj;324vec2 TintSaturation;325} myBufferVals;326layout (location = 0) in vec4 pos;327layout (location = 1) in vec4 inColor;328layout (location = 0) out vec4 outColor;329out gl_PerVertex { vec4 gl_Position; };330void main() {331outColor = inColor;332gl_Position = myBufferVals.WorldViewProj * pos;333}334)"335}336};337338const UniformBufferDesc vsColBufDesc { sizeof(VsColUB), {339{ "WorldViewProj", 0, -1, UniformType::MATRIX4X4, 0 },340{ "TintSaturation", 4, -1, UniformType::FLOAT2, 64 },341} };342343static const std::vector<ShaderSource> vsTexColNoTint = { {344GLSL_1xx,345R"(346#if __VERSION__ >= 130347#define attribute in348#define varying out349#endif350attribute vec3 Position;351attribute vec4 Color0;352attribute vec2 TexCoord0;353varying vec4 oColor0;354varying vec2 oTexCoord0;355uniform mat4 WorldViewProj;356uniform vec2 TintSaturation;357void main() {358gl_Position = WorldViewProj * vec4(Position, 1.0);359oColor0 = Color0;360oTexCoord0 = TexCoord0;361})"362} };363364static const std::vector<ShaderSource> vsTexCol = {365{ GLSL_1xx,366R"(367#if __VERSION__ >= 130368#define attribute in369#define varying out370#endif371attribute vec3 Position;372attribute vec4 Color0;373attribute vec2 TexCoord0;374varying vec4 oColor0;375varying vec2 oTexCoord0;376uniform mat4 WorldViewProj;377uniform vec2 TintSaturation;378vec3 rgb2hsv(vec3 c) {379vec4 K = vec4(0.0, -1.0 / 3.0, 2.0 / 3.0, -1.0);380vec4 p = mix(vec4(c.bg, K.wz), vec4(c.gb, K.xy), step(c.b, c.g));381vec4 q = mix(vec4(p.xyw, c.r), vec4(c.r, p.yzx), step(p.x, c.r));382float d = q.x - min(q.w, q.y);383float e = 1.0e-10;384return vec3(abs(q.z + (q.w - q.y) / (6.0 * d + e)), d / (q.x + e), q.x);385}386vec3 hsv2rgb(vec3 c) {387vec4 K = vec4(1.0, 2.0 / 3.0, 1.0 / 3.0, 3.0);388vec3 p = abs(fract(c.xxx + K.xyz) * 6.0 - K.www);389return c.z * mix(K.xxx, clamp(p - K.xxx, 0.0, 1.0), c.y);390}391void main() {392gl_Position = WorldViewProj * vec4(Position, 1.0);393vec3 hsv = rgb2hsv(Color0.xyz);394hsv.x += TintSaturation.x;395hsv.y *= TintSaturation.y;396oColor0 = vec4(hsv2rgb(hsv), Color0.w);397oTexCoord0 = TexCoord0;398})",399},400{ ShaderLanguage::HLSL_D3D9,401R"(402struct VS_INPUT { float3 Position : POSITION; float2 Texcoord0 : TEXCOORD0; float4 Color0 : COLOR0; };403struct VS_OUTPUT { float4 Position : POSITION; float2 Texcoord0 : TEXCOORD0; float4 Color0 : COLOR0; };404float4x4 WorldViewProj : register(c0);405float2 TintSaturation : register(c4);406float3 rgb2hsv(float3 c) {407float4 K = float4(0.0, -1.0 / 3.0, 2.0 / 3.0, -1.0);408float4 p = lerp(float4(c.bg, K.wz), float4(c.gb, K.xy), step(c.b, c.g));409float4 q = lerp(float4(p.xyw, c.r), float4(c.r, p.yzx), step(p.x, c.r));410float d = q.x - min(q.w, q.y);411float e = 1.0e-10;412return float3(abs(q.z + (q.w - q.y) / (6.0 * d + e)), d / (q.x + e), q.x);413}414float3 hsv2rgb(float3 c) {415float4 K = float4(1.0, 2.0 / 3.0, 1.0 / 3.0, 3.0);416float3 p = abs(frac(c.xxx + K.xyz) * 6.0 - K.www);417return c.z * lerp(K.xxx, saturate(p - K.xxx), c.y);418}419VS_OUTPUT main(VS_INPUT input) {420VS_OUTPUT output;421float3 hsv = rgb2hsv(input.Color0.xyz);422hsv.x += TintSaturation.x;423hsv.y *= TintSaturation.y;424output.Color0 = float4(hsv2rgb(hsv), input.Color0.w);425output.Position = mul(float4(input.Position, 1.0), WorldViewProj);426output.Texcoord0 = input.Texcoord0;427return output;428}429)"430},431{ ShaderLanguage::HLSL_D3D11,432R"(433struct VS_INPUT { float3 Position : POSITION; float2 Texcoord0 : TEXCOORD0; float4 Color0 : COLOR0; };434struct VS_OUTPUT { float4 Color0 : COLOR0; float2 Texcoord0 : TEXCOORD0; float4 Position : SV_Position; };435cbuffer ConstantBuffer : register(b0) {436matrix WorldViewProj;437float2 TintSaturation;438};439float3 rgb2hsv(float3 c) {440float4 K = float4(0.0, -1.0 / 3.0, 2.0 / 3.0, -1.0);441float4 p = lerp(float4(c.bg, K.wz), float4(c.gb, K.xy), step(c.b, c.g));442float4 q = lerp(float4(p.xyw, c.r), float4(c.r, p.yzx), step(p.x, c.r));443float d = q.x - min(q.w, q.y);444float e = 1.0e-10;445return float3(abs(q.z + (q.w - q.y) / (6.0 * d + e)), d / (q.x + e), q.x);446}447float3 hsv2rgb(float3 c) {448float4 K = float4(1.0, 2.0 / 3.0, 1.0 / 3.0, 3.0);449float3 p = abs(frac(c.xxx + K.xyz) * 6.0 - K.www);450return c.z * lerp(K.xxx, saturate(p - K.xxx), c.y);451}452VS_OUTPUT main(VS_INPUT input) {453VS_OUTPUT output;454float3 hsv = rgb2hsv(input.Color0.xyz);455hsv.x += TintSaturation.x;456hsv.y *= TintSaturation.y;457output.Color0 = float4(hsv2rgb(hsv), input.Color0.w);458output.Position = mul(WorldViewProj, float4(input.Position, 1.0));459output.Texcoord0 = input.Texcoord0;460return output;461}462)"463},464{ ShaderLanguage::GLSL_VULKAN,465R"(#version 450466#extension GL_ARB_separate_shader_objects : enable467#extension GL_ARB_shading_language_420pack : enable468layout (std140, set = 0, binding = 0) uniform bufferVals {469mat4 WorldViewProj;470vec2 TintSaturation;471} myBufferVals;472vec3 rgb2hsv(vec3 c) {473vec4 K = vec4(0.0, -1.0 / 3.0, 2.0 / 3.0, -1.0);474vec4 p = mix(vec4(c.bg, K.wz), vec4(c.gb, K.xy), step(c.b, c.g));475vec4 q = mix(vec4(p.xyw, c.r), vec4(c.r, p.yzx), step(p.x, c.r));476float d = q.x - min(q.w, q.y);477float e = 1.0e-10;478return vec3(abs(q.z + (q.w - q.y) / (6.0 * d + e)), d / (q.x + e), q.x);479}480vec3 hsv2rgb(vec3 c) {481vec4 K = vec4(1.0, 2.0 / 3.0, 1.0 / 3.0, 3.0);482vec3 p = abs(fract(c.xxx + K.xyz) * 6.0 - K.www);483return c.z * mix(K.xxx, clamp(p - K.xxx, 0.0, 1.0), c.y);484}485layout (location = 0) in vec4 pos;486layout (location = 1) in vec4 inColor;487layout (location = 3) in vec2 inTexCoord;488layout (location = 0) out vec4 outColor;489layout (location = 1) out vec2 outTexCoord;490out gl_PerVertex { vec4 gl_Position; };491void main() {492vec3 hsv = rgb2hsv(inColor.xyz);493hsv.x += myBufferVals.TintSaturation.x;494hsv.y *= myBufferVals.TintSaturation.y;495outColor = vec4(hsv2rgb(hsv), inColor.w);496outTexCoord = inTexCoord;497gl_Position = myBufferVals.WorldViewProj * pos;498}499)"500} };501502static_assert(SEM_TEXCOORD0 == 3, "Semantic shader hardcoded in glsl above.");503504const UniformBufferDesc vsTexColBufDesc{ sizeof(VsTexColUB),{505{ "WorldViewProj", 0, -1, UniformType::MATRIX4X4, 0 },506{ "TintSaturation", 4, -1, UniformType::FLOAT2, 64 },507} };508509ShaderModule *CreateShader(DrawContext *draw, ShaderStage stage, const std::vector<ShaderSource> &sources) {510uint32_t supported = draw->GetSupportedShaderLanguages();511for (auto iter : sources) {512if ((uint32_t)iter.lang & supported) {513return draw->CreateShaderModule(stage, iter.lang, (const uint8_t *)iter.src, strlen(iter.src));514}515}516return nullptr;517}518519bool DrawContext::CreatePresets() {520if (bugs_.Has(Bugs::RASPBERRY_SHADER_COMP_HANG)) {521vsPresets_[VS_TEXTURE_COLOR_2D] = CreateShader(this, ShaderStage::Vertex, vsTexColNoTint);522} else {523vsPresets_[VS_TEXTURE_COLOR_2D] = CreateShader(this, ShaderStage::Vertex, vsTexCol);524}525526vsPresets_[VS_COLOR_2D] = CreateShader(this, ShaderStage::Vertex, vsCol);527528fsPresets_[FS_TEXTURE_COLOR_2D] = CreateShader(this, ShaderStage::Fragment, fsTexCol);529fsPresets_[FS_COLOR_2D] = CreateShader(this, ShaderStage::Fragment, fsCol);530fsPresets_[FS_TEXTURE_COLOR_2D_RB_SWIZZLE] = CreateShader(this, ShaderStage::Fragment, fsTexColRBSwizzle);531532return vsPresets_[VS_TEXTURE_COLOR_2D] && vsPresets_[VS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D] && fsPresets_[FS_COLOR_2D] && fsPresets_[FS_TEXTURE_COLOR_2D_RB_SWIZZLE];533}534535void DrawContext::DestroyPresets() {536for (int i = 0; i < VS_MAX_PRESET; i++) {537if (vsPresets_[i]) {538vsPresets_[i]->Release();539vsPresets_[i] = nullptr;540}541}542for (int i = 0; i < FS_MAX_PRESET; i++) {543if (fsPresets_[i]) {544fsPresets_[i]->Release();545fsPresets_[i] = nullptr;546}547}548}549550DrawContext::~DrawContext() {551// TODO: Can't call DestroyPresets here, too late.552}553554void ConvertFromRGBA8888(uint8_t *dst, const uint8_t *src, uint32_t dstStride, uint32_t srcStride, uint32_t width, uint32_t height, DataFormat format) {555// Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP.556const uint32_t *src32 = (const uint32_t *)src;557558if (format == Draw::DataFormat::R8G8B8A8_UNORM) {559uint32_t *dst32 = (uint32_t *)dst;560if (src == dst) {561return;562} else {563for (uint32_t y = 0; y < height; ++y) {564memcpy(dst32, src32, width * 4);565src32 += srcStride;566dst32 += dstStride;567}568}569} else if (format == Draw::DataFormat::R8G8B8_UNORM) {570for (uint32_t y = 0; y < height; ++y) {571ConvertRGBA8888ToRGB888(dst, src32, width);572src32 += srcStride;573dst += dstStride * 3;574}575} else {576// But here it shouldn't matter if they do intersect577uint16_t *dst16 = (uint16_t *)dst;578switch (format) {579case Draw::DataFormat::R5G6B5_UNORM_PACK16: // BGR 565580for (uint32_t y = 0; y < height; ++y) {581ConvertRGBA8888ToRGB565(dst16, src32, width);582src32 += srcStride;583dst16 += dstStride;584}585break;586case Draw::DataFormat::A1R5G5B5_UNORM_PACK16: // ABGR 1555587for (uint32_t y = 0; y < height; ++y) {588ConvertRGBA8888ToRGBA5551(dst16, src32, width);589src32 += srcStride;590dst16 += dstStride;591}592break;593case Draw::DataFormat::A4R4G4B4_UNORM_PACK16: // ABGR 4444594for (uint32_t y = 0; y < height; ++y) {595ConvertRGBA8888ToRGBA4444(dst16, src32, width);596src32 += srcStride;597dst16 += dstStride;598}599break;600case Draw::DataFormat::R8G8B8A8_UNORM:601case Draw::DataFormat::UNDEFINED:602default:603WARN_LOG(Log::G3D, "Unable to convert from format: %d", (int)format);604break;605}606}607}608609void ConvertFromBGRA8888(uint8_t *dst, const uint8_t *src, uint32_t dstStride, uint32_t srcStride, uint32_t width, uint32_t height, DataFormat format) {610// Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP.611const uint32_t *src32 = (const uint32_t *)src;612613if (format == Draw::DataFormat::B8G8R8A8_UNORM) {614uint32_t *dst32 = (uint32_t *)dst;615if (src == dst) {616return;617} else {618for (uint32_t y = 0; y < height; ++y) {619memcpy(dst32, src32, width * 4);620src32 += srcStride;621dst32 += dstStride;622}623}624} else if (format == Draw::DataFormat::R8G8B8A8_UNORM) {625uint32_t *dst32 = (uint32_t *)dst;626for (uint32_t y = 0; y < height; ++y) {627ConvertBGRA8888ToRGBA8888(dst32, src32, width);628src32 += srcStride;629dst32 += dstStride;630}631} else if (format == Draw::DataFormat::R8G8B8_UNORM) {632for (uint32_t y = 0; y < height; ++y) {633ConvertBGRA8888ToRGB888(dst, src32, width);634src32 += srcStride;635dst += dstStride * 3;636}637} else {638// But here it shouldn't matter if they do intersect639uint16_t *dst16 = (uint16_t *)dst;640switch (format) {641case Draw::DataFormat::R5G6B5_UNORM_PACK16: // BGR 565642for (uint32_t y = 0; y < height; ++y) {643ConvertBGRA8888ToRGB565(dst16, src32, width);644src32 += srcStride;645dst16 += dstStride;646}647break;648case Draw::DataFormat::A1R5G5B5_UNORM_PACK16: // ABGR 1555649for (uint32_t y = 0; y < height; ++y) {650ConvertBGRA8888ToRGBA5551(dst16, src32, width);651src32 += srcStride;652dst16 += dstStride;653}654break;655case Draw::DataFormat::A4R4G4B4_UNORM_PACK16: // ABGR 4444656for (uint32_t y = 0; y < height; ++y) {657ConvertBGRA8888ToRGBA4444(dst16, src32, width);658src32 += srcStride;659dst16 += dstStride;660}661break;662case Draw::DataFormat::R8G8B8A8_UNORM:663case Draw::DataFormat::UNDEFINED:664default:665WARN_LOG(Log::G3D, "Unable to convert from format to BGRA: %d", (int)format);666break;667}668}669}670671void ConvertToD32F(uint8_t *dst, const uint8_t *src, uint32_t dstStride, uint32_t srcStride, uint32_t width, uint32_t height, DataFormat format) {672if (format == Draw::DataFormat::D32F) {673const float *src32 = (const float *)src;674float *dst32 = (float *)dst;675if (src == dst) {676return;677} else {678for (uint32_t y = 0; y < height; ++y) {679memcpy(dst32, src32, width * 4);680src32 += srcStride;681dst32 += dstStride;682}683}684} else if (format == Draw::DataFormat::D16) {685const uint16_t *src16 = (const uint16_t *)src;686float *dst32 = (float *)dst;687for (uint32_t y = 0; y < height; ++y) {688for (uint32_t x = 0; x < width; ++x) {689dst32[x] = (float)(int)src16[x] / 65535.0f;690}691src16 += srcStride;692dst32 += dstStride;693}694} else if (format == Draw::DataFormat::D24_S8) {695const uint32_t *src32 = (const uint32_t *)src;696float *dst32 = (float *)dst;697for (uint32_t y = 0; y < height; ++y) {698for (uint32_t x = 0; x < width; ++x) {699dst32[x] = (src32[x] & 0x00FFFFFF) / 16777215.0f;700}701src32 += srcStride;702dst32 += dstStride;703}704} else {705assert(false);706}707}708709// TODO: This is missing the conversion to the quarter-range we use if depth clamp is not available.710// That conversion doesn't necessarily belong here in thin3d, though.711void ConvertToD16(uint8_t *dst, const uint8_t *src, uint32_t dstStride, uint32_t srcStride, uint32_t width, uint32_t height, DataFormat format) {712if (format == Draw::DataFormat::D32F) {713const float *src32 = (const float *)src;714uint16_t *dst16 = (uint16_t *)dst;715if (src == dst) {716return;717} else {718for (uint32_t y = 0; y < height; ++y) {719for (uint32_t x = 0; x < width; ++x) {720dst16[x] = (uint16_t)(src32[x] * 65535.0f);721}722src32 += srcStride;723dst16 += dstStride;724}725}726} else if (format == Draw::DataFormat::D16) {727_assert_(src != dst);728const uint16_t *src16 = (const uint16_t *)src;729uint16_t *dst16 = (uint16_t *)dst;730for (uint32_t y = 0; y < height; ++y) {731memcpy(dst16, src16, width * 2);732src16 += srcStride;733dst16 += dstStride;734}735} else if (format == Draw::DataFormat::D24_S8) {736_assert_(src != dst);737const uint32_t *src32 = (const uint32_t *)src;738uint16_t *dst16 = (uint16_t *)dst;739for (uint32_t y = 0; y < height; ++y) {740for (uint32_t x = 0; x < width; ++x) {741dst16[x] = (src32[x] & 0x00FFFFFF) >> 8;742}743src32 += srcStride;744dst16 += dstStride;745}746} else {747assert(false);748}749}750751const char *Bugs::GetBugName(uint32_t bug) {752switch (bug) {753case NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI: return "NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI";754case NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO: return "NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO";755case DUAL_SOURCE_BLENDING_BROKEN: return "DUAL_SOURCE_BLENDING_BROKEN";756case ANY_MAP_BUFFER_RANGE_SLOW: return "ANY_MAP_BUFFER_RANGE_SLOW";757case PVR_GENMIPMAP_HEIGHT_GREATER: return "PVR_GENMIPMAP_HEIGHT_GREATER";758case BROKEN_NAN_IN_CONDITIONAL: return "BROKEN_NAN_IN_CONDITIONAL";759case COLORWRITEMASK_BROKEN_WITH_DEPTHTEST: return "COLORWRITEMASK_BROKEN_WITH_DEPTHTEST";760case BROKEN_FLAT_IN_SHADER: return "BROKEN_FLAT_IN_SHADER";761case EQUAL_WZ_CORRUPTS_DEPTH: return "EQUAL_WZ_CORRUPTS_DEPTH";762case RASPBERRY_SHADER_COMP_HANG: return "RASPBERRY_SHADER_COMP_HANG";763case MALI_CONSTANT_LOAD_BUG: return "MALI_CONSTANT_LOAD_BUG";764case SUBPASS_FEEDBACK_BROKEN: return "SUBPASS_FEEDBACK_BROKEN";765case GEOMETRY_SHADERS_SLOW_OR_BROKEN: return "GEOMETRY_SHADERS_SLOW_OR_BROKEN";766case ADRENO_RESOURCE_DEADLOCK: return "ADRENO_RESOURCE_DEADLOCK";767case PVR_BAD_16BIT_TEXFORMATS: return "PVR_BAD_16BIT_TEXFORMATS";768default: return "(N/A)";769}770}771772const char *PresentModeToString(PresentMode presentMode) {773// All 8 possible cases, with three flags, for simplicity.774switch ((int)presentMode) {775case 0: return "NONE";776case (int)PresentMode::FIFO: return "FIFO";777case (int)PresentMode::IMMEDIATE: return "IMMEDIATE";778case (int)PresentMode::MAILBOX: return "MAILBOX";779case ((int)PresentMode::FIFO | (int)PresentMode::MAILBOX) : return "FIFO|MAILBOX";780case ((int)PresentMode::FIFO | (int)PresentMode::IMMEDIATE) : return "FIFO|IMMEDIATE";781case ((int)PresentMode::MAILBOX | (int)PresentMode::IMMEDIATE) : return "MAILBOX|IMMEDIATE"; // Not gonna happen782case ((int)PresentMode::FIFO | (int)PresentMode::MAILBOX | (int)PresentMode::IMMEDIATE) : return "FIFO|MAILBOX|IMMEDIATE";783default:784return "INVALID";785}786}787788} // namespace Draw789790791