Path: blob/master/dep/reshadefx/src/effect_codegen_hlsl.cpp
4246 views
/*1* Copyright (C) 2014 Patrick Mours2* SPDX-License-Identifier: BSD-3-Clause3*/45#include "effect_parser.hpp"6#include "effect_codegen.hpp"7#include <cmath> // std::isinf, std::isnan, std::signbit8#include <cctype> // std::tolower9#include <cassert>10#include <cstring> // stricmp, std::memcmp11#include <charconv> // std::from_chars, std::to_chars12#include <algorithm> // std::equal, std::find, std::find_if, std::max13#include <locale>14#include <sstream>1516using namespace reshadefx;1718inline char to_digit(unsigned int value)19{20assert(value < 10);21return '0' + static_cast<char>(value);22}2324inline uint32_t align_up(uint32_t size, uint32_t alignment, uint32_t elements)25{26alignment -= 1;27return ((size + alignment) & ~alignment) * (elements - 1) + size;28}2930class codegen_hlsl final : public codegen31{32public:33codegen_hlsl(unsigned int shader_model, bool debug_info, bool uniforms_to_spec_constants) :34_shader_model(shader_model),35_debug_info(debug_info),36_uniforms_to_spec_constants(uniforms_to_spec_constants)37{38// Create default block and reserve a memory block to avoid frequent reallocations39std::string &block = _blocks.emplace(0, std::string()).first->second;40block.reserve(8192);41}4243private:44enum class naming45{46// Name should already be unique, so no additional steps are taken47unique,48// Will be numbered when clashing with another name49general,50// Replace name with a code snippet51expression,52};5354unsigned int _shader_model = 0;55bool _debug_info = false;56bool _uniforms_to_spec_constants = false;5758std::unordered_map<id, std::string> _names;59std::unordered_map<id, std::string> _blocks;60std::string _cbuffer_block;61std::string _current_location;62std::string _current_function_declaration;6364std::string _remapped_semantics[15];65std::vector<std::tuple<type, constant, id>> _constant_lookup;66#if 067std::vector<sampler_binding> _sampler_lookup;68#endif6970// Only write compatibility intrinsics to result if they are actually in use71bool _uses_bitwise_cast = false;72bool _uses_bitwise_intrinsics = false;7374void optimize_bindings() override75{76codegen::optimize_bindings();7778#if 079if (_shader_model < 40)80return;8182_module.num_sampler_bindings = static_cast<uint32_t>(_sampler_lookup.size());8384for (technique &tech : _module.techniques)85for (pass &pass : tech.passes)86pass.sampler_bindings.assign(_sampler_lookup.begin(), _sampler_lookup.end());87#endif88}8990std::string finalize_preamble() const91{92std::string preamble;9394#define IMPLEMENT_INTRINSIC_FALLBACK_ASINT(n) \95"int" #n " __asint(float" #n " v) {" \96"float" #n " e = 0;" \97"float" #n " f = frexp(v, e) * 2 - 1;" /* frexp does not include sign bit in HLSL, so can use as is */ \98"float" #n " m = ldexp(f, 23);" \99"return (v == 0) ? 0 : (v < 0 ? 2147483648 : 0) + (" /* Zero (does not handle negative zero) */ \100/* isnan(v) ? 2147483647 : */ /* NaN */ \101/* isinf(v) ? 2139095040 : */ /* Infinity */ \102"ldexp(e + 126, 23) + m);" \103"}"104#define IMPLEMENT_INTRINSIC_FALLBACK_ASUINT(n) \105"int" #n " __asuint(float" #n " v) { return __asint(v); }"106#define IMPLEMENT_INTRINSIC_FALLBACK_ASFLOAT(n) \107"float" #n " __asfloat(int" #n " v) {" \108"float" #n " m = v % exp2(23);" \109"float" #n " f = ldexp(m, -23);" \110"float" #n " e = floor(ldexp(v, -23) % 256);" \111"return (v > 2147483647 ? -1 : 1) * (" \112/* e == 0 ? ldexp(f, -126) : */ /* Denormalized */ \113/* e == 255 ? (m == 0 ? 1.#INF : -1.#IND) : */ /* Infinity and NaN */ \114"ldexp(1 + f, e - 127));" \115"}"116117// See https://graphics.stanford.edu/%7Eseander/bithacks.html#CountBitsSetParallel118#define IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS(n) \119"uint" #n " __countbits(uint" #n " v) {" \120"v = v - ((v >> 1) & 0x55555555);" \121"v = (v & 0x33333333) + ((v >> 2) & 0x33333333);" \122"v = (v + (v >> 4)) & 0x0F0F0F0F;" \123"v *= 0x01010101;" \124"return v >> 24;" \125"}"126#define IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS_LOOP(n) \127"uint" #n " __countbits(uint" #n " v) {" \128"uint" #n " c = 0;" \129"while (any(v > 0)) {" \130"c += v % 2;" \131"v /= 2;" \132"}" \133"return c;" \134"}"135136// See https://graphics.stanford.edu/%7Eseander/bithacks.html#ReverseParallel137#define IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS(n) \138"uint" #n " __reversebits(uint" #n " v) {" \139"v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);" \140"v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);" \141"v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);" \142"v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);" \143"return (v >> 16) | (v << 16);" \144"}"145#define IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS_LOOP(n) \146"uint" #n " __reversebits(uint" #n " v) {" \147"uint" #n " r = 0;" \148"for (int i = 0; i < 32; i++) {" \149"r *= 2;" \150"r += floor(x % 2);" \151"v /= 2;" \152"}" \153"return r;" \154"}"155156// See https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightParallel157#define IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW(n) \158"uint" #n " __firstbitlow(uint" #n " v) {" \159"uint" #n " c = (v != 0) ? 31 : 32;" \160"v &= -int" #n "(v);" \161"c = (v & 0x0000FFFF) ? c - 16 : c;" \162"c = (v & 0x00FF00FF) ? c - 8 : c;" \163"c = (v & 0x0F0F0F0F) ? c - 4 : c;" \164"c = (v & 0x33333333) ? c - 2 : c;" \165"c = (v & 0x55555555) ? c - 1 : c;" \166"return c;" \167"}"168#define IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW_LOOP(n) \169"uint" #n " __firstbitlow(uint" #n " v) {" \170"uint" #n " c = (v != 0) ? 31 : 32;" \171"for (int i = 0; i < 32; i++) {" \172"c = c > i && (v % 2) != 0 ? i : c;" \173"v /= 2;" \174"}" \175"return c;" \176"}"177178179#define IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(n) \180"uint" #n " __firstbithigh(uint" #n " v) { return __firstbitlow(__reversebits(v)); }"181182if (_shader_model >= 40)183{184preamble +=185"struct __sampler1D_int { Texture1D<int> t; SamplerState s; };\n"186"struct __sampler2D_int { Texture2D<int> t; SamplerState s; };\n"187"struct __sampler3D_int { Texture3D<int> t; SamplerState s; };\n"188"struct __sampler1D_uint { Texture1D<uint> t; SamplerState s; };\n"189"struct __sampler2D_uint { Texture2D<uint> t; SamplerState s; };\n"190"struct __sampler3D_uint { Texture3D<uint> t; SamplerState s; };\n"191"struct __sampler1D_float { Texture1D<float> t; SamplerState s; };\n"192"struct __sampler2D_float { Texture2D<float> t; SamplerState s; };\n"193"struct __sampler3D_float { Texture3D<float> t; SamplerState s; };\n"194"struct __sampler1D_float4 { Texture1D<float4> t; SamplerState s; };\n"195"struct __sampler2D_float4 { Texture2D<float4> t; SamplerState s; };\n"196"struct __sampler3D_float4 { Texture3D<float4> t; SamplerState s; };\n";197198if (_uses_bitwise_intrinsics && _shader_model < 50)199preamble +=200IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS(1) "\n"201IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS(2) "\n"202IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS(3) "\n"203IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS(4) "\n"204205IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS(1) "\n"206IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS(2) "\n"207IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS(3) "\n"208IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS(4) "\n"209210IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW(1) "\n"211IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW(2) "\n"212IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW(3) "\n"213IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW(4) "\n"214215IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(1) "\n"216IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(2) "\n"217IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(3) "\n"218IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(4) "\n";219220if (!_cbuffer_block.empty())221{222#if 0223if (_shader_model >= 60)224preamble += "[[vk::binding(0, 0)]] "; // Descriptor set 0225#endif226227preamble += "cbuffer _Globals {\n" + _cbuffer_block + "};\n";228}229}230else231{232preamble +=233"struct __sampler1D { sampler1D s; float1 pixelsize; };\n"234"struct __sampler2D { sampler2D s; float2 pixelsize; };\n"235"struct __sampler3D { sampler3D s; float3 pixelsize; };\n"236"uniform float2 __TEXEL_SIZE__ : register(c255);\n";237238if (_uses_bitwise_cast)239preamble +=240IMPLEMENT_INTRINSIC_FALLBACK_ASINT(1) "\n"241IMPLEMENT_INTRINSIC_FALLBACK_ASINT(2) "\n"242IMPLEMENT_INTRINSIC_FALLBACK_ASINT(3) "\n"243IMPLEMENT_INTRINSIC_FALLBACK_ASINT(4) "\n"244245IMPLEMENT_INTRINSIC_FALLBACK_ASUINT(1) "\n"246IMPLEMENT_INTRINSIC_FALLBACK_ASUINT(2) "\n"247IMPLEMENT_INTRINSIC_FALLBACK_ASUINT(3) "\n"248IMPLEMENT_INTRINSIC_FALLBACK_ASUINT(4) "\n"249250IMPLEMENT_INTRINSIC_FALLBACK_ASFLOAT(1) "\n"251IMPLEMENT_INTRINSIC_FALLBACK_ASFLOAT(2) "\n"252IMPLEMENT_INTRINSIC_FALLBACK_ASFLOAT(3) "\n"253IMPLEMENT_INTRINSIC_FALLBACK_ASFLOAT(4) "\n";254255if (_uses_bitwise_intrinsics)256preamble +=257IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS_LOOP(1) "\n"258IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS_LOOP(2) "\n"259IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS_LOOP(3) "\n"260IMPLEMENT_INTRINSIC_FALLBACK_COUNTBITS_LOOP(4) "\n"261262IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS_LOOP(1) "\n"263IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS_LOOP(2) "\n"264IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS_LOOP(3) "\n"265IMPLEMENT_INTRINSIC_FALLBACK_REVERSEBITS_LOOP(4) "\n"266267IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW_LOOP(1) "\n"268IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW_LOOP(2) "\n"269IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW_LOOP(3) "\n"270IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITLOW_LOOP(4) "\n"271272IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(1) "\n"273IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(2) "\n"274IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(3) "\n"275IMPLEMENT_INTRINSIC_FALLBACK_FIRSTBITHIGH(4) "\n";276277if (!_cbuffer_block.empty())278{279preamble += _cbuffer_block;280}281}282283return preamble;284}285286std::string finalize_code() const override287{288std::string code = finalize_preamble();289290// Add global definitions (struct types, global variables, sampler state declarations, ...)291code += _blocks.at(0);292293// Add texture and sampler definitions294for (const sampler &info : _module.samplers)295code += _blocks.at(info.id);296297// Add storage definitions298for (const storage &info : _module.storages)299code += _blocks.at(info.id);300301// Add function definitions302for (const std::unique_ptr<function> &func : _functions)303code += _blocks.at(func->id);304305return code;306}307std::string finalize_code_for_entry_point(const std::string &entry_point_name) const override308{309const auto entry_point_it = std::find_if(_functions.begin(), _functions.end(),310[&entry_point_name](const std::unique_ptr<function> &func) {311return func->unique_name == entry_point_name;312});313if (entry_point_it == _functions.end())314return {};315const function &entry_point = *entry_point_it->get();316317std::string code = finalize_preamble();318319if (_shader_model < 40 && entry_point.type == shader_type::pixel)320// Overwrite position semantic in pixel shaders321code += "#define POSITION VPOS\n";322323// Add global definitions (struct types, global variables, sampler state declarations, ...)324code += _blocks.at(0);325326const auto replace_binding =327[](std::string &code, uint32_t binding) {328for (size_t start = 0;;)329{330const size_t pos = code.find(": register(", start);331if (pos == std::string::npos)332break;333const size_t beg = pos + 12;334const size_t end = code.find(')', beg);335const std::string replacement = std::to_string(binding);336code.replace(beg, end - beg, replacement);337start = beg + replacement.length();338}339};340341// Add referenced texture and sampler definitions342for (uint32_t binding = 0; binding < entry_point.referenced_samplers.size(); ++binding)343{344if (entry_point.referenced_samplers[binding] == 0)345continue;346347std::string block_code = _blocks.at(entry_point.referenced_samplers[binding]);348replace_binding(block_code, binding);349code += block_code;350}351352// Add referenced storage definitions353for (uint32_t binding = 0; binding < entry_point.referenced_storages.size(); ++binding)354{355if (entry_point.referenced_storages[binding] == 0)356continue;357358std::string block_code = _blocks.at(entry_point.referenced_storages[binding]);359replace_binding(block_code, binding);360code += block_code;361}362363// Add referenced function definitions364for (const std::unique_ptr<function> &func : _functions)365{366if (func->id != entry_point.id &&367std::find(entry_point.referenced_functions.begin(), entry_point.referenced_functions.end(), func->id) == entry_point.referenced_functions.end())368continue;369370code += _blocks.at(func->id);371}372373return code;374}375376template <bool is_param = false, bool is_decl = true>377void write_type(std::string &s, const type &type) const378{379if constexpr (is_decl)380{381if (type.has(type::q_static))382s += "static ";383if (type.has(type::q_precise))384s += "precise ";385if (type.has(type::q_groupshared))386s += "groupshared ";387}388389if constexpr (is_param)390{391if (type.has(type::q_linear))392s += "linear ";393if (type.has(type::q_noperspective))394s += "noperspective ";395if (type.has(type::q_centroid))396s += "centroid ";397if (type.has(type::q_nointerpolation))398s += "nointerpolation ";399400if (type.has(type::q_inout))401s += "inout ";402else if (type.has(type::q_in))403s += "in ";404else if (type.has(type::q_out))405s += "out ";406}407408switch (type.base)409{410case type::t_void:411s += "void";412return;413case type::t_bool:414s += "bool";415break;416case type::t_min16int:417// Minimum precision types are only supported in shader model 4 and up418// Real 16-bit types were added in shader model 6.2419s += _shader_model >= 62 ? "int16_t" : _shader_model >= 40 ? "min16int" : "int";420break;421case type::t_int:422s += "int";423break;424case type::t_min16uint:425s += _shader_model >= 62 ? "uint16_t" : _shader_model >= 40 ? "min16uint" : "int";426break;427case type::t_uint:428// In shader model 3, uints can only be used with known-positive values, so use ints instead429s += _shader_model >= 40 ? "uint" : "int";430break;431case type::t_min16float:432s += _shader_model >= 62 ? "float16_t" : _shader_model >= 40 ? "min16float" : "float";433break;434case type::t_float:435s += "float";436break;437case type::t_struct:438s += id_to_name(type.struct_definition);439return;440case type::t_sampler1d_int:441case type::t_sampler2d_int:442case type::t_sampler3d_int:443s += "__sampler";444s += to_digit(type.texture_dimension());445s += 'D';446if (_shader_model >= 40)447{448s += "_int";449if (type.rows > 1)450s += to_digit(type.rows);451}452return;453case type::t_sampler1d_uint:454case type::t_sampler2d_uint:455case type::t_sampler3d_uint:456s += "__sampler";457s += to_digit(type.texture_dimension());458s += 'D';459if (_shader_model >= 40)460{461s += "_uint";462if (type.rows > 1)463s += to_digit(type.rows);464}465return;466case type::t_sampler1d_float:467case type::t_sampler2d_float:468case type::t_sampler3d_float:469s += "__sampler";470s += to_digit(type.texture_dimension());471s += 'D';472if (_shader_model >= 40)473{474s += "_float";475if (type.rows > 1)476s += to_digit(type.rows);477}478return;479case type::t_storage1d_int:480case type::t_storage2d_int:481case type::t_storage3d_int:482s += "RWTexture";483s += to_digit(type.texture_dimension());484s += "D<";485s += "int";486if (type.rows > 1)487s += to_digit(type.rows);488s += '>';489return;490case type::t_storage1d_uint:491case type::t_storage2d_uint:492case type::t_storage3d_uint:493s += "RWTexture";494s += to_digit(type.texture_dimension());495s += "D<";496s += "uint";497if (type.rows > 1)498s += to_digit(type.rows);499s += '>';500return;501case type::t_storage1d_float:502case type::t_storage2d_float:503case type::t_storage3d_float:504s += "RWTexture";505s += to_digit(type.texture_dimension());506s += "D<";507s += "float";508if (type.rows > 1)509s += to_digit(type.rows);510s += '>';511return;512default:513assert(false);514return;515}516517if (type.rows > 1)518s += to_digit(type.rows);519if (type.cols > 1)520s += 'x', s += to_digit(type.cols);521}522void write_constant(std::string &s, const type &data_type, const constant &data) const523{524if (data_type.is_array())525{526assert(data_type.is_bounded_array());527528type elem_type = data_type;529elem_type.array_length = 0;530531s += "{ ";532533for (unsigned int a = 0; a < data_type.array_length; ++a)534{535write_constant(s, elem_type, a < static_cast<unsigned int>(data.array_data.size()) ? data.array_data[a] : constant {});536s += ", ";537}538539// Remove trailing ", "540s.erase(s.size() - 2);541542s += " }";543return;544}545546if (data_type.is_struct())547{548// The can only be zero initializer struct constants549assert(data.as_uint[0] == 0);550551s += '(' + id_to_name(data_type.struct_definition) + ")0";552return;553}554555// There can only be numeric constants556assert(data_type.is_numeric());557558if (!data_type.is_scalar())559write_type<false, false>(s, data_type), s += '(';560561for (unsigned int i = 0; i < data_type.components(); ++i)562{563switch (data_type.base)564{565case type::t_bool:566s += data.as_uint[i] ? "true" : "false";567break;568case type::t_min16int:569case type::t_int:570s += std::to_string(data.as_int[i]);571break;572case type::t_min16uint:573case type::t_uint:574s += std::to_string(data.as_uint[i]);575break;576case type::t_min16float:577case type::t_float:578if (std::isnan(data.as_float[i])) {579s += "-1.#IND";580break;581}582if (std::isinf(data.as_float[i])) {583s += std::signbit(data.as_float[i]) ? "1.#INF" : "-1.#INF";584break;585}586{587#ifdef _MSC_VER588char temp[64];589const std::to_chars_result res = std::to_chars(temp, temp + sizeof(temp), data.as_float[i], std::chars_format::scientific, 8);590if (res.ec == std::errc())591s.append(temp, res.ptr);592else593assert(false);594#else595std::ostringstream ss;596ss.imbue(std::locale::classic());597ss << data.as_float[i];598s += ss.str();599#endif600}601break;602default:603assert(false);604}605606s += ", ";607}608609// Remove trailing ", "610s.erase(s.size() - 2);611612if (!data_type.is_scalar())613s += ')';614}615template <bool force_source = false>616void write_location(std::string &s, const location &loc)617{618if (loc.source.empty() || !_debug_info)619return;620621s += "#line " + std::to_string(loc.line);622623size_t offset = s.size();624625// Avoid writing the file name every time to reduce output text size626if constexpr (force_source)627{628s += " \"" + loc.source + '\"';629}630else if (loc.source != _current_location)631{632s += " \"" + loc.source + '\"';633634_current_location = loc.source;635}636637// Need to escape string for new DirectX Shader Compiler (dxc)638if (_shader_model >= 60)639{640for (; (offset = s.find('\\', offset)) != std::string::npos; offset += 2)641s.insert(offset, "\\", 1);642}643644s += '\n';645}646void write_texture_format(std::string &s, texture_format format)647{648switch (format)649{650case texture_format::r32i:651s += "int";652break;653case texture_format::r32u:654s += "uint";655break;656default:657assert(false);658[[fallthrough]];659case texture_format::unknown:660case texture_format::r8:661case texture_format::r16:662case texture_format::r16f:663case texture_format::r32f:664case texture_format::rg8:665case texture_format::rg16:666case texture_format::rg16f:667case texture_format::rg32f:668case texture_format::rgba8:669case texture_format::rgba16:670case texture_format::rgba16f:671case texture_format::rgba32f:672case texture_format::rgb10a2:673s += "float4";674break;675}676}677678std::string id_to_name(id id) const679{680assert(id != 0);681if (const auto names_it = _names.find(id);682names_it != _names.end())683return names_it->second;684return '_' + std::to_string(id);685}686687template <naming naming_type = naming::general>688void define_name(const id id, std::string name)689{690assert(!name.empty());691if constexpr (naming_type != naming::expression)692if (name[0] == '_')693return; // Filter out names that may clash with automatic ones694name = escape_name(std::move(name));695if constexpr (naming_type == naming::general)696if (std::find_if(_names.begin(), _names.end(),697[&name](const auto &names_it) { return names_it.second == name; }) != _names.end())698name += '_' + std::to_string(id); // Append a numbered suffix if the name already exists699_names[id] = std::move(name);700}701702std::string convert_semantic(const std::string &semantic, uint32_t max_attributes = 1)703{704if (_shader_model < 40)705{706if (semantic == "SV_POSITION")707return "POSITION"; // For pixel shaders this has to be "VPOS", so need to redefine that in post708if (semantic == "VPOS")709return "VPOS";710if (semantic == "SV_POINTSIZE")711return "PSIZE";712if (semantic.compare(0, 9, "SV_TARGET") == 0)713return "COLOR" + semantic.substr(9);714if (semantic == "SV_DEPTH")715return "DEPTH";716if (semantic == "SV_VERTEXID")717return "TEXCOORD0 /* VERTEXID */";718if (semantic == "SV_ISFRONTFACE")719return "VFACE";720721size_t digit_index = semantic.size() - 1;722while (digit_index != 0 && semantic[digit_index] >= '0' && semantic[digit_index] <= '9')723digit_index--;724digit_index++;725726const std::string semantic_base = semantic.substr(0, digit_index);727728uint32_t semantic_digit = 0;729std::from_chars(semantic.c_str() + digit_index, semantic.c_str() + semantic.size(), semantic_digit);730731if (semantic_base == "TEXCOORD")732{733if (semantic_digit < 15)734{735assert(_remapped_semantics[semantic_digit].empty() || _remapped_semantics[semantic_digit] == semantic); // Mixing custom semantic names and multiple TEXCOORD indices is not supported736_remapped_semantics[semantic_digit] = semantic;737}738}739// Shader model 3 only supports a selected list of semantic names, so need to remap custom ones to that740else if (741semantic_base != "COLOR" &&742semantic_base != "NORMAL" &&743semantic_base != "TANGENT" &&744semantic_base != "BINORMAL")745{746// Legal semantic indices are between 0 and 15, but skip first entry in case both custom semantic names and the common TEXCOORD0 exist747for (int i = 1; i < 15; ++i)748{749if (_remapped_semantics[i].empty() || _remapped_semantics[i] == semantic)750{751for (uint32_t a = 0; a < max_attributes && i + a < 15; ++a)752_remapped_semantics[i + a] = semantic_base + std::to_string(semantic_digit + a);753754return "TEXCOORD" + std::to_string(i) + " /* " + semantic + " */";755}756}757}758}759else760{761if (semantic.compare(0, 5, "COLOR") == 0)762return "SV_TARGET" + semantic.substr(5);763}764765return semantic;766}767768static std::string escape_name(std::string name)769{770static const auto stringicmp = [](const std::string &a, const std::string &b) {771#ifdef _WIN32772return _stricmp(a.c_str(), b.c_str()) == 0;773#else774return std::equal(a.begin(), a.end(), b.begin(), b.end(), [](std::string::value_type a, std::string::value_type b) { return std::tolower(a) == std::tolower(b); });775#endif776};777778// HLSL compiler complains about "technique" and "pass" names in strict mode (no matter the casing)779if (stringicmp(name, "line") ||780stringicmp(name, "pass") ||781stringicmp(name, "technique") ||782stringicmp(name, "point") ||783stringicmp(name, "export") ||784stringicmp(name, "extern") ||785stringicmp(name, "compile") ||786stringicmp(name, "discard") ||787stringicmp(name, "half") ||788stringicmp(name, "in") ||789stringicmp(name, "lineadj") ||790stringicmp(name, "matrix") ||791stringicmp(name, "sample") ||792stringicmp(name, "sampler") ||793stringicmp(name, "shared") ||794stringicmp(name, "precise") ||795stringicmp(name, "register") ||796stringicmp(name, "texture") ||797stringicmp(name, "unorm") ||798stringicmp(name, "triangle") ||799stringicmp(name, "triangleadj") ||800stringicmp(name, "out") ||801stringicmp(name, "vector"))802// This is guaranteed to not clash with user defined names, since those starting with an underscore are filtered out in 'define_name'803name = '_' + name;804805return name;806}807808static void increase_indentation_level(std::string &block)809{810if (block.empty())811return;812813for (size_t pos = 0; (pos = block.find("\n\t", pos)) != std::string::npos; pos += 3)814block.replace(pos, 2, "\n\t\t");815816block.insert(block.begin(), '\t');817}818819id define_struct(const location &loc, struct_type &info) override820{821const id res = info.id = make_id();822define_name<naming::unique>(res, info.unique_name);823824_structs.push_back(info);825826std::string &code = _blocks.at(_current_block);827828write_location(code, loc);829830code += "struct " + id_to_name(res) + "\n{\n";831832for (const member_type &member : info.member_list)833{834code += '\t';835write_type<true>(code, member.type); // HLSL allows interpolation attributes on struct members, so handle this like a parameter836code += ' ' + member.name;837838if (member.type.is_array())839code += '[' + std::to_string(member.type.array_length) + ']';840841if (!member.semantic.empty())842code += " : " + convert_semantic(member.semantic, std::max(1u, member.type.components() / 4) * std::max(1u, member.type.array_length));843844code += ";\n";845}846847code += "};\n";848849return res;850}851id define_texture(const location &, texture &info) override852{853const id res = info.id = make_id();854855_module.textures.push_back(info);856857return res;858}859id define_sampler(const location &loc, const texture &tex_info, sampler &info) override860{861const id res = info.id = create_block();862define_name<naming::unique>(res, info.unique_name);863864std::string &code = _blocks.at(res);865866// Default to a register index equivalent to the entry in the sampler list (this is later overwritten in 'finalize_code_for_entry_point' to a more optimal placement)867const uint32_t default_binding = static_cast<uint32_t>(_module.samplers.size());868uint32_t sampler_state_binding = 0;869870if (_shader_model >= 40)871{872#if 0873// Try and reuse a sampler binding with the same sampler description874const auto existing_sampler_it = std::find_if(_sampler_lookup.begin(), _sampler_lookup.end(),875[&info](const sampler_desc &existing_info) {876return877existing_info.filter == info.filter &&878existing_info.address_u == info.address_u &&879existing_info.address_v == info.address_v &&880existing_info.address_w == info.address_w &&881existing_info.min_lod == info.min_lod &&882existing_info.max_lod == info.max_lod &&883existing_info.lod_bias == info.lod_bias;884});885if (existing_sampler_it != _sampler_lookup.end())886{887sampler_state_binding = existing_sampler_it->binding;888}889else890{891sampler_state_binding = static_cast<uint32_t>(_sampler_lookup.size());892893sampler_binding s;894s.filter = info.filter;895s.address_u = info.address_u;896s.address_v = info.address_v;897s.address_w = info.address_w;898s.min_lod = info.min_lod;899s.max_lod = info.max_lod;900s.lod_bias = info.lod_bias;901s.binding = sampler_state_binding;902_sampler_lookup.push_back(std::move(s));903904if (_shader_model >= 60)905_blocks.at(0) += "[[vk::binding(" + std::to_string(sampler_state_binding) + ", 1)]] "; // Descriptor set 1906907_blocks.at(0) += "SamplerState __s" + std::to_string(sampler_state_binding) + " : register(s" + std::to_string(sampler_state_binding) + ");\n";908}909910if (_shader_model >= 60)911code += "[[vk::binding(" + std::to_string(default_binding) + ", 2)]] "; // Descriptor set 2912913code += "Texture";914code += to_digit(static_cast<unsigned int>(tex_info.type));915code += "D<";916write_texture_format(code, tex_info.format);917code += "> __" + info.unique_name + "_t : register(t" + std::to_string(default_binding) + "); \n";918919write_location(code, loc);920921code += "static const ";922write_type(code, info.type);923code += ' ' + id_to_name(res) + " = { __" + info.unique_name + "_t, __s" + std::to_string(sampler_state_binding) + " };\n";924#else925code += "Texture";926code += to_digit(static_cast<unsigned int>(tex_info.type));927code += "D<";928write_texture_format(code, tex_info.format);929code += "> __" + info.unique_name + "_t : register(t" + std::to_string(default_binding) + "); \n";930931code += "SamplerState __" + info.unique_name + "_s : register(s" + std::to_string(default_binding) + "); \n";932933write_location(code, loc);934935code += "static const ";936write_type(code, info.type);937code += ' ' + id_to_name(res) + " = { __" + info.unique_name + "_t, __" + info.unique_name + "_s };\n";938#endif939}940else941{942const unsigned int texture_dimension = info.type.texture_dimension();943944code += "sampler";945code += to_digit(texture_dimension);946code += "D __" + info.unique_name + "_s : register(s" + std::to_string(default_binding) + ");\n";947948write_location(code, loc);949950code += "static const ";951write_type(code, info.type);952code += ' ' + id_to_name(res) + " = { __" + info.unique_name + "_s, float" + to_digit(texture_dimension) + '(';953954if (tex_info.semantic.empty())955{956code += "1.0 / " + std::to_string(tex_info.width);957if (texture_dimension >= 2)958code += ", 1.0 / " + std::to_string(tex_info.height);959if (texture_dimension >= 3)960code += ", 1.0 / " + std::to_string(tex_info.depth);961}962else963{964// Expect application to set inverse texture size via a define if it is not known here965code += tex_info.semantic + "_PIXEL_SIZE";966}967968code += ") }; \n";969}970971_module.samplers.push_back(info);972973return res;974}975id define_storage(const location &loc, const texture &, storage &info) override976{977const id res = info.id = create_block();978define_name<naming::unique>(res, info.unique_name);979980// Default to a register index equivalent to the entry in the storage list (this is later overwritten in 'finalize_code_for_entry_point' to a more optimal placement)981const uint32_t default_binding = static_cast<uint32_t>(_module.storages.size());982983if (_shader_model >= 50)984{985std::string &code = _blocks.at(res);986987write_location(code, loc);988989#if 0990if (_shader_model >= 60)991code += "[[vk::binding(" + std::to_string(default_binding) + ", 3)]] "; // Descriptor set 3992#endif993994write_type(code, info.type);995code += ' ' + info.unique_name + " : register(u" + std::to_string(default_binding) + ");\n";996}997998_module.storages.push_back(info);9991000return res;1001}1002id define_uniform(const location &loc, uniform &info) override1003{1004const id res = make_id();1005define_name<naming::unique>(res, info.name);10061007if (_uniforms_to_spec_constants && info.has_initializer_value)1008{1009info.size = info.type.components() * 4;1010if (info.type.is_array())1011info.size *= info.type.array_length;10121013std::string &code = _blocks.at(_current_block);10141015write_location(code, loc);10161017assert(!info.type.has(type::q_static) && !info.type.has(type::q_const));10181019code += "static const ";1020write_type(code, info.type);1021code += ' ' + id_to_name(res) + " = ";1022if (!info.type.is_scalar())1023write_type<false, false>(code, info.type);1024code += "(SPEC_CONSTANT_" + info.name + ");\n";10251026_module.spec_constants.push_back(info);1027}1028else1029{1030if (info.type.is_matrix())1031info.size = align_up(info.type.cols * 4, 16, info.type.rows);1032else // Vectors are column major (1xN), matrices are row major (NxM)1033info.size = info.type.rows * 4;1034// Arrays are not packed in HLSL by default, each element is stored in a four-component vector (16 bytes)1035if (info.type.is_array())1036info.size = align_up(info.size, 16, info.type.array_length);10371038if (_shader_model < 40)1039_module.total_uniform_size /= 4;10401041// Data is packed into 4-byte boundaries (see https://docs.microsoft.com/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules)1042// This is already guaranteed, since all types are at least 4-byte in size1043info.offset = _module.total_uniform_size;1044// Additionally, HLSL packs data so that it does not cross a 16-byte boundary1045const uint32_t remaining = 16 - (info.offset & 15);1046if (remaining != 16 && info.size > remaining)1047info.offset += remaining;1048_module.total_uniform_size = info.offset + info.size;10491050write_location<true>(_cbuffer_block, loc);10511052if (_shader_model >= 40)1053_cbuffer_block += '\t';1054if (info.type.is_matrix()) // Force row major matrices1055_cbuffer_block += "row_major ";10561057type type = info.type;1058if (_shader_model < 40)1059{1060// The HLSL compiler tries to evaluate boolean values with temporary registers, which breaks branches, so force it to use constant float registers1061if (type.is_boolean())1062type.base = type::t_float;10631064// Simply put each uniform into a separate constant register in shader model 3 for now1065info.offset *= 4;1066_module.total_uniform_size *= 4;1067}10681069write_type(_cbuffer_block, type);1070_cbuffer_block += ' ' + id_to_name(res);10711072if (info.type.is_array())1073_cbuffer_block += '[' + std::to_string(info.type.array_length) + ']';10741075if (_shader_model < 40)1076{1077// Every constant register is 16 bytes wide, so divide memory offset by 16 to get the constant register index1078// Note: All uniforms are floating-point in shader model 3, even if the uniform type says different!!1079_cbuffer_block += " : register(c" + std::to_string(info.offset / 16) + ')';1080}10811082_cbuffer_block += ";\n";10831084_module.uniforms.push_back(info);1085}10861087return res;1088}1089id define_variable(const location &loc, const type &type, std::string name, bool global, id initializer_value) override1090{1091// Constant variables with a constant initializer can just point to the initializer SSA variable, since they cannot be modified anyway, thus saving an unnecessary assignment1092if (initializer_value != 0 && type.has(type::q_const) &&1093std::find_if(_constant_lookup.begin(), _constant_lookup.end(),1094[initializer_value](const auto &x) {1095return initializer_value == std::get<2>(x);1096}) != _constant_lookup.end())1097return initializer_value;10981099const id res = make_id();11001101if (!name.empty())1102define_name<naming::general>(res, name);11031104std::string &code = _blocks.at(_current_block);11051106write_location(code, loc);11071108if (!global)1109code += '\t';11101111write_type(code, type);1112code += ' ' + id_to_name(res);11131114if (type.is_array())1115code += '[' + std::to_string(type.array_length) + ']';11161117if (initializer_value != 0)1118code += " = " + id_to_name(initializer_value);11191120code += ";\n";11211122return res;1123}1124id define_function(const location &loc, function &info) override1125{1126const id res = info.id = make_id();1127define_name<naming::unique>(res, info.unique_name);11281129assert(_current_block == 0 && (_current_function_declaration.empty() || info.type != shader_type::unknown));1130std::string &code = _current_function_declaration;11311132write_location(code, loc);11331134write_type(code, info.return_type);1135code += ' ' + id_to_name(res) + '(';11361137for (member_type ¶m : info.parameter_list)1138{1139param.id = make_id();1140define_name<naming::unique>(param.id, param.name);11411142code += '\n';1143write_location(code, param.location);1144code += '\t';1145write_type<true>(code, param.type);1146code += ' ' + id_to_name(param.id);11471148if (param.type.is_array())1149code += '[' + std::to_string(param.type.array_length) + ']';11501151if (!param.semantic.empty())1152code += " : " + convert_semantic(param.semantic, std::max(1u, param.type.cols / 4u) * std::max(1u, param.type.array_length));11531154code += ',';1155}11561157// Remove trailing comma1158if (!info.parameter_list.empty())1159code.pop_back();11601161code += ')';11621163if (!info.return_semantic.empty())1164code += " : " + convert_semantic(info.return_semantic);11651166code += '\n';11671168_functions.push_back(std::make_unique<function>(info));1169_current_function = _functions.back().get();11701171return res;1172}11731174void define_entry_point(function &func) override1175{1176// Modify entry point name since a new function is created for it below1177assert(!func.unique_name.empty() && func.unique_name[0] == 'F');1178if (_shader_model < 40 || func.type == shader_type::compute)1179func.unique_name[0] = 'E';11801181if (func.type == shader_type::compute)1182func.unique_name +=1183'_' + std::to_string(func.num_threads[0]) +1184'_' + std::to_string(func.num_threads[1]) +1185'_' + std::to_string(func.num_threads[2]);11861187if (std::find_if(_module.entry_points.begin(), _module.entry_points.end(),1188[&func](const std::pair<std::string, shader_type> &entry_point) {1189return entry_point.first == func.unique_name;1190}) != _module.entry_points.end())1191return;11921193_module.entry_points.emplace_back(func.unique_name, func.type);11941195// Only have to rewrite the entry point function signature in shader model 3 and for compute (to write "numthreads" attribute)1196if (_shader_model >= 40 && func.type != shader_type::compute)1197return;11981199function entry_point = func;1200entry_point.referenced_functions.push_back(func.id);12011202const auto is_color_semantic = [](const std::string &semantic) {1203return semantic.compare(0, 9, "SV_TARGET") == 0 || semantic.compare(0, 5, "COLOR") == 0; };1204const auto is_position_semantic = [](const std::string &semantic) {1205return semantic == "SV_POSITION" || semantic == "POSITION"; };12061207const id ret = make_id();1208define_name<naming::general>(ret, "ret");12091210std::string position_variable_name;1211{1212if (func.type == shader_type::vertex && func.return_type.is_struct())1213{1214// If this function returns a struct which contains a position output, keep track of its member name1215for (const member_type &member : get_struct(func.return_type.struct_definition).member_list)1216if (is_position_semantic(member.semantic))1217position_variable_name = id_to_name(ret) + '.' + member.name;1218}12191220if (is_color_semantic(func.return_semantic))1221{1222// The COLOR output semantic has to be a four-component vector in shader model 3, so enforce that1223entry_point.return_type.rows = 4;1224}1225if (is_position_semantic(func.return_semantic))1226{1227if (func.type == shader_type::vertex)1228// Keep track of the position output variable1229position_variable_name = id_to_name(ret);1230}1231}1232for (member_type ¶m : entry_point.parameter_list)1233{1234if (func.type == shader_type::vertex && param.type.is_struct())1235{1236for (const member_type &member : get_struct(param.type.struct_definition).member_list)1237if (is_position_semantic(member.semantic))1238position_variable_name = id_to_name(param.id) + '.' + member.name;1239}12401241if (is_color_semantic(param.semantic))1242{1243param.type.rows = 4;1244}1245if (is_position_semantic(param.semantic))1246{1247if (func.type == shader_type::vertex)1248// Keep track of the position output variable1249position_variable_name = id_to_name(param.id);1250else if (func.type == shader_type::pixel)1251// Change the position input semantic in pixel shaders1252param.semantic = "VPOS";1253}1254}12551256assert(_current_function_declaration.empty());1257if (func.type == shader_type::compute)1258_current_function_declaration += "[numthreads(" +1259std::to_string(func.num_threads[0]) + ", " +1260std::to_string(func.num_threads[1]) + ", " +1261std::to_string(func.num_threads[2]) + ")]\n";12621263define_function({}, entry_point);1264enter_block(create_block());12651266std::string &code = _blocks.at(_current_block);12671268// Clear all color output parameters so no component is left uninitialized1269for (const member_type ¶m : entry_point.parameter_list)1270{1271if (is_color_semantic(param.semantic))1272code += '\t' + id_to_name(param.id) + " = float4(0.0, 0.0, 0.0, 0.0);\n";1273}12741275code += '\t';1276if (is_color_semantic(func.return_semantic))1277{1278code += "const float4 " + id_to_name(ret) + " = float4(";1279}1280else if (!func.return_type.is_void())1281{1282write_type(code, func.return_type);1283code += ' ' + id_to_name(ret) + " = ";1284}12851286// Call the function this entry point refers to1287code += id_to_name(func.id) + '(';12881289for (size_t i = 0; i < func.parameter_list.size(); ++i)1290{1291code += id_to_name(entry_point.parameter_list[i].id);12921293const member_type ¶m = func.parameter_list[i];12941295if (is_color_semantic(param.semantic))1296{1297code += '.';1298for (unsigned int c = 0; c < param.type.rows; c++)1299code += "xyzw"[c];1300}13011302code += ", ";1303}13041305// Remove trailing ", "1306if (!entry_point.parameter_list.empty())1307code.erase(code.size() - 2);13081309code += ')';13101311// Cast the output value to a four-component vector1312if (is_color_semantic(func.return_semantic))1313{1314for (unsigned int c = 0; c < (4 - func.return_type.rows); c++)1315code += ", 0.0";1316code += ')';1317}13181319code += ";\n";13201321// Shift everything by half a viewport pixel to workaround the different half-pixel offset in D3D9 (https://aras-p.info/blog/2016/04/08/solving-dx9-half-pixel-offset/)1322if (func.type == shader_type::vertex && !position_variable_name.empty()) // Check if we are in a vertex shader definition1323code += '\t' + position_variable_name + ".xy += __TEXEL_SIZE__ * " + position_variable_name + ".ww;\n";13241325leave_block_and_return(func.return_type.is_void() ? 0 : ret);1326leave_function();1327}13281329id emit_load(const expression &exp, bool force_new_id) override1330{1331if (exp.is_constant)1332return emit_constant(exp.type, exp.constant);1333else if (exp.chain.empty() && !force_new_id) // Can refer to values without access chain directly1334return exp.base;13351336const id res = make_id();13371338static const char s_matrix_swizzles[16][5] = {1339"_m00", "_m01", "_m02", "_m03",1340"_m10", "_m11", "_m12", "_m13",1341"_m20", "_m21", "_m22", "_m23",1342"_m30", "_m31", "_m32", "_m33"1343};13441345std::string type, expr_code = id_to_name(exp.base);13461347for (const expression::operation &op : exp.chain)1348{1349switch (op.op)1350{1351case expression::operation::op_cast:1352type.clear();1353write_type<false, false>(type, op.to);1354// Cast is in parentheses so that a subsequent operation operates on the casted value1355expr_code = "((" + type + ')' + expr_code + ')';1356break;1357case expression::operation::op_member:1358expr_code += '.';1359expr_code += get_struct(op.from.struct_definition).member_list[op.index].name;1360break;1361case expression::operation::op_dynamic_index:1362expr_code += '[' + id_to_name(op.index) + ']';1363break;1364case expression::operation::op_constant_index:1365if (op.from.is_vector() && !op.from.is_array())1366expr_code += '.',1367expr_code += "xyzw"[op.index];1368else1369expr_code += '[' + std::to_string(op.index) + ']';1370break;1371case expression::operation::op_swizzle:1372expr_code += '.';1373for (int i = 0; i < 4 && op.swizzle[i] >= 0; ++i)1374if (op.from.is_matrix())1375expr_code += s_matrix_swizzles[op.swizzle[i]];1376else1377expr_code += "xyzw"[op.swizzle[i]];1378break;1379}1380}13811382if (force_new_id)1383{1384// Need to store value in a new variable to comply with request for a new ID1385std::string &code = _blocks.at(_current_block);13861387code += '\t';1388write_type(code, exp.type);1389code += ' ' + id_to_name(res) + " = " + expr_code + ";\n";1390}1391else1392{1393// Avoid excessive variable definitions by instancing simple load operations in code every time1394define_name<naming::expression>(res, std::move(expr_code));1395}13961397return res;1398}1399void emit_store(const expression &exp, id value) override1400{1401std::string &code = _blocks.at(_current_block);14021403write_location(code, exp.location);14041405code += '\t' + id_to_name(exp.base);14061407static const char s_matrix_swizzles[16][5] = {1408"_m00", "_m01", "_m02", "_m03",1409"_m10", "_m11", "_m12", "_m13",1410"_m20", "_m21", "_m22", "_m23",1411"_m30", "_m31", "_m32", "_m33"1412};14131414for (const expression::operation &op : exp.chain)1415{1416switch (op.op)1417{1418case expression::operation::op_member:1419code += '.';1420code += get_struct(op.from.struct_definition).member_list[op.index].name;1421break;1422case expression::operation::op_dynamic_index:1423code += '[' + id_to_name(op.index) + ']';1424break;1425case expression::operation::op_constant_index:1426code += '[' + std::to_string(op.index) + ']';1427break;1428case expression::operation::op_swizzle:1429code += '.';1430for (int i = 0; i < 4 && op.swizzle[i] >= 0; ++i)1431if (op.from.is_matrix())1432code += s_matrix_swizzles[op.swizzle[i]];1433else1434code += "xyzw"[op.swizzle[i]];1435break;1436}1437}14381439code += " = " + id_to_name(value) + ";\n";1440}14411442id emit_constant(const type &data_type, const constant &data) override1443{1444const id res = make_id();14451446if (data_type.is_array())1447{1448assert(data_type.has(type::q_const));14491450if (const auto it = std::find_if(_constant_lookup.begin(), _constant_lookup.end(),1451[&data_type, &data](const std::tuple<type, constant, id> &x) {1452if (!(std::get<0>(x) == data_type && std::memcmp(&std::get<1>(x).as_uint[0], &data.as_uint[0], sizeof(uint32_t) * 16) == 0 && std::get<1>(x).array_data.size() == data.array_data.size()))1453return false;1454for (size_t i = 0; i < data.array_data.size(); ++i)1455if (std::memcmp(&std::get<1>(x).array_data[i].as_uint[0], &data.array_data[i].as_uint[0], sizeof(uint32_t) * 16) != 0)1456return false;1457return true;1458});1459it != _constant_lookup.end())1460return std::get<2>(*it); // Reuse existing constant instead of duplicating the definition1461else1462_constant_lookup.push_back({ data_type, data, res });14631464// Put constant variable into global scope, so that it can be reused in different blocks1465std::string &code = _blocks.at(0);14661467// Array constants need to be stored in a constant variable as they cannot be used in-place1468code += "static const ";1469write_type<false, false>(code, data_type);1470code += ' ' + id_to_name(res);1471code += '[' + std::to_string(data_type.array_length) + ']';1472code += " = ";1473write_constant(code, data_type, data);1474code += ";\n";1475return res;1476}14771478std::string code;1479write_constant(code, data_type, data);1480define_name<naming::expression>(res, std::move(code));14811482return res;1483}14841485id emit_unary_op(const location &loc, tokenid op, const type &res_type, id val) override1486{1487const id res = make_id();14881489std::string &code = _blocks.at(_current_block);14901491write_location(code, loc);14921493code += '\t';1494write_type(code, res_type);1495code += ' ' + id_to_name(res) + " = ";14961497if (_shader_model < 40 && op == tokenid::tilde)1498code += "0xFFFFFFFF - "; // Emulate bitwise not operator on shader model 31499else1500code += char(op);15011502code += id_to_name(val) + ";\n";15031504return res;1505}1506id emit_binary_op(const location &loc, tokenid op, const type &res_type, const type &, id lhs, id rhs) override1507{1508const id res = make_id();15091510std::string &code = _blocks.at(_current_block);15111512write_location(code, loc);15131514code += '\t';1515write_type(code, res_type);1516code += ' ' + id_to_name(res) + " = ";15171518if (_shader_model < 40)1519{1520// See bitwise shift operator emulation below1521if (op == tokenid::less_less || op == tokenid::less_less_equal)1522code += '(';1523else if (op == tokenid::greater_greater || op == tokenid::greater_greater_equal)1524code += "floor(";1525}15261527code += id_to_name(lhs) + ' ';15281529switch (op)1530{1531case tokenid::plus:1532case tokenid::plus_plus:1533case tokenid::plus_equal:1534code += '+';1535break;1536case tokenid::minus:1537case tokenid::minus_minus:1538case tokenid::minus_equal:1539code += '-';1540break;1541case tokenid::star:1542case tokenid::star_equal:1543code += '*';1544break;1545case tokenid::slash:1546case tokenid::slash_equal:1547code += '/';1548break;1549case tokenid::percent:1550case tokenid::percent_equal:1551code += '%';1552break;1553case tokenid::caret:1554case tokenid::caret_equal:1555code += '^';1556break;1557case tokenid::pipe:1558case tokenid::pipe_equal:1559code += '|';1560break;1561case tokenid::ampersand:1562case tokenid::ampersand_equal:1563code += '&';1564break;1565case tokenid::less_less:1566case tokenid::less_less_equal:1567code += _shader_model >= 40 ? "<<" : ") * exp2("; // Emulate bitwise shift operators on shader model 31568break;1569case tokenid::greater_greater:1570case tokenid::greater_greater_equal:1571code += _shader_model >= 40 ? ">>" : ") / exp2(";1572break;1573case tokenid::pipe_pipe:1574code += "||";1575break;1576case tokenid::ampersand_ampersand:1577code += "&&";1578break;1579case tokenid::less:1580code += '<';1581break;1582case tokenid::less_equal:1583code += "<=";1584break;1585case tokenid::greater:1586code += '>';1587break;1588case tokenid::greater_equal:1589code += ">=";1590break;1591case tokenid::equal_equal:1592code += "==";1593break;1594case tokenid::exclaim_equal:1595code += "!=";1596break;1597default:1598assert(false);1599}16001601code += ' ' + id_to_name(rhs);16021603if (_shader_model < 40)1604{1605// See bitwise shift operator emulation above1606if (op == tokenid::less_less || op == tokenid::less_less_equal ||1607op == tokenid::greater_greater || op == tokenid::greater_greater_equal)1608code += ')';1609}16101611code += ";\n";16121613return res;1614}1615id emit_ternary_op(const location &loc, tokenid op, const type &res_type, id condition, id true_value, id false_value) override1616{1617if (op != tokenid::question)1618return assert(false), 0; // Should never happen, since this is the only ternary operator currently supported16191620const id res = make_id();16211622std::string &code = _blocks.at(_current_block);16231624write_location(code, loc);16251626code += '\t';1627write_type(code, res_type);1628code += ' ' + id_to_name(res);16291630if (res_type.is_array())1631code += '[' + std::to_string(res_type.array_length) + ']';16321633code += " = " + id_to_name(condition) + " ? " + id_to_name(true_value) + " : " + id_to_name(false_value) + ";\n";16341635return res;1636}1637id emit_call(const location &loc, id function, const type &res_type, const std::vector<expression> &args) override1638{1639#ifndef NDEBUG1640for (const expression &arg : args)1641assert(arg.chain.empty() && arg.base != 0);1642#endif16431644const id res = make_id();16451646std::string &code = _blocks.at(_current_block);16471648write_location(code, loc);16491650code += '\t';16511652if (!res_type.is_void())1653{1654write_type(code, res_type);1655code += ' ' + id_to_name(res);16561657if (res_type.is_array())1658code += '[' + std::to_string(res_type.array_length) + ']';16591660code += " = ";1661}16621663code += id_to_name(function) + '(';16641665for (const expression &arg : args)1666{1667code += id_to_name(arg.base);1668code += ", ";1669}16701671// Remove trailing ", "1672if (!args.empty())1673code.erase(code.size() - 2);16741675code += ");\n";16761677return res;1678}1679id emit_call_intrinsic(const location &loc, id intrinsic, const type &res_type, const std::vector<expression> &args) override1680{1681#ifndef NDEBUG1682for (const expression &arg : args)1683assert(arg.chain.empty() && arg.base != 0);1684#endif16851686const id res = make_id();16871688std::string &code = _blocks.at(_current_block);16891690enum1691{1692#define IMPLEMENT_INTRINSIC_HLSL(name, i, code) name##i,1693#include "effect_symbol_table_intrinsics.inl"1694};16951696write_location(code, loc);16971698code += '\t';16991700if (!res_type.is_void())1701{1702write_type(code, res_type);1703code += ' ' + id_to_name(res) + " = ";1704}17051706switch (intrinsic)1707{1708#define IMPLEMENT_INTRINSIC_HLSL(name, i, code) case name##i: code break;1709#include "effect_symbol_table_intrinsics.inl"1710default:1711assert(false);1712}17131714code += ";\n";17151716return res;1717}1718id emit_construct(const location &loc, const type &res_type, const std::vector<expression> &args) override1719{1720#ifndef NDEBUG1721for (const expression &arg : args)1722assert((arg.type.is_scalar() || res_type.is_array()) && arg.chain.empty() && arg.base != 0);1723#endif17241725const id res = make_id();17261727std::string &code = _blocks.at(_current_block);17281729write_location(code, loc);17301731code += '\t';1732write_type(code, res_type);1733code += ' ' + id_to_name(res);17341735if (res_type.is_array())1736code += '[' + std::to_string(res_type.array_length) + ']';17371738code += " = ";17391740if (res_type.is_array())1741code += "{ ";1742else1743write_type<false, false>(code, res_type), code += '(';17441745for (const expression &arg : args)1746{1747code += id_to_name(arg.base);1748code += ", ";1749}17501751// Remove trailing ", "1752if (!args.empty())1753code.erase(code.size() - 2);17541755if (res_type.is_array())1756code += " }";1757else1758code += ')';17591760code += ";\n";17611762return res;1763}17641765void emit_if(const location &loc, id condition_value, id condition_block, id true_statement_block, id false_statement_block, unsigned int flags) override1766{1767assert(condition_value != 0 && condition_block != 0 && true_statement_block != 0 && false_statement_block != 0);17681769std::string &code = _blocks.at(_current_block);17701771std::string &true_statement_data = _blocks.at(true_statement_block);1772std::string &false_statement_data = _blocks.at(false_statement_block);17731774increase_indentation_level(true_statement_data);1775increase_indentation_level(false_statement_data);17761777code += _blocks.at(condition_block);17781779write_location(code, loc);17801781code += '\t';17821783if (flags & 0x1) code += "[flatten] ";1784if (flags & 0x2) code += "[branch] ";17851786code += "if (" + id_to_name(condition_value) + ")\n\t{\n";1787code += true_statement_data;1788code += "\t}\n";17891790if (!false_statement_data.empty())1791{1792code += "\telse\n\t{\n";1793code += false_statement_data;1794code += "\t}\n";1795}17961797// Remove consumed blocks to save memory1798_blocks.erase(condition_block);1799_blocks.erase(true_statement_block);1800_blocks.erase(false_statement_block);1801}1802id emit_phi(const location &loc, id condition_value, id condition_block, id true_value, id true_statement_block, id false_value, id false_statement_block, const type &res_type) override1803{1804assert(condition_value != 0 && condition_block != 0 && true_value != 0 && true_statement_block != 0 && false_value != 0 && false_statement_block != 0);18051806std::string &code = _blocks.at(_current_block);18071808std::string &true_statement_data = _blocks.at(true_statement_block);1809std::string &false_statement_data = _blocks.at(false_statement_block);18101811increase_indentation_level(true_statement_data);1812increase_indentation_level(false_statement_data);18131814const id res = make_id();18151816code += _blocks.at(condition_block);18171818code += '\t';1819write_type(code, res_type);1820code += ' ' + id_to_name(res) + ";\n";18211822write_location(code, loc);18231824code += "\tif (" + id_to_name(condition_value) + ")\n\t{\n";1825code += (true_statement_block != condition_block ? true_statement_data : std::string());1826code += "\t\t" + id_to_name(res) + " = " + id_to_name(true_value) + ";\n";1827code += "\t}\n\telse\n\t{\n";1828code += (false_statement_block != condition_block ? false_statement_data : std::string());1829code += "\t\t" + id_to_name(res) + " = " + id_to_name(false_value) + ";\n";1830code += "\t}\n";18311832// Remove consumed blocks to save memory1833_blocks.erase(condition_block);1834_blocks.erase(true_statement_block);1835_blocks.erase(false_statement_block);18361837return res;1838}1839void emit_loop(const location &loc, id condition_value, id prev_block, id header_block, id condition_block, id loop_block, id continue_block, unsigned int flags) override1840{1841assert(prev_block != 0 && header_block != 0 && loop_block != 0 && continue_block != 0);18421843std::string &code = _blocks.at(_current_block);18441845std::string &loop_data = _blocks.at(loop_block);1846std::string &continue_data = _blocks.at(continue_block);18471848increase_indentation_level(loop_data);1849increase_indentation_level(loop_data);1850increase_indentation_level(continue_data);18511852code += _blocks.at(prev_block);18531854std::string attributes;1855if (flags & 0x1)1856attributes += "[unroll] ";1857if (flags & 0x2)1858attributes += _shader_model >= 40 ? "[fastopt] " : "[loop] ";18591860// Condition value can be missing in infinite loop constructs like "for (;;)"1861std::string condition_name = condition_value != 0 ? id_to_name(condition_value) : "true";18621863if (condition_block == 0)1864{1865// Convert the last SSA variable initializer to an assignment statement1866const size_t pos_assign = continue_data.rfind(condition_name);1867const size_t pos_prev_assign = continue_data.rfind('\t', pos_assign);1868continue_data.erase(pos_prev_assign + 1, pos_assign - pos_prev_assign - 1);18691870// We need to add the continue block to all "continue" statements as well1871const std::string continue_id = "__CONTINUE__" + std::to_string(continue_block);1872for (size_t offset = 0; (offset = loop_data.find(continue_id, offset)) != std::string::npos; offset += continue_data.size())1873loop_data.replace(offset, continue_id.size(), continue_data);18741875code += "\tbool " + condition_name + ";\n";18761877write_location(code, loc);18781879code += '\t' + attributes;1880code += "do\n\t{\n\t\t{\n";1881code += loop_data; // Encapsulate loop body into another scope, so not to confuse any local variables with the current iteration variable accessed in the continue block below1882code += "\t\t}\n";1883code += continue_data;1884code += "\t}\n\twhile (" + condition_name + ");\n";1885}1886else1887{1888std::string &condition_data = _blocks.at(condition_block);18891890// Work around D3DCompiler putting uniform variables that are used as the loop count register into integer registers (only in SM3)1891// Only applies to dynamic loops with uniform variables in the condition, where it generates a loop instruction like "rep i0", but then expects the "i0" register to be set externally1892// Moving the loop condition into the loop body forces it to move the uniform variable into a constant register instead and geneates a fixed number of loop iterations with "defi i0, 255, ..."1893// Check 'condition_name' instead of 'condition_value' here to also catch cases where a constant boolean expression was passed in as loop condition1894bool use_break_statement_for_condition = (_shader_model < 40 && condition_name != "true") &&1895std::find_if(_module.uniforms.begin(), _module.uniforms.end(),1896[&](const uniform &info) {1897return condition_data.find(info.name) != std::string::npos || condition_name.find(info.name) != std::string::npos;1898}) != _module.uniforms.end();18991900// If the condition data is just a single line, then it is a simple expression, which we can just put into the loop condition as-is1901if (!use_break_statement_for_condition && std::count(condition_data.begin(), condition_data.end(), '\n') == 1)1902{1903// Convert SSA variable initializer back to a condition expression1904const size_t pos_assign = condition_data.find('=');1905condition_data.erase(0, pos_assign + 2);1906const size_t pos_semicolon = condition_data.rfind(';');1907condition_data.erase(pos_semicolon);19081909condition_name = std::move(condition_data);1910assert(condition_data.empty());1911}1912else1913{1914code += condition_data;19151916increase_indentation_level(condition_data);19171918// Convert the last SSA variable initializer to an assignment statement1919const size_t pos_assign = condition_data.rfind(condition_name);1920const size_t pos_prev_assign = condition_data.rfind('\t', pos_assign);1921condition_data.erase(pos_prev_assign + 1, pos_assign - pos_prev_assign - 1);1922}19231924const std::string continue_id = "__CONTINUE__" + std::to_string(continue_block);1925for (size_t offset = 0; (offset = loop_data.find(continue_id, offset)) != std::string::npos; offset += continue_data.size())1926loop_data.replace(offset, continue_id.size(), continue_data + condition_data);19271928write_location(code, loc);19291930code += '\t' + attributes;1931if (use_break_statement_for_condition)1932code += "while (true)\n\t{\n\t\tif (" + condition_name + ")\n\t\t{\n";1933else1934code += "while (" + condition_name + ")\n\t{\n\t\t{\n";1935code += loop_data;1936code += "\t\t}\n";1937if (use_break_statement_for_condition)1938code += "\t\telse break;\n";1939code += continue_data;1940code += condition_data;1941code += "\t}\n";19421943_blocks.erase(condition_block);1944}19451946// Remove consumed blocks to save memory1947_blocks.erase(prev_block);1948_blocks.erase(header_block);1949_blocks.erase(loop_block);1950_blocks.erase(continue_block);1951}1952void emit_switch(const location &loc, id selector_value, id selector_block, id default_label, id default_block, const std::vector<id> &case_literal_and_labels, const std::vector<id> &case_blocks, unsigned int flags) override1953{1954assert(selector_value != 0 && selector_block != 0 && default_label != 0 && default_block != 0);1955assert(case_blocks.size() == case_literal_and_labels.size() / 2);19561957std::string &code = _blocks.at(_current_block);19581959code += _blocks.at(selector_block);19601961if (_shader_model >= 40)1962{1963write_location(code, loc);19641965code += '\t';19661967if (flags & 0x1) code += "[flatten] ";1968if (flags & 0x2) code += "[branch] ";1969if (flags & 0x4) code += "[forcecase] ";1970if (flags & 0x8) code += "[call] ";19711972code += "switch (" + id_to_name(selector_value) + ")\n\t{\n";19731974std::vector<id> labels = case_literal_and_labels;1975for (size_t i = 0; i < labels.size(); i += 2)1976{1977if (labels[i + 1] == 0)1978continue; // Happens if a case was already handled, see below19791980code += "\tcase " + std::to_string(labels[i]) + ": ";19811982if (labels[i + 1] == default_label)1983{1984code += "default: ";1985default_label = 0;1986}1987else1988{1989for (size_t k = i + 2; k < labels.size(); k += 2)1990{1991if (labels[k + 1] == 0 || labels[k + 1] != labels[i + 1])1992continue;19931994code += "case " + std::to_string(labels[k]) + ": ";1995labels[k + 1] = 0;1996}1997}19981999assert(case_blocks[i / 2] != 0);2000std::string &case_data = _blocks.at(case_blocks[i / 2]);20012002increase_indentation_level(case_data);20032004code += "{\n";2005code += case_data;2006code += "\t}\n";2007}20082009if (default_label != 0 && default_block != _current_block)2010{2011std::string &default_data = _blocks.at(default_block);20122013increase_indentation_level(default_data);20142015code += "\tdefault: {\n";2016code += default_data;2017code += "\t}\n";20182019_blocks.erase(default_block);2020}20212022code += "\t}\n";2023}2024else // Switch statements do not work correctly in SM3 if a constant is used as selector value (this is a D3DCompiler bug), so replace them with if statements2025{2026write_location(code, loc);20272028code += "\t[unroll] do { "; // This dummy loop makes "break" statements work20292030if (flags & 0x1) code += "[flatten] ";2031if (flags & 0x2) code += "[branch] ";20322033std::vector<id> labels = case_literal_and_labels;2034for (size_t i = 0; i < labels.size(); i += 2)2035{2036if (labels[i + 1] == 0)2037continue; // Happens if a case was already handled, see below20382039code += "if (" + id_to_name(selector_value) + " == " + std::to_string(labels[i]);20402041for (size_t k = i + 2; k < labels.size(); k += 2)2042{2043if (labels[k + 1] == 0 || labels[k + 1] != labels[i + 1])2044continue;20452046code += " || " + id_to_name(selector_value) + " == " + std::to_string(labels[k]);2047labels[k + 1] = 0;2048}20492050assert(case_blocks[i / 2] != 0);2051std::string &case_data = _blocks.at(case_blocks[i / 2]);20522053increase_indentation_level(case_data);20542055code += ")\n\t{\n";2056code += case_data;2057code += "\t}\n\telse\n\t";2058}20592060code += "{\n";20612062if (default_block != _current_block)2063{2064std::string &default_data = _blocks.at(default_block);20652066increase_indentation_level(default_data);20672068code += default_data;20692070_blocks.erase(default_block);2071}20722073code += "\t} } while (false);\n";2074}20752076// Remove consumed blocks to save memory2077_blocks.erase(selector_block);2078for (const id case_block : case_blocks)2079_blocks.erase(case_block);2080}20812082id create_block() override2083{2084const id res = make_id();20852086std::string &block = _blocks.emplace(res, std::string()).first->second;2087// Reserve a decently big enough memory block to avoid frequent reallocations2088block.reserve(4096);20892090return res;2091}2092id set_block(id id) override2093{2094_last_block = _current_block;2095_current_block = id;20962097return _last_block;2098}2099void enter_block(id id) override2100{2101_current_block = id;2102}2103id leave_block_and_kill() override2104{2105if (!is_in_block())2106return 0;21072108std::string &code = _blocks.at(_current_block);21092110code += "\tdiscard;\n";21112112const type &return_type = _current_function->return_type;2113if (!return_type.is_void())2114{2115// HLSL compiler doesn't handle discard like a shader kill2116// Add a return statement to exit functions in case discard is the last control flow statement2117// See https://docs.microsoft.com/windows/win32/direct3dhlsl/discard--sm4---asm-2118code += "\treturn ";2119write_constant(code, return_type, constant());2120code += ";\n";2121}21222123return set_block(0);2124}2125id leave_block_and_return(id value) override2126{2127if (!is_in_block())2128return 0;21292130// Skip implicit return statement2131if (!_current_function->return_type.is_void() && value == 0)2132return set_block(0);21332134std::string &code = _blocks.at(_current_block);21352136code += "\treturn";21372138if (value != 0)2139code += ' ' + id_to_name(value);21402141code += ";\n";21422143return set_block(0);2144}2145id leave_block_and_switch(id, id) override2146{2147if (!is_in_block())2148return _last_block;21492150return set_block(0);2151}2152id leave_block_and_branch(id target, unsigned int loop_flow) override2153{2154if (!is_in_block())2155return _last_block;21562157std::string &code = _blocks.at(_current_block);21582159switch (loop_flow)2160{2161case 1:2162code += "\tbreak;\n";2163break;2164case 2: // Keep track of continue target block, so we can insert its code here later2165code += "__CONTINUE__" + std::to_string(target) + "\tcontinue;\n";2166break;2167}21682169return set_block(0);2170}2171id leave_block_and_branch_conditional(id, id, id) override2172{2173if (!is_in_block())2174return _last_block;21752176return set_block(0);2177}2178void leave_function() override2179{2180assert(_current_function != nullptr && _last_block != 0);21812182_blocks.emplace(_current_function->id, _current_function_declaration + "{\n" + _blocks.at(_last_block) + "}\n");21832184_current_function = nullptr;2185_current_function_declaration.clear();2186}2187};21882189codegen *reshadefx::create_codegen_hlsl(unsigned int shader_model, bool debug_info, bool uniforms_to_spec_constants)2190{2191return new codegen_hlsl(shader_model, debug_info, uniforms_to_spec_constants);2192}219321942195