CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Common/GPU/D3D9/thin3d_d3d9.cpp
Views: 1401
#include <vector>1#include <cstdio>2#include <cstdint>34#include "ppsspp_config.h"56#ifdef _DEBUG7#define D3D_DEBUG_INFO8#endif910#include <d3d9.h>11#ifdef USE_CRT_DBG12#undef new13#endif1415#include <D3Dcompiler.h>16#include "Common/GPU/D3D9/D3DCompilerLoader.h"1718#ifndef D3DXERR_INVALIDDATA19#define D3DXERR_INVALIDDATA 0x88760b5920#endif2122#include "Common/Math/lin/matrix4x4.h"23#include "Common/GPU/thin3d.h"24#include "Common/GPU/D3D9/D3D9StateCache.h"25#include "Common/OSVersion.h"26#include "Common/StringUtils.h"27#include "Common/TimeUtil.h"2829#include "Common/Log.h"30#include <wrl/client.h>3132using namespace Microsoft::WRL;3334namespace Draw {3536static constexpr int MAX_BOUND_TEXTURES = 8;3738// Could be declared as u839static const D3DCMPFUNC compareToD3D9[] = {40D3DCMP_NEVER,41D3DCMP_LESS,42D3DCMP_EQUAL,43D3DCMP_LESSEQUAL,44D3DCMP_GREATER,45D3DCMP_NOTEQUAL,46D3DCMP_GREATEREQUAL,47D3DCMP_ALWAYS48};4950// Could be declared as u851static const D3DBLENDOP blendEqToD3D9[] = {52D3DBLENDOP_ADD,53D3DBLENDOP_SUBTRACT,54D3DBLENDOP_REVSUBTRACT,55D3DBLENDOP_MIN,56D3DBLENDOP_MAX,57};5859// Could be declared as u860static const D3DBLEND blendFactorToD3D9[] = {61D3DBLEND_ZERO,62D3DBLEND_ONE,63D3DBLEND_SRCCOLOR,64D3DBLEND_INVSRCCOLOR,65D3DBLEND_DESTCOLOR,66D3DBLEND_INVDESTCOLOR,67D3DBLEND_SRCALPHA,68D3DBLEND_INVSRCALPHA,69D3DBLEND_DESTALPHA,70D3DBLEND_INVDESTALPHA,71D3DBLEND_BLENDFACTOR,72D3DBLEND_INVBLENDFACTOR,73D3DBLEND_BLENDFACTOR,74D3DBLEND_INVBLENDFACTOR,75D3DBLEND_ZERO,76D3DBLEND_ZERO,77D3DBLEND_ZERO,78D3DBLEND_ZERO,79};8081static const D3DTEXTUREADDRESS texWrapToD3D9[] = {82D3DTADDRESS_WRAP,83D3DTADDRESS_MIRROR,84D3DTADDRESS_CLAMP,85D3DTADDRESS_BORDER,86};8788static const D3DTEXTUREFILTERTYPE texFilterToD3D9[] = {89D3DTEXF_POINT,90D3DTEXF_LINEAR,91};9293static const D3DPRIMITIVETYPE primToD3D9[] = {94D3DPT_POINTLIST,95D3DPT_LINELIST,96D3DPT_LINESTRIP,97D3DPT_TRIANGLELIST,98D3DPT_TRIANGLESTRIP,99D3DPT_TRIANGLEFAN,100// These aren't available.101D3DPT_POINTLIST, // tess102D3DPT_POINTLIST, // geom ...103D3DPT_POINTLIST,104D3DPT_POINTLIST,105D3DPT_POINTLIST,106};107108static const D3DSTENCILOP stencilOpToD3D9[] = {109D3DSTENCILOP_KEEP,110D3DSTENCILOP_ZERO,111D3DSTENCILOP_REPLACE,112D3DSTENCILOP_INCRSAT,113D3DSTENCILOP_DECRSAT,114D3DSTENCILOP_INVERT,115D3DSTENCILOP_INCR,116D3DSTENCILOP_DECR,117};118119static D3DFORMAT FormatToD3DFMT(DataFormat fmt) {120switch (fmt) {121case DataFormat::R16_UNORM: return D3DFMT_L16; // closest match, should be a fine substitution if we ignore channels except R.122case DataFormat::R8G8B8A8_UNORM: return D3DFMT_A8R8G8B8;123case DataFormat::B8G8R8A8_UNORM: return D3DFMT_A8R8G8B8;124case DataFormat::R4G4B4A4_UNORM_PACK16: return D3DFMT_A4R4G4B4; // emulated125case DataFormat::B4G4R4A4_UNORM_PACK16: return D3DFMT_A4R4G4B4; // native126case DataFormat::A4R4G4B4_UNORM_PACK16: return D3DFMT_A4R4G4B4; // emulated127case DataFormat::R5G6B5_UNORM_PACK16: return D3DFMT_R5G6B5;128case DataFormat::A1R5G5B5_UNORM_PACK16: return D3DFMT_A1R5G5B5;129case DataFormat::D24_S8: return D3DFMT_D24S8;130case DataFormat::D16: return D3DFMT_D16;131case DataFormat::BC1_RGBA_UNORM_BLOCK: return D3DFMT_DXT1;132case DataFormat::BC2_UNORM_BLOCK: return D3DFMT_DXT3; // DXT3 is indeed BC2.133case DataFormat::BC3_UNORM_BLOCK: return D3DFMT_DXT5; // DXT5 is indeed BC3134default: return D3DFMT_UNKNOWN;135}136}137138static int FormatToD3DDeclType(DataFormat type) {139switch (type) {140case DataFormat::R32_FLOAT: return D3DDECLTYPE_FLOAT1;141case DataFormat::R32G32_FLOAT: return D3DDECLTYPE_FLOAT2;142case DataFormat::R32G32B32_FLOAT: return D3DDECLTYPE_FLOAT3;143case DataFormat::R32G32B32A32_FLOAT: return D3DDECLTYPE_FLOAT4;144case DataFormat::R8G8B8A8_UNORM: return D3DDECLTYPE_UBYTE4N; // D3DCOLOR has a different byte ordering.145default: return D3DDECLTYPE_UNUSED;146}147}148149class D3D9Buffer;150151class D3D9DepthStencilState : public DepthStencilState {152public:153BOOL depthTestEnabled;154BOOL depthWriteEnabled;155D3DCMPFUNC depthCompare;156BOOL stencilEnabled;157D3DSTENCILOP stencilFail;158D3DSTENCILOP stencilZFail;159D3DSTENCILOP stencilPass;160D3DCMPFUNC stencilCompareOp;161162void Apply(LPDIRECT3DDEVICE9 device, uint8_t stencilRef, uint8_t stencilWriteMask, uint8_t stencilCompareMask) {163dxstate.depthTest.set(depthTestEnabled);164if (depthTestEnabled) {165dxstate.depthWrite.set(depthWriteEnabled);166dxstate.depthFunc.set(depthCompare);167}168dxstate.stencilTest.set(stencilEnabled);169if (stencilEnabled) {170dxstate.stencilOp.set(stencilFail, stencilZFail, stencilPass);171dxstate.stencilFunc.set(stencilCompareOp);172dxstate.stencilRef.set(stencilRef);173dxstate.stencilCompareMask.set(stencilCompareMask);174dxstate.stencilWriteMask.set(stencilWriteMask);175}176}177};178179class D3D9RasterState : public RasterState {180public:181DWORD cullMode; // D3DCULL_*182183void Apply(LPDIRECT3DDEVICE9 device) {184dxstate.cullMode.set(cullMode);185dxstate.scissorTest.enable();186// Force user clipping off.187dxstate.clipPlaneEnable.set(0);188}189};190191class D3D9BlendState : public BlendState {192public:193bool enabled;194D3DBLENDOP eqCol, eqAlpha;195D3DBLEND srcCol, srcAlpha, dstCol, dstAlpha;196uint32_t colorMask;197198void Apply(LPDIRECT3DDEVICE9 device) {199dxstate.blend.set(enabled);200dxstate.blendFunc.set(srcCol, dstCol, srcAlpha, dstAlpha);201dxstate.blendEquation.set(eqCol, eqAlpha);202dxstate.colorMask.set(colorMask);203}204};205206class D3D9SamplerState : public SamplerState {207public:208D3DTEXTUREADDRESS wrapS, wrapT;209D3DTEXTUREFILTERTYPE magFilt, minFilt, mipFilt;210211void Apply(LPDIRECT3DDEVICE9 device, int index) {212if (index == 0) {213dxstate.texAddressU.set(wrapS);214dxstate.texAddressV.set(wrapT);215dxstate.texMagFilter.set(magFilt);216dxstate.texMinFilter.set(minFilt);217dxstate.texMipFilter.set(mipFilt);218} else {219pD3Ddevice9->SetSamplerState(index, D3DSAMP_ADDRESSU, wrapS);220pD3Ddevice9->SetSamplerState(index, D3DSAMP_ADDRESSV, wrapT);221pD3Ddevice9->SetSamplerState(index, D3DSAMP_MAGFILTER, magFilt);222pD3Ddevice9->SetSamplerState(index, D3DSAMP_MINFILTER, minFilt);223pD3Ddevice9->SetSamplerState(index, D3DSAMP_MIPFILTER, mipFilt);224}225}226};227228class D3D9InputLayout : public InputLayout {229public:230D3D9InputLayout(LPDIRECT3DDEVICE9 device, const InputLayoutDesc &desc);231~D3D9InputLayout() {232}233int GetStride() const { return stride_; }234void Apply(LPDIRECT3DDEVICE9 device) {235device->SetVertexDeclaration(decl_.Get());236}237238private:239ComPtr<IDirect3DVertexDeclaration9> decl_;240int stride_;241};242243class D3D9ShaderModule : public ShaderModule {244public:245D3D9ShaderModule(ShaderStage stage, const std::string &tag) : stage_(stage), tag_(tag) {}246~D3D9ShaderModule() {247}248bool Compile(LPDIRECT3DDEVICE9 device, const uint8_t *data, size_t size);249void Apply(LPDIRECT3DDEVICE9 device) {250if (stage_ == ShaderStage::Fragment) {251device->SetPixelShader(pshader_.Get());252} else {253device->SetVertexShader(vshader_.Get());254}255}256ShaderStage GetStage() const override { return stage_; }257258private:259ShaderStage stage_;260ComPtr<IDirect3DVertexShader9> vshader_;261ComPtr<IDirect3DPixelShader9> pshader_;262std::string tag_;263};264265class D3D9Pipeline : public Pipeline {266public:267D3D9Pipeline() {}268~D3D9Pipeline() {269}270271D3D9ShaderModule *vshader = nullptr;272D3D9ShaderModule *pshader = nullptr;273274D3DPRIMITIVETYPE prim{};275AutoRef<D3D9InputLayout> inputLayout;276AutoRef<D3D9DepthStencilState> depthStencil;277AutoRef<D3D9BlendState> blend;278AutoRef<D3D9RasterState> raster;279UniformBufferDesc dynamicUniforms{};280281void Apply(LPDIRECT3DDEVICE9 device, uint8_t stencilRef, uint8_t stencilWriteMask, uint8_t stencilCompareMask);282};283284class D3D9Texture : public Texture {285public:286D3D9Texture(LPDIRECT3DDEVICE9 device, LPDIRECT3DDEVICE9EX deviceEx, const TextureDesc &desc);287~D3D9Texture();288void SetToSampler(LPDIRECT3DDEVICE9 device, int sampler);289LPDIRECT3DBASETEXTURE9 TexturePtr() const {290// TODO: Cleanup291if (tex_) {292return tex_.Get();293} else if (volTex_) {294return volTex_.Get();295} else if (cubeTex_) {296return cubeTex_.Get();297} else {298return nullptr;299}300}301void UpdateTextureLevels(const uint8_t * const *data, int numLevels, TextureCallback initDataCallback);302303private:304void SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback initDataCallback);305bool Create(const TextureDesc &desc);306ComPtr<IDirect3DDevice9> device_;307ComPtr<IDirect3DDevice9Ex> deviceEx_;308TextureType type_;309D3DFORMAT d3dfmt_;310ComPtr<IDirect3DTexture9> tex_;311ComPtr<IDirect3DVolumeTexture9> volTex_;312ComPtr<IDirect3DCubeTexture9> cubeTex_;313};314315D3D9Texture::D3D9Texture(LPDIRECT3DDEVICE9 device, LPDIRECT3DDEVICE9EX deviceEx, const TextureDesc &desc)316: device_(device), deviceEx_(deviceEx), tex_(nullptr), volTex_(nullptr), cubeTex_(nullptr) {317Create(desc);318}319320D3D9Texture::~D3D9Texture() {321}322323bool D3D9Texture::Create(const TextureDesc &desc) {324width_ = desc.width;325height_ = desc.height;326depth_ = desc.depth;327type_ = desc.type;328format_ = desc.format;329tex_ = nullptr;330d3dfmt_ = FormatToD3DFMT(desc.format);331332if (d3dfmt_ == D3DFMT_UNKNOWN) {333return false;334}335HRESULT hr = E_FAIL;336337D3DPOOL pool = D3DPOOL_MANAGED;338int usage = 0;339if (deviceEx_ != nullptr) {340pool = D3DPOOL_DEFAULT;341usage = D3DUSAGE_DYNAMIC;342}343if (desc.generateMips)344usage |= D3DUSAGE_AUTOGENMIPMAP;345switch (type_) {346case TextureType::LINEAR1D:347case TextureType::LINEAR2D:348hr = device_->CreateTexture(desc.width, desc.height, desc.generateMips ? 0 : desc.mipLevels, usage, d3dfmt_, pool, &tex_, NULL);349break;350case TextureType::LINEAR3D:351hr = device_->CreateVolumeTexture(desc.width, desc.height, desc.depth, desc.mipLevels, usage, d3dfmt_, pool, &volTex_, NULL);352break;353case TextureType::CUBE:354hr = device_->CreateCubeTexture(desc.width, desc.mipLevels, usage, d3dfmt_, pool, &cubeTex_, NULL);355break;356}357if (FAILED(hr)) {358ERROR_LOG(Log::G3D, "D3D9 Texture creation failed");359return false;360}361362if (desc.initData.size()) {363// In D3D9, after setting D3DUSAGE_AUTOGENMIPS, we can only access the top layer. The rest will be364// automatically generated.365int numLevels = desc.generateMips ? 1 : (int)desc.initData.size();366UpdateTextureLevels(desc.initData.data(), numLevels, desc.initDataCallback);367}368return true;369}370371void D3D9Texture::UpdateTextureLevels(const uint8_t * const *data, int numLevels, TextureCallback initDataCallback) {372int w = width_;373int h = height_;374int d = depth_;375for (int i = 0; i < numLevels; i++) {376SetImageData(0, 0, 0, w, h, d, i, 0, data[i], initDataCallback);377w = (w + 1) / 2;378h = (h + 1) / 2;379d = (d + 1) / 2;380}381}382383// Just switches R and G.384inline uint32_t Shuffle8888(uint32_t x) {385return (x & 0xFF00FF00) | ((x >> 16) & 0xFF) | ((x << 16) & 0xFF0000);386}387388void D3D9Texture::SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback callback) {389if (!tex_)390return;391392if (level == 0) {393width_ = width;394height_ = height;395depth_ = depth;396}397398if (!stride) {399stride = width * (int)DataFormatSizeInBytes(format_);400}401402switch (type_) {403case TextureType::LINEAR2D:404{405D3DLOCKED_RECT rect;406if (x == 0 && y == 0) {407tex_->LockRect(level, &rect, NULL, D3DLOCK_DISCARD);408409if (callback) {410if (callback((uint8_t *)rect.pBits, data, width, height, depth, rect.Pitch, height * rect.Pitch)) {411// Now this is the source. All conversions below support in-place.412data = (const uint8_t *)rect.pBits;413stride = rect.Pitch;414}415}416417for (int i = 0; i < height; i++) {418uint8_t *dest = (uint8_t *)rect.pBits + rect.Pitch * i;419const uint8_t *source = data + stride * i;420int j;421switch (format_) {422case DataFormat::B4G4R4A4_UNORM_PACK16: // We emulate support for this format.423for (j = 0; j < width; j++) {424uint16_t color = ((const uint16_t *)source)[j];425((uint16_t *)dest)[j] = (color << 12) | (color >> 4);426}427break;428case DataFormat::A4R4G4B4_UNORM_PACK16:429case DataFormat::A1R5G5B5_UNORM_PACK16:430case DataFormat::R5G6B5_UNORM_PACK16:431// Native432if (data != rect.pBits)433memcpy(dest, source, width * sizeof(uint16_t));434break;435436case DataFormat::R8G8B8A8_UNORM:437for (j = 0; j < width; j++) {438((uint32_t *)dest)[j] = Shuffle8888(((uint32_t *)source)[j]);439}440break;441442case DataFormat::B8G8R8A8_UNORM:443if (data != rect.pBits)444memcpy(dest, source, sizeof(uint32_t) * width);445break;446447case DataFormat::R8_UNORM:448if (data != rect.pBits)449memcpy(dest, source, width);450break;451452case DataFormat::R16_UNORM:453if (data != rect.pBits)454memcpy(dest, source, sizeof(uint16_t) * width);455break;456457default:458// Unhandled data format copy.459DebugBreak();460break;461}462}463tex_->UnlockRect(level);464}465break;466}467468default:469ERROR_LOG(Log::G3D, "Non-LINEAR2D textures not yet supported");470break;471}472}473474void D3D9Texture::SetToSampler(LPDIRECT3DDEVICE9 device, int sampler) {475switch (type_) {476case TextureType::LINEAR1D:477case TextureType::LINEAR2D:478device->SetTexture(sampler, tex_.Get());479break;480481case TextureType::LINEAR3D:482device->SetTexture(sampler, volTex_.Get());483break;484485case TextureType::CUBE:486device->SetTexture(sampler, cubeTex_.Get());487break;488}489}490491class D3D9Context : public DrawContext {492public:493D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, IDirect3DDevice9 *device, IDirect3DDevice9Ex *deviceEx);494~D3D9Context();495496const DeviceCaps &GetDeviceCaps() const override {497return caps_;498}499uint32_t GetSupportedShaderLanguages() const override {500return (uint32_t)ShaderLanguage::HLSL_D3D9;501}502uint32_t GetDataFormatSupport(DataFormat fmt) const override;503504ShaderModule *CreateShaderModule(ShaderStage stage, ShaderLanguage language, const uint8_t *data, size_t dataSize, const char *tag) override;505DepthStencilState *CreateDepthStencilState(const DepthStencilStateDesc &desc) override;506BlendState *CreateBlendState(const BlendStateDesc &desc) override;507SamplerState *CreateSamplerState(const SamplerStateDesc &desc) override;508RasterState *CreateRasterState(const RasterStateDesc &desc) override;509Buffer *CreateBuffer(size_t size, uint32_t usageFlags) override;510Pipeline *CreateGraphicsPipeline(const PipelineDesc &desc, const char *tag) override;511InputLayout *CreateInputLayout(const InputLayoutDesc &desc) override;512Texture *CreateTexture(const TextureDesc &desc) override;513514Framebuffer *CreateFramebuffer(const FramebufferDesc &desc) override;515516void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) override;517void UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) override;518519void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override {520// Not implemented521}522bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override;523bool CopyFramebufferToMemory(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, ReadbackMode mode, const char *tag) override;524525// These functions should be self explanatory.526void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override;527void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int layer) override;528529uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) override;530531void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override;532533void BindTextures(int start, int count, Texture **textures, TextureBindFlags flags) override;534void BindNativeTexture(int index, void *nativeTexture) override;535536void BindSamplerStates(int start, int count, SamplerState **states) override {537_assert_(start + count <= MAX_BOUND_TEXTURES);538for (int i = 0; i < count; ++i) {539D3D9SamplerState *s = static_cast<D3D9SamplerState *>(states[i]);540if (s)541s->Apply(device_, start + i);542}543}544void BindVertexBuffer(Buffer *vertexBuffer, int offset) override {545curVBuffer_ = (D3D9Buffer *)vertexBuffer;546curVBufferOffset_ = offset;547}548void BindIndexBuffer(Buffer *indexBuffer, int offset) override {549curIBuffer_ = (D3D9Buffer *)indexBuffer;550curIBufferOffset_ = offset;551}552553void BindPipeline(Pipeline *pipeline) override {554curPipeline_ = (D3D9Pipeline *)pipeline;555}556557void BeginFrame(Draw::DebugFlags debugFlags) override;558void EndFrame() override;559void Present(PresentMode presentMode, int vblanks) override;560561int GetFrameCount() override { return frameCount_; }562563void UpdateDynamicUniformBuffer(const void *ub, size_t size) override;564565// Raster state566void SetScissorRect(int left, int top, int width, int height) override;567void SetViewport(const Viewport &viewport) override;568void SetBlendFactor(float color[4]) override;569void SetStencilParams(uint8_t refValue, uint8_t writeMask, uint8_t compareMask) override;570571void ApplyDynamicState();572void Draw(int vertexCount, int offset) override;573void DrawIndexed(int vertexCount, int offset) override;574void DrawUP(const void *vdata, int vertexCount) override;575void Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) override;576577uint64_t GetNativeObject(NativeObject obj, void *srcObject) override {578switch (obj) {579case NativeObject::CONTEXT:580return (uint64_t)(uintptr_t)d3d_;581case NativeObject::DEVICE:582return (uint64_t)(uintptr_t)device_;583case NativeObject::DEVICE_EX:584return (uint64_t)(uintptr_t)deviceEx_;585case NativeObject::TEXTURE_VIEW:586return (uint64_t)(((D3D9Texture *)srcObject)->TexturePtr());587default:588return 0;589}590}591592std::string GetInfoString(InfoField info) const override {593switch (info) {594case InfoField::APIVERSION: return "DirectX 9.0";595case InfoField::VENDORSTRING: return identifier_.Description;596case InfoField::VENDOR: return "";597case InfoField::DRIVER: return identifier_.Driver; // eh, sort of598case InfoField::SHADELANGVERSION: return shadeLangVersion_;599case InfoField::APINAME: return "Direct3D 9";600default: return "?";601}602}603604void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override;605606void Invalidate(InvalidationFlags flags) override;607608void SetInvalidationCallback(InvalidationCallback callback) override {609invalidationCallback_ = callback;610}611612private:613LPDIRECT3D9 d3d_;614LPDIRECT3D9EX d3dEx_;615LPDIRECT3DDEVICE9 device_;616LPDIRECT3DDEVICE9EX deviceEx_;617int adapterId_ = -1;618D3DADAPTER_IDENTIFIER9 identifier_{};619D3DCAPS9 d3dCaps_;620char shadeLangVersion_[64]{};621DeviceCaps caps_{};622int frameCount_ = FRAME_TIME_HISTORY_LENGTH;623624// Bound state625AutoRef<D3D9Pipeline> curPipeline_;626AutoRef<D3D9Buffer> curVBuffer_;627int curVBufferOffset_ = 0;628AutoRef<D3D9Buffer> curIBuffer_;629int curIBufferOffset_ = 0;630AutoRef<Framebuffer> curRenderTarget_;631632u8 stencilRefValue_ = 0;633u8 stencilCompareMask_ = 0xFF;634u8 stencilWriteMask_ = 0xFF;635636// Framebuffer state637ComPtr<IDirect3DSurface9> deviceRTsurf;638ComPtr<IDirect3DSurface9> deviceDSsurf;639bool supportsINTZ = false;640641// Dynamic state642uint8_t stencilRef_ = 0;643644InvalidationCallback invalidationCallback_;645};646647void D3D9Context::Invalidate(InvalidationFlags flags) {648if (flags & InvalidationFlags::CACHED_RENDER_STATE) {649curPipeline_ = nullptr;650}651}652653// TODO: Move this detection elsewhere when it's needed elsewhere, not before. It's ugly.654// Source: https://envytools.readthedocs.io/en/latest/hw/pciid.html#gf100655enum NVIDIAGeneration {656NV_PRE_KEPLER,657NV_KEPLER,658NV_MAXWELL,659NV_PASCAL,660NV_VOLTA,661NV_TURING, // or later662};663664static NVIDIAGeneration NVIDIAGetDeviceGeneration(int deviceID) {665if (deviceID >= 0x1180 && deviceID <= 0x11bf)666return NV_KEPLER; // GK104667if (deviceID >= 0x11c0 && deviceID <= 0x11fa)668return NV_KEPLER; // GK106669if (deviceID >= 0x0fc0 && deviceID <= 0x0fff)670return NV_KEPLER; // GK107671if (deviceID >= 0x1003 && deviceID <= 0x1028)672return NV_KEPLER; // GK110(B)673if (deviceID >= 0x1280 && deviceID <= 0x12ba)674return NV_KEPLER; // GK208675if (deviceID >= 0x1381 && deviceID <= 0x13b0)676return NV_MAXWELL; // GM107677if (deviceID >= 0x1340 && deviceID <= 0x134d)678return NV_MAXWELL; // GM108679if (deviceID >= 0x13c0 && deviceID <= 0x13d9)680return NV_MAXWELL; // GM204681if (deviceID >= 0x1401 && deviceID <= 0x1427)682return NV_MAXWELL; // GM206683if (deviceID >= 0x15f7 && deviceID <= 0x15f9)684return NV_PASCAL; // GP100685if (deviceID >= 0x15f7 && deviceID <= 0x15f9)686return NV_PASCAL; // GP100687if (deviceID >= 0x1b00 && deviceID <= 0x1b38)688return NV_PASCAL; // GP102689if (deviceID >= 0x1b80 && deviceID <= 0x1be1)690return NV_PASCAL; // GP104691if (deviceID >= 0x1c02 && deviceID <= 0x1c62)692return NV_PASCAL; // GP106693if (deviceID >= 0x1c81 && deviceID <= 0x1c92)694return NV_PASCAL; // GP107695if (deviceID >= 0x1d01 && deviceID <= 0x1d12)696return NV_PASCAL; // GP108697if (deviceID >= 0x1d81 && deviceID <= 0x1dba)698return NV_VOLTA; // GV100699if (deviceID >= 0x1e02 && deviceID <= 0x1e3c)700return NV_TURING; // TU102701if (deviceID >= 0x1e82 && deviceID <= 0x1ed0)702return NV_TURING; // TU104703if (deviceID >= 0x1f02 && deviceID <= 0x1f51)704return NV_TURING; // TU104705if (deviceID >= 0x1e02)706return NV_TURING; // More TU models or later, probably.707return NV_PRE_KEPLER;708}709710#define FB_DIV 1711#define FOURCC_INTZ ((D3DFORMAT)(MAKEFOURCC('I', 'N', 'T', 'Z')))712713D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, IDirect3DDevice9 *device, IDirect3DDevice9Ex *deviceEx)714: d3d_(d3d), d3dEx_(d3dEx), device_(device), deviceEx_(deviceEx), adapterId_(adapterId), caps_{} {715if (FAILED(d3d->GetAdapterIdentifier(adapterId, 0, &identifier_))) {716ERROR_LOG(Log::G3D, "Failed to get adapter identifier: %d", adapterId);717}718switch (identifier_.VendorId) {719case 0x10DE: caps_.vendor = GPUVendor::VENDOR_NVIDIA; break;720case 0x1002:721case 0x1022: caps_.vendor = GPUVendor::VENDOR_AMD; break;722case 0x163C:723case 0x8086:724case 0x8087: caps_.vendor = GPUVendor::VENDOR_INTEL; break;725default:726caps_.vendor = GPUVendor::VENDOR_UNKNOWN;727}728729D3DCAPS9 caps;730ZeroMemory(&caps, sizeof(caps));731HRESULT result = 0;732if (deviceEx_) {733result = deviceEx_->GetDeviceCaps(&caps);734} else {735result = device_->GetDeviceCaps(&caps);736}737738if (SUCCEEDED(result)) {739snprintf(shadeLangVersion_, sizeof(shadeLangVersion_), "PS: %04x VS: %04x", d3dCaps_.PixelShaderVersion & 0xFFFF, d3dCaps_.VertexShaderVersion & 0xFFFF);740} else {741WARN_LOG(Log::G3D, "Direct3D9: Failed to get the device caps!");742truncate_cpy(shadeLangVersion_, "N/A");743}744745caps_.deviceID = identifier_.DeviceId;746caps_.depthRangeMinusOneToOne = false;747caps_.preferredDepthBufferFormat = DataFormat::D24_S8;748caps_.dualSourceBlend = false;749caps_.tesselationShaderSupported = false;750caps_.framebufferBlitSupported = true;751caps_.framebufferCopySupported = false;752caps_.framebufferDepthBlitSupported = false;753caps_.framebufferStencilBlitSupported = false;754caps_.framebufferDepthCopySupported = false;755caps_.framebufferSeparateDepthCopySupported = false;756caps_.texture3DSupported = true;757caps_.fragmentShaderDepthWriteSupported = true;758caps_.requiresHalfPixelOffset = true;759caps_.fragmentShaderStencilWriteSupported = false;760caps_.blendMinMaxSupported = true;761caps_.isTilingGPU = false;762caps_.multiSampleLevelsMask = 1; // More could be supported with some work.763764caps_.clipPlanesSupported = caps.MaxUserClipPlanes;765caps_.presentInstantModeChange = false;766caps_.presentMaxInterval = 1;767caps_.presentModesSupported = PresentMode::FIFO;768769caps_.provokingVertexLast = false; // D3D has it first, unfortunately (and no way to change it).770771if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1) {772caps_.anisoSupported = true;773}774if ((caps.TextureCaps & (D3DPTEXTURECAPS_NONPOW2CONDITIONAL | D3DPTEXTURECAPS_POW2)) == 0) {775caps_.textureNPOTFullySupported = true;776}777778caps_.supportsD3D9 = true;779if (!strcmp(identifier_.Description, "Intel(R) Iris(R) Xe Graphics")) {780caps_.supportsD3D9 = false;781}782783// VS range culling (killing triangles in the vertex shader using NaN) causes problems on Intel.784// Also causes problems on old NVIDIA.785switch (caps_.vendor) {786case Draw::GPUVendor::VENDOR_INTEL:787bugs_.Infest(Bugs::BROKEN_NAN_IN_CONDITIONAL);788break;789case Draw::GPUVendor::VENDOR_NVIDIA:790// Older NVIDIAs don't seem to like NaNs in their DX9 vertex shaders.791// No idea if KEPLER is the right cutoff, but let's go with it.792if (NVIDIAGetDeviceGeneration(caps_.deviceID) < NV_KEPLER) {793bugs_.Infest(Bugs::BROKEN_NAN_IN_CONDITIONAL);794}795break;796}797798if (d3d) {799D3DDISPLAYMODE displayMode;800d3d->GetAdapterDisplayMode(D3DADAPTER_DEFAULT, &displayMode);801802// To be safe, make sure both the display format and the FBO format support INTZ.803HRESULT displayINTZ = d3d->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, displayMode.Format, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, FOURCC_INTZ);804HRESULT displayINTY = d3d->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, displayMode.Format, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, ((D3DFORMAT)(MAKEFOURCC('I', 'N', 'T', 'Y'))));805// Try to prevent INTZ on older Intel drivers that claim support.806supportsINTZ = SUCCEEDED(displayINTZ) && !SUCCEEDED(displayINTY) && IsWin7OrHigher();807}808caps_.textureDepthSupported = supportsINTZ;809810shaderLanguageDesc_.Init(HLSL_D3D9);811812dxstate.Restore();813}814815D3D9Context::~D3D9Context() {816DestroyPresets();817}818819ShaderModule *D3D9Context::CreateShaderModule(ShaderStage stage, ShaderLanguage language, const uint8_t *data, size_t size, const char *tag) {820D3D9ShaderModule *shader = new D3D9ShaderModule(stage, tag);821if (shader->Compile(device_, data, size)) {822return shader;823} else {824delete shader;825return NULL;826}827}828829Pipeline *D3D9Context::CreateGraphicsPipeline(const PipelineDesc &desc, const char *tag) {830if (!desc.shaders.size()) {831ERROR_LOG(Log::G3D, "Pipeline %s requires at least one shader", tag);832return NULL;833}834D3D9Pipeline *pipeline = new D3D9Pipeline();835for (auto iter : desc.shaders) {836if (!iter) {837ERROR_LOG(Log::G3D, "NULL shader passed to CreateGraphicsPipeline(%s)", tag);838delete pipeline;839return NULL;840}841if (iter->GetStage() == ShaderStage::Fragment) {842pipeline->pshader = static_cast<D3D9ShaderModule *>(iter);843pipeline->pshader->AddRef();844}845else if (iter->GetStage() == ShaderStage::Vertex) {846pipeline->vshader = static_cast<D3D9ShaderModule *>(iter);847pipeline->vshader->AddRef();848}849}850pipeline->prim = primToD3D9[(int)desc.prim];851pipeline->depthStencil = (D3D9DepthStencilState *)desc.depthStencil;852pipeline->blend = (D3D9BlendState *)desc.blend;853pipeline->raster = (D3D9RasterState *)desc.raster;854pipeline->inputLayout = (D3D9InputLayout *)desc.inputLayout;855if (desc.uniformDesc)856pipeline->dynamicUniforms = *desc.uniformDesc;857return pipeline;858}859860DepthStencilState *D3D9Context::CreateDepthStencilState(const DepthStencilStateDesc &desc) {861D3D9DepthStencilState *ds = new D3D9DepthStencilState();862ds->depthTestEnabled = desc.depthTestEnabled;863ds->depthWriteEnabled = desc.depthWriteEnabled;864ds->depthCompare = compareToD3D9[(int)desc.depthCompare];865ds->stencilEnabled = desc.stencilEnabled;866ds->stencilCompareOp = compareToD3D9[(int)desc.stencil.compareOp];867ds->stencilPass = stencilOpToD3D9[(int)desc.stencil.passOp];868ds->stencilFail = stencilOpToD3D9[(int)desc.stencil.failOp];869ds->stencilZFail = stencilOpToD3D9[(int)desc.stencil.depthFailOp];870return ds;871}872873InputLayout *D3D9Context::CreateInputLayout(const InputLayoutDesc &desc) {874D3D9InputLayout *fmt = new D3D9InputLayout(device_, desc);875return fmt;876}877878BlendState *D3D9Context::CreateBlendState(const BlendStateDesc &desc) {879D3D9BlendState *bs = new D3D9BlendState();880bs->enabled = desc.enabled;881bs->eqCol = blendEqToD3D9[(int)desc.eqCol];882bs->srcCol = blendFactorToD3D9[(int)desc.srcCol];883bs->dstCol = blendFactorToD3D9[(int)desc.dstCol];884bs->eqAlpha = blendEqToD3D9[(int)desc.eqAlpha];885bs->srcAlpha = blendFactorToD3D9[(int)desc.srcAlpha];886bs->dstAlpha = blendFactorToD3D9[(int)desc.dstAlpha];887bs->colorMask = desc.colorMask;888// Ignore logic ops, we don't support them in D3D9889return bs;890}891892SamplerState *D3D9Context::CreateSamplerState(const SamplerStateDesc &desc) {893D3D9SamplerState *samps = new D3D9SamplerState();894samps->wrapS = texWrapToD3D9[(int)desc.wrapU];895samps->wrapT = texWrapToD3D9[(int)desc.wrapV];896samps->magFilt = texFilterToD3D9[(int)desc.magFilter];897samps->minFilt = texFilterToD3D9[(int)desc.minFilter];898samps->mipFilt = texFilterToD3D9[(int)desc.mipFilter];899return samps;900}901902RasterState *D3D9Context::CreateRasterState(const RasterStateDesc &desc) {903D3D9RasterState *rs = new D3D9RasterState();904rs->cullMode = D3DCULL_NONE;905if (desc.cull == CullMode::NONE) {906return rs;907}908switch (desc.frontFace) {909case Facing::CW:910switch (desc.cull) {911case CullMode::FRONT: rs->cullMode = D3DCULL_CCW; break;912case CullMode::BACK: rs->cullMode = D3DCULL_CW; break;913}914case Facing::CCW:915switch (desc.cull) {916case CullMode::FRONT: rs->cullMode = D3DCULL_CW; break;917case CullMode::BACK: rs->cullMode = D3DCULL_CCW; break;918}919}920return rs;921}922923Texture *D3D9Context::CreateTexture(const TextureDesc &desc) {924D3D9Texture *tex = new D3D9Texture(device_, deviceEx_, desc);925return tex;926}927928void D3D9Context::UpdateTextureLevels(Texture *texture, const uint8_t **data, TextureCallback initDataCallback, int numLevels) {929D3D9Texture *tex = (D3D9Texture *)texture;930tex->UpdateTextureLevels(data, numLevels, initDataCallback);931}932933934void D3D9Context::BindTextures(int start, int count, Texture **textures, TextureBindFlags flags) {935_assert_(start + count <= MAX_BOUND_TEXTURES);936for (int i = start; i < start + count; i++) {937D3D9Texture *tex = static_cast<D3D9Texture *>(textures[i - start]);938if (tex) {939tex->SetToSampler(device_, i);940} else {941device_->SetTexture(i, nullptr);942}943}944}945946void D3D9Context::BindNativeTexture(int index, void *nativeTexture) {947LPDIRECT3DTEXTURE9 texture = (LPDIRECT3DTEXTURE9)nativeTexture;948device_->SetTexture(index, texture);949}950951void D3D9Context::BeginFrame(Draw::DebugFlags debugFlags) {952FrameTimeData frameTimeData = frameTimeHistory_.Add(frameCount_);953frameTimeData.frameBegin = time_now_d();954frameTimeData.afterFenceWait = frameTimeData.frameBegin; // no fence wait955}956957void D3D9Context::EndFrame() {958frameTimeHistory_[frameCount_].firstSubmit = time_now_d();959curPipeline_ = nullptr;960}961962void D3D9Context::Present(PresentMode presentMode, int vblanks) {963frameTimeHistory_[frameCount_].queuePresent = time_now_d();964if (deviceEx_) {965deviceEx_->EndScene();966deviceEx_->PresentEx(NULL, NULL, NULL, NULL, 0);967deviceEx_->BeginScene();968} else {969device_->EndScene();970device_->Present(NULL, NULL, NULL, NULL);971device_->BeginScene();972}973frameCount_++;974}975976static void SemanticToD3D9UsageAndIndex(int semantic, BYTE *usage, BYTE *index) {977*index = 0;978switch (semantic) {979case SEM_POSITION:980*usage = D3DDECLUSAGE_POSITION;981break;982case SEM_NORMAL:983*usage = D3DDECLUSAGE_NORMAL;984break;985case SEM_TANGENT:986*usage = D3DDECLUSAGE_TANGENT;987break;988case SEM_BINORMAL:989*usage = D3DDECLUSAGE_BINORMAL;990break;991case SEM_COLOR0:992*usage = D3DDECLUSAGE_COLOR;993break;994case SEM_COLOR1:995*usage = D3DDECLUSAGE_COLOR;996*index = 1;997break;998case SEM_TEXCOORD0:999*usage = D3DDECLUSAGE_TEXCOORD;1000break;1001case SEM_TEXCOORD1:1002*usage = D3DDECLUSAGE_TEXCOORD;1003*index = 1;1004break;1005}1006}10071008D3D9InputLayout::D3D9InputLayout(LPDIRECT3DDEVICE9 device, const InputLayoutDesc &desc) : decl_(NULL) {1009D3DVERTEXELEMENT9 *elements = new D3DVERTEXELEMENT9[desc.attributes.size() + 1];1010size_t i;1011for (i = 0; i < desc.attributes.size(); i++) {1012elements[i].Stream = 0;1013elements[i].Offset = desc.attributes[i].offset;1014elements[i].Method = D3DDECLMETHOD_DEFAULT;1015SemanticToD3D9UsageAndIndex(desc.attributes[i].location, &elements[i].Usage, &elements[i].UsageIndex);1016elements[i].Type = FormatToD3DDeclType(desc.attributes[i].format);1017}1018D3DVERTEXELEMENT9 end = D3DDECL_END();1019// Zero the last one.1020memcpy(&elements[i], &end, sizeof(elements[i]));10211022stride_ = desc.stride;10231024HRESULT hr = device->CreateVertexDeclaration(elements, &decl_);1025if (FAILED(hr)) {1026ERROR_LOG(Log::G3D, "Error creating vertex decl");1027}1028delete[] elements;1029}10301031// Simulate a simple buffer type like the other backends have, use the usage flags to create the right internal type.1032class D3D9Buffer : public Buffer {1033public:1034D3D9Buffer(LPDIRECT3DDEVICE9 device, size_t size, uint32_t flags) : vbuffer_(nullptr), ibuffer_(nullptr), maxSize_(size) {1035if (flags & BufferUsageFlag::INDEXDATA) {1036DWORD usage = D3DUSAGE_DYNAMIC;1037device->CreateIndexBuffer((UINT)size, usage, D3DFMT_INDEX16, D3DPOOL_DEFAULT, &ibuffer_, NULL);1038} else {1039DWORD usage = D3DUSAGE_DYNAMIC;1040device->CreateVertexBuffer((UINT)size, usage, 0, D3DPOOL_DEFAULT, &vbuffer_, NULL);1041}1042}1043~D3D9Buffer() {1044}10451046ComPtr<IDirect3DVertexBuffer9> vbuffer_;1047ComPtr<IDirect3DIndexBuffer9> ibuffer_;1048size_t maxSize_;1049};10501051Buffer *D3D9Context::CreateBuffer(size_t size, uint32_t usageFlags) {1052return new D3D9Buffer(device_, size, usageFlags);1053}10541055inline void Transpose4x4(float out[16], const float in[16]) {1056for (int i = 0; i < 4; i++) {1057for (int j = 0; j < 4; j++) {1058out[i * 4 + j] = in[j * 4 + i];1059}1060}1061}10621063void D3D9Context::UpdateDynamicUniformBuffer(const void *ub, size_t size) {1064_assert_(size == curPipeline_->dynamicUniforms.uniformBufferSize);1065for (auto &uniform : curPipeline_->dynamicUniforms.uniforms) {1066int count = 0;1067switch (uniform.type) {1068case UniformType::FLOAT1:1069case UniformType::FLOAT2:1070case UniformType::FLOAT3:1071case UniformType::FLOAT4:1072count = 1;1073break;1074case UniformType::MATRIX4X4:1075count = 4;1076break;1077}1078const float *srcPtr = (const float *)((const uint8_t *)ub + uniform.offset);1079if (uniform.vertexReg != -1) {1080float transp[16];1081if (count == 4) {1082Transpose4x4(transp, srcPtr);1083srcPtr = transp;1084}1085device_->SetVertexShaderConstantF(uniform.vertexReg, srcPtr, count);1086}1087if (uniform.fragmentReg != -1) {1088device_->SetPixelShaderConstantF(uniform.fragmentReg, srcPtr, count);1089}1090}1091}10921093void D3D9Context::UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) {1094D3D9Buffer *buf = (D3D9Buffer *)buffer;1095if (!size)1096return;1097if (offset + size > buf->maxSize_) {1098ERROR_LOG(Log::G3D, "Can't SubData with bigger size than buffer was created with");1099return;1100}1101if (buf->vbuffer_) {1102void *ptr = nullptr;1103HRESULT res = buf->vbuffer_->Lock((UINT)offset, (UINT)size, &ptr, (flags & UPDATE_DISCARD) ? D3DLOCK_DISCARD : 0);1104if (!FAILED(res) && ptr) {1105memcpy(ptr, data, size);1106buf->vbuffer_->Unlock();1107}1108} else if (buf->ibuffer_) {1109void *ptr = nullptr;1110HRESULT res = buf->ibuffer_->Lock((UINT)offset, (UINT)size, &ptr, (flags & UPDATE_DISCARD) ? D3DLOCK_DISCARD : 0);1111if (!FAILED(res) && ptr) {1112memcpy(ptr, data, size);1113buf->ibuffer_->Unlock();1114}1115}1116}11171118void D3D9Pipeline::Apply(LPDIRECT3DDEVICE9 device, uint8_t stencilRef, uint8_t stencilWriteMask, uint8_t stencilCompareMask) {1119vshader->Apply(device);1120pshader->Apply(device);1121blend->Apply(device);1122depthStencil->Apply(device, stencilRef, stencilWriteMask, stencilCompareMask);1123raster->Apply(device);1124}11251126void D3D9Context::ApplyDynamicState() {1127// Apply dynamic state.1128if (curPipeline_->depthStencil->stencilEnabled) {1129device_->SetRenderState(D3DRS_STENCILREF, (DWORD)stencilRefValue_);1130device_->SetRenderState(D3DRS_STENCILWRITEMASK, (DWORD)stencilWriteMask_);1131device_->SetRenderState(D3DRS_STENCILMASK, (DWORD)stencilCompareMask_);1132}1133}11341135static const int D3DPRIMITIVEVERTEXCOUNT[8][2] = {1136{0, 0}, // invalid1137{1, 0}, // 1 = D3DPT_POINTLIST,1138{2, 0}, // 2 = D3DPT_LINELIST,1139{2, 1}, // 3 = D3DPT_LINESTRIP,1140{3, 0}, // 4 = D3DPT_TRIANGLELIST,1141{1, 2}, // 5 = D3DPT_TRIANGLESTRIP,1142{1, 2}, // 6 = D3DPT_TRIANGLEFAN,1143};11441145inline int D3DPrimCount(D3DPRIMITIVETYPE prim, int size) {1146return (size / D3DPRIMITIVEVERTEXCOUNT[prim][0]) - D3DPRIMITIVEVERTEXCOUNT[prim][1];1147}11481149void D3D9Context::Draw(int vertexCount, int offset) {1150device_->SetStreamSource(0, curVBuffer_->vbuffer_.Get(), curVBufferOffset_, curPipeline_->inputLayout->GetStride());1151curPipeline_->inputLayout->Apply(device_);1152curPipeline_->Apply(device_, stencilRef_, stencilWriteMask_, stencilCompareMask_);1153ApplyDynamicState();1154device_->DrawPrimitive(curPipeline_->prim, offset, D3DPrimCount(curPipeline_->prim, vertexCount));1155}11561157void D3D9Context::DrawIndexed(int vertexCount, int offset) {1158curPipeline_->inputLayout->Apply(device_);1159curPipeline_->Apply(device_, stencilRef_, stencilWriteMask_, stencilCompareMask_);1160ApplyDynamicState();1161device_->SetStreamSource(0, curVBuffer_->vbuffer_.Get(), curVBufferOffset_, curPipeline_->inputLayout->GetStride());1162device_->SetIndices(curIBuffer_->ibuffer_.Get());1163device_->DrawIndexedPrimitive(curPipeline_->prim, 0, 0, vertexCount, offset, D3DPrimCount(curPipeline_->prim, vertexCount));1164}11651166void D3D9Context::DrawUP(const void *vdata, int vertexCount) {1167curPipeline_->inputLayout->Apply(device_);1168curPipeline_->Apply(device_, stencilRef_, stencilWriteMask_, stencilCompareMask_);1169ApplyDynamicState();11701171device_->DrawPrimitiveUP(curPipeline_->prim, D3DPrimCount(curPipeline_->prim, vertexCount), vdata, curPipeline_->inputLayout->GetStride());1172}11731174static uint32_t SwapRB(uint32_t c) {1175return (c & 0xFF00FF00) | ((c >> 16) & 0xFF) | ((c << 16) & 0xFF0000);1176}11771178void D3D9Context::Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) {1179UINT d3dMask = 0;1180if (mask & FBChannel::FB_COLOR_BIT) d3dMask |= D3DCLEAR_TARGET;1181if (mask & FBChannel::FB_DEPTH_BIT) d3dMask |= D3DCLEAR_ZBUFFER;1182if (mask & FBChannel::FB_STENCIL_BIT) d3dMask |= D3DCLEAR_STENCIL;1183if (d3dMask) {1184device_->Clear(0, NULL, d3dMask, (D3DCOLOR)SwapRB(colorval), depthVal, stencilVal);1185}1186}11871188void D3D9Context::SetScissorRect(int left, int top, int width, int height) {1189dxstate.scissorRect.set(left, top, left + width, top + height);1190dxstate.scissorTest.set(true);1191}11921193void D3D9Context::SetViewport(const Viewport &viewport) {1194int x = (int)viewport.TopLeftX;1195int y = (int)viewport.TopLeftY;1196int w = (int)viewport.Width;1197int h = (int)viewport.Height;1198dxstate.viewport.set(x, y, w, h, viewport.MinDepth, viewport.MaxDepth);1199}12001201void D3D9Context::SetBlendFactor(float color[4]) {1202uint32_t r = (uint32_t)(color[0] * 255.0f);1203uint32_t g = (uint32_t)(color[1] * 255.0f);1204uint32_t b = (uint32_t)(color[2] * 255.0f);1205uint32_t a = (uint32_t)(color[3] * 255.0f);1206dxstate.blendColor.set(color);1207}12081209void D3D9Context::SetStencilParams(uint8_t refValue, uint8_t writeMask, uint8_t compareMask) {1210stencilRefValue_ = refValue;1211stencilWriteMask_ = writeMask;1212stencilCompareMask_ = compareMask;1213}12141215bool D3D9ShaderModule::Compile(LPDIRECT3DDEVICE9 device, const uint8_t *data, size_t size) {1216LPD3D_SHADER_MACRO defines = nullptr;1217LPD3DINCLUDE includes = nullptr;1218ComPtr<ID3DBlob> codeBuffer;1219ComPtr<ID3DBlob> errorBuffer;1220const char *source = (const char *)data;1221auto compile = [&](const char *profile) -> HRESULT {1222return dyn_D3DCompile(source, (UINT)strlen(source), nullptr, defines, includes, "main", profile, 0, 0, &codeBuffer, &errorBuffer);1223};1224HRESULT hr = compile(stage_ == ShaderStage::Fragment ? "ps_3_0" : "vs_3_0");1225if (FAILED(hr)) {1226const char *error = errorBuffer ? (const char *)errorBuffer->GetBufferPointer() : "(no errorbuffer returned)";1227if (hr == ERROR_MOD_NOT_FOUND) {1228// No D3D9-compatible shader compiler installed.1229error = "D3D9 shader compiler not installed";1230}12311232ERROR_LOG(Log::G3D, "Compile error: %s", error);1233ERROR_LOG(Log::G3D, "%s", LineNumberString(std::string((const char *)data)).c_str());12341235OutputDebugStringA(source);1236OutputDebugStringA(error);1237return false;1238}12391240bool success = false;1241if (stage_ == ShaderStage::Fragment) {1242HRESULT result = device->CreatePixelShader((DWORD *)codeBuffer->GetBufferPointer(), &pshader_);1243success = SUCCEEDED(result);1244} else {1245HRESULT result = device->CreateVertexShader((DWORD *)codeBuffer->GetBufferPointer(), &vshader_);1246success = SUCCEEDED(result);1247}12481249return true;1250}12511252class D3D9Framebuffer : public Framebuffer {1253public:1254D3D9Framebuffer(int width, int height) {1255width_ = width;1256height_ = height;1257}1258~D3D9Framebuffer();12591260uint32_t id = 0;1261ComPtr<IDirect3DSurface9> surf;1262ComPtr<IDirect3DSurface9> depthstencil;1263ComPtr<IDirect3DTexture9> tex;1264ComPtr<IDirect3DTexture9> depthstenciltex;1265};12661267Framebuffer *D3D9Context::CreateFramebuffer(const FramebufferDesc &desc) {1268// Don't think D3D9 does array layers.1269_dbg_assert_(desc.numLayers == 1);12701271static uint32_t id = 0;12721273D3D9Framebuffer *fbo = new D3D9Framebuffer(desc.width, desc.height);1274fbo->depthstenciltex = nullptr;12751276HRESULT rtResult = device_->CreateTexture(desc.width, desc.height, 1, D3DUSAGE_RENDERTARGET, D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &fbo->tex, nullptr);1277if (FAILED(rtResult)) {1278ERROR_LOG(Log::G3D, "Failed to create render target");1279fbo->Release();1280return NULL;1281}1282fbo->tex->GetSurfaceLevel(0, &fbo->surf);12831284HRESULT dsResult;1285if (supportsINTZ) {1286dsResult = device_->CreateTexture(desc.width, desc.height, 1, D3DUSAGE_DEPTHSTENCIL, FOURCC_INTZ, D3DPOOL_DEFAULT, &fbo->depthstenciltex, NULL);1287if (SUCCEEDED(dsResult)) {1288dsResult = fbo->depthstenciltex->GetSurfaceLevel(0, &fbo->depthstencil);1289}1290} else {1291dsResult = device_->CreateDepthStencilSurface(desc.width, desc.height, D3DFMT_D24S8, D3DMULTISAMPLE_NONE, 0, FALSE, &fbo->depthstencil, NULL);1292}1293if (FAILED(dsResult)) {1294ERROR_LOG(Log::G3D, "Failed to create depth buffer");1295fbo->surf = nullptr;1296fbo->tex = nullptr;1297fbo->depthstenciltex = nullptr;1298delete fbo;1299return NULL;1300}1301fbo->id = id++;1302return fbo;1303}13041305D3D9Framebuffer::~D3D9Framebuffer() {1306}13071308void D3D9Context::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) {1309if (fbo) {1310D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo;1311device_->SetRenderTarget(0, fb->surf.Get());1312device_->SetDepthStencilSurface(fb->depthstencil.Get());1313curRenderTarget_ = fb;1314} else {1315device_->SetRenderTarget(0, deviceRTsurf.Get());1316device_->SetDepthStencilSurface(deviceDSsurf.Get());1317curRenderTarget_ = nullptr;1318}13191320int clearFlags = 0;1321if (rp.color == RPAction::CLEAR) {1322clearFlags |= D3DCLEAR_TARGET;1323}1324if (rp.depth == RPAction::CLEAR) {1325clearFlags |= D3DCLEAR_ZBUFFER;1326}1327if (rp.stencil == RPAction::CLEAR) {1328clearFlags |= D3DCLEAR_STENCIL;1329}1330if (clearFlags) {1331dxstate.scissorTest.force(false);1332device_->Clear(0, nullptr, clearFlags, (D3DCOLOR)SwapRB(rp.clearColor), rp.clearDepth, rp.clearStencil);1333dxstate.scissorRect.restore();1334}13351336dxstate.scissorRect.restore();1337dxstate.scissorTest.restore();1338dxstate.viewport.restore();13391340if (invalidationCallback_) {1341invalidationCallback_(InvalidationCallbackFlags::RENDER_PASS_STATE);1342}1343}13441345uintptr_t D3D9Context::GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) {1346D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo;1347if (channelBits & FB_SURFACE_BIT) {1348switch (channelBits & 7) {1349case FB_DEPTH_BIT:1350return (uintptr_t)fb->depthstencil.Get();1351case FB_STENCIL_BIT:1352return (uintptr_t)fb->depthstencil.Get();1353case FB_COLOR_BIT:1354default:1355return (uintptr_t)fb->surf.Get();1356}1357} else {1358switch (channelBits & 7) {1359case FB_DEPTH_BIT:1360return (uintptr_t)fb->depthstenciltex.Get();1361case FB_STENCIL_BIT:1362return 0; // Can't texture from stencil1363case FB_COLOR_BIT:1364default:1365return (uintptr_t)fb->tex.Get();1366}1367}1368}13691370void D3D9Context::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int layer) {1371_dbg_assert_(binding < MAX_BOUND_TEXTURES);1372_dbg_assert_(layer == ALL_LAYERS || layer == 0); // No stereo support1373D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo;1374switch (channelBit) {1375case FB_DEPTH_BIT:1376if (fb->depthstenciltex) {1377device_->SetTexture(binding, fb->depthstenciltex.Get());1378}1379break;1380case FB_COLOR_BIT:1381default:1382if (fb->tex) {1383device_->SetTexture(binding, fb->tex.Get());1384}1385break;1386}1387}13881389void D3D9Context::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) {1390D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo;1391if (fb) {1392*w = fb->Width();1393*h = fb->Height();1394} else {1395*w = targetWidth_;1396*h = targetHeight_;1397}1398}13991400bool D3D9Context::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dstfb, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) {1401D3D9Framebuffer *src = (D3D9Framebuffer *)srcfb;1402D3D9Framebuffer *dst = (D3D9Framebuffer *)dstfb;14031404ComPtr<IDirect3DSurface9> srcSurf;1405ComPtr<IDirect3DSurface9> dstSurf;1406RECT srcRect{ (LONG)srcX1, (LONG)srcY1, (LONG)srcX2, (LONG)srcY2 };1407RECT dstRect{ (LONG)dstX1, (LONG)dstY1, (LONG)dstX2, (LONG)dstY2 };1408if (channelBits == FB_COLOR_BIT) {1409srcSurf = src ? src->surf : deviceRTsurf;1410dstSurf = dst ? dst->surf : deviceRTsurf;1411} else if (channelBits & FB_DEPTH_BIT) {1412if (!src || !dst) {1413// Might have implications for non-buffered rendering.1414return false;1415}1416srcSurf = src->depthstencil;1417dstSurf = dst->depthstencil;1418} else {1419return false;1420}1421return SUCCEEDED(device_->StretchRect(srcSurf.Get(), &srcRect, dstSurf.Get(), &dstRect, (filter == FB_BLIT_LINEAR && channelBits == FB_COLOR_BIT) ? D3DTEXF_LINEAR : D3DTEXF_POINT));1422}14231424bool D3D9Context::CopyFramebufferToMemory(Framebuffer *src, int channelBits, int bx, int by, int bw, int bh, Draw::DataFormat destFormat, void *pixels, int pixelStride, ReadbackMode mode, const char *tag) {1425D3D9Framebuffer *fb = (D3D9Framebuffer *)src;14261427if (fb) {1428if (bx + bw > fb->Width()) {1429bw -= (bx + bw) - fb->Width();1430}1431if (by + bh > fb->Height()) {1432bh -= (by + bh) - fb->Height();1433}1434}14351436if (bh <= 0 || bw <= 0)1437return true;14381439DataFormat srcFormat = Draw::DataFormat::R8G8B8A8_UNORM;1440if (channelBits != FB_COLOR_BIT) {1441srcFormat = Draw::DataFormat::D24_S8;1442if (!supportsINTZ)1443return false;1444}14451446D3DSURFACE_DESC desc;1447D3DLOCKED_RECT locked;1448RECT rect = { (LONG)bx, (LONG)by, (LONG)bw, (LONG)bh };14491450ComPtr<IDirect3DSurface9> offscreen;1451HRESULT hr = E_UNEXPECTED;1452if (channelBits == FB_COLOR_BIT) {1453if (fb)1454fb->tex->GetLevelDesc(0, &desc);1455else1456deviceRTsurf->GetDesc(&desc);14571458hr = device_->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, nullptr);1459if (SUCCEEDED(hr)) {1460hr = device_->GetRenderTargetData(fb ? fb->surf.Get() : deviceRTsurf.Get(), offscreen.Get());1461if (SUCCEEDED(hr)) {1462hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY);1463}1464}1465} else {1466_assert_(fb->depthstenciltex != nullptr);1467fb->depthstenciltex->GetLevelDesc(0, &desc);1468hr = fb->depthstenciltex->LockRect(0, &locked, &rect, D3DLOCK_READONLY);1469}14701471if (SUCCEEDED(hr)) {1472switch (channelBits) {1473case FB_COLOR_BIT:1474// Pixel size always 4 here because we always request BGRA8888.1475ConvertFromBGRA8888((uint8_t *)pixels, (const uint8_t *)locked.pBits, pixelStride, locked.Pitch / sizeof(uint32_t), bw, bh, destFormat);1476break;1477case FB_DEPTH_BIT:1478if (srcFormat == destFormat) {1479// Can just memcpy when it matches no matter the format!1480uint8_t *dst = (uint8_t *)pixels;1481const uint8_t *src = (const uint8_t *)locked.pBits;1482for (int y = 0; y < bh; ++y) {1483memcpy(dst, src, bw * DataFormatSizeInBytes(srcFormat));1484dst += pixelStride * DataFormatSizeInBytes(srcFormat);1485src += locked.Pitch;1486}1487} else if (destFormat == DataFormat::D32F) {1488ConvertToD32F((uint8_t *)pixels, (const uint8_t *)locked.pBits, pixelStride, locked.Pitch / sizeof(uint32_t), bw, bh, srcFormat);1489} else if (destFormat == DataFormat::D16) {1490ConvertToD16((uint8_t *)pixels, (const uint8_t *)locked.pBits, pixelStride, locked.Pitch / sizeof(uint32_t), bw, bh, srcFormat);1491} else {1492_assert_(false);1493}1494break;1495case FB_STENCIL_BIT:1496if (srcFormat == destFormat) {1497uint8_t *dst = (uint8_t *)pixels;1498const uint8_t *src = (const uint8_t *)locked.pBits;1499for (int y = 0; y < bh; ++y) {1500memcpy(dst, src, bw * DataFormatSizeInBytes(srcFormat));1501dst += pixelStride * DataFormatSizeInBytes(srcFormat);1502src += locked.Pitch;1503}1504} else if (destFormat == DataFormat::S8) {1505for (int y = 0; y < bh; y++) {1506uint8_t *destStencil = (uint8_t *)pixels + y * pixelStride;1507const uint32_t *src = (const uint32_t *)((const uint8_t *)locked.pBits + locked.Pitch * y);1508for (int x = 0; x < bw; x++) {1509destStencil[x] = src[x] >> 24;1510}1511}1512} else {1513_assert_(false);1514}1515break;1516}1517}15181519if (channelBits != FB_COLOR_BIT) {1520fb->depthstenciltex->UnlockRect(0);1521}15221523return SUCCEEDED(hr);1524}15251526void D3D9Context::HandleEvent(Event ev, int width, int height, void *param1, void *param2) {1527switch (ev) {1528case Event::LOST_BACKBUFFER:1529deviceRTsurf = nullptr;1530deviceDSsurf = nullptr;1531break;1532case Event::GOT_BACKBUFFER:1533device_->GetRenderTarget(0, &deviceRTsurf);1534device_->GetDepthStencilSurface(&deviceDSsurf);1535break;1536case Event::PRESENTED:1537break;1538}1539}15401541DrawContext *T3DCreateDX9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, IDirect3DDevice9 *device, IDirect3DDevice9Ex *deviceEx) {1542bool result = LoadD3DCompilerDynamic();1543if (!result) {1544ERROR_LOG(Log::G3D, "Failed to load D3DCompiler!");1545return nullptr;1546}1547return new D3D9Context(d3d, d3dEx, adapterId, device, deviceEx);1548}15491550// Only partial implementation!1551uint32_t D3D9Context::GetDataFormatSupport(DataFormat fmt) const {1552switch (fmt) {1553case DataFormat::B8G8R8A8_UNORM:1554return FMT_RENDERTARGET | FMT_TEXTURE | FMT_AUTOGEN_MIPS;15551556case DataFormat::R4G4B4A4_UNORM_PACK16:1557return 0;1558case DataFormat::B4G4R4A4_UNORM_PACK16:1559return FMT_TEXTURE; // emulated support1560case DataFormat::R5G6B5_UNORM_PACK16:1561case DataFormat::A1R5G5B5_UNORM_PACK16:1562case DataFormat::A4R4G4B4_UNORM_PACK16:1563return FMT_RENDERTARGET | FMT_TEXTURE | FMT_AUTOGEN_MIPS; // native support15641565case DataFormat::R8G8B8A8_UNORM:1566return FMT_RENDERTARGET | FMT_TEXTURE | FMT_INPUTLAYOUT | FMT_AUTOGEN_MIPS;15671568case DataFormat::R32_FLOAT:1569case DataFormat::R32G32_FLOAT:1570case DataFormat::R32G32B32_FLOAT:1571case DataFormat::R32G32B32A32_FLOAT:1572return FMT_INPUTLAYOUT;15731574case DataFormat::R8_UNORM:1575return 0;1576case DataFormat::BC1_RGBA_UNORM_BLOCK:1577case DataFormat::BC2_UNORM_BLOCK:1578case DataFormat::BC3_UNORM_BLOCK:1579// DXT1, DXT3, DXT5.1580return FMT_TEXTURE;1581default:1582return 0;1583}1584}158515861587} // namespace Draw158815891590