CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Software/RasterizerRectangle.cpp
Views: 1401
// See comment in header for the purpose of the code in this file.12#include "ppsspp_config.h"3#include <algorithm>4#include <cmath>56#include "Common/Common.h"7#include "Common/Data/Convert/ColorConv.h"8#include "Common/Profiler/Profiler.h"9#include "Common/StringUtils.h"1011#include "Core/Config.h"12#include "Core/Debugger/MemBlockInfo.h"13#include "Core/MemMap.h"14#include "Core/System.h"15#include "GPU/GPUState.h"1617#include "GPU/Common/TextureCacheCommon.h"18#include "GPU/Software/BinManager.h"19#include "GPU/Software/DrawPixel.h"20#include "GPU/Software/Rasterizer.h"21#include "GPU/Software/Sampler.h"22#include "GPU/Software/SoftGpu.h"2324#if defined(_M_SSE)25#include <emmintrin.h>26#endif2728#if PPSSPP_ARCH(ARM_NEON)29#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)30#include <arm64_neon.h>31#else32#include <arm_neon.h>33#endif34#endif3536extern DSStretch g_DarkStalkerStretch;37// For Darkstalkers hack. Ugh.38extern bool currentDialogActive;3940namespace Rasterizer {4142// This essentially AlphaBlendingResult() with fixed src.a / 1 - src.a factors and ADD equation.43// It allows us to skip round trips between 32-bit and 16-bit color values.44static uint32_t StandardAlphaBlend(uint32_t source, uint32_t dst) {45#if defined(_M_SSE)46const __m128i alpha = _mm_cvtsi32_si128(source >> 24);47// Keep the alpha lane of the srcfactor zero, so we keep dest alpha.48const __m128i srcfactor = _mm_shufflelo_epi16(alpha, _MM_SHUFFLE(1, 0, 0, 0));49const __m128i dstfactor = _mm_sub_epi16(_mm_set1_epi16(255), srcfactor);5051const __m128i z = _mm_setzero_si128();52const __m128i sourcevec = _mm_unpacklo_epi8(_mm_cvtsi32_si128(source), z);53const __m128i dstvec = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dst), z);5455// We switch to 16 bit to use mulhi, and we use 4 bits of decimal to make the 16 bit shift free.56const __m128i half = _mm_set1_epi16(1 << 3);5758const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(sourcevec, 4), half);59const __m128i sf = _mm_add_epi16(_mm_slli_epi16(srcfactor, 4), half);60const __m128i s = _mm_mulhi_epi16(srgb, sf);6162const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(dstvec, 4), half);63const __m128i df = _mm_add_epi16(_mm_slli_epi16(dstfactor, 4), half);64const __m128i d = _mm_mulhi_epi16(drgb, df);6566const __m128i blended16 = _mm_adds_epi16(s, d);67return _mm_cvtsi128_si32(_mm_packus_epi16(blended16, blended16));68#elif PPSSPP_ARCH(ARM64_NEON)69uint16x4_t sf = vdup_n_u16((source >> 24) * 2 + 1);70uint16x4_t df = vdup_n_u16((255 - (source >> 24)) * 2 + 1);7172// Convert both to 16-bit, double, and add the half before even going to 32 bit.73uint16x8_t sd_c16 = vmovl_u8(vcreate_u8((uint64_t)source | ((uint64_t)dst << 32)));74sd_c16 = vaddq_u16(vshlq_n_u16(sd_c16, 1), vdupq_n_u16(1));7576uint16x4_t srgb = vget_low_u16(sd_c16);77uint16x4_t drgb = vget_high_u16(sd_c16);7879uint16x4_t s = vshrn_n_u32(vmull_u16(srgb, sf), 10);80uint16x4_t d = vshrn_n_u32(vmull_u16(drgb, df), 10);8182uint16x4_t blended = vset_lane_u16(0, vadd_u16(s, d), 3);83uint8x8_t blended8 = vqmovn_u16(vcombine_u16(blended, blended));84return vget_lane_u32(vreinterpret_u32_u8(blended8), 0);85#else86Vec3<int> srcfactor = Vec3<int>::AssignToAll(source >> 24);87Vec3<int> dstfactor = Vec3<int>::AssignToAll(255 - (source >> 24));8889static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);90Vec3<int> lhs = ((Vec3<int>::FromRGB(source) * 2 + half) * (srcfactor * 2 + half)) / 1024;91Vec3<int> rhs = ((Vec3<int>::FromRGB(dst) * 2 + half) * (dstfactor * 2 + half)) / 1024;92Vec3<int> blended = lhs + rhs;9394return clamp_u8(blended.r()) | (clamp_u8(blended.g()) << 8) | (clamp_u8(blended.b()) << 16);95#endif96}9798// Through mode, with the specific Darkstalker settings.99template <GEBufferFormat fmt, bool alphaBlend>100static inline void DrawSinglePixel(u16 *pixel, const u32 color_in) {101u32 new_color;102// Because of this check, we only support src.a / 1-src.a blending.103if (!alphaBlend || (color_in >> 24) == 255) {104new_color = color_in & 0xFFFFFF;105} else {106u32 old_color;107switch (fmt) {108case GE_FORMAT_565:109old_color = RGB565ToRGBA8888(*pixel);110break;111case GE_FORMAT_5551:112old_color = RGBA5551ToRGBA8888(*pixel);113break;114case GE_FORMAT_4444:115old_color = RGBA4444ToRGBA8888(*pixel);116break;117default:118break;119}120121new_color = StandardAlphaBlend(color_in, old_color);122}123124switch (fmt) {125case GE_FORMAT_565:126*pixel = RGBA8888ToRGB565(new_color);127break;128case GE_FORMAT_5551:129*pixel = RGBA8888ToRGBA555X(new_color) | (*pixel & 0x8000);130break;131case GE_FORMAT_4444:132*pixel = RGBA8888ToRGBA444X(new_color) | (*pixel & 0xF000);133break;134default:135break;136}137}138139template <bool alphaBlend>140static inline void DrawSinglePixel32(u32 *pixel, const u32 color_in) {141u32 new_color;142// Because of this check, we only support src.a / 1-src.a blending.143if (!alphaBlend || (color_in >> 24) == 255) {144new_color = color_in & 0xFFFFFF;145} else {146const u32 old_color = *pixel;147new_color = StandardAlphaBlend(color_in, old_color);148}149new_color |= *pixel & 0xFF000000;150*pixel = new_color;151}152153// Check if we can safely ignore the alpha test, assuming standard alpha blending.154static inline bool AlphaTestIsNeedless(const PixelFuncID &pixelID) {155switch (pixelID.AlphaTestFunc()) {156case GE_COMP_NEVER:157case GE_COMP_EQUAL:158case GE_COMP_LESS:159case GE_COMP_LEQUAL:160return false;161162case GE_COMP_ALWAYS:163return true;164165case GE_COMP_NOTEQUAL:166case GE_COMP_GREATER:167case GE_COMP_GEQUAL:168if (pixelID.alphaTestRef != 0 || pixelID.hasAlphaTestMask)169return false;170return true;171}172173return false;174}175176static bool UseDrawSinglePixel(const PixelFuncID &pixelID) {177if (pixelID.clearMode || pixelID.colorTest || pixelID.stencilTest)178return false;179if (!AlphaTestIsNeedless(pixelID) || pixelID.DepthTestFunc() != GE_COMP_ALWAYS)180return false;181// We skip blending when alpha = FF, so we can't allow other blend modes.182if (pixelID.alphaBlend) {183if (pixelID.AlphaBlendEq() != GE_BLENDMODE_MUL_AND_ADD || pixelID.AlphaBlendSrc() != PixelBlendFactor::SRCALPHA)184return false;185if (pixelID.AlphaBlendDst() != PixelBlendFactor::INVSRCALPHA)186return false;187}188if (pixelID.dithering || pixelID.applyLogicOp || pixelID.applyColorWriteMask)189return false;190191return true;192}193194static inline Vec4IntResult SOFTRAST_CALL ModulateRGBA(Vec4IntArg prim_in, Vec4IntArg texcolor_in, const SamplerID &samplerID) {195Vec4<int> out;196Vec4<int> prim_color = prim_in;197Vec4<int> texcolor = texcolor_in;198199#if defined(_M_SSE)200// Modulate weights slightly on the tex color, by adding one to prim and dividing by 256.201const __m128i p = _mm_slli_epi16(_mm_packs_epi32(prim_color.ivec, prim_color.ivec), 4);202const __m128i pboost = _mm_add_epi16(p, _mm_set1_epi16(1 << 4));203__m128i t = _mm_slli_epi16(_mm_packs_epi32(texcolor.ivec, texcolor.ivec), 4);204if (samplerID.useColorDoubling) {205const __m128i amask = _mm_set_epi16(-1, 0, 0, 0, -1, 0, 0, 0);206const __m128i a = _mm_and_si128(t, amask);207const __m128i rgb = _mm_andnot_si128(amask, t);208t = _mm_or_si128(_mm_slli_epi16(rgb, 1), a);209}210const __m128i b = _mm_mulhi_epi16(pboost, t);211out.ivec = _mm_unpacklo_epi16(b, _mm_setzero_si128());212#elif PPSSPP_ARCH(ARM64_NEON)213int32x4_t pboost = vaddq_s32(prim_color.ivec, vdupq_n_s32(1));214int32x4_t t = texcolor.ivec;215if (samplerID.useColorDoubling) {216static const int32_t rgbDouble[4] = {1, 1, 1, 0};217t = vshlq_s32(t, vld1q_s32(rgbDouble));218}219out.ivec = vshrq_n_s32(vmulq_s32(pboost, t), 8);220#else221if (samplerID.useColorDoubling) {222Vec4<int> tex = texcolor * Vec4<int>(2, 2, 2, 1);223out = ((prim_color + Vec4<int>::AssignToAll(1)) * tex) / 256;224} else {225out = (prim_color + Vec4<int>::AssignToAll(1)) * texcolor / 256;226}227#endif228229return ToVec4IntResult(out);230}231232template <GEBufferFormat fmt, bool isWhite, bool alphaBlend, bool alphaTestZero>233static void DrawSpriteTex(const DrawingCoords &pos0, const DrawingCoords &pos1, int s_start, int t_start, int ds, int dt, u32 color0, const RasterizerState &state, Sampler::FetchFunc fetchFunc) {234const u8 *texptr = state.texptr[0];235uint16_t texbufw = state.texbufw[0];236237int t = t_start;238const Vec4<int> c0 = Vec4<int>::FromRGBA(color0);239for (int y = pos0.y; y < pos1.y; y++) {240int s = s_start;241u16 *pixel16 = fb.Get16Ptr(pos0.x, y, state.pixelID.cached.framebufStride);242u32 *pixel32 = fb.Get32Ptr(pos0.x, y, state.pixelID.cached.framebufStride);243for (int x = pos0.x; x < pos1.x; x++) {244Vec4<int> tex_color = fetchFunc(s, t, texptr, texbufw, 0, state.samplerID);245if (isWhite) {246if (!alphaTestZero || tex_color.a() != 0) {247u32 tex_color32 = tex_color.ToRGBA();248if (fmt == GE_FORMAT_8888)249DrawSinglePixel32<alphaBlend>(pixel32, tex_color32);250else251DrawSinglePixel<fmt, alphaBlend>(pixel16, tex_color32);252}253} else {254Vec4<int> prim_color = c0;255prim_color = Vec4<int>(ModulateRGBA(ToVec4IntArg(prim_color), ToVec4IntArg(tex_color), state.samplerID));256if (!alphaTestZero || prim_color.a() > 0) {257if (fmt == GE_FORMAT_8888)258DrawSinglePixel32<alphaBlend>(pixel32, prim_color.ToRGBA());259else260DrawSinglePixel<fmt, alphaBlend>(pixel16, prim_color.ToRGBA());261}262}263s += ds;264if (fmt == GE_FORMAT_8888)265pixel32++;266else267pixel16++;268}269t += dt;270}271}272273template <bool isWhite, bool alphaBlend, bool alphaTestZero>274static void DrawSpriteTex(const DrawingCoords &pos0, const DrawingCoords &pos1, int s_start, int t_start, int ds, int dt, u32 color0, const RasterizerState &state, Sampler::FetchFunc fetchFunc) {275switch (state.pixelID.FBFormat()) {276case GE_FORMAT_565:277DrawSpriteTex<GE_FORMAT_565, isWhite, alphaBlend, alphaTestZero>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);278break;279case GE_FORMAT_5551:280DrawSpriteTex<GE_FORMAT_5551, isWhite, alphaBlend, alphaTestZero>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);281break;282case GE_FORMAT_4444:283DrawSpriteTex<GE_FORMAT_4444, isWhite, alphaBlend, alphaTestZero>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);284break;285case GE_FORMAT_8888:286DrawSpriteTex<GE_FORMAT_8888, isWhite, alphaBlend, alphaTestZero>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);287break;288default:289// Invalid, don't draw anything...290break;291}292}293294template <bool isWhite>295static inline void DrawSpriteTex(const DrawingCoords &pos0, const DrawingCoords &pos1, int s_start, int t_start, int ds, int dt, u32 color0, const RasterizerState &state, Sampler::FetchFunc fetchFunc) {296// Standard alpha blending implies skipping alpha zero.297if (state.pixelID.alphaBlend)298DrawSpriteTex<isWhite, true, true>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);299else if (state.pixelID.AlphaTestFunc() != GE_COMP_ALWAYS)300DrawSpriteTex<isWhite, false, true>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);301else302DrawSpriteTex<isWhite, false, false>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);303}304305template <GEBufferFormat fmt, bool alphaBlend>306static void DrawSpriteNoTex(const DrawingCoords &pos0, const DrawingCoords &pos1, u32 color0, const RasterizerState &state) {307if constexpr (alphaBlend)308if (Vec4<int>::FromRGBA(color0).a() == 0)309return;310311for (int y = pos0.y; y < pos1.y; y++) {312if (fmt == GE_FORMAT_8888) {313u32 *pixel = fb.Get32Ptr(pos0.x, y, state.pixelID.cached.framebufStride);314for (int x = pos0.x; x < pos1.x; x++) {315DrawSinglePixel32<alphaBlend>(pixel, color0);316pixel++;317}318} else {319u16 *pixel = fb.Get16Ptr(pos0.x, y, state.pixelID.cached.framebufStride);320for (int x = pos0.x; x < pos1.x; x++) {321DrawSinglePixel<fmt, alphaBlend>(pixel, color0);322pixel++;323}324}325}326}327328template <bool alphaBlend>329static void DrawSpriteNoTex(const DrawingCoords &pos0, const DrawingCoords &pos1, u32 color0, const RasterizerState &state) {330switch (state.pixelID.FBFormat()) {331case GE_FORMAT_565:332DrawSpriteNoTex<GE_FORMAT_565, alphaBlend>(pos0, pos1, color0, state);333break;334case GE_FORMAT_5551:335DrawSpriteNoTex<GE_FORMAT_5551, alphaBlend>(pos0, pos1, color0, state);336break;337case GE_FORMAT_4444:338DrawSpriteNoTex<GE_FORMAT_4444, alphaBlend>(pos0, pos1, color0, state);339break;340case GE_FORMAT_8888:341DrawSpriteNoTex<GE_FORMAT_8888, alphaBlend>(pos0, pos1, color0, state);342break;343default:344// Invalid, don't draw anything...345break;346}347}348349void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &range, const RasterizerState &state) {350const u8 *texptr = state.texptr[0];351352GETextureFormat texfmt = state.samplerID.TexFmt();353uint16_t texbufw = state.texbufw[0];354355// We won't flush, since we compile all samplers together.356Sampler::FetchFunc fetchFunc = Sampler::GetFetchFunc(state.samplerID, nullptr);357_dbg_assert_msg_(fetchFunc != nullptr, "Failed to get precompiled fetch func");358auto &pixelID = state.pixelID;359auto &samplerID = state.samplerID;360361DrawingCoords pos0 = TransformUnit::ScreenToDrawing(v0.screenpos);362// Include the ending pixel based on its center, not start.363DrawingCoords pos1 = TransformUnit::ScreenToDrawing(v1.screenpos + ScreenCoords(7, 7, 0));364365DrawingCoords scissorTL = TransformUnit::ScreenToDrawing(range.x1, range.y1);366DrawingCoords scissorBR = TransformUnit::ScreenToDrawing(range.x2, range.y2);367368const int z = v1.screenpos.z;369constexpr int fog = 255;370371// Since it's flat, we can check depth range early. Matters for earlyZChecks.372if (pixelID.applyDepthRange && (z < pixelID.cached.minz || z > pixelID.cached.maxz))373return;374375bool isWhite = v1.color0 == 0xFFFFFFFF;376377if (state.enableTextures) {378// 1:1 (but with mirror support) texture mapping!379int s_start = v0.texturecoords.x;380int t_start = v0.texturecoords.y;381int ds = v1.texturecoords.x > v0.texturecoords.x ? 1 : -1;382int dt = v1.texturecoords.y > v0.texturecoords.y ? 1 : -1;383384if (ds < 0) {385s_start += ds;386}387if (dt < 0) {388t_start += dt;389}390391// First clip the right and bottom sides, since we don't need to adjust the deltas.392if (pos1.x > scissorBR.x) pos1.x = scissorBR.x + 1;393if (pos1.y > scissorBR.y) pos1.y = scissorBR.y + 1;394// Now clip the other sides.395if (pos0.x < scissorTL.x) {396s_start += (scissorTL.x - pos0.x) * ds;397pos0.x = scissorTL.x;398}399if (pos0.y < scissorTL.y) {400t_start += (scissorTL.y - pos0.y) * dt;401pos0.y = scissorTL.y;402}403404if (UseDrawSinglePixel(pixelID) && (samplerID.TexFunc() == GE_TEXFUNC_MODULATE || samplerID.TexFunc() == GE_TEXFUNC_REPLACE) && samplerID.useTextureAlpha) {405if (isWhite || samplerID.TexFunc() == GE_TEXFUNC_REPLACE) {406DrawSpriteTex<true>(pos0, pos1, s_start, t_start, ds, dt, v1.color0, state, fetchFunc);407} else {408DrawSpriteTex<false>(pos0, pos1, s_start, t_start, ds, dt, v1.color0, state, fetchFunc);409}410} else {411float dsf = ds * (1.0f / (float)(1 << state.samplerID.width0Shift));412float dtf = dt * (1.0f / (float)(1 << state.samplerID.height0Shift));413float sf_start = s_start * (1.0f / (float)(1 << state.samplerID.width0Shift));414float tf_start = t_start * (1.0f / (float)(1 << state.samplerID.height0Shift));415416float t = tf_start;417const Vec4<int> c0 = Vec4<int>::FromRGBA(v1.color0);418if (pixelID.earlyZChecks) {419for (int y = pos0.y; y < pos1.y; y++) {420float s = sf_start;421// Not really that fast but faster than triangle.422for (int x = pos0.x; x < pos1.x; x++) {423if (CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {424Vec4<int> prim_color = state.nearest(s, t, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID);425state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);426}427428s += dsf;429}430t += dtf;431}432} else {433for (int y = pos0.y; y < pos1.y; y++) {434float s = sf_start;435// Not really that fast but faster than triangle.436for (int x = pos0.x; x < pos1.x; x++) {437Vec4<int> prim_color = state.nearest(s, t, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID);438state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);439s += dsf;440}441t += dtf;442}443}444}445} else {446if (pos1.x > scissorBR.x) pos1.x = scissorBR.x + 1;447if (pos1.y > scissorBR.y) pos1.y = scissorBR.y + 1;448if (pos0.x < scissorTL.x) pos0.x = scissorTL.x;449if (pos0.y < scissorTL.y) pos0.y = scissorTL.y;450if (UseDrawSinglePixel(pixelID)) {451if (pixelID.alphaBlend)452DrawSpriteNoTex<true>(pos0, pos1, v1.color0, state);453else454DrawSpriteNoTex<false>(pos0, pos1, v1.color0, state);455} else if (pixelID.earlyZChecks) {456const Vec4<int> prim_color = Vec4<int>::FromRGBA(v1.color0);457for (int y = pos0.y; y < pos1.y; y++) {458for (int x = pos0.x; x < pos1.x; x++) {459if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z))460continue;461462state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);463}464}465} else {466const Vec4<int> prim_color = Vec4<int>::FromRGBA(v1.color0);467for (int y = pos0.y; y < pos1.y; y++) {468for (int x = pos0.x; x < pos1.x; x++) {469state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);470}471}472}473}474475#if defined(SOFTGPU_MEMORY_TAGGING_BASIC) || defined(SOFTGPU_MEMORY_TAGGING_DETAILED)476uint32_t bpp = pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2;477char tag[64]{};478// char ztag[64]{};479int tagLen = snprintf(tag, sizeof(tag), "DisplayListR_%08x", state.listPC);480// int ztagLen = snprintf(ztag, sizeof(ztag), "DisplayListRZ_%08x", state.listPC);481482for (int y = pos0.y; y < pos1.y; y++) {483uint32_t row = gstate.getFrameBufAddress() + y * pixelID.cached.framebufStride * bpp;484NotifyMemInfo(MemBlockFlags::WRITE, row + pos0.x * bpp, (pos1.x - pos0.x) * bpp, tag, tagLen);485}486#endif487}488489bool g_needsClearAfterDialog = false;490491static inline bool NoClampOrWrap(const RasterizerState &state, const Vec2f &tc) {492if (tc.x < 0 || tc.y < 0)493return false;494if (state.samplerID.cached.sizes[0].w > 512 || state.samplerID.cached.sizes[0].h > 512)495return false;496return tc.x <= state.samplerID.cached.sizes[0].w && tc.y <= state.samplerID.cached.sizes[0].h;497}498499// Returns true if the normal path should be skipped.500bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &binner) {501const RasterizerState &state = binner.State();502503g_DarkStalkerStretch = DSStretch::Off;504505// Eliminate the stretch blit in DarkStalkers.506// We compensate for that when blitting the framebuffer in SoftGpu.cpp.507if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && v0.texturecoords.x == 64.0f && v0.texturecoords.y == 16.0f && v1.texturecoords.x == 448.0f && v1.texturecoords.y == 240.0f) {508// check for save/load dialog.509if (!currentDialogActive) {510if (v0.screenpos.x + gstate.getOffsetX16() == 0x7100 && v0.screenpos.y + gstate.getOffsetY16() == 0x7780 && v1.screenpos.x + gstate.getOffsetX16() == 0x8f00 && v1.screenpos.y + gstate.getOffsetY16() == 0x8880) {511g_DarkStalkerStretch = DSStretch::Wide;512} else if (v0.screenpos.x + gstate.getOffsetX16() == 0x7400 && v0.screenpos.y + gstate.getOffsetY16() == 0x7780 && v1.screenpos.x + gstate.getOffsetX16() == 0x8C00 && v1.screenpos.y + gstate.getOffsetY16() == 0x8880) {513g_DarkStalkerStretch = DSStretch::Normal;514} else {515return false;516}517if (g_needsClearAfterDialog) {518g_needsClearAfterDialog = false;519// Afterwards, we also need to clear the actual destination. Can do a fast rectfill.520gstate.textureMapEnable &= ~1;521VertexData newV1 = v1;522newV1.color0 = 0xFF000000;523binner.AddSprite(v0, newV1);524gstate.textureMapEnable |= 1;525}526return true;527} else {528g_needsClearAfterDialog = true;529}530}531532// Check for 1:1 texture mapping. In that case we can call DrawSprite.533int xdiff = v1.screenpos.x - v0.screenpos.x;534int ydiff = v1.screenpos.y - v0.screenpos.y;535int udiff = (v1.texturecoords.x - v0.texturecoords.x) * (float)SCREEN_SCALE_FACTOR;536int vdiff = (v1.texturecoords.y - v0.texturecoords.y) * (float)SCREEN_SCALE_FACTOR;537538// Currently only works for TL/BR, which is the most common but not required.539bool orient_check = xdiff >= 0 && ydiff >= 0;540// We already have a fast path for clear in ClearRectangle.541bool state_check = state.throughMode && !state.pixelID.clearMode && !state.samplerID.hasAnyMips && !state.textureProj;542bool coord_check = true;543if (state.enableTextures) {544state_check = state_check && NoClampOrWrap(state, v0.texturecoords.uv()) && NoClampOrWrap(state, v1.texturecoords.uv());545coord_check = (xdiff == udiff || xdiff == -udiff) && (ydiff == vdiff || ydiff == -vdiff);546}547// This doesn't work well with offset drawing, see #15876. Through never has a subpixel offset.548bool subpixel_check = ((v0.screenpos.x | v0.screenpos.y | v1.screenpos.x | v1.screenpos.y) & 0xF) == 0;549if (coord_check && orient_check && state_check && subpixel_check) {550binner.AddSprite(v0, v1);551return true;552}553return false;554}555556static bool IsCoordRectangleCompatible(const RasterizerState &state, const ClipVertexData &data) {557if (!state.throughMode) {558// See AreCoordsRectangleCompatible() for most of these, this just checks the main vert.559if (data.OutsideRange())560return false;561if (data.clippos.w < 0.0f)562return false;563if (data.clippos.z < -data.clippos.w)564return false;565}566return true;567}568569static bool AreCoordsRectangleCompatible(const RasterizerState &state, const ClipVertexData &data0, const ClipVertexData &data1) {570if (data1.v.color0 != data0.v.color0)571return false;572if (data1.v.screenpos.z != data0.v.screenpos.z) {573// Sometimes, we don't actually care about z.574if (state.pixelID.depthWrite || state.pixelID.DepthTestFunc() != GE_COMP_ALWAYS)575return false;576}577if (!state.throughMode) {578if (data1.v.color1 != data0.v.color1)579return false;580// This means it should be culled, outside range.581if (data1.OutsideRange())582return false;583// Do we have to think about perspective correction or slope mip level?584if (state.enableTextures && data1.clippos.w != data0.clippos.w) {585// If the w is off by less than a factor of 1/512, it should be safe to treat as a rectangle.586static constexpr float halftexel = 0.5f / 512.0f;587if (data1.clippos.w - halftexel > data0.clippos.w || data1.clippos.w + halftexel < data0.clippos.w)588return false;589}590// We might need to cull this if all verts have negative w, which doesn't seem to happen for rectangles.591if (data1.clippos.w < 0.0f)592return false;593// And we also may need to clip, even if flat.594if (data1.clippos.z < -data1.clippos.w)595return false;596// If we're projecting textures, only allow an exact match for simplicity.597if (state.enableTextures && data1.v.texturecoords.q() != data0.v.texturecoords.q())598return false;599if (state.pixelID.applyFog && data1.v.fogdepth != data0.v.fogdepth) {600// Similar to w, this only matters if they're farther apart than 1/255.601static constexpr float foghalfstep = 0.5f / 255.0f;602if (data1.v.fogdepth - foghalfstep > data0.v.fogdepth || data1.v.fogdepth + foghalfstep < data0.v.fogdepth)603return false;604}605}606return true;607}608609bool DetectRectangleFromStrip(const RasterizerState &state, const ClipVertexData data[4], int *tlIndex, int *brIndex) {610if (!IsCoordRectangleCompatible(state, data[0]))611return false;612613// Color and Z must be flat. Also find the TL and BR meanwhile.614int tl = 0, br = 0;615for (int i = 1; i < 4; ++i) {616if (!AreCoordsRectangleCompatible(state, data[0], data[i]))617return false;618619if (data[i].v.screenpos.x <= data[tl].v.screenpos.x && data[i].v.screenpos.y <= data[tl].v.screenpos.y)620tl = i;621if (data[i].v.screenpos.x >= data[br].v.screenpos.x && data[i].v.screenpos.y >= data[br].v.screenpos.y)622br = i;623}624625*tlIndex = tl;626*brIndex = br;627628// OK, now let's look at data to detect rectangles. There are a few possibilities629// but we focus on Darkstalkers for now.630if (data[0].v.screenpos.x == data[1].v.screenpos.x &&631data[0].v.screenpos.y == data[2].v.screenpos.y &&632data[2].v.screenpos.x == data[3].v.screenpos.x &&633data[1].v.screenpos.y == data[3].v.screenpos.y) {634// Okay, this is in the shape of a rectangle, but what about texture?635if (!state.enableTextures)636return true;637638if (data[0].v.texturecoords.x == data[1].v.texturecoords.x &&639data[0].v.texturecoords.y == data[2].v.texturecoords.y &&640data[2].v.texturecoords.x == data[3].v.texturecoords.x &&641data[1].v.texturecoords.y == data[3].v.texturecoords.y) {642// It's a rectangle!643return true;644}645return false;646}647// There's the other vertex order too...648if (data[0].v.screenpos.x == data[2].v.screenpos.x &&649data[0].v.screenpos.y == data[1].v.screenpos.y &&650data[1].v.screenpos.x == data[3].v.screenpos.x &&651data[2].v.screenpos.y == data[3].v.screenpos.y) {652// Okay, this is in the shape of a rectangle, but what about texture?653if (!state.enableTextures)654return true;655656if (data[0].v.texturecoords.x == data[2].v.texturecoords.x &&657data[0].v.texturecoords.y == data[1].v.texturecoords.y &&658data[1].v.texturecoords.x == data[3].v.texturecoords.x &&659data[2].v.texturecoords.y == data[3].v.texturecoords.y) {660// It's a rectangle!661return true;662}663return false;664}665return false;666}667668bool DetectRectangleFromFan(const RasterizerState &state, const ClipVertexData *data, int *tlIndex, int *brIndex) {669if (!IsCoordRectangleCompatible(state, data[0]))670return false;671672// Color and Z must be flat.673int tl = 0, br = 0;674for (int i = 1; i < 4; ++i) {675if (!AreCoordsRectangleCompatible(state, data[0], data[i]))676return false;677678if (data[i].v.screenpos.x <= data[tl].v.screenpos.x && data[i].v.screenpos.y <= data[tl].v.screenpos.y)679tl = i;680if (data[i].v.screenpos.x >= data[br].v.screenpos.x && data[i].v.screenpos.y >= data[br].v.screenpos.y)681br = i;682}683684*tlIndex = tl;685*brIndex = br;686687int tr = 1, bl = 1;688for (int i = 0; i < 4; ++i) {689if (i == tl || i == br)690continue;691692if (data[i].v.screenpos.x <= data[tl].v.screenpos.x && data[i].v.screenpos.y >= data[tl].v.screenpos.y)693bl = i;694if (data[i].v.screenpos.x >= data[br].v.screenpos.x && data[i].v.screenpos.y <= data[br].v.screenpos.y)695tr = i;696}697698// Must have found each of the coordinates.699if (tl + tr + bl + br != 6)700return false;701702// Note the common case is a single TL-TR-BR-BL.703const auto &postl = data[tl].v.screenpos, &postr = data[tr].v.screenpos;704const auto &posbr = data[br].v.screenpos, &posbl = data[bl].v.screenpos;705if (postl.x == posbl.x && postr.x == posbr.x && postl.y == postr.y && posbl.y == posbr.y) {706// Do we need to think about rotation?707if (!state.enableTextures)708return true;709710const auto &textl = data[tl].v.texturecoords, &textr = data[tr].v.texturecoords;711const auto &texbl = data[bl].v.texturecoords, &texbr = data[br].v.texturecoords;712713if (textl.x == texbl.x && textr.x == texbr.x && textl.y == textr.y && texbl.y == texbr.y) {714// Okay, the texture is also good, but let's avoid rotation issues.715return textl.y < texbr.y && postl.y < posbr.y && textl.x < texbr.x && postl.x < posbr.x;716}717}718719return false;720}721722bool DetectRectangleFromPair(const RasterizerState &state, const ClipVertexData data[6], int *tlIndex, int *brIndex) {723if (!IsCoordRectangleCompatible(state, data[0]))724return false;725726// Color and Z must be flat. Also find the TL and BR meanwhile.727int tl = 0, br = 0;728for (int i = 1; i < 6; ++i) {729if (!AreCoordsRectangleCompatible(state, data[0], data[i]))730return false;731732if (data[i].v.screenpos.x <= data[tl].v.screenpos.x && data[i].v.screenpos.y <= data[tl].v.screenpos.y)733tl = i;734if (data[i].v.screenpos.x >= data[br].v.screenpos.x && data[i].v.screenpos.y >= data[br].v.screenpos.y)735br = i;736}737738*tlIndex = tl;739*brIndex = br;740741auto xat = [&](int i) { return data[i].v.screenpos.x; };742auto yat = [&](int i) { return data[i].v.screenpos.y; };743auto uat = [&](int i) { return data[i].v.texturecoords.x; };744auto vat = [&](int i) { return data[i].v.texturecoords.y; };745746// A likely order would be: TL, TR, BR, TL, BR, BL. We'd have the last index of each.747// TODO: Make more generic.748if (tl == 3 && br == 4) {749bool x1_match = xat(0) == xat(3) && xat(0) == xat(5);750bool x2_match = xat(1) == xat(2) && xat(1) == xat(4);751bool y1_match = yat(0) == yat(1) && yat(0) == yat(3);752bool y2_match = yat(2) == yat(4) && yat(2) == yat(5);753if (x1_match && y1_match && x2_match && y2_match) {754// Do we need to think about rotation or UVs?755if (!state.enableTextures)756return true;757758x1_match = uat(0) == uat(3) && uat(0) == uat(5);759x2_match = uat(1) == uat(2) && uat(1) == uat(4);760y1_match = vat(0) == vat(1) && vat(0) == vat(3);761y2_match = vat(2) == vat(4) && vat(2) == vat(5);762if (x1_match && y1_match && x2_match && y2_match) {763// Double check rotation direction.764return vat(tl) < vat(br) && yat(tl) < yat(br) && uat(tl) < uat(br) && xat(tl) < xat(br);765}766}767}768769return false;770}771772bool DetectRectangleThroughModeSlices(const RasterizerState &state, const ClipVertexData data[4]) {773// Color and Z must be flat.774for (int i = 1; i < 4; ++i) {775if (!(data[i].v.color0 == data[0].v.color0))776return false;777if (!(data[i].v.screenpos.z == data[0].v.screenpos.z)) {778// Sometimes, we don't actually care about z.779if (state.pixelID.depthWrite || state.pixelID.DepthTestFunc() != GE_COMP_ALWAYS)780return false;781}782}783784// Games very commonly use vertical strips of rectangles. Detect and combine.785const auto &tl1 = data[0].v.screenpos, &br1 = data[1].v.screenpos;786const auto &tl2 = data[2].v.screenpos, &br2 = data[3].v.screenpos;787if (tl1.y == tl2.y && br1.y == br2.y && br1.y > tl1.y) {788if (br1.x == tl2.x && tl1.x < br1.x && tl2.x < br2.x) {789if (!state.enableTextures)790return true;791792const auto &textl1 = data[0].v.texturecoords, &texbr1 = data[1].v.texturecoords;793const auto &textl2 = data[2].v.texturecoords, &texbr2 = data[3].v.texturecoords;794if (textl1.y != textl2.y || texbr1.y != texbr2.y || textl1.y > texbr1.y)795return false;796if (texbr1.x != textl2.x || textl1.x > texbr1.x || textl2.x > texbr2.x)797return false;798799// We might be able to compare ratios, but let's expect 1:1.800int texdiff1 = (texbr1.x - textl1.x) * (float)SCREEN_SCALE_FACTOR;801int texdiff2 = (texbr2.x - textl2.x) * (float)SCREEN_SCALE_FACTOR;802int posdiff1 = br1.x - tl1.x;803int posdiff2 = br2.x - tl2.x;804return texdiff1 == posdiff1 && texdiff2 == posdiff2;805}806}807808return false;809}810811} // namespace Rasterizer812813814815