CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Software/RasterizerRectangle.cpp
Views: 1401
1
// See comment in header for the purpose of the code in this file.
2
3
#include "ppsspp_config.h"
4
#include <algorithm>
5
#include <cmath>
6
7
#include "Common/Common.h"
8
#include "Common/Data/Convert/ColorConv.h"
9
#include "Common/Profiler/Profiler.h"
10
#include "Common/StringUtils.h"
11
12
#include "Core/Config.h"
13
#include "Core/Debugger/MemBlockInfo.h"
14
#include "Core/MemMap.h"
15
#include "Core/System.h"
16
#include "GPU/GPUState.h"
17
18
#include "GPU/Common/TextureCacheCommon.h"
19
#include "GPU/Software/BinManager.h"
20
#include "GPU/Software/DrawPixel.h"
21
#include "GPU/Software/Rasterizer.h"
22
#include "GPU/Software/Sampler.h"
23
#include "GPU/Software/SoftGpu.h"
24
25
#if defined(_M_SSE)
26
#include <emmintrin.h>
27
#endif
28
29
#if PPSSPP_ARCH(ARM_NEON)
30
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
31
#include <arm64_neon.h>
32
#else
33
#include <arm_neon.h>
34
#endif
35
#endif
36
37
extern DSStretch g_DarkStalkerStretch;
38
// For Darkstalkers hack. Ugh.
39
extern bool currentDialogActive;
40
41
namespace Rasterizer {
42
43
// This essentially AlphaBlendingResult() with fixed src.a / 1 - src.a factors and ADD equation.
44
// It allows us to skip round trips between 32-bit and 16-bit color values.
45
static uint32_t StandardAlphaBlend(uint32_t source, uint32_t dst) {
46
#if defined(_M_SSE)
47
const __m128i alpha = _mm_cvtsi32_si128(source >> 24);
48
// Keep the alpha lane of the srcfactor zero, so we keep dest alpha.
49
const __m128i srcfactor = _mm_shufflelo_epi16(alpha, _MM_SHUFFLE(1, 0, 0, 0));
50
const __m128i dstfactor = _mm_sub_epi16(_mm_set1_epi16(255), srcfactor);
51
52
const __m128i z = _mm_setzero_si128();
53
const __m128i sourcevec = _mm_unpacklo_epi8(_mm_cvtsi32_si128(source), z);
54
const __m128i dstvec = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dst), z);
55
56
// We switch to 16 bit to use mulhi, and we use 4 bits of decimal to make the 16 bit shift free.
57
const __m128i half = _mm_set1_epi16(1 << 3);
58
59
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(sourcevec, 4), half);
60
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(srcfactor, 4), half);
61
const __m128i s = _mm_mulhi_epi16(srgb, sf);
62
63
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(dstvec, 4), half);
64
const __m128i df = _mm_add_epi16(_mm_slli_epi16(dstfactor, 4), half);
65
const __m128i d = _mm_mulhi_epi16(drgb, df);
66
67
const __m128i blended16 = _mm_adds_epi16(s, d);
68
return _mm_cvtsi128_si32(_mm_packus_epi16(blended16, blended16));
69
#elif PPSSPP_ARCH(ARM64_NEON)
70
uint16x4_t sf = vdup_n_u16((source >> 24) * 2 + 1);
71
uint16x4_t df = vdup_n_u16((255 - (source >> 24)) * 2 + 1);
72
73
// Convert both to 16-bit, double, and add the half before even going to 32 bit.
74
uint16x8_t sd_c16 = vmovl_u8(vcreate_u8((uint64_t)source | ((uint64_t)dst << 32)));
75
sd_c16 = vaddq_u16(vshlq_n_u16(sd_c16, 1), vdupq_n_u16(1));
76
77
uint16x4_t srgb = vget_low_u16(sd_c16);
78
uint16x4_t drgb = vget_high_u16(sd_c16);
79
80
uint16x4_t s = vshrn_n_u32(vmull_u16(srgb, sf), 10);
81
uint16x4_t d = vshrn_n_u32(vmull_u16(drgb, df), 10);
82
83
uint16x4_t blended = vset_lane_u16(0, vadd_u16(s, d), 3);
84
uint8x8_t blended8 = vqmovn_u16(vcombine_u16(blended, blended));
85
return vget_lane_u32(vreinterpret_u32_u8(blended8), 0);
86
#else
87
Vec3<int> srcfactor = Vec3<int>::AssignToAll(source >> 24);
88
Vec3<int> dstfactor = Vec3<int>::AssignToAll(255 - (source >> 24));
89
90
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
91
Vec3<int> lhs = ((Vec3<int>::FromRGB(source) * 2 + half) * (srcfactor * 2 + half)) / 1024;
92
Vec3<int> rhs = ((Vec3<int>::FromRGB(dst) * 2 + half) * (dstfactor * 2 + half)) / 1024;
93
Vec3<int> blended = lhs + rhs;
94
95
return clamp_u8(blended.r()) | (clamp_u8(blended.g()) << 8) | (clamp_u8(blended.b()) << 16);
96
#endif
97
}
98
99
// Through mode, with the specific Darkstalker settings.
100
template <GEBufferFormat fmt, bool alphaBlend>
101
static inline void DrawSinglePixel(u16 *pixel, const u32 color_in) {
102
u32 new_color;
103
// Because of this check, we only support src.a / 1-src.a blending.
104
if (!alphaBlend || (color_in >> 24) == 255) {
105
new_color = color_in & 0xFFFFFF;
106
} else {
107
u32 old_color;
108
switch (fmt) {
109
case GE_FORMAT_565:
110
old_color = RGB565ToRGBA8888(*pixel);
111
break;
112
case GE_FORMAT_5551:
113
old_color = RGBA5551ToRGBA8888(*pixel);
114
break;
115
case GE_FORMAT_4444:
116
old_color = RGBA4444ToRGBA8888(*pixel);
117
break;
118
default:
119
break;
120
}
121
122
new_color = StandardAlphaBlend(color_in, old_color);
123
}
124
125
switch (fmt) {
126
case GE_FORMAT_565:
127
*pixel = RGBA8888ToRGB565(new_color);
128
break;
129
case GE_FORMAT_5551:
130
*pixel = RGBA8888ToRGBA555X(new_color) | (*pixel & 0x8000);
131
break;
132
case GE_FORMAT_4444:
133
*pixel = RGBA8888ToRGBA444X(new_color) | (*pixel & 0xF000);
134
break;
135
default:
136
break;
137
}
138
}
139
140
template <bool alphaBlend>
141
static inline void DrawSinglePixel32(u32 *pixel, const u32 color_in) {
142
u32 new_color;
143
// Because of this check, we only support src.a / 1-src.a blending.
144
if (!alphaBlend || (color_in >> 24) == 255) {
145
new_color = color_in & 0xFFFFFF;
146
} else {
147
const u32 old_color = *pixel;
148
new_color = StandardAlphaBlend(color_in, old_color);
149
}
150
new_color |= *pixel & 0xFF000000;
151
*pixel = new_color;
152
}
153
154
// Check if we can safely ignore the alpha test, assuming standard alpha blending.
155
static inline bool AlphaTestIsNeedless(const PixelFuncID &pixelID) {
156
switch (pixelID.AlphaTestFunc()) {
157
case GE_COMP_NEVER:
158
case GE_COMP_EQUAL:
159
case GE_COMP_LESS:
160
case GE_COMP_LEQUAL:
161
return false;
162
163
case GE_COMP_ALWAYS:
164
return true;
165
166
case GE_COMP_NOTEQUAL:
167
case GE_COMP_GREATER:
168
case GE_COMP_GEQUAL:
169
if (pixelID.alphaTestRef != 0 || pixelID.hasAlphaTestMask)
170
return false;
171
return true;
172
}
173
174
return false;
175
}
176
177
static bool UseDrawSinglePixel(const PixelFuncID &pixelID) {
178
if (pixelID.clearMode || pixelID.colorTest || pixelID.stencilTest)
179
return false;
180
if (!AlphaTestIsNeedless(pixelID) || pixelID.DepthTestFunc() != GE_COMP_ALWAYS)
181
return false;
182
// We skip blending when alpha = FF, so we can't allow other blend modes.
183
if (pixelID.alphaBlend) {
184
if (pixelID.AlphaBlendEq() != GE_BLENDMODE_MUL_AND_ADD || pixelID.AlphaBlendSrc() != PixelBlendFactor::SRCALPHA)
185
return false;
186
if (pixelID.AlphaBlendDst() != PixelBlendFactor::INVSRCALPHA)
187
return false;
188
}
189
if (pixelID.dithering || pixelID.applyLogicOp || pixelID.applyColorWriteMask)
190
return false;
191
192
return true;
193
}
194
195
static inline Vec4IntResult SOFTRAST_CALL ModulateRGBA(Vec4IntArg prim_in, Vec4IntArg texcolor_in, const SamplerID &samplerID) {
196
Vec4<int> out;
197
Vec4<int> prim_color = prim_in;
198
Vec4<int> texcolor = texcolor_in;
199
200
#if defined(_M_SSE)
201
// Modulate weights slightly on the tex color, by adding one to prim and dividing by 256.
202
const __m128i p = _mm_slli_epi16(_mm_packs_epi32(prim_color.ivec, prim_color.ivec), 4);
203
const __m128i pboost = _mm_add_epi16(p, _mm_set1_epi16(1 << 4));
204
__m128i t = _mm_slli_epi16(_mm_packs_epi32(texcolor.ivec, texcolor.ivec), 4);
205
if (samplerID.useColorDoubling) {
206
const __m128i amask = _mm_set_epi16(-1, 0, 0, 0, -1, 0, 0, 0);
207
const __m128i a = _mm_and_si128(t, amask);
208
const __m128i rgb = _mm_andnot_si128(amask, t);
209
t = _mm_or_si128(_mm_slli_epi16(rgb, 1), a);
210
}
211
const __m128i b = _mm_mulhi_epi16(pboost, t);
212
out.ivec = _mm_unpacklo_epi16(b, _mm_setzero_si128());
213
#elif PPSSPP_ARCH(ARM64_NEON)
214
int32x4_t pboost = vaddq_s32(prim_color.ivec, vdupq_n_s32(1));
215
int32x4_t t = texcolor.ivec;
216
if (samplerID.useColorDoubling) {
217
static const int32_t rgbDouble[4] = {1, 1, 1, 0};
218
t = vshlq_s32(t, vld1q_s32(rgbDouble));
219
}
220
out.ivec = vshrq_n_s32(vmulq_s32(pboost, t), 8);
221
#else
222
if (samplerID.useColorDoubling) {
223
Vec4<int> tex = texcolor * Vec4<int>(2, 2, 2, 1);
224
out = ((prim_color + Vec4<int>::AssignToAll(1)) * tex) / 256;
225
} else {
226
out = (prim_color + Vec4<int>::AssignToAll(1)) * texcolor / 256;
227
}
228
#endif
229
230
return ToVec4IntResult(out);
231
}
232
233
template <GEBufferFormat fmt, bool isWhite, bool alphaBlend, bool alphaTestZero>
234
static void DrawSpriteTex(const DrawingCoords &pos0, const DrawingCoords &pos1, int s_start, int t_start, int ds, int dt, u32 color0, const RasterizerState &state, Sampler::FetchFunc fetchFunc) {
235
const u8 *texptr = state.texptr[0];
236
uint16_t texbufw = state.texbufw[0];
237
238
int t = t_start;
239
const Vec4<int> c0 = Vec4<int>::FromRGBA(color0);
240
for (int y = pos0.y; y < pos1.y; y++) {
241
int s = s_start;
242
u16 *pixel16 = fb.Get16Ptr(pos0.x, y, state.pixelID.cached.framebufStride);
243
u32 *pixel32 = fb.Get32Ptr(pos0.x, y, state.pixelID.cached.framebufStride);
244
for (int x = pos0.x; x < pos1.x; x++) {
245
Vec4<int> tex_color = fetchFunc(s, t, texptr, texbufw, 0, state.samplerID);
246
if (isWhite) {
247
if (!alphaTestZero || tex_color.a() != 0) {
248
u32 tex_color32 = tex_color.ToRGBA();
249
if (fmt == GE_FORMAT_8888)
250
DrawSinglePixel32<alphaBlend>(pixel32, tex_color32);
251
else
252
DrawSinglePixel<fmt, alphaBlend>(pixel16, tex_color32);
253
}
254
} else {
255
Vec4<int> prim_color = c0;
256
prim_color = Vec4<int>(ModulateRGBA(ToVec4IntArg(prim_color), ToVec4IntArg(tex_color), state.samplerID));
257
if (!alphaTestZero || prim_color.a() > 0) {
258
if (fmt == GE_FORMAT_8888)
259
DrawSinglePixel32<alphaBlend>(pixel32, prim_color.ToRGBA());
260
else
261
DrawSinglePixel<fmt, alphaBlend>(pixel16, prim_color.ToRGBA());
262
}
263
}
264
s += ds;
265
if (fmt == GE_FORMAT_8888)
266
pixel32++;
267
else
268
pixel16++;
269
}
270
t += dt;
271
}
272
}
273
274
template <bool isWhite, bool alphaBlend, bool alphaTestZero>
275
static void DrawSpriteTex(const DrawingCoords &pos0, const DrawingCoords &pos1, int s_start, int t_start, int ds, int dt, u32 color0, const RasterizerState &state, Sampler::FetchFunc fetchFunc) {
276
switch (state.pixelID.FBFormat()) {
277
case GE_FORMAT_565:
278
DrawSpriteTex<GE_FORMAT_565, isWhite, alphaBlend, alphaTestZero>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);
279
break;
280
case GE_FORMAT_5551:
281
DrawSpriteTex<GE_FORMAT_5551, isWhite, alphaBlend, alphaTestZero>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);
282
break;
283
case GE_FORMAT_4444:
284
DrawSpriteTex<GE_FORMAT_4444, isWhite, alphaBlend, alphaTestZero>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);
285
break;
286
case GE_FORMAT_8888:
287
DrawSpriteTex<GE_FORMAT_8888, isWhite, alphaBlend, alphaTestZero>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);
288
break;
289
default:
290
// Invalid, don't draw anything...
291
break;
292
}
293
}
294
295
template <bool isWhite>
296
static inline void DrawSpriteTex(const DrawingCoords &pos0, const DrawingCoords &pos1, int s_start, int t_start, int ds, int dt, u32 color0, const RasterizerState &state, Sampler::FetchFunc fetchFunc) {
297
// Standard alpha blending implies skipping alpha zero.
298
if (state.pixelID.alphaBlend)
299
DrawSpriteTex<isWhite, true, true>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);
300
else if (state.pixelID.AlphaTestFunc() != GE_COMP_ALWAYS)
301
DrawSpriteTex<isWhite, false, true>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);
302
else
303
DrawSpriteTex<isWhite, false, false>(pos0, pos1, s_start, t_start, ds, dt, color0, state, fetchFunc);
304
}
305
306
template <GEBufferFormat fmt, bool alphaBlend>
307
static void DrawSpriteNoTex(const DrawingCoords &pos0, const DrawingCoords &pos1, u32 color0, const RasterizerState &state) {
308
if constexpr (alphaBlend)
309
if (Vec4<int>::FromRGBA(color0).a() == 0)
310
return;
311
312
for (int y = pos0.y; y < pos1.y; y++) {
313
if (fmt == GE_FORMAT_8888) {
314
u32 *pixel = fb.Get32Ptr(pos0.x, y, state.pixelID.cached.framebufStride);
315
for (int x = pos0.x; x < pos1.x; x++) {
316
DrawSinglePixel32<alphaBlend>(pixel, color0);
317
pixel++;
318
}
319
} else {
320
u16 *pixel = fb.Get16Ptr(pos0.x, y, state.pixelID.cached.framebufStride);
321
for (int x = pos0.x; x < pos1.x; x++) {
322
DrawSinglePixel<fmt, alphaBlend>(pixel, color0);
323
pixel++;
324
}
325
}
326
}
327
}
328
329
template <bool alphaBlend>
330
static void DrawSpriteNoTex(const DrawingCoords &pos0, const DrawingCoords &pos1, u32 color0, const RasterizerState &state) {
331
switch (state.pixelID.FBFormat()) {
332
case GE_FORMAT_565:
333
DrawSpriteNoTex<GE_FORMAT_565, alphaBlend>(pos0, pos1, color0, state);
334
break;
335
case GE_FORMAT_5551:
336
DrawSpriteNoTex<GE_FORMAT_5551, alphaBlend>(pos0, pos1, color0, state);
337
break;
338
case GE_FORMAT_4444:
339
DrawSpriteNoTex<GE_FORMAT_4444, alphaBlend>(pos0, pos1, color0, state);
340
break;
341
case GE_FORMAT_8888:
342
DrawSpriteNoTex<GE_FORMAT_8888, alphaBlend>(pos0, pos1, color0, state);
343
break;
344
default:
345
// Invalid, don't draw anything...
346
break;
347
}
348
}
349
350
void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &range, const RasterizerState &state) {
351
const u8 *texptr = state.texptr[0];
352
353
GETextureFormat texfmt = state.samplerID.TexFmt();
354
uint16_t texbufw = state.texbufw[0];
355
356
// We won't flush, since we compile all samplers together.
357
Sampler::FetchFunc fetchFunc = Sampler::GetFetchFunc(state.samplerID, nullptr);
358
_dbg_assert_msg_(fetchFunc != nullptr, "Failed to get precompiled fetch func");
359
auto &pixelID = state.pixelID;
360
auto &samplerID = state.samplerID;
361
362
DrawingCoords pos0 = TransformUnit::ScreenToDrawing(v0.screenpos);
363
// Include the ending pixel based on its center, not start.
364
DrawingCoords pos1 = TransformUnit::ScreenToDrawing(v1.screenpos + ScreenCoords(7, 7, 0));
365
366
DrawingCoords scissorTL = TransformUnit::ScreenToDrawing(range.x1, range.y1);
367
DrawingCoords scissorBR = TransformUnit::ScreenToDrawing(range.x2, range.y2);
368
369
const int z = v1.screenpos.z;
370
constexpr int fog = 255;
371
372
// Since it's flat, we can check depth range early. Matters for earlyZChecks.
373
if (pixelID.applyDepthRange && (z < pixelID.cached.minz || z > pixelID.cached.maxz))
374
return;
375
376
bool isWhite = v1.color0 == 0xFFFFFFFF;
377
378
if (state.enableTextures) {
379
// 1:1 (but with mirror support) texture mapping!
380
int s_start = v0.texturecoords.x;
381
int t_start = v0.texturecoords.y;
382
int ds = v1.texturecoords.x > v0.texturecoords.x ? 1 : -1;
383
int dt = v1.texturecoords.y > v0.texturecoords.y ? 1 : -1;
384
385
if (ds < 0) {
386
s_start += ds;
387
}
388
if (dt < 0) {
389
t_start += dt;
390
}
391
392
// First clip the right and bottom sides, since we don't need to adjust the deltas.
393
if (pos1.x > scissorBR.x) pos1.x = scissorBR.x + 1;
394
if (pos1.y > scissorBR.y) pos1.y = scissorBR.y + 1;
395
// Now clip the other sides.
396
if (pos0.x < scissorTL.x) {
397
s_start += (scissorTL.x - pos0.x) * ds;
398
pos0.x = scissorTL.x;
399
}
400
if (pos0.y < scissorTL.y) {
401
t_start += (scissorTL.y - pos0.y) * dt;
402
pos0.y = scissorTL.y;
403
}
404
405
if (UseDrawSinglePixel(pixelID) && (samplerID.TexFunc() == GE_TEXFUNC_MODULATE || samplerID.TexFunc() == GE_TEXFUNC_REPLACE) && samplerID.useTextureAlpha) {
406
if (isWhite || samplerID.TexFunc() == GE_TEXFUNC_REPLACE) {
407
DrawSpriteTex<true>(pos0, pos1, s_start, t_start, ds, dt, v1.color0, state, fetchFunc);
408
} else {
409
DrawSpriteTex<false>(pos0, pos1, s_start, t_start, ds, dt, v1.color0, state, fetchFunc);
410
}
411
} else {
412
float dsf = ds * (1.0f / (float)(1 << state.samplerID.width0Shift));
413
float dtf = dt * (1.0f / (float)(1 << state.samplerID.height0Shift));
414
float sf_start = s_start * (1.0f / (float)(1 << state.samplerID.width0Shift));
415
float tf_start = t_start * (1.0f / (float)(1 << state.samplerID.height0Shift));
416
417
float t = tf_start;
418
const Vec4<int> c0 = Vec4<int>::FromRGBA(v1.color0);
419
if (pixelID.earlyZChecks) {
420
for (int y = pos0.y; y < pos1.y; y++) {
421
float s = sf_start;
422
// Not really that fast but faster than triangle.
423
for (int x = pos0.x; x < pos1.x; x++) {
424
if (CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {
425
Vec4<int> prim_color = state.nearest(s, t, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID);
426
state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
427
}
428
429
s += dsf;
430
}
431
t += dtf;
432
}
433
} else {
434
for (int y = pos0.y; y < pos1.y; y++) {
435
float s = sf_start;
436
// Not really that fast but faster than triangle.
437
for (int x = pos0.x; x < pos1.x; x++) {
438
Vec4<int> prim_color = state.nearest(s, t, ToVec4IntArg(c0), &texptr, &texbufw, 0, 0, state.samplerID);
439
state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
440
s += dsf;
441
}
442
t += dtf;
443
}
444
}
445
}
446
} else {
447
if (pos1.x > scissorBR.x) pos1.x = scissorBR.x + 1;
448
if (pos1.y > scissorBR.y) pos1.y = scissorBR.y + 1;
449
if (pos0.x < scissorTL.x) pos0.x = scissorTL.x;
450
if (pos0.y < scissorTL.y) pos0.y = scissorTL.y;
451
if (UseDrawSinglePixel(pixelID)) {
452
if (pixelID.alphaBlend)
453
DrawSpriteNoTex<true>(pos0, pos1, v1.color0, state);
454
else
455
DrawSpriteNoTex<false>(pos0, pos1, v1.color0, state);
456
} else if (pixelID.earlyZChecks) {
457
const Vec4<int> prim_color = Vec4<int>::FromRGBA(v1.color0);
458
for (int y = pos0.y; y < pos1.y; y++) {
459
for (int x = pos0.x; x < pos1.x; x++) {
460
if (!CheckDepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z))
461
continue;
462
463
state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
464
}
465
}
466
} else {
467
const Vec4<int> prim_color = Vec4<int>::FromRGBA(v1.color0);
468
for (int y = pos0.y; y < pos1.y; y++) {
469
for (int x = pos0.x; x < pos1.x; x++) {
470
state.drawPixel(x, y, z, fog, ToVec4IntArg(prim_color), pixelID);
471
}
472
}
473
}
474
}
475
476
#if defined(SOFTGPU_MEMORY_TAGGING_BASIC) || defined(SOFTGPU_MEMORY_TAGGING_DETAILED)
477
uint32_t bpp = pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2;
478
char tag[64]{};
479
// char ztag[64]{};
480
int tagLen = snprintf(tag, sizeof(tag), "DisplayListR_%08x", state.listPC);
481
// int ztagLen = snprintf(ztag, sizeof(ztag), "DisplayListRZ_%08x", state.listPC);
482
483
for (int y = pos0.y; y < pos1.y; y++) {
484
uint32_t row = gstate.getFrameBufAddress() + y * pixelID.cached.framebufStride * bpp;
485
NotifyMemInfo(MemBlockFlags::WRITE, row + pos0.x * bpp, (pos1.x - pos0.x) * bpp, tag, tagLen);
486
}
487
#endif
488
}
489
490
bool g_needsClearAfterDialog = false;
491
492
static inline bool NoClampOrWrap(const RasterizerState &state, const Vec2f &tc) {
493
if (tc.x < 0 || tc.y < 0)
494
return false;
495
if (state.samplerID.cached.sizes[0].w > 512 || state.samplerID.cached.sizes[0].h > 512)
496
return false;
497
return tc.x <= state.samplerID.cached.sizes[0].w && tc.y <= state.samplerID.cached.sizes[0].h;
498
}
499
500
// Returns true if the normal path should be skipped.
501
bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &binner) {
502
const RasterizerState &state = binner.State();
503
504
g_DarkStalkerStretch = DSStretch::Off;
505
506
// Eliminate the stretch blit in DarkStalkers.
507
// We compensate for that when blitting the framebuffer in SoftGpu.cpp.
508
if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && v0.texturecoords.x == 64.0f && v0.texturecoords.y == 16.0f && v1.texturecoords.x == 448.0f && v1.texturecoords.y == 240.0f) {
509
// check for save/load dialog.
510
if (!currentDialogActive) {
511
if (v0.screenpos.x + gstate.getOffsetX16() == 0x7100 && v0.screenpos.y + gstate.getOffsetY16() == 0x7780 && v1.screenpos.x + gstate.getOffsetX16() == 0x8f00 && v1.screenpos.y + gstate.getOffsetY16() == 0x8880) {
512
g_DarkStalkerStretch = DSStretch::Wide;
513
} else if (v0.screenpos.x + gstate.getOffsetX16() == 0x7400 && v0.screenpos.y + gstate.getOffsetY16() == 0x7780 && v1.screenpos.x + gstate.getOffsetX16() == 0x8C00 && v1.screenpos.y + gstate.getOffsetY16() == 0x8880) {
514
g_DarkStalkerStretch = DSStretch::Normal;
515
} else {
516
return false;
517
}
518
if (g_needsClearAfterDialog) {
519
g_needsClearAfterDialog = false;
520
// Afterwards, we also need to clear the actual destination. Can do a fast rectfill.
521
gstate.textureMapEnable &= ~1;
522
VertexData newV1 = v1;
523
newV1.color0 = 0xFF000000;
524
binner.AddSprite(v0, newV1);
525
gstate.textureMapEnable |= 1;
526
}
527
return true;
528
} else {
529
g_needsClearAfterDialog = true;
530
}
531
}
532
533
// Check for 1:1 texture mapping. In that case we can call DrawSprite.
534
int xdiff = v1.screenpos.x - v0.screenpos.x;
535
int ydiff = v1.screenpos.y - v0.screenpos.y;
536
int udiff = (v1.texturecoords.x - v0.texturecoords.x) * (float)SCREEN_SCALE_FACTOR;
537
int vdiff = (v1.texturecoords.y - v0.texturecoords.y) * (float)SCREEN_SCALE_FACTOR;
538
539
// Currently only works for TL/BR, which is the most common but not required.
540
bool orient_check = xdiff >= 0 && ydiff >= 0;
541
// We already have a fast path for clear in ClearRectangle.
542
bool state_check = state.throughMode && !state.pixelID.clearMode && !state.samplerID.hasAnyMips && !state.textureProj;
543
bool coord_check = true;
544
if (state.enableTextures) {
545
state_check = state_check && NoClampOrWrap(state, v0.texturecoords.uv()) && NoClampOrWrap(state, v1.texturecoords.uv());
546
coord_check = (xdiff == udiff || xdiff == -udiff) && (ydiff == vdiff || ydiff == -vdiff);
547
}
548
// This doesn't work well with offset drawing, see #15876. Through never has a subpixel offset.
549
bool subpixel_check = ((v0.screenpos.x | v0.screenpos.y | v1.screenpos.x | v1.screenpos.y) & 0xF) == 0;
550
if (coord_check && orient_check && state_check && subpixel_check) {
551
binner.AddSprite(v0, v1);
552
return true;
553
}
554
return false;
555
}
556
557
static bool IsCoordRectangleCompatible(const RasterizerState &state, const ClipVertexData &data) {
558
if (!state.throughMode) {
559
// See AreCoordsRectangleCompatible() for most of these, this just checks the main vert.
560
if (data.OutsideRange())
561
return false;
562
if (data.clippos.w < 0.0f)
563
return false;
564
if (data.clippos.z < -data.clippos.w)
565
return false;
566
}
567
return true;
568
}
569
570
static bool AreCoordsRectangleCompatible(const RasterizerState &state, const ClipVertexData &data0, const ClipVertexData &data1) {
571
if (data1.v.color0 != data0.v.color0)
572
return false;
573
if (data1.v.screenpos.z != data0.v.screenpos.z) {
574
// Sometimes, we don't actually care about z.
575
if (state.pixelID.depthWrite || state.pixelID.DepthTestFunc() != GE_COMP_ALWAYS)
576
return false;
577
}
578
if (!state.throughMode) {
579
if (data1.v.color1 != data0.v.color1)
580
return false;
581
// This means it should be culled, outside range.
582
if (data1.OutsideRange())
583
return false;
584
// Do we have to think about perspective correction or slope mip level?
585
if (state.enableTextures && data1.clippos.w != data0.clippos.w) {
586
// If the w is off by less than a factor of 1/512, it should be safe to treat as a rectangle.
587
static constexpr float halftexel = 0.5f / 512.0f;
588
if (data1.clippos.w - halftexel > data0.clippos.w || data1.clippos.w + halftexel < data0.clippos.w)
589
return false;
590
}
591
// We might need to cull this if all verts have negative w, which doesn't seem to happen for rectangles.
592
if (data1.clippos.w < 0.0f)
593
return false;
594
// And we also may need to clip, even if flat.
595
if (data1.clippos.z < -data1.clippos.w)
596
return false;
597
// If we're projecting textures, only allow an exact match for simplicity.
598
if (state.enableTextures && data1.v.texturecoords.q() != data0.v.texturecoords.q())
599
return false;
600
if (state.pixelID.applyFog && data1.v.fogdepth != data0.v.fogdepth) {
601
// Similar to w, this only matters if they're farther apart than 1/255.
602
static constexpr float foghalfstep = 0.5f / 255.0f;
603
if (data1.v.fogdepth - foghalfstep > data0.v.fogdepth || data1.v.fogdepth + foghalfstep < data0.v.fogdepth)
604
return false;
605
}
606
}
607
return true;
608
}
609
610
bool DetectRectangleFromStrip(const RasterizerState &state, const ClipVertexData data[4], int *tlIndex, int *brIndex) {
611
if (!IsCoordRectangleCompatible(state, data[0]))
612
return false;
613
614
// Color and Z must be flat. Also find the TL and BR meanwhile.
615
int tl = 0, br = 0;
616
for (int i = 1; i < 4; ++i) {
617
if (!AreCoordsRectangleCompatible(state, data[0], data[i]))
618
return false;
619
620
if (data[i].v.screenpos.x <= data[tl].v.screenpos.x && data[i].v.screenpos.y <= data[tl].v.screenpos.y)
621
tl = i;
622
if (data[i].v.screenpos.x >= data[br].v.screenpos.x && data[i].v.screenpos.y >= data[br].v.screenpos.y)
623
br = i;
624
}
625
626
*tlIndex = tl;
627
*brIndex = br;
628
629
// OK, now let's look at data to detect rectangles. There are a few possibilities
630
// but we focus on Darkstalkers for now.
631
if (data[0].v.screenpos.x == data[1].v.screenpos.x &&
632
data[0].v.screenpos.y == data[2].v.screenpos.y &&
633
data[2].v.screenpos.x == data[3].v.screenpos.x &&
634
data[1].v.screenpos.y == data[3].v.screenpos.y) {
635
// Okay, this is in the shape of a rectangle, but what about texture?
636
if (!state.enableTextures)
637
return true;
638
639
if (data[0].v.texturecoords.x == data[1].v.texturecoords.x &&
640
data[0].v.texturecoords.y == data[2].v.texturecoords.y &&
641
data[2].v.texturecoords.x == data[3].v.texturecoords.x &&
642
data[1].v.texturecoords.y == data[3].v.texturecoords.y) {
643
// It's a rectangle!
644
return true;
645
}
646
return false;
647
}
648
// There's the other vertex order too...
649
if (data[0].v.screenpos.x == data[2].v.screenpos.x &&
650
data[0].v.screenpos.y == data[1].v.screenpos.y &&
651
data[1].v.screenpos.x == data[3].v.screenpos.x &&
652
data[2].v.screenpos.y == data[3].v.screenpos.y) {
653
// Okay, this is in the shape of a rectangle, but what about texture?
654
if (!state.enableTextures)
655
return true;
656
657
if (data[0].v.texturecoords.x == data[2].v.texturecoords.x &&
658
data[0].v.texturecoords.y == data[1].v.texturecoords.y &&
659
data[1].v.texturecoords.x == data[3].v.texturecoords.x &&
660
data[2].v.texturecoords.y == data[3].v.texturecoords.y) {
661
// It's a rectangle!
662
return true;
663
}
664
return false;
665
}
666
return false;
667
}
668
669
bool DetectRectangleFromFan(const RasterizerState &state, const ClipVertexData *data, int *tlIndex, int *brIndex) {
670
if (!IsCoordRectangleCompatible(state, data[0]))
671
return false;
672
673
// Color and Z must be flat.
674
int tl = 0, br = 0;
675
for (int i = 1; i < 4; ++i) {
676
if (!AreCoordsRectangleCompatible(state, data[0], data[i]))
677
return false;
678
679
if (data[i].v.screenpos.x <= data[tl].v.screenpos.x && data[i].v.screenpos.y <= data[tl].v.screenpos.y)
680
tl = i;
681
if (data[i].v.screenpos.x >= data[br].v.screenpos.x && data[i].v.screenpos.y >= data[br].v.screenpos.y)
682
br = i;
683
}
684
685
*tlIndex = tl;
686
*brIndex = br;
687
688
int tr = 1, bl = 1;
689
for (int i = 0; i < 4; ++i) {
690
if (i == tl || i == br)
691
continue;
692
693
if (data[i].v.screenpos.x <= data[tl].v.screenpos.x && data[i].v.screenpos.y >= data[tl].v.screenpos.y)
694
bl = i;
695
if (data[i].v.screenpos.x >= data[br].v.screenpos.x && data[i].v.screenpos.y <= data[br].v.screenpos.y)
696
tr = i;
697
}
698
699
// Must have found each of the coordinates.
700
if (tl + tr + bl + br != 6)
701
return false;
702
703
// Note the common case is a single TL-TR-BR-BL.
704
const auto &postl = data[tl].v.screenpos, &postr = data[tr].v.screenpos;
705
const auto &posbr = data[br].v.screenpos, &posbl = data[bl].v.screenpos;
706
if (postl.x == posbl.x && postr.x == posbr.x && postl.y == postr.y && posbl.y == posbr.y) {
707
// Do we need to think about rotation?
708
if (!state.enableTextures)
709
return true;
710
711
const auto &textl = data[tl].v.texturecoords, &textr = data[tr].v.texturecoords;
712
const auto &texbl = data[bl].v.texturecoords, &texbr = data[br].v.texturecoords;
713
714
if (textl.x == texbl.x && textr.x == texbr.x && textl.y == textr.y && texbl.y == texbr.y) {
715
// Okay, the texture is also good, but let's avoid rotation issues.
716
return textl.y < texbr.y && postl.y < posbr.y && textl.x < texbr.x && postl.x < posbr.x;
717
}
718
}
719
720
return false;
721
}
722
723
bool DetectRectangleFromPair(const RasterizerState &state, const ClipVertexData data[6], int *tlIndex, int *brIndex) {
724
if (!IsCoordRectangleCompatible(state, data[0]))
725
return false;
726
727
// Color and Z must be flat. Also find the TL and BR meanwhile.
728
int tl = 0, br = 0;
729
for (int i = 1; i < 6; ++i) {
730
if (!AreCoordsRectangleCompatible(state, data[0], data[i]))
731
return false;
732
733
if (data[i].v.screenpos.x <= data[tl].v.screenpos.x && data[i].v.screenpos.y <= data[tl].v.screenpos.y)
734
tl = i;
735
if (data[i].v.screenpos.x >= data[br].v.screenpos.x && data[i].v.screenpos.y >= data[br].v.screenpos.y)
736
br = i;
737
}
738
739
*tlIndex = tl;
740
*brIndex = br;
741
742
auto xat = [&](int i) { return data[i].v.screenpos.x; };
743
auto yat = [&](int i) { return data[i].v.screenpos.y; };
744
auto uat = [&](int i) { return data[i].v.texturecoords.x; };
745
auto vat = [&](int i) { return data[i].v.texturecoords.y; };
746
747
// A likely order would be: TL, TR, BR, TL, BR, BL. We'd have the last index of each.
748
// TODO: Make more generic.
749
if (tl == 3 && br == 4) {
750
bool x1_match = xat(0) == xat(3) && xat(0) == xat(5);
751
bool x2_match = xat(1) == xat(2) && xat(1) == xat(4);
752
bool y1_match = yat(0) == yat(1) && yat(0) == yat(3);
753
bool y2_match = yat(2) == yat(4) && yat(2) == yat(5);
754
if (x1_match && y1_match && x2_match && y2_match) {
755
// Do we need to think about rotation or UVs?
756
if (!state.enableTextures)
757
return true;
758
759
x1_match = uat(0) == uat(3) && uat(0) == uat(5);
760
x2_match = uat(1) == uat(2) && uat(1) == uat(4);
761
y1_match = vat(0) == vat(1) && vat(0) == vat(3);
762
y2_match = vat(2) == vat(4) && vat(2) == vat(5);
763
if (x1_match && y1_match && x2_match && y2_match) {
764
// Double check rotation direction.
765
return vat(tl) < vat(br) && yat(tl) < yat(br) && uat(tl) < uat(br) && xat(tl) < xat(br);
766
}
767
}
768
}
769
770
return false;
771
}
772
773
bool DetectRectangleThroughModeSlices(const RasterizerState &state, const ClipVertexData data[4]) {
774
// Color and Z must be flat.
775
for (int i = 1; i < 4; ++i) {
776
if (!(data[i].v.color0 == data[0].v.color0))
777
return false;
778
if (!(data[i].v.screenpos.z == data[0].v.screenpos.z)) {
779
// Sometimes, we don't actually care about z.
780
if (state.pixelID.depthWrite || state.pixelID.DepthTestFunc() != GE_COMP_ALWAYS)
781
return false;
782
}
783
}
784
785
// Games very commonly use vertical strips of rectangles. Detect and combine.
786
const auto &tl1 = data[0].v.screenpos, &br1 = data[1].v.screenpos;
787
const auto &tl2 = data[2].v.screenpos, &br2 = data[3].v.screenpos;
788
if (tl1.y == tl2.y && br1.y == br2.y && br1.y > tl1.y) {
789
if (br1.x == tl2.x && tl1.x < br1.x && tl2.x < br2.x) {
790
if (!state.enableTextures)
791
return true;
792
793
const auto &textl1 = data[0].v.texturecoords, &texbr1 = data[1].v.texturecoords;
794
const auto &textl2 = data[2].v.texturecoords, &texbr2 = data[3].v.texturecoords;
795
if (textl1.y != textl2.y || texbr1.y != texbr2.y || textl1.y > texbr1.y)
796
return false;
797
if (texbr1.x != textl2.x || textl1.x > texbr1.x || textl2.x > texbr2.x)
798
return false;
799
800
// We might be able to compare ratios, but let's expect 1:1.
801
int texdiff1 = (texbr1.x - textl1.x) * (float)SCREEN_SCALE_FACTOR;
802
int texdiff2 = (texbr2.x - textl2.x) * (float)SCREEN_SCALE_FACTOR;
803
int posdiff1 = br1.x - tl1.x;
804
int posdiff2 = br2.x - tl2.x;
805
return texdiff1 == posdiff1 && texdiff2 == posdiff2;
806
}
807
}
808
809
return false;
810
}
811
812
} // namespace Rasterizer
813
814
815