CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/DrawEngineCommon.h
Views: 1401
1
// Copyright (c) 2013- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#pragma once
19
20
#include <vector>
21
22
#include "Common/CommonTypes.h"
23
#include "Common/Data/Collections/Hashmaps.h"
24
25
#include "GPU/Math3D.h"
26
#include "GPU/GPUState.h"
27
#include "GPU/Common/GPUStateUtils.h"
28
#include "GPU/Common/GPUDebugInterface.h"
29
#include "GPU/Common/IndexGenerator.h"
30
#include "GPU/Common/VertexDecoderCommon.h"
31
32
class VertexDecoder;
33
34
enum {
35
VERTEX_BUFFER_MAX = 65536,
36
DECODED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 2 * 36, // 36 == sizeof(SimpleVertex)
37
DECODED_INDEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 6 * 6 * 2, // * 6 for spline tessellation, then * 6 again for converting into points/lines, and * 2 for 2 bytes per index
38
};
39
40
enum {
41
TEX_SLOT_PSP_TEXTURE = 0,
42
TEX_SLOT_SHADERBLEND_SRC = 1,
43
TEX_SLOT_ALPHATEST = 2,
44
TEX_SLOT_CLUT = 3,
45
TEX_SLOT_SPLINE_POINTS = 4,
46
TEX_SLOT_SPLINE_WEIGHTS_U = 5,
47
TEX_SLOT_SPLINE_WEIGHTS_V = 6,
48
};
49
50
enum FBOTexState {
51
FBO_TEX_NONE,
52
FBO_TEX_COPY_BIND_TEX,
53
FBO_TEX_READ_FRAMEBUFFER,
54
};
55
56
inline uint32_t GetVertTypeID(uint32_t vertType, int uvGenMode, bool skinInDecode) {
57
// As the decoder depends on the UVGenMode when we use UV prescale, we simply mash it
58
// into the top of the verttype where there are unused bits.
59
return (vertType & 0xFFFFFF) | (uvGenMode << 24) | (skinInDecode << 26);
60
}
61
62
struct SimpleVertex;
63
namespace Spline { struct Weight2D; }
64
65
class TessellationDataTransfer {
66
public:
67
virtual ~TessellationDataTransfer() {}
68
static void CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType);
69
virtual void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) = 0;
70
};
71
72
// Culling plane, group of 8.
73
struct alignas(16) Plane8 {
74
float x[8], y[8], z[8], w[8];
75
void Set(int i, float _x, float _y, float _z, float _w) { x[i] = _x; y[i] = _y; z[i] = _z; w[i] = _w; }
76
float Test(int i, const float f[3]) const { return x[i] * f[0] + y[i] * f[1] + z[i] * f[2] + w[i]; }
77
};
78
79
class DrawEngineCommon {
80
public:
81
DrawEngineCommon();
82
virtual ~DrawEngineCommon();
83
84
void Init();
85
virtual void DeviceLost() = 0;
86
virtual void DeviceRestore(Draw::DrawContext *draw) = 0;
87
88
bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices);
89
90
static u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoder *dec, int lowerBound, int upperBound, u32 vertType);
91
92
// Flush is normally non-virtual but here's a virtual way to call it, used by the shared spline code, which is expensive anyway.
93
// Not really sure if these wrappers are worth it...
94
virtual void DispatchFlush() = 0;
95
96
// This would seem to be unnecessary now, but is still required for splines/beziers to work in the software backend since SubmitPrim
97
// is different. Should probably refactor that.
98
// Note that vertTypeID should be computed using GetVertTypeID().
99
virtual void DispatchSubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, bool clockwise, int *bytesRead) {
100
SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, clockwise, bytesRead);
101
}
102
103
virtual void DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation);
104
105
bool TestBoundingBox(const void *control_points, const void *inds, int vertexCount, u32 vertType);
106
107
// This is a less accurate version of TestBoundingBox, but faster. Can have more false positives.
108
// Doesn't support indexing.
109
bool TestBoundingBoxFast(const void *control_points, int vertexCount, u32 vertType);
110
111
void FlushSkin() {
112
bool applySkin = (lastVType_ & GE_VTYPE_WEIGHT_MASK) && decOptions_.applySkinInDecode;
113
if (applySkin) {
114
DecodeVerts(decoded_);
115
}
116
}
117
118
int ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *stall, u32 vertTypeID, bool clockwise, int *bytesRead, bool isTriangle);
119
bool SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, bool clockwise, int *bytesRead);
120
void SkipPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead);
121
122
template<class Surface>
123
void SubmitCurve(const void *control_points, const void *indices, Surface &surface, u32 vertType, int *bytesRead, const char *scope);
124
static void ClearSplineBezierWeights();
125
126
bool CanUseHardwareTransform(int prim) const;
127
bool CanUseHardwareTessellation(GEPatchPrimType prim) const;
128
129
std::vector<std::string> DebugGetVertexLoaderIDs();
130
std::string DebugGetVertexLoaderString(std::string id, DebugShaderStringType stringType);
131
132
virtual void NotifyConfigChanged();
133
134
bool EverUsedExactEqualDepth() const {
135
return everUsedExactEqualDepth_;
136
}
137
void SetEverUsedExactEqualDepth(bool v) {
138
everUsedExactEqualDepth_ = v;
139
}
140
141
bool DescribeCodePtr(const u8 *ptr, std::string &name) const;
142
int GetNumDrawCalls() const {
143
return numDrawVerts_;
144
}
145
146
VertexDecoder *GetVertexDecoder(u32 vtype);
147
148
virtual void ClearTrackedVertexArrays() {}
149
150
protected:
151
virtual bool UpdateUseHWTessellation(bool enabled) const { return enabled; }
152
void UpdatePlanes();
153
154
void DecodeVerts(u8 *dest);
155
int DecodeInds();
156
157
// Preprocessing for spline/bezier
158
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize = nullptr);
159
160
int ComputeNumVertsToDecode() const;
161
162
void ApplyFramebufferRead(FBOTexState *fboTexState);
163
164
static inline int IndexSize(u32 vtype) {
165
const u32 indexType = (vtype & GE_VTYPE_IDX_MASK);
166
if (indexType == GE_VTYPE_IDX_16BIT) {
167
return 2;
168
} else if (indexType == GE_VTYPE_IDX_32BIT) {
169
return 4;
170
}
171
return 1;
172
}
173
174
inline void UpdateEverUsedEqualDepth(GEComparison comp) {
175
switch (comp) {
176
case GE_COMP_EQUAL:
177
everUsedExactEqualDepth_ = true;
178
everUsedEqualDepth_ = true;
179
break;
180
181
case GE_COMP_NOTEQUAL:
182
case GE_COMP_LEQUAL:
183
case GE_COMP_GEQUAL:
184
everUsedEqualDepth_ = true;
185
break;
186
187
default:
188
break;
189
}
190
}
191
192
inline void ResetAfterDrawInline() {
193
gpuStats.numFlushes++;
194
gpuStats.numDrawCalls += numDrawInds_;
195
gpuStats.numVertexDecodes += numDrawVerts_;
196
gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
197
gpuStats.numVertsDecoded += numDecodedVerts_;
198
199
indexGen.Reset();
200
numDecodedVerts_ = 0;
201
numDrawVerts_ = 0;
202
numDrawInds_ = 0;
203
vertexCountInDrawCalls_ = 0;
204
decodeIndsCounter_ = 0;
205
decodeVertsCounter_ = 0;
206
seenPrims_ = 0;
207
anyCCWOrIndexed_ = false;
208
gstate_c.vertexFullAlpha = true;
209
210
// Now seems as good a time as any to reset the min/max coords, which we may examine later.
211
gstate_c.vertBounds.minU = 512;
212
gstate_c.vertBounds.minV = 512;
213
gstate_c.vertBounds.maxU = 0;
214
gstate_c.vertBounds.maxV = 0;
215
}
216
217
inline bool CollectedPureDraw() const {
218
switch (seenPrims_) {
219
case 1 << GE_PRIM_TRIANGLE_STRIP:
220
return !anyCCWOrIndexed_ && numDrawInds_ == 1;
221
case 1 << GE_PRIM_LINES:
222
case 1 << GE_PRIM_POINTS:
223
case 1 << GE_PRIM_TRIANGLES:
224
return !anyCCWOrIndexed_;
225
default:
226
return false;
227
}
228
}
229
230
inline void DecodeIndsAndGetData(GEPrimitiveType *prim, int *numVerts, int *maxIndex, bool *useElements, bool forceIndexed) {
231
if (!forceIndexed && CollectedPureDraw()) {
232
*prim = drawInds_[0].prim;
233
*numVerts = numDecodedVerts_;
234
*maxIndex = numDecodedVerts_;
235
*useElements = false;
236
} else {
237
int vertexCount = DecodeInds();
238
*numVerts = vertexCount;
239
*maxIndex = numDecodedVerts_;
240
*prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim);
241
*useElements = true;
242
}
243
}
244
245
inline int RemainingIndices(const uint16_t *inds) const {
246
return DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t) - (inds - decIndex_);
247
}
248
249
bool useHWTransform_ = false;
250
bool useHWTessellation_ = false;
251
// Used to prevent unnecessary flushing in softgpu.
252
bool flushOnParams_ = true;
253
254
// Set once a equal depth test is encountered.
255
bool everUsedEqualDepth_ = false;
256
bool everUsedExactEqualDepth_ = false;
257
258
// Vertex collector buffers
259
u8 *decoded_ = nullptr;
260
u16 *decIndex_ = nullptr;
261
262
// Cached vertex decoders
263
DenseHashMap<u32, VertexDecoder *> decoderMap_;
264
VertexDecoderJitCache *decJitCache_ = nullptr;
265
VertexDecoderOptions decOptions_{};
266
267
TransformedVertex *transformed_ = nullptr;
268
TransformedVertex *transformedExpanded_ = nullptr;
269
270
// Defer all vertex decoding to a "Flush" (except when software skinning)
271
struct DeferredVerts {
272
const void *verts;
273
UVScale uvScale;
274
u32 vertexCount;
275
u16 indexLowerBound;
276
u16 indexUpperBound;
277
};
278
279
struct DeferredInds {
280
const void *inds;
281
u32 vertexCount;
282
u8 vertDecodeIndex; // index into the drawVerts_ array to look up the vertexOffset.
283
u8 indexType;
284
GEPrimitiveType prim;
285
bool clockwise;
286
u16 offset;
287
};
288
289
enum { MAX_DEFERRED_DRAW_VERTS = 128 }; // If you change this to more than 256, change type of DeferredInds::vertDecodeIndex.
290
enum { MAX_DEFERRED_DRAW_INDS = 512 }; // Monster Hunter spams indexed calls that we end up merging.
291
DeferredVerts drawVerts_[MAX_DEFERRED_DRAW_VERTS];
292
uint32_t drawVertexOffsets_[MAX_DEFERRED_DRAW_VERTS];
293
DeferredInds drawInds_[MAX_DEFERRED_DRAW_INDS];
294
295
VertexDecoder *dec_ = nullptr;
296
u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_...
297
int numDrawVerts_ = 0;
298
int numDrawInds_ = 0;
299
int vertexCountInDrawCalls_ = 0;
300
301
int decodeVertsCounter_ = 0;
302
int decodeIndsCounter_ = 0;
303
304
int seenPrims_ = 0;
305
bool anyCCWOrIndexed_ = 0;
306
bool anyIndexed_ = 0;
307
308
// Vertex collector state
309
IndexGenerator indexGen;
310
int numDecodedVerts_ = 0;
311
GEPrimitiveType prevPrim_ = GE_PRIM_INVALID;
312
313
// Shader blending state
314
bool fboTexBound_ = false;
315
316
// Sometimes, unusual situations mean we need to reset dirty flags after state calc finishes.
317
uint64_t dirtyRequiresRecheck_ = 0;
318
319
ComputedPipelineState pipelineState_;
320
321
// Hardware tessellation
322
TessellationDataTransfer *tessDataTransfer;
323
324
// Culling
325
Plane8 planes_;
326
Vec2f minOffset_;
327
Vec2f maxOffset_;
328
bool offsetOutsideEdge_;
329
};
330
331