CoCalc -- DrawEngineCommon.h

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/DrawEngineCommon.h
Views: ¹⁴⁰¹
1
// Copyright (c) 2013- PPSSPP Project.
2

3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6

7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
// GNU General Public License 2.0 for more details.
11

12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14

15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17

18
#pragma once
19

20
#include <vector>
21

22
#include "Common/CommonTypes.h"
23
#include "Common/Data/Collections/Hashmaps.h"
24

25
#include "GPU/Math3D.h"
26
#include "GPU/GPUState.h"
27
#include "GPU/Common/GPUStateUtils.h"
28
#include "GPU/Common/GPUDebugInterface.h"
29
#include "GPU/Common/IndexGenerator.h"
30
#include "GPU/Common/VertexDecoderCommon.h"
31

32
class VertexDecoder;
33

34
enum {
35
	VERTEX_BUFFER_MAX = 65536,
36
	DECODED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 2 * 36,  // 36 == sizeof(SimpleVertex)
37
	DECODED_INDEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 6 * 6 * 2,   // * 6 for spline tessellation, then * 6 again for converting into points/lines, and * 2 for 2 bytes per index
38
};
39

40
enum {
41
	TEX_SLOT_PSP_TEXTURE = 0,
42
	TEX_SLOT_SHADERBLEND_SRC = 1,
43
	TEX_SLOT_ALPHATEST = 2,
44
	TEX_SLOT_CLUT = 3,
45
	TEX_SLOT_SPLINE_POINTS = 4,
46
	TEX_SLOT_SPLINE_WEIGHTS_U = 5,
47
	TEX_SLOT_SPLINE_WEIGHTS_V = 6,
48
};
49

50
enum FBOTexState {
51
	FBO_TEX_NONE,
52
	FBO_TEX_COPY_BIND_TEX,
53
	FBO_TEX_READ_FRAMEBUFFER,
54
};
55

56
inline uint32_t GetVertTypeID(uint32_t vertType, int uvGenMode, bool skinInDecode) {
57
	// As the decoder depends on the UVGenMode when we use UV prescale, we simply mash it
58
	// into the top of the verttype where there are unused bits.
59
	return (vertType & 0xFFFFFF) | (uvGenMode << 24) | (skinInDecode << 26);
60
}
61

62
struct SimpleVertex;
63
namespace Spline { struct Weight2D; }
64

65
class TessellationDataTransfer {
66
public:
67
	virtual ~TessellationDataTransfer() {}
68
	static void CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType);
69
	virtual void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) = 0;
70
};
71

72
// Culling plane, group of 8.
73
struct alignas(16) Plane8 {
74
	float x[8], y[8], z[8], w[8];
75
	void Set(int i, float _x, float _y, float _z, float _w) { x[i] = _x; y[i] = _y; z[i] = _z; w[i] = _w; }
76
	float Test(int i, const float f[3]) const { return x[i] * f[0] + y[i] * f[1] + z[i] * f[2] + w[i]; }
77
};
78

79
class DrawEngineCommon {
80
public:
81
	DrawEngineCommon();
82
	virtual ~DrawEngineCommon();
83

84
	void Init();
85
	virtual void DeviceLost() = 0;
86
	virtual void DeviceRestore(Draw::DrawContext *draw) = 0;
87

88
	bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices);
89

90
	static u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoder *dec, int lowerBound, int upperBound, u32 vertType);
91

92
	// Flush is normally non-virtual but here's a virtual way to call it, used by the shared spline code, which is expensive anyway.
93
	// Not really sure if these wrappers are worth it...
94
	virtual void DispatchFlush() = 0;
95

96
	// This would seem to be unnecessary now, but is still required for splines/beziers to work in the software backend since SubmitPrim
97
	// is different. Should probably refactor that.
98
	// Note that vertTypeID should be computed using GetVertTypeID().
99
	virtual void DispatchSubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, bool clockwise, int *bytesRead) {
100
		SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, clockwise, bytesRead);
101
	}
102

103
	virtual void DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation);
104

105
	bool TestBoundingBox(const void *control_points, const void *inds, int vertexCount, u32 vertType);
106

107
	// This is a less accurate version of TestBoundingBox, but faster. Can have more false positives.
108
	// Doesn't support indexing.
109
	bool TestBoundingBoxFast(const void *control_points, int vertexCount, u32 vertType);
110

111
	void FlushSkin() {
112
		bool applySkin = (lastVType_ & GE_VTYPE_WEIGHT_MASK) && decOptions_.applySkinInDecode;
113
		if (applySkin) {
114
			DecodeVerts(decoded_);
115
		}
116
	}
117

118
	int ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *stall, u32 vertTypeID, bool clockwise, int *bytesRead, bool isTriangle);
119
	bool SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, bool clockwise, int *bytesRead);
120
	void SkipPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead);
121

122
	template<class Surface>
123
	void SubmitCurve(const void *control_points, const void *indices, Surface &surface, u32 vertType, int *bytesRead, const char *scope);
124
	static void ClearSplineBezierWeights();
125

126
	bool CanUseHardwareTransform(int prim) const;
127
	bool CanUseHardwareTessellation(GEPatchPrimType prim) const;
128

129
	std::vector<std::string> DebugGetVertexLoaderIDs();
130
	std::string DebugGetVertexLoaderString(std::string id, DebugShaderStringType stringType);
131

132
	virtual void NotifyConfigChanged();
133

134
	bool EverUsedExactEqualDepth() const {
135
		return everUsedExactEqualDepth_;
136
	}
137
	void SetEverUsedExactEqualDepth(bool v) {
138
		everUsedExactEqualDepth_ = v;
139
	}
140

141
	bool DescribeCodePtr(const u8 *ptr, std::string &name) const;
142
	int GetNumDrawCalls() const {
143
		return numDrawVerts_;
144
	}
145

146
	VertexDecoder *GetVertexDecoder(u32 vtype);
147

148
	virtual void ClearTrackedVertexArrays() {}
149

150
protected:
151
	virtual bool UpdateUseHWTessellation(bool enabled) const { return enabled; }
152
	void UpdatePlanes();
153

154
	void DecodeVerts(u8 *dest);
155
	int DecodeInds();
156

157
	// Preprocessing for spline/bezier
158
	u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize = nullptr);
159

160
	int ComputeNumVertsToDecode() const;
161

162
	void ApplyFramebufferRead(FBOTexState *fboTexState);
163

164
	static inline int IndexSize(u32 vtype) {
165
		const u32 indexType = (vtype & GE_VTYPE_IDX_MASK);
166
		if (indexType == GE_VTYPE_IDX_16BIT) {
167
			return 2;
168
		} else if (indexType == GE_VTYPE_IDX_32BIT) {
169
			return 4;
170
		}
171
		return 1;
172
	}
173

174
	inline void UpdateEverUsedEqualDepth(GEComparison comp) {
175
		switch (comp) {
176
		case GE_COMP_EQUAL:
177
			everUsedExactEqualDepth_ = true;
178
			everUsedEqualDepth_ = true;
179
			break;
180

181
		case GE_COMP_NOTEQUAL:
182
		case GE_COMP_LEQUAL:
183
		case GE_COMP_GEQUAL:
184
			everUsedEqualDepth_ = true;
185
			break;
186

187
		default:
188
			break;
189
		}
190
	}
191

192
	inline void ResetAfterDrawInline() {
193
		gpuStats.numFlushes++;
194
		gpuStats.numDrawCalls += numDrawInds_;
195
		gpuStats.numVertexDecodes += numDrawVerts_;
196
		gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
197
		gpuStats.numVertsDecoded += numDecodedVerts_;
198

199
		indexGen.Reset();
200
		numDecodedVerts_ = 0;
201
		numDrawVerts_ = 0;
202
		numDrawInds_ = 0;
203
		vertexCountInDrawCalls_ = 0;
204
		decodeIndsCounter_ = 0;
205
		decodeVertsCounter_ = 0;
206
		seenPrims_ = 0;
207
		anyCCWOrIndexed_ = false;
208
		gstate_c.vertexFullAlpha = true;
209

210
		// Now seems as good a time as any to reset the min/max coords, which we may examine later.
211
		gstate_c.vertBounds.minU = 512;
212
		gstate_c.vertBounds.minV = 512;
213
		gstate_c.vertBounds.maxU = 0;
214
		gstate_c.vertBounds.maxV = 0;
215
	}
216

217
	inline bool CollectedPureDraw() const {
218
		switch (seenPrims_) {
219
		case 1 << GE_PRIM_TRIANGLE_STRIP:
220
			return !anyCCWOrIndexed_ && numDrawInds_ == 1;
221
		case 1 << GE_PRIM_LINES:
222
		case 1 << GE_PRIM_POINTS:
223
		case 1 << GE_PRIM_TRIANGLES:
224
			return !anyCCWOrIndexed_;
225
		default:
226
			return false;
227
		}
228
	}
229

230
	inline void DecodeIndsAndGetData(GEPrimitiveType *prim, int *numVerts, int *maxIndex, bool *useElements, bool forceIndexed) {
231
		if (!forceIndexed && CollectedPureDraw()) {
232
			*prim = drawInds_[0].prim;
233
			*numVerts = numDecodedVerts_;
234
			*maxIndex = numDecodedVerts_;
235
			*useElements = false;
236
		} else {
237
			int vertexCount = DecodeInds();
238
			*numVerts = vertexCount;
239
			*maxIndex = numDecodedVerts_;
240
			*prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim);
241
			*useElements = true;
242
		}
243
	}
244

245
	inline int RemainingIndices(const uint16_t *inds) const {
246
		return DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t) - (inds - decIndex_);
247
	}
248

249
	bool useHWTransform_ = false;
250
	bool useHWTessellation_ = false;
251
	// Used to prevent unnecessary flushing in softgpu.
252
	bool flushOnParams_ = true;
253

254
	// Set once a equal depth test is encountered.
255
	bool everUsedEqualDepth_ = false;
256
	bool everUsedExactEqualDepth_ = false;
257

258
	// Vertex collector buffers
259
	u8 *decoded_ = nullptr;
260
	u16 *decIndex_ = nullptr;
261

262
	// Cached vertex decoders
263
	DenseHashMap<u32, VertexDecoder *> decoderMap_;
264
	VertexDecoderJitCache *decJitCache_ = nullptr;
265
	VertexDecoderOptions decOptions_{};
266

267
	TransformedVertex *transformed_ = nullptr;
268
	TransformedVertex *transformedExpanded_ = nullptr;
269

270
	// Defer all vertex decoding to a "Flush" (except when software skinning)
271
	struct DeferredVerts {
272
		const void *verts;
273
		UVScale uvScale;
274
		u32 vertexCount;
275
		u16 indexLowerBound;
276
		u16 indexUpperBound;
277
	};
278

279
	struct DeferredInds {
280
		const void *inds;
281
		u32 vertexCount;
282
		u8 vertDecodeIndex;  // index into the drawVerts_ array to look up the vertexOffset.
283
		u8 indexType;
284
		GEPrimitiveType prim;
285
		bool clockwise;
286
		u16 offset;
287
	};
288

289
	enum { MAX_DEFERRED_DRAW_VERTS = 128 };  // If you change this to more than 256, change type of DeferredInds::vertDecodeIndex.
290
	enum { MAX_DEFERRED_DRAW_INDS = 512 };  // Monster Hunter spams indexed calls that we end up merging.
291
	DeferredVerts drawVerts_[MAX_DEFERRED_DRAW_VERTS];
292
	uint32_t drawVertexOffsets_[MAX_DEFERRED_DRAW_VERTS];
293
	DeferredInds drawInds_[MAX_DEFERRED_DRAW_INDS];
294

295
	VertexDecoder *dec_ = nullptr;
296
	u32 lastVType_ = -1;  // corresponds to dec_.  Could really just pick it out of dec_...
297
	int numDrawVerts_ = 0;
298
	int numDrawInds_ = 0;
299
	int vertexCountInDrawCalls_ = 0;
300

301
	int decodeVertsCounter_ = 0;
302
	int decodeIndsCounter_ = 0;
303

304
	int seenPrims_ = 0;
305
	bool anyCCWOrIndexed_ = 0;
306
	bool anyIndexed_ = 0;
307

308
	// Vertex collector state
309
	IndexGenerator indexGen;
310
	int numDecodedVerts_ = 0;
311
	GEPrimitiveType prevPrim_ = GE_PRIM_INVALID;
312

313
	// Shader blending state
314
	bool fboTexBound_ = false;
315

316
	// Sometimes, unusual situations mean we need to reset dirty flags after state calc finishes.
317
	uint64_t dirtyRequiresRecheck_ = 0;
318

319
	ComputedPipelineState pipelineState_;
320

321
	// Hardware tessellation
322
	TessellationDataTransfer *tessDataTransfer;
323

324
	// Culling
325
	Plane8 planes_;
326
	Vec2f minOffset_;
327
	Vec2f maxOffset_;
328
	bool offsetOutsideEdge_;
329
};
330

331
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

Product

Resources

Company