CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Common/DrawEngineCommon.h
Views: 1401
// Copyright (c) 2013- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#pragma once1819#include <vector>2021#include "Common/CommonTypes.h"22#include "Common/Data/Collections/Hashmaps.h"2324#include "GPU/Math3D.h"25#include "GPU/GPUState.h"26#include "GPU/Common/GPUStateUtils.h"27#include "GPU/Common/GPUDebugInterface.h"28#include "GPU/Common/IndexGenerator.h"29#include "GPU/Common/VertexDecoderCommon.h"3031class VertexDecoder;3233enum {34VERTEX_BUFFER_MAX = 65536,35DECODED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 2 * 36, // 36 == sizeof(SimpleVertex)36DECODED_INDEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 6 * 6 * 2, // * 6 for spline tessellation, then * 6 again for converting into points/lines, and * 2 for 2 bytes per index37};3839enum {40TEX_SLOT_PSP_TEXTURE = 0,41TEX_SLOT_SHADERBLEND_SRC = 1,42TEX_SLOT_ALPHATEST = 2,43TEX_SLOT_CLUT = 3,44TEX_SLOT_SPLINE_POINTS = 4,45TEX_SLOT_SPLINE_WEIGHTS_U = 5,46TEX_SLOT_SPLINE_WEIGHTS_V = 6,47};4849enum FBOTexState {50FBO_TEX_NONE,51FBO_TEX_COPY_BIND_TEX,52FBO_TEX_READ_FRAMEBUFFER,53};5455inline uint32_t GetVertTypeID(uint32_t vertType, int uvGenMode, bool skinInDecode) {56// As the decoder depends on the UVGenMode when we use UV prescale, we simply mash it57// into the top of the verttype where there are unused bits.58return (vertType & 0xFFFFFF) | (uvGenMode << 24) | (skinInDecode << 26);59}6061struct SimpleVertex;62namespace Spline { struct Weight2D; }6364class TessellationDataTransfer {65public:66virtual ~TessellationDataTransfer() {}67static void CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType);68virtual void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) = 0;69};7071// Culling plane, group of 8.72struct alignas(16) Plane8 {73float x[8], y[8], z[8], w[8];74void Set(int i, float _x, float _y, float _z, float _w) { x[i] = _x; y[i] = _y; z[i] = _z; w[i] = _w; }75float Test(int i, const float f[3]) const { return x[i] * f[0] + y[i] * f[1] + z[i] * f[2] + w[i]; }76};7778class DrawEngineCommon {79public:80DrawEngineCommon();81virtual ~DrawEngineCommon();8283void Init();84virtual void DeviceLost() = 0;85virtual void DeviceRestore(Draw::DrawContext *draw) = 0;8687bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices);8889static u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoder *dec, int lowerBound, int upperBound, u32 vertType);9091// Flush is normally non-virtual but here's a virtual way to call it, used by the shared spline code, which is expensive anyway.92// Not really sure if these wrappers are worth it...93virtual void DispatchFlush() = 0;9495// This would seem to be unnecessary now, but is still required for splines/beziers to work in the software backend since SubmitPrim96// is different. Should probably refactor that.97// Note that vertTypeID should be computed using GetVertTypeID().98virtual void DispatchSubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, bool clockwise, int *bytesRead) {99SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, clockwise, bytesRead);100}101102virtual void DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation);103104bool TestBoundingBox(const void *control_points, const void *inds, int vertexCount, u32 vertType);105106// This is a less accurate version of TestBoundingBox, but faster. Can have more false positives.107// Doesn't support indexing.108bool TestBoundingBoxFast(const void *control_points, int vertexCount, u32 vertType);109110void FlushSkin() {111bool applySkin = (lastVType_ & GE_VTYPE_WEIGHT_MASK) && decOptions_.applySkinInDecode;112if (applySkin) {113DecodeVerts(decoded_);114}115}116117int ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t *stall, u32 vertTypeID, bool clockwise, int *bytesRead, bool isTriangle);118bool SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, bool clockwise, int *bytesRead);119void SkipPrim(GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead);120121template<class Surface>122void SubmitCurve(const void *control_points, const void *indices, Surface &surface, u32 vertType, int *bytesRead, const char *scope);123static void ClearSplineBezierWeights();124125bool CanUseHardwareTransform(int prim) const;126bool CanUseHardwareTessellation(GEPatchPrimType prim) const;127128std::vector<std::string> DebugGetVertexLoaderIDs();129std::string DebugGetVertexLoaderString(std::string id, DebugShaderStringType stringType);130131virtual void NotifyConfigChanged();132133bool EverUsedExactEqualDepth() const {134return everUsedExactEqualDepth_;135}136void SetEverUsedExactEqualDepth(bool v) {137everUsedExactEqualDepth_ = v;138}139140bool DescribeCodePtr(const u8 *ptr, std::string &name) const;141int GetNumDrawCalls() const {142return numDrawVerts_;143}144145VertexDecoder *GetVertexDecoder(u32 vtype);146147virtual void ClearTrackedVertexArrays() {}148149protected:150virtual bool UpdateUseHWTessellation(bool enabled) const { return enabled; }151void UpdatePlanes();152153void DecodeVerts(u8 *dest);154int DecodeInds();155156// Preprocessing for spline/bezier157u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize = nullptr);158159int ComputeNumVertsToDecode() const;160161void ApplyFramebufferRead(FBOTexState *fboTexState);162163static inline int IndexSize(u32 vtype) {164const u32 indexType = (vtype & GE_VTYPE_IDX_MASK);165if (indexType == GE_VTYPE_IDX_16BIT) {166return 2;167} else if (indexType == GE_VTYPE_IDX_32BIT) {168return 4;169}170return 1;171}172173inline void UpdateEverUsedEqualDepth(GEComparison comp) {174switch (comp) {175case GE_COMP_EQUAL:176everUsedExactEqualDepth_ = true;177everUsedEqualDepth_ = true;178break;179180case GE_COMP_NOTEQUAL:181case GE_COMP_LEQUAL:182case GE_COMP_GEQUAL:183everUsedEqualDepth_ = true;184break;185186default:187break;188}189}190191inline void ResetAfterDrawInline() {192gpuStats.numFlushes++;193gpuStats.numDrawCalls += numDrawInds_;194gpuStats.numVertexDecodes += numDrawVerts_;195gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;196gpuStats.numVertsDecoded += numDecodedVerts_;197198indexGen.Reset();199numDecodedVerts_ = 0;200numDrawVerts_ = 0;201numDrawInds_ = 0;202vertexCountInDrawCalls_ = 0;203decodeIndsCounter_ = 0;204decodeVertsCounter_ = 0;205seenPrims_ = 0;206anyCCWOrIndexed_ = false;207gstate_c.vertexFullAlpha = true;208209// Now seems as good a time as any to reset the min/max coords, which we may examine later.210gstate_c.vertBounds.minU = 512;211gstate_c.vertBounds.minV = 512;212gstate_c.vertBounds.maxU = 0;213gstate_c.vertBounds.maxV = 0;214}215216inline bool CollectedPureDraw() const {217switch (seenPrims_) {218case 1 << GE_PRIM_TRIANGLE_STRIP:219return !anyCCWOrIndexed_ && numDrawInds_ == 1;220case 1 << GE_PRIM_LINES:221case 1 << GE_PRIM_POINTS:222case 1 << GE_PRIM_TRIANGLES:223return !anyCCWOrIndexed_;224default:225return false;226}227}228229inline void DecodeIndsAndGetData(GEPrimitiveType *prim, int *numVerts, int *maxIndex, bool *useElements, bool forceIndexed) {230if (!forceIndexed && CollectedPureDraw()) {231*prim = drawInds_[0].prim;232*numVerts = numDecodedVerts_;233*maxIndex = numDecodedVerts_;234*useElements = false;235} else {236int vertexCount = DecodeInds();237*numVerts = vertexCount;238*maxIndex = numDecodedVerts_;239*prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim);240*useElements = true;241}242}243244inline int RemainingIndices(const uint16_t *inds) const {245return DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t) - (inds - decIndex_);246}247248bool useHWTransform_ = false;249bool useHWTessellation_ = false;250// Used to prevent unnecessary flushing in softgpu.251bool flushOnParams_ = true;252253// Set once a equal depth test is encountered.254bool everUsedEqualDepth_ = false;255bool everUsedExactEqualDepth_ = false;256257// Vertex collector buffers258u8 *decoded_ = nullptr;259u16 *decIndex_ = nullptr;260261// Cached vertex decoders262DenseHashMap<u32, VertexDecoder *> decoderMap_;263VertexDecoderJitCache *decJitCache_ = nullptr;264VertexDecoderOptions decOptions_{};265266TransformedVertex *transformed_ = nullptr;267TransformedVertex *transformedExpanded_ = nullptr;268269// Defer all vertex decoding to a "Flush" (except when software skinning)270struct DeferredVerts {271const void *verts;272UVScale uvScale;273u32 vertexCount;274u16 indexLowerBound;275u16 indexUpperBound;276};277278struct DeferredInds {279const void *inds;280u32 vertexCount;281u8 vertDecodeIndex; // index into the drawVerts_ array to look up the vertexOffset.282u8 indexType;283GEPrimitiveType prim;284bool clockwise;285u16 offset;286};287288enum { MAX_DEFERRED_DRAW_VERTS = 128 }; // If you change this to more than 256, change type of DeferredInds::vertDecodeIndex.289enum { MAX_DEFERRED_DRAW_INDS = 512 }; // Monster Hunter spams indexed calls that we end up merging.290DeferredVerts drawVerts_[MAX_DEFERRED_DRAW_VERTS];291uint32_t drawVertexOffsets_[MAX_DEFERRED_DRAW_VERTS];292DeferredInds drawInds_[MAX_DEFERRED_DRAW_INDS];293294VertexDecoder *dec_ = nullptr;295u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_...296int numDrawVerts_ = 0;297int numDrawInds_ = 0;298int vertexCountInDrawCalls_ = 0;299300int decodeVertsCounter_ = 0;301int decodeIndsCounter_ = 0;302303int seenPrims_ = 0;304bool anyCCWOrIndexed_ = 0;305bool anyIndexed_ = 0;306307// Vertex collector state308IndexGenerator indexGen;309int numDecodedVerts_ = 0;310GEPrimitiveType prevPrim_ = GE_PRIM_INVALID;311312// Shader blending state313bool fboTexBound_ = false;314315// Sometimes, unusual situations mean we need to reset dirty flags after state calc finishes.316uint64_t dirtyRequiresRecheck_ = 0;317318ComputedPipelineState pipelineState_;319320// Hardware tessellation321TessellationDataTransfer *tessDataTransfer;322323// Culling324Plane8 planes_;325Vec2f minOffset_;326Vec2f maxOffset_;327bool offsetOutsideEdge_;328};329330331