CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Software/BinManager.cpp
Views: 1401
// Copyright (c) 2022- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include <atomic>18#include <condition_variable>19#include <mutex>20#include "Common/Profiler/Profiler.h"21#include "Common/Thread/ThreadManager.h"22#include "Common/TimeUtil.h"23#include "Core/System.h"24#include "GPU/Common/TextureDecoder.h"25#include "GPU/Software/BinManager.h"26#include "GPU/Software/Rasterizer.h"27#include "GPU/Software/RasterizerRectangle.h"2829// Sometimes useful for debugging.30static constexpr bool FORCE_SINGLE_THREAD = false;3132using namespace Rasterizer;3334struct BinWaitable : public Waitable {35public:36BinWaitable() {37count_ = 0;38}3940void Fill() {41count_++;42}4344bool Empty() {45return count_ == 0;46}4748void Drain() {49int result = --count_;50if (result == 0) {51// We were the last one to increment.52std::unique_lock<std::mutex> lock(mutex_);53cond_.notify_all();54}55}5657void Wait() override {58std::unique_lock<std::mutex> lock(mutex_);59while (count_ != 0) {60cond_.wait(lock);61}62}6364std::atomic<int> count_;65std::mutex mutex_;66std::condition_variable cond_;67};6869static inline void DrawBinItem(const BinItem &item, const RasterizerState &state) {70switch (item.type) {71case BinItemType::TRIANGLE:72DrawTriangle(item.v0, item.v1, item.v2, item.range, state);73break;7475case BinItemType::CLEAR_RECT:76ClearRectangle(item.v0, item.v1, item.range, state);77break;7879case BinItemType::RECT:80DrawRectangle(item.v0, item.v1, item.range, state);81break;8283case BinItemType::SPRITE:84DrawSprite(item.v0, item.v1, item.range, state);85break;8687case BinItemType::LINE:88DrawLine(item.v0, item.v1, item.range, state);89break;9091case BinItemType::POINT:92DrawPoint(item.v0, item.range, state);93break;94}95}9697class DrawBinItemsTask : public Task {98public:99DrawBinItemsTask(BinWaitable *notify, BinManager::BinItemQueue &items, std::atomic<bool> &status, const BinManager::BinStateQueue &states)100: notify_(notify), items_(items), status_(status), states_(states) {101}102103TaskType Type() const override {104return TaskType::CPU_COMPUTE;105}106107TaskPriority Priority() const override {108// Let priority emulation tasks win over this.109return TaskPriority::NORMAL;110}111112void Run() override {113ProcessItems();114status_ = false;115// In case of any atomic issues, do another pass.116ProcessItems();117notify_->Drain();118}119120void Release() override {121// Don't delete, this is statically allocated.122}123124private:125void ProcessItems() {126while (!items_.Empty()) {127const BinItem &item = items_.PeekNext();128DrawBinItem(item, states_[item.stateIndex]);129items_.SkipNext();130}131}132133BinWaitable *notify_;134BinManager::BinItemQueue &items_;135std::atomic<bool> &status_;136const BinManager::BinStateQueue &states_;137};138139constexpr int BinManager::MAX_POSSIBLE_TASKS;140141BinManager::BinManager() {142queueRange_.x1 = 0x7FFFFFFF;143queueRange_.y1 = 0x7FFFFFFF;144queueRange_.x2 = 0;145queueRange_.y2 = 0;146147waitable_ = new BinWaitable();148for (auto &s : taskStatus_)149s = false;150151int maxInitTasks = std::min(g_threadManager.GetNumLooperThreads(), MAX_POSSIBLE_TASKS);152for (int i = 0; i < maxInitTasks; ++i) {153taskQueues_[i].Setup();154for (DrawBinItemsTask *&task : taskLists_[i].tasks)155task = new DrawBinItemsTask(waitable_, taskQueues_[i], taskStatus_[i], states_);156}157states_.Setup();158cluts_.Setup();159queue_.Setup();160}161162BinManager::~BinManager() {163delete waitable_;164165for (int i = 0; i < MAX_POSSIBLE_TASKS; ++i) {166for (DrawBinItemsTask *task : taskLists_[i].tasks)167delete task;168}169}170171void BinManager::UpdateState() {172PROFILE_THIS_SCOPE("bin_state");173if (HasDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL)) {174if (states_.Full())175Flush("states");176creatingState_ = true;177stateIndex_ = (uint16_t)states_.Push(RasterizerState());178// When new funcs are compiled, we need to flush if WX exclusive.179ComputeRasterizerState(&states_[stateIndex_], this);180states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable;181creatingState_ = false;182183ClearDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL);184}185186if (lastFlipstats_ != gpuStats.numFlips) {187lastFlipstats_ = gpuStats.numFlips;188ResetStats();189}190191const auto &state = State();192const bool hadDepth = pendingWrites_[1].base != 0;193194if (HasDirty(SoftDirty::BINNER_RANGE)) {195DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1());196DrawingCoords scissorBR(std::min(gstate.getScissorX2(), gstate.getRegionX2()), std::min(gstate.getScissorY2(), gstate.getRegionY2()));197ScreenCoords screenScissorTL = TransformUnit::DrawingToScreen(scissorTL, 0);198ScreenCoords screenScissorBR = TransformUnit::DrawingToScreen(scissorBR, 0);199200scissor_.x1 = screenScissorTL.x;201scissor_.y1 = screenScissorTL.y;202scissor_.x2 = screenScissorBR.x + SCREEN_SCALE_FACTOR - 1;203scissor_.y2 = screenScissorBR.y + SCREEN_SCALE_FACTOR - 1;204205// If we're about to texture from something still pending (i.e. depth), flush.206if (HasTextureWrite(state))207Flush("tex");208209// Okay, now update what's pending.210MarkPendingWrites(state);211212ClearDirty(SoftDirty::BINNER_RANGE);213} else if (pendingOverlap_) {214if (HasTextureWrite(state)) {215Flush("tex");216217// We need the pending writes set, which flushing cleared. Set them again.218MarkPendingWrites(state);219}220}221222if (HasDirty(SoftDirty::BINNER_OVERLAP)) {223// This is a good place to record any dependencies for block transfer overlap.224MarkPendingReads(state);225226// Disallow threads when rendering to the target, even offset.227bool selfRender = HasTextureWrite(state);228int newMaxTasks = selfRender || FORCE_SINGLE_THREAD ? 1 : g_threadManager.GetNumLooperThreads();229if (newMaxTasks > MAX_POSSIBLE_TASKS)230newMaxTasks = MAX_POSSIBLE_TASKS;231// We don't want to overlap wrong, so flush any pending.232if (maxTasks_ != newMaxTasks) {233maxTasks_ = newMaxTasks;234Flush("selfrender");235}236pendingOverlap_ = pendingOverlap_ || selfRender;237238// Lastly, we have to check if we're newly writing depth we were texturing before.239// This happens in Call of Duty (depth clear after depth texture), for example.240if (!hadDepth && state.pixelID.depthWrite) {241for (size_t i = 0; i < states_.Size(); ++i) {242if (HasTextureWrite(states_.Peek(i))) {243Flush("selfdepth");244}245}246}247ClearDirty(SoftDirty::BINNER_OVERLAP);248}249}250251bool BinManager::HasTextureWrite(const RasterizerState &state) {252if (!state.enableTextures)253return false;254255const uint8_t textureBits = textureBitsPerPixel[state.samplerID.texfmt];256for (int i = 0; i <= state.maxTexLevel; ++i) {257int byteStride = (state.texbufw[i] * textureBits) / 8;258int byteWidth = (state.samplerID.cached.sizes[i].w * textureBits) / 8;259int h = state.samplerID.cached.sizes[i].h;260if (HasPendingWrite(state.texaddr[i], byteStride, byteWidth, h))261return true;262}263264return false;265}266267bool BinManager::IsExactSelfRender(const Rasterizer::RasterizerState &state, const BinItem &item) {268if (item.type != BinItemType::SPRITE && item.type != BinItemType::RECT)269return false;270if (state.textureProj || state.maxTexLevel > 0)271return false;272273// Only possible if the texture is 1:1.274if ((state.texaddr[0] & 0x0F1FFFFF) != (gstate.getFrameBufAddress() & 0x0F1FFFFF))275return false;276int bufferPixelWidth = BufferFormatBytesPerPixel(state.pixelID.FBFormat());277int texturePixelWidth = textureBitsPerPixel[state.samplerID.texfmt] / 8;278if (bufferPixelWidth != texturePixelWidth)279return false;280281Vec4f tc = Vec4f(item.v0.texturecoords.x, item.v0.texturecoords.y, item.v1.texturecoords.x, item.v1.texturecoords.y);282if (state.throughMode) {283// Already at texels, convert to screen.284tc = tc * SCREEN_SCALE_FACTOR;285} else {286// Need to also multiply by width/height in transform mode.287int w = state.samplerID.cached.sizes[0].w * SCREEN_SCALE_FACTOR;288int h = state.samplerID.cached.sizes[0].h * SCREEN_SCALE_FACTOR;289tc = tc * Vec4f(w, h, w, h);290}291292Vec4<int> tci = tc.Cast<int>();293if (tci.x != item.v0.screenpos.x || tci.y != item.v0.screenpos.y)294return false;295if (tci.z != item.v1.screenpos.x || tci.w != item.v1.screenpos.y)296return false;297298return true;299}300301void BinManager::MarkPendingReads(const Rasterizer::RasterizerState &state) {302if (!state.enableTextures)303return;304305const uint8_t textureBits = textureBitsPerPixel[state.samplerID.texfmt];306for (int i = 0; i <= state.maxTexLevel; ++i) {307uint32_t byteStride = (state.texbufw[i] * textureBits) / 8;308uint32_t byteWidth = (state.samplerID.cached.sizes[i].w * textureBits) / 8;309uint32_t h = state.samplerID.cached.sizes[i].h;310auto it = pendingReads_.find(state.texaddr[i]);311if (it != pendingReads_.end()) {312uint32_t total = byteStride * (h - 1) + byteWidth;313uint32_t existing = it->second.strideBytes * (it->second.height - 1) + it->second.widthBytes;314if (existing < total) {315it->second.strideBytes = std::max(it->second.strideBytes, byteStride);316it->second.widthBytes = std::max(it->second.widthBytes, byteWidth);317it->second.height = std::max(it->second.height, h);318}319} else {320auto &range = pendingReads_[state.texaddr[i]];321range.base = state.texaddr[i];322range.strideBytes = byteStride;323range.widthBytes = byteWidth;324range.height = h;325}326}327}328329void BinManager::MarkPendingWrites(const Rasterizer::RasterizerState &state) {330DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1());331DrawingCoords scissorBR(std::min(gstate.getScissorX2(), gstate.getRegionX2()), std::min(gstate.getScissorY2(), gstate.getRegionY2()));332333constexpr uint32_t mirrorMask = 0x041FFFFF;334const uint32_t bpp = state.pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2;335pendingWrites_[0].Expand(gstate.getFrameBufAddress() & mirrorMask, bpp, gstate.FrameBufStride(), scissorTL, scissorBR);336if (state.pixelID.depthWrite)337pendingWrites_[1].Expand(gstate.getDepthBufAddress() & mirrorMask, 2, gstate.DepthBufStride(), scissorTL, scissorBR);338}339340inline void BinDirtyRange::Expand(uint32_t newBase, uint32_t bpp, uint32_t stride, const DrawingCoords &tl, const DrawingCoords &br) {341const uint32_t w = br.x - tl.x + 1;342const uint32_t h = br.y - tl.y + 1;343344newBase += tl.y * stride * bpp + tl.x * bpp;345if (base == 0) {346base = newBase;347strideBytes = stride * bpp;348widthBytes = w * bpp;349height = h;350return;351}352353height = std::max(height, h);354if (base == newBase && strideBytes == stride * bpp) {355widthBytes = std::max(widthBytes, w * bpp);356return;357}358359if (stride != 0)360height += ((int)base - (int)newBase) / (stride * bpp);361base = std::min(base, newBase);362strideBytes = std::max(strideBytes, stride * bpp);363widthBytes = strideBytes;364}365366void BinManager::UpdateClut(const void *src) {367PROFILE_THIS_SCOPE("bin_clut");368if (cluts_.Full())369Flush("cluts");370BinClut &clut = cluts_.PeekPush();371memcpy(clut.readable, src, sizeof(BinClut));372clutIndex_ = (uint16_t)cluts_.PushPeeked();373}374375void BinManager::AddTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2) {376Vec2<int> d01((int)v0.screenpos.x - (int)v1.screenpos.x, (int)v0.screenpos.y - (int)v1.screenpos.y);377Vec2<int> d02((int)v0.screenpos.x - (int)v2.screenpos.x, (int)v0.screenpos.y - (int)v2.screenpos.y);378Vec2<int> d12((int)v1.screenpos.x - (int)v2.screenpos.x, (int)v1.screenpos.y - (int)v2.screenpos.y);379380// Drop primitives which are not in CCW order by checking the cross product.381static_assert(SCREEN_SCALE_FACTOR <= 16, "Fails if scale factor is too high");382if (d01.x * d02.y - d01.y * d02.x < 0)383return;384// If all points have identical coords, we'll have 0 weights and not skip properly, so skip here.385if ((d01.x == 0 && d02.x == 0) || (d01.y == 0 && d02.y == 0))386return;387388// Was it fully outside the scissor?389const BinCoords range = Range(v0, v1, v2);390if (range.Invalid())391return;392393if (queue_.Full())394Drain();395queue_.Push(BinItem{ BinItemType::TRIANGLE, stateIndex_, range, v0, v1, v2 });396CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, v2);397Expand(range);398}399400void BinManager::AddClearRect(const VertexData &v0, const VertexData &v1) {401const BinCoords range = Range(v0, v1);402if (range.Invalid())403return;404405if (queue_.Full())406Drain();407queue_.Push(BinItem{ BinItemType::CLEAR_RECT, stateIndex_, range, v0, v1 });408CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);409Expand(range);410}411412void BinManager::AddRect(const VertexData &v0, const VertexData &v1) {413const BinCoords range = Range(v0, v1);414if (range.Invalid())415return;416417if (queue_.Full())418Drain();419queue_.Push(BinItem{ BinItemType::RECT, stateIndex_, range, v0, v1 });420CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);421Expand(range);422}423424void BinManager::AddSprite(const VertexData &v0, const VertexData &v1) {425const BinCoords range = Range(v0, v1);426if (range.Invalid())427return;428429if (queue_.Full())430Drain();431queue_.Push(BinItem{ BinItemType::SPRITE, stateIndex_, range, v0, v1 });432CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);433Expand(range);434}435436void BinManager::AddLine(const VertexData &v0, const VertexData &v1) {437const BinCoords range = Range(v0, v1);438if (range.Invalid())439return;440441if (queue_.Full())442Drain();443queue_.Push(BinItem{ BinItemType::LINE, stateIndex_, range, v0, v1 });444CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, false);445Expand(range);446}447448void BinManager::AddPoint(const VertexData &v0) {449const BinCoords range = Range(v0);450if (range.Invalid())451return;452453if (queue_.Full())454Drain();455queue_.Push(BinItem{ BinItemType::POINT, stateIndex_, range, v0 });456CalculateRasterStateFlags(&states_[stateIndex_], v0);457Expand(range);458}459460void BinManager::Drain(bool flushing) {461PROFILE_THIS_SCOPE("bin_drain");462463// If the waitable has fully drained, we can update our binning decisions.464if (!tasksSplit_ || waitable_->Empty()) {465int w2 = (queueRange_.x2 - queueRange_.x1 + (SCREEN_SCALE_FACTOR * 2 - 1)) / (SCREEN_SCALE_FACTOR * 2);466int h2 = (queueRange_.y2 - queueRange_.y1 + (SCREEN_SCALE_FACTOR * 2 - 1)) / (SCREEN_SCALE_FACTOR * 2);467468// Always bin the entire possible range, but focus on the drawn area.469ScreenCoords tl(0, 0, 0);470ScreenCoords br(1024 * SCREEN_SCALE_FACTOR, 1024 * SCREEN_SCALE_FACTOR, 0);471472if (pendingOverlap_ && maxTasks_ == 1 && flushing && queue_.Size() == 1 && !FORCE_SINGLE_THREAD) {473// If the drawing is 1:1, we can potentially use threads. It's worth checking.474const auto &item = queue_.PeekNext();475const auto &state = states_[item.stateIndex];476if (IsExactSelfRender(state, item))477maxTasks_ = std::min(g_threadManager.GetNumLooperThreads(), MAX_POSSIBLE_TASKS);478}479480taskRanges_.clear();481if (h2 >= 18 && w2 >= h2 * 4) {482int bin_w = std::max(4, (w2 + maxTasks_ - 1) / maxTasks_) * SCREEN_SCALE_FACTOR * 2;483taskRanges_.push_back(BinCoords{ tl.x, tl.y, queueRange_.x1 + bin_w - 1, br.y - 1 });484for (int x = queueRange_.x1 + bin_w; x <= queueRange_.x2; x += bin_w) {485int x2 = x + bin_w > queueRange_.x2 ? br.x : x + bin_w;486taskRanges_.push_back(BinCoords{ x, tl.y, x2 - 1, br.y - 1 });487}488} else if (h2 >= 18 && w2 >= 18) {489int bin_h = std::max(4, (h2 + maxTasks_ - 1) / maxTasks_) * SCREEN_SCALE_FACTOR * 2;490taskRanges_.push_back(BinCoords{ tl.x, tl.y, br.x - 1, queueRange_.y1 + bin_h - 1 });491for (int y = queueRange_.y1 + bin_h; y <= queueRange_.y2; y += bin_h) {492int y2 = y + bin_h > queueRange_.y2 ? br.y : y + bin_h;493taskRanges_.push_back(BinCoords{ tl.x, y, br.x - 1, y2 - 1 });494}495}496497tasksSplit_ = true;498}499500// Let's try to optimize states, if we can.501OptimizePendingStates(pendingStateIndex_, stateIndex_);502pendingStateIndex_ = stateIndex_;503504if (taskRanges_.size() <= 1) {505PROFILE_THIS_SCOPE("bin_drain_single");506while (!queue_.Empty()) {507const BinItem &item = queue_.PeekNext();508DrawBinItem(item, states_[item.stateIndex]);509queue_.SkipNext();510}511} else {512int max = flushing ? QUEUED_PRIMS : QUEUED_PRIMS / 2;513while (!queue_.Empty()) {514const BinItem &item = queue_.PeekNext();515for (int i = 0; i < (int)taskRanges_.size(); ++i) {516const BinCoords range = taskRanges_[i].Intersect(item.range);517if (range.Invalid())518continue;519520if (taskQueues_[i].NearFull()) {521// This shouldn't often happen, but if it does, wait for space.522if (taskQueues_[i].Full())523waitable_->Wait();524// If we're not flushing and not near full, let's just continue later.525// Near full means we'd drain on next prim, so better to finish it now.526else if (!flushing && !queue_.NearFull())527max = 0;528}529530BinItem &taskItem = taskQueues_[i].PeekPush();531taskItem = item;532taskItem.range = range;533taskQueues_[i].PushPeeked();534}535queue_.SkipNext();536if (--max <= 0)537break;538}539540int threads = 0;541for (int i = 0; i < (int)taskRanges_.size(); ++i) {542if (taskQueues_[i].Empty())543continue;544threads++;545if (taskStatus_[i])546continue;547548waitable_->Fill();549taskStatus_[i] = true;550g_threadManager.EnqueueTaskOnThread(i, taskLists_[i].Next());551enqueues_++;552}553554mostThreads_ = std::max(mostThreads_, threads);555}556}557558void BinManager::Flush(const char *reason) {559if (queueRange_.x1 == 0x7FFFFFFF)560return;561562double st;563if (coreCollectDebugStats)564st = time_now_d();565Drain(true);566waitable_->Wait();567taskRanges_.clear();568tasksSplit_ = false;569570queue_.Reset();571while (states_.Size() > 1)572states_.SkipNext();573while (cluts_.Size() > 1)574cluts_.SkipNext();575576Rasterizer::FlushJit();577Sampler::FlushJit();578579queueRange_.x1 = 0x7FFFFFFF;580queueRange_.y1 = 0x7FFFFFFF;581queueRange_.x2 = 0;582queueRange_.y2 = 0;583584for (auto &pending : pendingWrites_)585pending.base = 0;586pendingOverlap_ = false;587pendingReads_.clear();588589// We'll need to set the pending writes and reads again, since we just flushed it.590dirty_ |= SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP;591592if (coreCollectDebugStats) {593double et = time_now_d();594flushReasonTimes_[reason] += et - st;595if (et - st > slowestFlushTime_) {596slowestFlushTime_ = et - st;597slowestFlushReason_ = reason;598}599}600}601602void BinManager::OptimizePendingStates(uint16_t first, uint16_t last) {603// We can sometimes hit this when compiling new funcs while creating a state.604// At that point, the state isn't loaded fully yet, so don't touch it.605if (creatingState_ && last == stateIndex_) {606if (first == last)607return;608last--;609}610611int count = (QUEUED_STATES + last - first) % QUEUED_STATES + 1;612for (int i = 0; i < count; ++i) {613size_t pos = (first + i) % QUEUED_STATES;614OptimizeRasterState(&states_[pos]);615}616}617618bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, uint32_t h) {619// We can only write to VRAM.620if (!Memory::IsVRAMAddress(start))621return false;622// Ignore mirrors for overlap detection.623start &= 0x041FFFFF;624625uint32_t size = stride * (h - 1) + w;626for (const auto &range : pendingWrites_) {627if (range.base == 0 || range.strideBytes == 0)628continue;629if (start >= range.base + range.height * range.strideBytes || start + size <= range.base)630continue;631632// Let's simply go through each line. Might be in the stride gap.633uint32_t row = start;634for (uint32_t y = 0; y < h; ++y) {635int32_t offset = row - range.base;636int32_t rangeY = offset / (int32_t)range.strideBytes;637uint32_t rangeX = offset % (int32_t)range.strideBytes;638if (rangeY >= 0 && (uint32_t)rangeY < range.height) {639// If this row is either within width, or extends beyond stride, overlap.640if (rangeX < range.widthBytes || rangeX + w >= range.strideBytes)641return true;642}643644row += stride;645}646}647648return false;649}650651bool BinManager::HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h) {652if (Memory::IsVRAMAddress(start)) {653// Ignore VRAM mirrors.654start &= 0x041FFFFF;655} else {656// Ignore only regular RAM mirrors.657start &= 0x3FFFFFFF;658}659660uint32_t size = stride * (h - 1) + w;661for (const auto &pair : pendingReads_) {662const auto &range = pair.second;663if (start >= range.base + range.height * range.strideBytes || start + size <= range.base)664continue;665666// Stride gaps are uncommon with reads, so don't bother.667return true;668}669670return false;671}672673void BinManager::GetStats(char *buffer, size_t bufsize) {674double allTotal = 0.0;675double slowestTotalTime = 0.0;676const char *slowestTotalReason = nullptr;677for (auto &it : flushReasonTimes_) {678if (it.second > slowestTotalTime) {679slowestTotalTime = it.second;680slowestTotalReason = it.first;681}682allTotal += it.second;683}684685// Many games are 30 FPS, so check last frame too for better stats.686double recentTotal = allTotal;687double slowestRecentTime = slowestTotalTime;688const char *slowestRecentReason = slowestTotalReason;689for (auto &it : lastFlushReasonTimes_) {690if (it.second > slowestRecentTime) {691slowestRecentTime = it.second;692slowestRecentReason = it.first;693}694recentTotal += it.second;695}696697snprintf(buffer, bufsize,698"Slowest individual flush: %s (%0.4f)\n"699"Slowest frame flush: %s (%0.4f)\n"700"Slowest recent flush: %s (%0.4f)\n"701"Total flush time: %0.4f (%05.2f%%, last 2: %05.2f%%)\n"702"Thread enqueues: %d, count %d",703slowestFlushReason_, slowestFlushTime_,704slowestTotalReason, slowestTotalTime,705slowestRecentReason, slowestRecentTime,706allTotal, allTotal * (6000.0 / 1.001), recentTotal * (3000.0 / 1.001),707enqueues_, mostThreads_);708}709710void BinManager::ResetStats() {711lastFlushReasonTimes_ = std::move(flushReasonTimes_);712flushReasonTimes_.clear();713slowestFlushReason_ = nullptr;714slowestFlushTime_ = 0.0;715enqueues_ = 0;716mostThreads_ = 0;717}718719inline BinCoords BinCoords::Intersect(const BinCoords &range) const {720BinCoords sub;721sub.x1 = std::max(x1, range.x1);722sub.y1 = std::max(y1, range.y1);723sub.x2 = std::min(x2, range.x2);724sub.y2 = std::min(y2, range.y2);725return sub;726}727728BinCoords BinManager::Scissor(BinCoords range) {729return range.Intersect(scissor_);730}731732BinCoords BinManager::Range(const VertexData &v0, const VertexData &v1, const VertexData &v2) {733BinCoords range;734range.x1 = std::min(std::min(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) & ~(SCREEN_SCALE_FACTOR - 1);735range.y1 = std::min(std::min(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) & ~(SCREEN_SCALE_FACTOR - 1);736range.x2 = std::max(std::max(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) | (SCREEN_SCALE_FACTOR - 1);737range.y2 = std::max(std::max(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) | (SCREEN_SCALE_FACTOR - 1);738return Scissor(range);739}740741BinCoords BinManager::Range(const VertexData &v0, const VertexData &v1) {742BinCoords range;743range.x1 = std::min(v0.screenpos.x, v1.screenpos.x) & ~(SCREEN_SCALE_FACTOR - 1);744range.y1 = std::min(v0.screenpos.y, v1.screenpos.y) & ~(SCREEN_SCALE_FACTOR - 1);745range.x2 = std::max(v0.screenpos.x, v1.screenpos.x) | (SCREEN_SCALE_FACTOR - 1);746range.y2 = std::max(v0.screenpos.y, v1.screenpos.y) | (SCREEN_SCALE_FACTOR - 1);747return Scissor(range);748}749750BinCoords BinManager::Range(const VertexData &v0) {751BinCoords range;752range.x1 = v0.screenpos.x & ~(SCREEN_SCALE_FACTOR - 1);753range.y1 = v0.screenpos.y & ~(SCREEN_SCALE_FACTOR - 1);754range.x2 = v0.screenpos.x | (SCREEN_SCALE_FACTOR - 1);755range.y2 = v0.screenpos.y | (SCREEN_SCALE_FACTOR - 1);756return Scissor(range);757}758759void BinManager::Expand(const BinCoords &range) {760queueRange_.x1 = std::min(queueRange_.x1, range.x1);761queueRange_.y1 = std::min(queueRange_.y1, range.y1);762queueRange_.x2 = std::max(queueRange_.x2, range.x2);763queueRange_.y2 = std::max(queueRange_.y2, range.y2);764765if (maxTasks_ == 1 || (queueRange_.y2 - queueRange_.y1 >= 224 * SCREEN_SCALE_FACTOR && enqueues_ < 36 * maxTasks_)) {766if (pendingOverlap_)767Flush("expand");768else769Drain();770}771}772773774