Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
4574 views
1
/****************************************************************************
2
* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*
23
* @file archrast.cpp
24
*
25
* @brief Implementation for archrast.
26
*
27
******************************************************************************/
28
#include <sys/stat.h>
29
30
#include <atomic>
31
#include <map>
32
33
#include "common/os.h"
34
#include "archrast/archrast.h"
35
#include "archrast/eventmanager.h"
36
#include "gen_ar_event.hpp"
37
#include "gen_ar_eventhandlerfile.hpp"
38
39
namespace ArchRast
40
{
41
//////////////////////////////////////////////////////////////////////////
42
/// @brief struct that keeps track of depth and stencil event information
43
struct DepthStencilStats
44
{
45
uint32_t earlyZTestPassCount = 0;
46
uint32_t earlyZTestFailCount = 0;
47
uint32_t lateZTestPassCount = 0;
48
uint32_t lateZTestFailCount = 0;
49
uint32_t earlyStencilTestPassCount = 0;
50
uint32_t earlyStencilTestFailCount = 0;
51
uint32_t lateStencilTestPassCount = 0;
52
uint32_t lateStencilTestFailCount = 0;
53
};
54
55
struct CStats
56
{
57
uint32_t trivialRejectCount;
58
uint32_t trivialAcceptCount;
59
uint32_t mustClipCount;
60
};
61
62
struct TEStats
63
{
64
uint32_t inputPrims = 0;
65
//@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
66
};
67
68
struct GSStateInfo
69
{
70
uint32_t inputPrimCount;
71
uint32_t primGeneratedCount;
72
uint32_t vertsInput;
73
};
74
75
struct RastStats
76
{
77
uint32_t rasterTiles = 0;
78
};
79
80
struct CullStats
81
{
82
uint32_t degeneratePrimCount = 0;
83
uint32_t backfacePrimCount = 0;
84
};
85
86
struct AlphaStats
87
{
88
uint32_t alphaTestCount = 0;
89
uint32_t alphaBlendCount = 0;
90
};
91
92
93
//////////////////////////////////////////////////////////////////////////
94
/// @brief Event handler that handles API thread events. This is shared
95
/// between the API and its caller (e.g. driver shim) but typically
96
/// there is only a single API thread per context. So you can save
97
/// information in the class to be used for other events.
98
class EventHandlerApiStats : public EventHandlerFile
99
{
100
public:
101
EventHandlerApiStats(uint32_t id) : EventHandlerFile(id)
102
{
103
#if defined(_WIN32)
104
// Attempt to copy the events.proto file to the ArchRast output dir. It's common for
105
// tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
106
// exists, this will attempt to copy it the first time we get here to package it with
107
// the stats. Otherwise, the user would need to specify the events.proto location when
108
// parsing the stats in post.
109
std::stringstream eventsProtoSrcFilename, eventsProtoDstFilename;
110
eventsProtoSrcFilename << KNOB_DEBUG_OUTPUT_DIR << "\\events.proto" << std::ends;
111
eventsProtoDstFilename << mOutputDir.substr(0, mOutputDir.size() - 1)
112
<< "\\events.proto" << std::ends;
113
114
// If event.proto already exists, we're done; else do the copy
115
struct stat buf; // Use a Posix stat for file existence check
116
if (!stat(eventsProtoDstFilename.str().c_str(), &buf) == 0)
117
{
118
// Now check to make sure the events.proto source exists
119
if (stat(eventsProtoSrcFilename.str().c_str(), &buf) == 0)
120
{
121
std::ifstream srcFile;
122
srcFile.open(eventsProtoSrcFilename.str().c_str(), std::ios::binary);
123
if (srcFile.is_open())
124
{
125
// Just do a binary buffer copy
126
std::ofstream dstFile;
127
dstFile.open(eventsProtoDstFilename.str().c_str(), std::ios::binary);
128
dstFile << srcFile.rdbuf();
129
dstFile.close();
130
}
131
srcFile.close();
132
}
133
}
134
#endif
135
}
136
137
virtual void Handle(const DrawInstancedEvent& event)
138
{
139
DrawInfoEvent e(event.data.drawId,
140
ArchRast::Instanced,
141
event.data.topology,
142
event.data.numVertices,
143
0,
144
0,
145
event.data.startVertex,
146
event.data.numInstances,
147
event.data.startInstance,
148
event.data.tsEnable,
149
event.data.gsEnable,
150
event.data.soEnable,
151
event.data.soTopology,
152
event.data.splitId);
153
154
EventHandlerFile::Handle(e);
155
}
156
157
virtual void Handle(const DrawIndexedInstancedEvent& event)
158
{
159
DrawInfoEvent e(event.data.drawId,
160
ArchRast::IndexedInstanced,
161
event.data.topology,
162
0,
163
event.data.numIndices,
164
event.data.indexOffset,
165
event.data.baseVertex,
166
event.data.numInstances,
167
event.data.startInstance,
168
event.data.tsEnable,
169
event.data.gsEnable,
170
event.data.soEnable,
171
event.data.soTopology,
172
event.data.splitId);
173
174
EventHandlerFile::Handle(e);
175
}
176
};
177
178
//////////////////////////////////////////////////////////////////////////
179
/// @brief Event handler that handles worker thread events. There is one
180
/// event handler per thread. The python script will need to sum
181
/// up counters across all of the threads.
182
class EventHandlerWorkerStats : public EventHandlerFile
183
{
184
public:
185
EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false)
186
{
187
memset(mShaderStats, 0, sizeof(mShaderStats));
188
}
189
190
virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
191
{
192
// earlyZ test compute
193
mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
194
mDSSingleSample.earlyZTestFailCount +=
195
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
196
197
// earlyStencil test compute
198
mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
199
mDSSingleSample.earlyStencilTestFailCount +=
200
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
201
202
// earlyZ test single and multi sample
203
mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
204
mDSCombined.earlyZTestFailCount +=
205
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
206
207
// earlyStencil test single and multi sample
208
mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
209
mDSCombined.earlyStencilTestFailCount +=
210
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
211
212
mNeedFlush = true;
213
}
214
215
virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
216
{
217
// earlyZ test compute
218
mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
219
mDSSampleRate.earlyZTestFailCount +=
220
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
221
222
// earlyStencil test compute
223
mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
224
mDSSampleRate.earlyStencilTestFailCount +=
225
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
226
227
// earlyZ test single and multi sample
228
mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
229
mDSCombined.earlyZTestFailCount +=
230
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
231
232
// earlyStencil test single and multi sample
233
mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
234
mDSCombined.earlyStencilTestFailCount +=
235
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
236
237
mNeedFlush = true;
238
}
239
240
virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
241
{
242
// earlyZ test compute
243
mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
244
mDSNullPS.earlyZTestFailCount +=
245
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
246
247
// earlyStencil test compute
248
mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
249
mDSNullPS.earlyStencilTestFailCount +=
250
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
251
mNeedFlush = true;
252
}
253
254
virtual void Handle(const LateDepthStencilInfoSingleSample& event)
255
{
256
// lateZ test compute
257
mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
258
mDSSingleSample.lateZTestFailCount +=
259
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
260
261
// lateStencil test compute
262
mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
263
mDSSingleSample.lateStencilTestFailCount +=
264
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
265
266
// lateZ test single and multi sample
267
mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
268
mDSCombined.lateZTestFailCount +=
269
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
270
271
// lateStencil test single and multi sample
272
mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
273
mDSCombined.lateStencilTestFailCount +=
274
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
275
276
mNeedFlush = true;
277
}
278
279
virtual void Handle(const LateDepthStencilInfoSampleRate& event)
280
{
281
// lateZ test compute
282
mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
283
mDSSampleRate.lateZTestFailCount +=
284
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
285
286
// lateStencil test compute
287
mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
288
mDSSampleRate.lateStencilTestFailCount +=
289
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
290
291
// lateZ test single and multi sample
292
mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
293
mDSCombined.lateZTestFailCount +=
294
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
295
296
// lateStencil test single and multi sample
297
mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
298
mDSCombined.lateStencilTestFailCount +=
299
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
300
301
mNeedFlush = true;
302
}
303
304
virtual void Handle(const LateDepthStencilInfoNullPS& event)
305
{
306
// lateZ test compute
307
mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
308
mDSNullPS.lateZTestFailCount +=
309
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
310
311
// lateStencil test compute
312
mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
313
mDSNullPS.lateStencilTestFailCount +=
314
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
315
mNeedFlush = true;
316
}
317
318
virtual void Handle(const EarlyDepthInfoPixelRate& event)
319
{
320
// earlyZ test compute
321
mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
322
mDSPixelRate.earlyZTestFailCount +=
323
(_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
324
mNeedFlush = true;
325
}
326
327
328
virtual void Handle(const LateDepthInfoPixelRate& event)
329
{
330
// lateZ test compute
331
mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
332
mDSPixelRate.lateZTestFailCount +=
333
(_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
334
mNeedFlush = true;
335
}
336
337
338
virtual void Handle(const ClipInfoEvent& event)
339
{
340
mClipper.mustClipCount += _mm_popcnt_u32(event.data.clipMask);
341
mClipper.trivialRejectCount +=
342
event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);
343
mClipper.trivialAcceptCount +=
344
_mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
345
}
346
347
void UpdateStats(SWR_SHADER_STATS* pStatTotals, const SWR_SHADER_STATS* pStatUpdate)
348
{
349
pStatTotals->numInstExecuted += pStatUpdate->numInstExecuted;
350
pStatTotals->numSampleExecuted += pStatUpdate->numSampleExecuted;
351
pStatTotals->numSampleLExecuted += pStatUpdate->numSampleLExecuted;
352
pStatTotals->numSampleBExecuted += pStatUpdate->numSampleBExecuted;
353
pStatTotals->numSampleCExecuted += pStatUpdate->numSampleCExecuted;
354
pStatTotals->numSampleCLZExecuted += pStatUpdate->numSampleCLZExecuted;
355
pStatTotals->numSampleCDExecuted += pStatUpdate->numSampleCDExecuted;
356
pStatTotals->numGather4Executed += pStatUpdate->numGather4Executed;
357
pStatTotals->numGather4CExecuted += pStatUpdate->numGather4CExecuted;
358
pStatTotals->numGather4CPOExecuted += pStatUpdate->numGather4CPOExecuted;
359
pStatTotals->numGather4CPOCExecuted += pStatUpdate->numGather4CPOCExecuted;
360
pStatTotals->numLodExecuted += pStatUpdate->numLodExecuted;
361
}
362
363
virtual void Handle(const VSStats& event)
364
{
365
SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
366
UpdateStats(&mShaderStats[SHADER_VERTEX], pStats);
367
}
368
369
virtual void Handle(const GSStats& event)
370
{
371
SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
372
UpdateStats(&mShaderStats[SHADER_GEOMETRY], pStats);
373
}
374
375
virtual void Handle(const DSStats& event)
376
{
377
SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
378
UpdateStats(&mShaderStats[SHADER_DOMAIN], pStats);
379
}
380
381
virtual void Handle(const HSStats& event)
382
{
383
SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
384
UpdateStats(&mShaderStats[SHADER_HULL], pStats);
385
}
386
387
virtual void Handle(const PSStats& event)
388
{
389
SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
390
UpdateStats(&mShaderStats[SHADER_PIXEL], pStats);
391
mNeedFlush = true;
392
}
393
394
virtual void Handle(const CSStats& event)
395
{
396
SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
397
UpdateStats(&mShaderStats[SHADER_COMPUTE], pStats);
398
mNeedFlush = true;
399
}
400
401
// Flush cached events for this draw
402
virtual void FlushDraw(uint32_t drawId)
403
{
404
if (mNeedFlush == false)
405
return;
406
407
EventHandlerFile::Handle(PSInfo(drawId,
408
mShaderStats[SHADER_PIXEL].numInstExecuted,
409
mShaderStats[SHADER_PIXEL].numSampleExecuted,
410
mShaderStats[SHADER_PIXEL].numSampleLExecuted,
411
mShaderStats[SHADER_PIXEL].numSampleBExecuted,
412
mShaderStats[SHADER_PIXEL].numSampleCExecuted,
413
mShaderStats[SHADER_PIXEL].numSampleCLZExecuted,
414
mShaderStats[SHADER_PIXEL].numSampleCDExecuted,
415
mShaderStats[SHADER_PIXEL].numGather4Executed,
416
mShaderStats[SHADER_PIXEL].numGather4CExecuted,
417
mShaderStats[SHADER_PIXEL].numGather4CPOExecuted,
418
mShaderStats[SHADER_PIXEL].numGather4CPOCExecuted,
419
mShaderStats[SHADER_PIXEL].numLodExecuted));
420
EventHandlerFile::Handle(CSInfo(drawId,
421
mShaderStats[SHADER_COMPUTE].numInstExecuted,
422
mShaderStats[SHADER_COMPUTE].numSampleExecuted,
423
mShaderStats[SHADER_COMPUTE].numSampleLExecuted,
424
mShaderStats[SHADER_COMPUTE].numSampleBExecuted,
425
mShaderStats[SHADER_COMPUTE].numSampleCExecuted,
426
mShaderStats[SHADER_COMPUTE].numSampleCLZExecuted,
427
mShaderStats[SHADER_COMPUTE].numSampleCDExecuted,
428
mShaderStats[SHADER_COMPUTE].numGather4Executed,
429
mShaderStats[SHADER_COMPUTE].numGather4CExecuted,
430
mShaderStats[SHADER_COMPUTE].numGather4CPOExecuted,
431
mShaderStats[SHADER_COMPUTE].numGather4CPOCExecuted,
432
mShaderStats[SHADER_COMPUTE].numLodExecuted));
433
434
// singleSample
435
EventHandlerFile::Handle(EarlyZSingleSample(
436
drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
437
EventHandlerFile::Handle(LateZSingleSample(
438
drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
439
EventHandlerFile::Handle(
440
EarlyStencilSingleSample(drawId,
441
mDSSingleSample.earlyStencilTestPassCount,
442
mDSSingleSample.earlyStencilTestFailCount));
443
EventHandlerFile::Handle(
444
LateStencilSingleSample(drawId,
445
mDSSingleSample.lateStencilTestPassCount,
446
mDSSingleSample.lateStencilTestFailCount));
447
448
// sampleRate
449
EventHandlerFile::Handle(EarlyZSampleRate(
450
drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
451
EventHandlerFile::Handle(LateZSampleRate(
452
drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
453
EventHandlerFile::Handle(
454
EarlyStencilSampleRate(drawId,
455
mDSSampleRate.earlyStencilTestPassCount,
456
mDSSampleRate.earlyStencilTestFailCount));
457
EventHandlerFile::Handle(LateStencilSampleRate(drawId,
458
mDSSampleRate.lateStencilTestPassCount,
459
mDSSampleRate.lateStencilTestFailCount));
460
461
// combined
462
EventHandlerFile::Handle(
463
EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));
464
EventHandlerFile::Handle(
465
LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));
466
EventHandlerFile::Handle(EarlyStencil(drawId,
467
mDSCombined.earlyStencilTestPassCount,
468
mDSCombined.earlyStencilTestFailCount));
469
EventHandlerFile::Handle(LateStencil(drawId,
470
mDSCombined.lateStencilTestPassCount,
471
mDSCombined.lateStencilTestFailCount));
472
473
// pixelRate
474
EventHandlerFile::Handle(EarlyZPixelRate(
475
drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
476
EventHandlerFile::Handle(LateZPixelRate(
477
drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
478
479
480
// NullPS
481
EventHandlerFile::Handle(
482
EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
483
EventHandlerFile::Handle(EarlyStencilNullPS(
484
drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
485
486
// Rasterized Subspans
487
EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
488
489
// Alpha Subspans
490
EventHandlerFile::Handle(
491
AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
492
493
// Primitive Culling
494
EventHandlerFile::Handle(
495
CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
496
497
mDSSingleSample = {};
498
mDSSampleRate = {};
499
mDSCombined = {};
500
mDSPixelRate = {};
501
mDSNullPS = {};
502
503
rastStats = {};
504
mCullStats = {};
505
mAlphaStats = {};
506
507
mShaderStats[SHADER_PIXEL] = {};
508
mShaderStats[SHADER_COMPUTE] = {};
509
510
mNeedFlush = false;
511
}
512
513
virtual void Handle(const FrontendDrawEndEvent& event)
514
{
515
// Clipper
516
EventHandlerFile::Handle(ClipperEvent(event.data.drawId,
517
mClipper.trivialRejectCount,
518
mClipper.trivialAcceptCount,
519
mClipper.mustClipCount));
520
521
// Tesselator
522
EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
523
524
// Geometry Shader
525
EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
526
EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
527
EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
528
529
EventHandlerFile::Handle(VSInfo(event.data.drawId,
530
mShaderStats[SHADER_VERTEX].numInstExecuted,
531
mShaderStats[SHADER_VERTEX].numSampleExecuted,
532
mShaderStats[SHADER_VERTEX].numSampleLExecuted,
533
mShaderStats[SHADER_VERTEX].numSampleBExecuted,
534
mShaderStats[SHADER_VERTEX].numSampleCExecuted,
535
mShaderStats[SHADER_VERTEX].numSampleCLZExecuted,
536
mShaderStats[SHADER_VERTEX].numSampleCDExecuted,
537
mShaderStats[SHADER_VERTEX].numGather4Executed,
538
mShaderStats[SHADER_VERTEX].numGather4CExecuted,
539
mShaderStats[SHADER_VERTEX].numGather4CPOExecuted,
540
mShaderStats[SHADER_VERTEX].numGather4CPOCExecuted,
541
mShaderStats[SHADER_VERTEX].numLodExecuted));
542
EventHandlerFile::Handle(HSInfo(event.data.drawId,
543
mShaderStats[SHADER_HULL].numInstExecuted,
544
mShaderStats[SHADER_HULL].numSampleExecuted,
545
mShaderStats[SHADER_HULL].numSampleLExecuted,
546
mShaderStats[SHADER_HULL].numSampleBExecuted,
547
mShaderStats[SHADER_HULL].numSampleCExecuted,
548
mShaderStats[SHADER_HULL].numSampleCLZExecuted,
549
mShaderStats[SHADER_HULL].numSampleCDExecuted,
550
mShaderStats[SHADER_HULL].numGather4Executed,
551
mShaderStats[SHADER_HULL].numGather4CExecuted,
552
mShaderStats[SHADER_HULL].numGather4CPOExecuted,
553
mShaderStats[SHADER_HULL].numGather4CPOCExecuted,
554
mShaderStats[SHADER_HULL].numLodExecuted));
555
EventHandlerFile::Handle(DSInfo(event.data.drawId,
556
mShaderStats[SHADER_DOMAIN].numInstExecuted,
557
mShaderStats[SHADER_DOMAIN].numSampleExecuted,
558
mShaderStats[SHADER_DOMAIN].numSampleLExecuted,
559
mShaderStats[SHADER_DOMAIN].numSampleBExecuted,
560
mShaderStats[SHADER_DOMAIN].numSampleCExecuted,
561
mShaderStats[SHADER_DOMAIN].numSampleCLZExecuted,
562
mShaderStats[SHADER_DOMAIN].numSampleCDExecuted,
563
mShaderStats[SHADER_DOMAIN].numGather4Executed,
564
mShaderStats[SHADER_DOMAIN].numGather4CExecuted,
565
mShaderStats[SHADER_DOMAIN].numGather4CPOExecuted,
566
mShaderStats[SHADER_DOMAIN].numGather4CPOCExecuted,
567
mShaderStats[SHADER_DOMAIN].numLodExecuted));
568
EventHandlerFile::Handle(GSInfo(event.data.drawId,
569
mShaderStats[SHADER_GEOMETRY].numInstExecuted,
570
mShaderStats[SHADER_GEOMETRY].numSampleExecuted,
571
mShaderStats[SHADER_GEOMETRY].numSampleLExecuted,
572
mShaderStats[SHADER_GEOMETRY].numSampleBExecuted,
573
mShaderStats[SHADER_GEOMETRY].numSampleCExecuted,
574
mShaderStats[SHADER_GEOMETRY].numSampleCLZExecuted,
575
mShaderStats[SHADER_GEOMETRY].numSampleCDExecuted,
576
mShaderStats[SHADER_GEOMETRY].numGather4Executed,
577
mShaderStats[SHADER_GEOMETRY].numGather4CExecuted,
578
mShaderStats[SHADER_GEOMETRY].numGather4CPOExecuted,
579
mShaderStats[SHADER_GEOMETRY].numGather4CPOCExecuted,
580
mShaderStats[SHADER_GEOMETRY].numLodExecuted));
581
582
mShaderStats[SHADER_VERTEX] = {};
583
mShaderStats[SHADER_HULL] = {};
584
mShaderStats[SHADER_DOMAIN] = {};
585
mShaderStats[SHADER_GEOMETRY] = {};
586
587
// Reset Internal Counters
588
mClipper = {};
589
mTS = {};
590
mGS = {};
591
}
592
593
virtual void Handle(const GSPrimInfo& event)
594
{
595
mGS.inputPrimCount += event.data.inputPrimCount;
596
mGS.primGeneratedCount += event.data.primGeneratedCount;
597
mGS.vertsInput += event.data.vertsInput;
598
}
599
600
virtual void Handle(const TessPrimCount& event) { mTS.inputPrims += event.data.primCount; }
601
602
virtual void Handle(const RasterTileCount& event)
603
{
604
rastStats.rasterTiles += event.data.rasterTiles;
605
}
606
607
virtual void Handle(const CullInfoEvent& event)
608
{
609
mCullStats.degeneratePrimCount += _mm_popcnt_u32(
610
event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
611
mCullStats.backfacePrimCount += _mm_popcnt_u32(
612
event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
613
}
614
615
virtual void Handle(const AlphaInfoEvent& event)
616
{
617
mAlphaStats.alphaTestCount += event.data.alphaTestEnable;
618
mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
619
}
620
621
protected:
622
bool mNeedFlush;
623
// Per draw stats
624
DepthStencilStats mDSSingleSample = {};
625
DepthStencilStats mDSSampleRate = {};
626
DepthStencilStats mDSPixelRate = {};
627
DepthStencilStats mDSCombined = {};
628
DepthStencilStats mDSNullPS = {};
629
DepthStencilStats mDSOmZ = {};
630
CStats mClipper = {};
631
TEStats mTS = {};
632
GSStateInfo mGS = {};
633
RastStats rastStats = {};
634
CullStats mCullStats = {};
635
AlphaStats mAlphaStats = {};
636
637
SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES];
638
639
};
640
641
static EventManager* FromHandle(HANDLE hThreadContext)
642
{
643
return reinterpret_cast<EventManager*>(hThreadContext);
644
}
645
646
// Construct an event manager and associate a handler with it.
647
HANDLE CreateThreadContext(AR_THREAD type)
648
{
649
// Can we assume single threaded here?
650
static std::atomic<uint32_t> counter(0);
651
uint32_t id = counter.fetch_add(1);
652
653
EventManager* pManager = new EventManager();
654
655
if (pManager)
656
{
657
EventHandlerFile* pHandler = nullptr;
658
659
if (type == AR_THREAD::API)
660
{
661
pHandler = new EventHandlerApiStats(id);
662
pManager->Attach(pHandler);
663
pHandler->Handle(ThreadStartApiEvent());
664
}
665
else
666
{
667
pHandler = new EventHandlerWorkerStats(id);
668
pManager->Attach(pHandler);
669
pHandler->Handle(ThreadStartWorkerEvent());
670
}
671
672
pHandler->MarkHeader();
673
674
return pManager;
675
}
676
677
SWR_INVALID("Failed to register thread.");
678
return nullptr;
679
}
680
681
void DestroyThreadContext(HANDLE hThreadContext)
682
{
683
EventManager* pManager = FromHandle(hThreadContext);
684
SWR_ASSERT(pManager != nullptr);
685
686
delete pManager;
687
}
688
689
// Dispatch event for this thread.
690
void Dispatch(HANDLE hThreadContext, const Event& event)
691
{
692
if (event.IsEnabled())
693
{
694
EventManager* pManager = reinterpret_cast<EventManager*>(hThreadContext);
695
SWR_ASSERT(pManager != nullptr);
696
pManager->Dispatch(event);
697
}
698
}
699
700
// Flush for this thread.
701
void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
702
{
703
EventManager* pManager = FromHandle(hThreadContext);
704
SWR_ASSERT(pManager != nullptr);
705
706
pManager->FlushDraw(drawId);
707
}
708
} // namespace ArchRast
709
710