Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/angle
Path: blob/main_old/src/tests/perf_tests/VulkanBarriersPerf.cpp
1693 views
1
//
2
// Copyright 2019 The ANGLE Project Authors. All rights reserved.
3
// Use of this source code is governed by a BSD-style license that can be
4
// found in the LICENSE file.
5
//
6
// VulkanBarriersPerf:
7
// Performance tests for ANGLE's Vulkan backend w.r.t barrier efficiency.
8
//
9
10
#include <sstream>
11
12
#include "ANGLEPerfTest.h"
13
#include "test_utils/gl_raii.h"
14
#include "util/shader_utils.h"
15
16
using namespace angle;
17
18
namespace
19
{
20
constexpr unsigned int kIterationsPerStep = 10;
21
22
struct VulkanBarriersPerfParams final : public RenderTestParams
23
{
24
VulkanBarriersPerfParams(bool bufferCopy, bool largeTransfers, bool slowFS)
25
{
26
iterationsPerStep = kIterationsPerStep;
27
28
// Common default parameters
29
eglParameters = egl_platform::VULKAN();
30
majorVersion = 3;
31
minorVersion = 0;
32
windowWidth = 256;
33
windowHeight = 256;
34
trackGpuTime = true;
35
36
doBufferCopy = bufferCopy;
37
doLargeTransfers = largeTransfers;
38
doSlowFragmentShaders = slowFS;
39
}
40
41
std::string story() const override;
42
43
// Static parameters
44
static constexpr int kImageSizes[3] = {256, 512, 4096};
45
static constexpr int kBufferSize = 4096 * 4096;
46
47
bool doBufferCopy;
48
bool doLargeTransfers;
49
bool doSlowFragmentShaders;
50
};
51
52
constexpr int VulkanBarriersPerfParams::kImageSizes[];
53
54
std::ostream &operator<<(std::ostream &os, const VulkanBarriersPerfParams &params)
55
{
56
os << params.backendAndStory().substr(1);
57
return os;
58
}
59
60
class VulkanBarriersPerfBenchmark : public ANGLERenderTest,
61
public ::testing::WithParamInterface<VulkanBarriersPerfParams>
62
{
63
public:
64
VulkanBarriersPerfBenchmark();
65
66
void initializeBenchmark() override;
67
void destroyBenchmark() override;
68
void drawBenchmark() override;
69
70
private:
71
void createTexture(uint32_t textureIndex, uint32_t sizeIndex, bool compressed);
72
void createUniformBuffer();
73
void createFramebuffer(uint32_t fboIndex, uint32_t textureIndex, uint32_t sizeIndex);
74
void createResources();
75
76
// Handle to the program object
77
GLProgram mProgram;
78
79
// Attribute locations
80
GLint mPositionLoc;
81
GLint mTexCoordLoc;
82
83
// Sampler location
84
GLint mSamplerLoc;
85
86
// Texture handles
87
GLTexture mTextures[4];
88
89
// Uniform buffer handles
90
GLBuffer mUniformBuffers[2];
91
92
// Framebuffer handles
93
GLFramebuffer mFbos[2];
94
95
// Buffer handle
96
GLBuffer mVertexBuffer;
97
GLBuffer mIndexBuffer;
98
99
static constexpr size_t kSmallFboIndex = 0;
100
static constexpr size_t kLargeFboIndex = 1;
101
102
static constexpr size_t kUniformBuffer1Index = 0;
103
static constexpr size_t kUniformBuffer2Index = 1;
104
105
static constexpr size_t kSmallTextureIndex = 0;
106
static constexpr size_t kLargeTextureIndex = 1;
107
static constexpr size_t kTransferTexture1Index = 2;
108
static constexpr size_t kTransferTexture2Index = 3;
109
110
static constexpr size_t kSmallSizeIndex = 0;
111
static constexpr size_t kLargeSizeIndex = 1;
112
static constexpr size_t kHugeSizeIndex = 2;
113
};
114
115
std::string VulkanBarriersPerfParams::story() const
116
{
117
std::ostringstream sout;
118
119
sout << RenderTestParams::story();
120
121
if (doBufferCopy)
122
{
123
sout << "_buffer_copy";
124
}
125
if (doLargeTransfers)
126
{
127
sout << "_transfer";
128
}
129
if (doSlowFragmentShaders)
130
{
131
sout << "_slowfs";
132
}
133
134
return sout.str();
135
}
136
137
VulkanBarriersPerfBenchmark::VulkanBarriersPerfBenchmark()
138
: ANGLERenderTest("VulkanBarriersPerf", GetParam()),
139
mPositionLoc(-1),
140
mTexCoordLoc(-1),
141
mSamplerLoc(-1)
142
{
143
// Fails on Windows7 NVIDIA Vulkan, presumably due to old drivers. http://crbug.com/1096510
144
if (IsNVIDIA() && IsWindows7())
145
{
146
mSkipTest = true;
147
}
148
}
149
150
constexpr char kVS[] = R"(attribute vec4 a_position;
151
attribute vec2 a_texCoord;
152
varying vec2 v_texCoord;
153
void main()
154
{
155
gl_Position = a_position;
156
v_texCoord = a_texCoord;
157
})";
158
159
constexpr char kShortFS[] = R"(precision mediump float;
160
varying vec2 v_texCoord;
161
uniform sampler2D s_texture;
162
void main()
163
{
164
gl_FragColor = texture2D(s_texture, v_texCoord);
165
})";
166
167
constexpr char kSlowFS[] = R"(precision mediump float;
168
varying vec2 v_texCoord;
169
uniform sampler2D s_texture;
170
void main()
171
{
172
vec4 outColor = vec4(0);
173
if (v_texCoord.x < 0.2)
174
{
175
for (int i = 0; i < 100; ++i)
176
{
177
outColor += texture2D(s_texture, v_texCoord);
178
}
179
}
180
gl_FragColor = outColor;
181
})";
182
183
void VulkanBarriersPerfBenchmark::createTexture(uint32_t textureIndex,
184
uint32_t sizeIndex,
185
bool compressed)
186
{
187
const auto &params = GetParam();
188
189
// TODO(syoussefi): compressed copy using vkCmdCopyImage not yet implemented in the vulkan
190
// backend. http://anglebug.com/2999
191
192
glBindTexture(GL_TEXTURE_2D, mTextures[textureIndex]);
193
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, params.kImageSizes[sizeIndex],
194
params.kImageSizes[sizeIndex], 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
195
196
// Disable mipmapping
197
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
198
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
199
}
200
201
void VulkanBarriersPerfBenchmark::createUniformBuffer()
202
{
203
const auto &params = GetParam();
204
205
glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer1Index]);
206
glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY);
207
glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer2Index]);
208
glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY);
209
glBindBuffer(GL_UNIFORM_BUFFER, 0);
210
}
211
212
void VulkanBarriersPerfBenchmark::createFramebuffer(uint32_t fboIndex,
213
uint32_t textureIndex,
214
uint32_t sizeIndex)
215
{
216
createTexture(textureIndex, sizeIndex, false);
217
218
glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboIndex]);
219
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
220
mTextures[textureIndex], 0);
221
}
222
223
void VulkanBarriersPerfBenchmark::createResources()
224
{
225
const auto &params = GetParam();
226
227
mProgram.makeRaster(kVS, params.doSlowFragmentShaders ? kSlowFS : kShortFS);
228
ASSERT_TRUE(mProgram.valid());
229
230
// Get the attribute locations
231
mPositionLoc = glGetAttribLocation(mProgram, "a_position");
232
mTexCoordLoc = glGetAttribLocation(mProgram, "a_texCoord");
233
234
// Get the sampler location
235
mSamplerLoc = glGetUniformLocation(mProgram, "s_texture");
236
237
// Build the vertex buffer
238
GLfloat vertices[] = {
239
-0.5f, 0.5f, 0.0f, // Position 0
240
0.0f, 0.0f, // TexCoord 0
241
-0.5f, -0.5f, 0.0f, // Position 1
242
0.0f, 1.0f, // TexCoord 1
243
0.5f, -0.5f, 0.0f, // Position 2
244
1.0f, 1.0f, // TexCoord 2
245
0.5f, 0.5f, 0.0f, // Position 3
246
1.0f, 0.0f // TexCoord 3
247
};
248
249
glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
250
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
251
252
GLushort indices[] = {0, 1, 2, 0, 2, 3};
253
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
254
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
255
256
// Use tightly packed data
257
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
258
259
// Create four textures. Two of them are going to be framebuffers, and two are used for large
260
// transfers.
261
createFramebuffer(kSmallFboIndex, kSmallTextureIndex, kSmallSizeIndex);
262
createFramebuffer(kLargeFboIndex, kLargeTextureIndex, kLargeSizeIndex);
263
createUniformBuffer();
264
265
if (params.doLargeTransfers)
266
{
267
createTexture(kTransferTexture1Index, kHugeSizeIndex, true);
268
createTexture(kTransferTexture2Index, kHugeSizeIndex, true);
269
}
270
}
271
272
void VulkanBarriersPerfBenchmark::initializeBenchmark()
273
{
274
createResources();
275
276
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
277
278
ASSERT_GL_NO_ERROR();
279
}
280
281
void VulkanBarriersPerfBenchmark::destroyBenchmark() {}
282
283
void VulkanBarriersPerfBenchmark::drawBenchmark()
284
{
285
const auto &params = GetParam();
286
287
glUseProgram(mProgram);
288
289
// Bind the buffers
290
glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
291
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
292
293
// Load the vertex position
294
glVertexAttribPointer(mPositionLoc, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), 0);
295
// Load the texture coordinate
296
glVertexAttribPointer(mTexCoordLoc, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat),
297
reinterpret_cast<void *>(3 * sizeof(GLfloat)));
298
299
glEnableVertexAttribArray(mPositionLoc);
300
glEnableVertexAttribArray(mTexCoordLoc);
301
302
// Set the texture sampler to texture unit to 0
303
glUniform1i(mSamplerLoc, 0);
304
305
/*
306
* The perf benchmark does the following:
307
*
308
* - Alternately clear and draw from fbo 1 into fbo 2 and back. This would use the color
309
* attachment and shader read-only layouts in the fragment shader and color attachment stages.
310
*
311
* - Alternately copy data between the 2 uniform buffers. This would use the transfer layouts
312
* in the transfer stage.
313
*
314
* Once compressed texture copies are supported, alternately copy large chunks of data from
315
* texture 1 into texture 2 and back. This would use the transfer layouts in the transfer
316
* stage.
317
*
318
* Once compute shader support is added, another independent set of operations could be a few
319
* dispatches. This would use the general and shader read-only layouts in the compute stage.
320
*
321
* The idea is to create independent pipelines of operations that would run in parallel on the
322
* GPU. Regressions or inefficiencies in the barrier implementation could result in
323
* serialization of these jobs, resulting in a hit in performance.
324
*
325
* The above operations for example should ideally run on the GPU threads in parallel:
326
*
327
* + |---draw---||---draw---||---draw---||---draw---||---draw---|
328
* + |----buffer copy----||----buffer copy----||----buffer copy----|
329
* + |-----------texture copy------------||-----------texture copy------------|
330
* + |-----dispatch------||------dispatch------||------dispatch------|
331
*
332
* If barriers are too restrictive, situations like this could happen (draw is blocking
333
* copy):
334
*
335
* + |---draw---||---draw---||---draw---||---draw---||---draw---|
336
* + |------------copy------------||-----------copy------------|
337
*
338
* Or like this (copy is blocking draw):
339
*
340
* + |---draw---| |---draw---| |---draw---|
341
* + |--------------copy-------------||-------------copy--------------|
342
*
343
* Or like this (draw and copy blocking each other):
344
*
345
* + |---draw---| |---draw---|
346
* + |------------copy---------------| |------------copy------------|
347
*
348
* The idea of doing slow FS calls is to make the second case above slower (by making the draw
349
* slower than the transfer):
350
*
351
* + |------------------draw------------------| |-...draw...-|
352
* + |--------------copy----------------| |-------------copy-------------|
353
*/
354
355
startGpuTimer();
356
for (unsigned int iteration = 0; iteration < params.iterationsPerStep; ++iteration)
357
{
358
bool altEven = iteration % 2 == 0;
359
360
const int fboDestIndex = altEven ? kLargeFboIndex : kSmallFboIndex;
361
const int fboTexSrcIndex = altEven ? kSmallTextureIndex : kLargeTextureIndex;
362
const int fboDestSizeIndex = altEven ? kLargeSizeIndex : kSmallSizeIndex;
363
const int uniformBufferReadIndex = altEven ? kUniformBuffer1Index : kUniformBuffer2Index;
364
const int uniformBufferWriteIndex = altEven ? kUniformBuffer2Index : kUniformBuffer1Index;
365
366
if (params.doBufferCopy)
367
{
368
// Transfer data between the 2 Uniform buffers
369
glBindBuffer(GL_COPY_READ_BUFFER, mUniformBuffers[uniformBufferReadIndex]);
370
glBindBuffer(GL_COPY_WRITE_BUFFER, mUniformBuffers[uniformBufferWriteIndex]);
371
glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, 0, 0,
372
params.kBufferSize);
373
}
374
375
// Bind the framebuffer
376
glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboDestIndex]);
377
378
// Set the viewport
379
glViewport(0, 0, params.kImageSizes[fboDestSizeIndex],
380
params.kImageSizes[fboDestSizeIndex]);
381
382
// Clear the color buffer
383
glClear(GL_COLOR_BUFFER_BIT);
384
385
// Bind the texture
386
glActiveTexture(GL_TEXTURE0);
387
glBindTexture(GL_TEXTURE_2D, mTextures[fboTexSrcIndex]);
388
389
ASSERT_GL_NO_ERROR();
390
391
glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);
392
}
393
stopGpuTimer();
394
395
ASSERT_GL_NO_ERROR();
396
}
397
398
} // namespace
399
400
TEST_P(VulkanBarriersPerfBenchmark, Run)
401
{
402
run();
403
}
404
405
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VulkanBarriersPerfBenchmark);
406
ANGLE_INSTANTIATE_TEST(VulkanBarriersPerfBenchmark,
407
VulkanBarriersPerfParams(false, false, false),
408
VulkanBarriersPerfParams(true, false, false),
409
VulkanBarriersPerfParams(false, true, false),
410
VulkanBarriersPerfParams(false, true, true));
411
412