Path: blob/main_old/src/tests/perf_tests/VulkanBarriersPerf.cpp
1693 views
//1// Copyright 2019 The ANGLE Project Authors. All rights reserved.2// Use of this source code is governed by a BSD-style license that can be3// found in the LICENSE file.4//5// VulkanBarriersPerf:6// Performance tests for ANGLE's Vulkan backend w.r.t barrier efficiency.7//89#include <sstream>1011#include "ANGLEPerfTest.h"12#include "test_utils/gl_raii.h"13#include "util/shader_utils.h"1415using namespace angle;1617namespace18{19constexpr unsigned int kIterationsPerStep = 10;2021struct VulkanBarriersPerfParams final : public RenderTestParams22{23VulkanBarriersPerfParams(bool bufferCopy, bool largeTransfers, bool slowFS)24{25iterationsPerStep = kIterationsPerStep;2627// Common default parameters28eglParameters = egl_platform::VULKAN();29majorVersion = 3;30minorVersion = 0;31windowWidth = 256;32windowHeight = 256;33trackGpuTime = true;3435doBufferCopy = bufferCopy;36doLargeTransfers = largeTransfers;37doSlowFragmentShaders = slowFS;38}3940std::string story() const override;4142// Static parameters43static constexpr int kImageSizes[3] = {256, 512, 4096};44static constexpr int kBufferSize = 4096 * 4096;4546bool doBufferCopy;47bool doLargeTransfers;48bool doSlowFragmentShaders;49};5051constexpr int VulkanBarriersPerfParams::kImageSizes[];5253std::ostream &operator<<(std::ostream &os, const VulkanBarriersPerfParams ¶ms)54{55os << params.backendAndStory().substr(1);56return os;57}5859class VulkanBarriersPerfBenchmark : public ANGLERenderTest,60public ::testing::WithParamInterface<VulkanBarriersPerfParams>61{62public:63VulkanBarriersPerfBenchmark();6465void initializeBenchmark() override;66void destroyBenchmark() override;67void drawBenchmark() override;6869private:70void createTexture(uint32_t textureIndex, uint32_t sizeIndex, bool compressed);71void createUniformBuffer();72void createFramebuffer(uint32_t fboIndex, uint32_t textureIndex, uint32_t sizeIndex);73void createResources();7475// Handle to the program object76GLProgram mProgram;7778// Attribute locations79GLint mPositionLoc;80GLint mTexCoordLoc;8182// Sampler location83GLint mSamplerLoc;8485// Texture handles86GLTexture mTextures[4];8788// Uniform buffer handles89GLBuffer mUniformBuffers[2];9091// Framebuffer handles92GLFramebuffer mFbos[2];9394// Buffer handle95GLBuffer mVertexBuffer;96GLBuffer mIndexBuffer;9798static constexpr size_t kSmallFboIndex = 0;99static constexpr size_t kLargeFboIndex = 1;100101static constexpr size_t kUniformBuffer1Index = 0;102static constexpr size_t kUniformBuffer2Index = 1;103104static constexpr size_t kSmallTextureIndex = 0;105static constexpr size_t kLargeTextureIndex = 1;106static constexpr size_t kTransferTexture1Index = 2;107static constexpr size_t kTransferTexture2Index = 3;108109static constexpr size_t kSmallSizeIndex = 0;110static constexpr size_t kLargeSizeIndex = 1;111static constexpr size_t kHugeSizeIndex = 2;112};113114std::string VulkanBarriersPerfParams::story() const115{116std::ostringstream sout;117118sout << RenderTestParams::story();119120if (doBufferCopy)121{122sout << "_buffer_copy";123}124if (doLargeTransfers)125{126sout << "_transfer";127}128if (doSlowFragmentShaders)129{130sout << "_slowfs";131}132133return sout.str();134}135136VulkanBarriersPerfBenchmark::VulkanBarriersPerfBenchmark()137: ANGLERenderTest("VulkanBarriersPerf", GetParam()),138mPositionLoc(-1),139mTexCoordLoc(-1),140mSamplerLoc(-1)141{142// Fails on Windows7 NVIDIA Vulkan, presumably due to old drivers. http://crbug.com/1096510143if (IsNVIDIA() && IsWindows7())144{145mSkipTest = true;146}147}148149constexpr char kVS[] = R"(attribute vec4 a_position;150attribute vec2 a_texCoord;151varying vec2 v_texCoord;152void main()153{154gl_Position = a_position;155v_texCoord = a_texCoord;156})";157158constexpr char kShortFS[] = R"(precision mediump float;159varying vec2 v_texCoord;160uniform sampler2D s_texture;161void main()162{163gl_FragColor = texture2D(s_texture, v_texCoord);164})";165166constexpr char kSlowFS[] = R"(precision mediump float;167varying vec2 v_texCoord;168uniform sampler2D s_texture;169void main()170{171vec4 outColor = vec4(0);172if (v_texCoord.x < 0.2)173{174for (int i = 0; i < 100; ++i)175{176outColor += texture2D(s_texture, v_texCoord);177}178}179gl_FragColor = outColor;180})";181182void VulkanBarriersPerfBenchmark::createTexture(uint32_t textureIndex,183uint32_t sizeIndex,184bool compressed)185{186const auto ¶ms = GetParam();187188// TODO(syoussefi): compressed copy using vkCmdCopyImage not yet implemented in the vulkan189// backend. http://anglebug.com/2999190191glBindTexture(GL_TEXTURE_2D, mTextures[textureIndex]);192glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, params.kImageSizes[sizeIndex],193params.kImageSizes[sizeIndex], 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);194195// Disable mipmapping196glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);197glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);198}199200void VulkanBarriersPerfBenchmark::createUniformBuffer()201{202const auto ¶ms = GetParam();203204glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer1Index]);205glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY);206glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer2Index]);207glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY);208glBindBuffer(GL_UNIFORM_BUFFER, 0);209}210211void VulkanBarriersPerfBenchmark::createFramebuffer(uint32_t fboIndex,212uint32_t textureIndex,213uint32_t sizeIndex)214{215createTexture(textureIndex, sizeIndex, false);216217glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboIndex]);218glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,219mTextures[textureIndex], 0);220}221222void VulkanBarriersPerfBenchmark::createResources()223{224const auto ¶ms = GetParam();225226mProgram.makeRaster(kVS, params.doSlowFragmentShaders ? kSlowFS : kShortFS);227ASSERT_TRUE(mProgram.valid());228229// Get the attribute locations230mPositionLoc = glGetAttribLocation(mProgram, "a_position");231mTexCoordLoc = glGetAttribLocation(mProgram, "a_texCoord");232233// Get the sampler location234mSamplerLoc = glGetUniformLocation(mProgram, "s_texture");235236// Build the vertex buffer237GLfloat vertices[] = {238-0.5f, 0.5f, 0.0f, // Position 02390.0f, 0.0f, // TexCoord 0240-0.5f, -0.5f, 0.0f, // Position 12410.0f, 1.0f, // TexCoord 12420.5f, -0.5f, 0.0f, // Position 22431.0f, 1.0f, // TexCoord 22440.5f, 0.5f, 0.0f, // Position 32451.0f, 0.0f // TexCoord 3246};247248glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);249glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);250251GLushort indices[] = {0, 1, 2, 0, 2, 3};252glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);253glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);254255// Use tightly packed data256glPixelStorei(GL_UNPACK_ALIGNMENT, 1);257258// Create four textures. Two of them are going to be framebuffers, and two are used for large259// transfers.260createFramebuffer(kSmallFboIndex, kSmallTextureIndex, kSmallSizeIndex);261createFramebuffer(kLargeFboIndex, kLargeTextureIndex, kLargeSizeIndex);262createUniformBuffer();263264if (params.doLargeTransfers)265{266createTexture(kTransferTexture1Index, kHugeSizeIndex, true);267createTexture(kTransferTexture2Index, kHugeSizeIndex, true);268}269}270271void VulkanBarriersPerfBenchmark::initializeBenchmark()272{273createResources();274275glClearColor(0.0f, 0.0f, 0.0f, 0.0f);276277ASSERT_GL_NO_ERROR();278}279280void VulkanBarriersPerfBenchmark::destroyBenchmark() {}281282void VulkanBarriersPerfBenchmark::drawBenchmark()283{284const auto ¶ms = GetParam();285286glUseProgram(mProgram);287288// Bind the buffers289glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);290glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);291292// Load the vertex position293glVertexAttribPointer(mPositionLoc, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), 0);294// Load the texture coordinate295glVertexAttribPointer(mTexCoordLoc, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat),296reinterpret_cast<void *>(3 * sizeof(GLfloat)));297298glEnableVertexAttribArray(mPositionLoc);299glEnableVertexAttribArray(mTexCoordLoc);300301// Set the texture sampler to texture unit to 0302glUniform1i(mSamplerLoc, 0);303304/*305* The perf benchmark does the following:306*307* - Alternately clear and draw from fbo 1 into fbo 2 and back. This would use the color308* attachment and shader read-only layouts in the fragment shader and color attachment stages.309*310* - Alternately copy data between the 2 uniform buffers. This would use the transfer layouts311* in the transfer stage.312*313* Once compressed texture copies are supported, alternately copy large chunks of data from314* texture 1 into texture 2 and back. This would use the transfer layouts in the transfer315* stage.316*317* Once compute shader support is added, another independent set of operations could be a few318* dispatches. This would use the general and shader read-only layouts in the compute stage.319*320* The idea is to create independent pipelines of operations that would run in parallel on the321* GPU. Regressions or inefficiencies in the barrier implementation could result in322* serialization of these jobs, resulting in a hit in performance.323*324* The above operations for example should ideally run on the GPU threads in parallel:325*326* + |---draw---||---draw---||---draw---||---draw---||---draw---|327* + |----buffer copy----||----buffer copy----||----buffer copy----|328* + |-----------texture copy------------||-----------texture copy------------|329* + |-----dispatch------||------dispatch------||------dispatch------|330*331* If barriers are too restrictive, situations like this could happen (draw is blocking332* copy):333*334* + |---draw---||---draw---||---draw---||---draw---||---draw---|335* + |------------copy------------||-----------copy------------|336*337* Or like this (copy is blocking draw):338*339* + |---draw---| |---draw---| |---draw---|340* + |--------------copy-------------||-------------copy--------------|341*342* Or like this (draw and copy blocking each other):343*344* + |---draw---| |---draw---|345* + |------------copy---------------| |------------copy------------|346*347* The idea of doing slow FS calls is to make the second case above slower (by making the draw348* slower than the transfer):349*350* + |------------------draw------------------| |-...draw...-|351* + |--------------copy----------------| |-------------copy-------------|352*/353354startGpuTimer();355for (unsigned int iteration = 0; iteration < params.iterationsPerStep; ++iteration)356{357bool altEven = iteration % 2 == 0;358359const int fboDestIndex = altEven ? kLargeFboIndex : kSmallFboIndex;360const int fboTexSrcIndex = altEven ? kSmallTextureIndex : kLargeTextureIndex;361const int fboDestSizeIndex = altEven ? kLargeSizeIndex : kSmallSizeIndex;362const int uniformBufferReadIndex = altEven ? kUniformBuffer1Index : kUniformBuffer2Index;363const int uniformBufferWriteIndex = altEven ? kUniformBuffer2Index : kUniformBuffer1Index;364365if (params.doBufferCopy)366{367// Transfer data between the 2 Uniform buffers368glBindBuffer(GL_COPY_READ_BUFFER, mUniformBuffers[uniformBufferReadIndex]);369glBindBuffer(GL_COPY_WRITE_BUFFER, mUniformBuffers[uniformBufferWriteIndex]);370glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, 0, 0,371params.kBufferSize);372}373374// Bind the framebuffer375glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboDestIndex]);376377// Set the viewport378glViewport(0, 0, params.kImageSizes[fboDestSizeIndex],379params.kImageSizes[fboDestSizeIndex]);380381// Clear the color buffer382glClear(GL_COLOR_BUFFER_BIT);383384// Bind the texture385glActiveTexture(GL_TEXTURE0);386glBindTexture(GL_TEXTURE_2D, mTextures[fboTexSrcIndex]);387388ASSERT_GL_NO_ERROR();389390glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);391}392stopGpuTimer();393394ASSERT_GL_NO_ERROR();395}396397} // namespace398399TEST_P(VulkanBarriersPerfBenchmark, Run)400{401run();402}403404GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VulkanBarriersPerfBenchmark);405ANGLE_INSTANTIATE_TEST(VulkanBarriersPerfBenchmark,406VulkanBarriersPerfParams(false, false, false),407VulkanBarriersPerfParams(true, false, false),408VulkanBarriersPerfParams(false, true, false),409VulkanBarriersPerfParams(false, true, true));410411412