Path: blob/21.2-virgl/src/amd/vulkan/layers/radv_sqtt_layer.c
7229 views
/*1* Copyright © 2020 Valve Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "radv_private.h"24#include "radv_shader.h"2526#include "ac_rgp.h"27#include "ac_sqtt.h"2829static void30radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer,31enum rgp_sqtt_marker_general_api_type api_type)32{33struct rgp_sqtt_marker_general_api marker = {0};34struct radeon_cmdbuf *cs = cmd_buffer->cs;3536marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;37marker.api_type = api_type;3839radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);40}4142static void43radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer,44enum rgp_sqtt_marker_general_api_type api_type)45{46struct rgp_sqtt_marker_general_api marker = {0};47struct radeon_cmdbuf *cs = cmd_buffer->cs;4849marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;50marker.api_type = api_type;51marker.is_end = 1;5253radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);54}5556static void57radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer,58enum rgp_sqtt_marker_event_type api_type, uint32_t vertex_offset_user_data,59uint32_t instance_offset_user_data, uint32_t draw_index_user_data)60{61struct rgp_sqtt_marker_event marker = {0};62struct radeon_cmdbuf *cs = cmd_buffer->cs;6364marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;65marker.api_type = api_type;66marker.cmd_id = cmd_buffer->state.num_events++;67marker.cb_id = 0;6869if (vertex_offset_user_data == UINT_MAX || instance_offset_user_data == UINT_MAX) {70vertex_offset_user_data = 0;71instance_offset_user_data = 0;72}7374if (draw_index_user_data == UINT_MAX)75draw_index_user_data = vertex_offset_user_data;7677marker.vertex_offset_reg_idx = vertex_offset_user_data;78marker.instance_offset_reg_idx = instance_offset_user_data;79marker.draw_index_reg_idx = draw_index_user_data;8081radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);82}8384static void85radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer,86enum rgp_sqtt_marker_event_type api_type, uint32_t x, uint32_t y,87uint32_t z)88{89struct rgp_sqtt_marker_event_with_dims marker = {0};90struct radeon_cmdbuf *cs = cmd_buffer->cs;9192marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;93marker.event.api_type = api_type;94marker.event.cmd_id = cmd_buffer->state.num_events++;95marker.event.cb_id = 0;96marker.event.has_thread_dims = 1;9798marker.thread_x = x;99marker.thread_y = y;100marker.thread_z = z;101102radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);103}104105static void106radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer,107enum rgp_sqtt_marker_user_event_type type, const char *str)108{109struct radeon_cmdbuf *cs = cmd_buffer->cs;110111if (type == UserEventPop) {112assert(str == NULL);113struct rgp_sqtt_marker_user_event marker = {0};114marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;115marker.data_type = type;116117radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);118} else {119assert(str != NULL);120unsigned len = strlen(str);121struct rgp_sqtt_marker_user_event_with_length marker = {0};122marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;123marker.user_event.data_type = type;124marker.length = align(len, 4);125126uint8_t *buffer = alloca(sizeof(marker) + marker.length);127memset(buffer, 0, sizeof(marker) + marker.length);128memcpy(buffer, &marker, sizeof(marker));129memcpy(buffer + sizeof(marker), str, len);130131radv_emit_thread_trace_userdata(cmd_buffer->device, cs, buffer,132sizeof(marker) / 4 + marker.length / 4);133}134}135136void137radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)138{139uint64_t device_id = (uintptr_t)cmd_buffer->device;140struct rgp_sqtt_marker_cb_start marker = {0};141struct radeon_cmdbuf *cs = cmd_buffer->cs;142143if (likely(!cmd_buffer->device->thread_trace.bo))144return;145146marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;147marker.cb_id = 0;148marker.device_id_low = device_id;149marker.device_id_high = device_id >> 32;150marker.queue = cmd_buffer->queue_family_index;151marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT;152153if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)154marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;155156radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);157}158159void160radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)161{162uint64_t device_id = (uintptr_t)cmd_buffer->device;163struct rgp_sqtt_marker_cb_end marker = {0};164struct radeon_cmdbuf *cs = cmd_buffer->cs;165166if (likely(!cmd_buffer->device->thread_trace.bo))167return;168169marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;170marker.cb_id = 0;171marker.device_id_low = device_id;172marker.device_id_high = device_id >> 32;173174radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);175}176177void178radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)179{180if (likely(!cmd_buffer->device->thread_trace.bo))181return;182183radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX,184UINT_MAX);185}186187void188radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z)189{190if (likely(!cmd_buffer->device->thread_trace.bo))191return;192193radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, x, y, z);194}195196void197radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,198VkImageAspectFlagBits aspects)199{200cmd_buffer->state.current_event_type = (aspects & VK_IMAGE_ASPECT_COLOR_BIT)201? EventRenderPassColorClear202: EventRenderPassDepthStencilClear;203}204205void206radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer)207{208cmd_buffer->state.current_event_type = EventInternalUnknown;209}210211void212radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)213{214cmd_buffer->state.current_event_type = EventRenderPassResolve;215}216217void218radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)219{220cmd_buffer->state.current_event_type = EventInternalUnknown;221}222223void224radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer)225{226struct rgp_sqtt_marker_barrier_end marker = {0};227struct radeon_cmdbuf *cs = cmd_buffer->cs;228229if (likely(!cmd_buffer->device->thread_trace.bo) || !cmd_buffer->state.pending_sqtt_barrier_end)230return;231232cmd_buffer->state.pending_sqtt_barrier_end = false;233234marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;235marker.cb_id = 0;236237marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions;238239if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS)240marker.wait_on_eop_ts = true;241if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH)242marker.vs_partial_flush = true;243if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH)244marker.ps_partial_flush = true;245if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH)246marker.cs_partial_flush = true;247if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME)248marker.pfp_sync_me = true;249if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA)250marker.sync_cp_dma = true;251if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0)252marker.inval_tcp = true;253if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE)254marker.inval_sqI = true;255if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0)256marker.inval_sqK = true;257if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2)258marker.flush_tcc = true;259if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2)260marker.inval_tcc = true;261if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB)262marker.flush_cb = true;263if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB)264marker.inval_cb = true;265if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB)266marker.flush_db = true;267if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB)268marker.inval_db = true;269if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)270marker.inval_gl1 = true;271272radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);273274cmd_buffer->state.num_layout_transitions = 0;275}276277void278radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason)279{280struct rgp_sqtt_marker_barrier_start marker = {0};281struct radeon_cmdbuf *cs = cmd_buffer->cs;282283if (likely(!cmd_buffer->device->thread_trace.bo))284return;285286radv_describe_barrier_end_delayed(cmd_buffer);287cmd_buffer->state.sqtt_flush_bits = 0;288289marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;290marker.cb_id = 0;291marker.dword02 = reason;292293radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);294}295296void297radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer)298{299cmd_buffer->state.pending_sqtt_barrier_end = true;300}301302void303radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,304const struct radv_barrier_data *barrier)305{306struct rgp_sqtt_marker_layout_transition marker = {0};307struct radeon_cmdbuf *cs = cmd_buffer->cs;308309if (likely(!cmd_buffer->device->thread_trace.bo))310return;311312marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;313marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand;314marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand;315marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize;316marker.dcc_decompress = barrier->layout_transitions.dcc_decompress;317marker.fmask_decompress = barrier->layout_transitions.fmask_decompress;318marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate;319marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;320marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;321322radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);323324cmd_buffer->state.num_layout_transitions++;325}326327static void328radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer,329VkPipelineBindPoint pipelineBindPoint, struct radv_pipeline *pipeline)330{331struct rgp_sqtt_marker_pipeline_bind marker = {0};332struct radeon_cmdbuf *cs = cmd_buffer->cs;333334if (likely(!cmd_buffer->device->thread_trace.bo))335return;336337marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;338marker.cb_id = 0;339marker.bind_point = pipelineBindPoint;340marker.api_pso_hash[0] = pipeline->pipeline_hash;341marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;342343radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);344}345346/* TODO: Improve the way to trigger capture (overlay, etc). */347static void348radv_handle_thread_trace(VkQueue _queue)349{350RADV_FROM_HANDLE(radv_queue, queue, _queue);351static bool thread_trace_enabled = false;352static uint64_t num_frames = 0;353bool resize_trigger = false;354355if (thread_trace_enabled) {356struct ac_thread_trace thread_trace = {0};357358radv_end_thread_trace(queue);359thread_trace_enabled = false;360361/* TODO: Do something better than this whole sync. */362radv_QueueWaitIdle(_queue);363364if (radv_get_thread_trace(queue, &thread_trace)) {365ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace);366} else {367/* Trigger a new capture if the driver failed to get368* the trace because the buffer was too small.369*/370resize_trigger = true;371}372}373374if (!thread_trace_enabled) {375bool frame_trigger = num_frames == queue->device->thread_trace.start_frame;376bool file_trigger = false;377#ifndef _WIN32378if (queue->device->thread_trace.trigger_file &&379access(queue->device->thread_trace.trigger_file, W_OK) == 0) {380if (unlink(queue->device->thread_trace.trigger_file) == 0) {381file_trigger = true;382} else {383/* Do not enable tracing if we cannot remove the file,384* because by then we'll trace every frame ... */385fprintf(stderr, "RADV: could not remove thread trace trigger file, ignoring\n");386}387}388#endif389390if (frame_trigger || file_trigger || resize_trigger) {391/* FIXME: SQTT on compute hangs. */392if (queue->queue_family_index == RADV_QUEUE_COMPUTE) {393fprintf(stderr, "RADV: Capturing a SQTT trace on the compute "394"queue is currently broken and might hang! "395"Please, disable presenting on compute if "396"you can.\n");397return;398}399400radv_begin_thread_trace(queue);401assert(!thread_trace_enabled);402thread_trace_enabled = true;403}404}405num_frames++;406}407408VkResult409sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)410{411VkResult result;412413result = radv_QueuePresentKHR(_queue, pPresentInfo);414if (result != VK_SUCCESS)415return result;416417radv_handle_thread_trace(_queue);418419return VK_SUCCESS;420}421422#define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) \423RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \424radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \425cmd_buffer->state.current_event_type = EventCmd##api_name; \426radv_Cmd##cmd_name(__VA_ARGS__); \427cmd_buffer->state.current_event_type = EventInternalUnknown; \428radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);429430#define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);431432void433sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,434uint32_t firstVertex, uint32_t firstInstance)435{436EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);437}438439void440sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,441uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)442{443EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset,444firstInstance);445}446447void448sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,449uint32_t drawCount, uint32_t stride)450{451EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride);452}453454void455sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,456uint32_t drawCount, uint32_t stride)457{458EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride);459}460461void462sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,463VkBuffer countBuffer, VkDeviceSize countBufferOffset,464uint32_t maxDrawCount, uint32_t stride)465{466EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset,467maxDrawCount, stride);468}469470void471sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer,472VkDeviceSize offset, VkBuffer countBuffer,473VkDeviceSize countBufferOffset, uint32_t maxDrawCount,474uint32_t stride)475{476EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer,477countBufferOffset, maxDrawCount, stride);478}479480void481sqtt_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)482{483EVENT_MARKER(Dispatch, commandBuffer, x, y, z);484}485486void487sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset)488{489EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset);490}491492void493sqtt_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2KHR *pCopyBufferInfo)494{495EVENT_MARKER_ALIAS(CopyBuffer2KHR, CopyBuffer, commandBuffer, pCopyBufferInfo);496}497498void499sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,500VkDeviceSize fillSize, uint32_t data)501{502EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data);503}504505void506sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,507VkDeviceSize dataSize, const void *pData)508{509EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData);510}511512void513sqtt_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, const VkCopyImageInfo2KHR *pCopyImageInfo)514{515EVENT_MARKER_ALIAS(CopyImage2KHR, CopyImage, commandBuffer, pCopyImageInfo);516}517518void519sqtt_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,520const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo)521{522EVENT_MARKER_ALIAS(CopyBufferToImage2KHR, CopyBufferToImage, commandBuffer,523pCopyBufferToImageInfo);524}525526void527sqtt_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,528const VkCopyImageToBufferInfo2KHR *pCopyImageToBufferInfo)529{530EVENT_MARKER_ALIAS(CopyImageToBuffer2KHR, CopyImageToBuffer, commandBuffer,531pCopyImageToBufferInfo);532}533534void535sqtt_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, const VkBlitImageInfo2KHR *pBlitImageInfo)536{537EVENT_MARKER_ALIAS(BlitImage2KHR, BlitImage, commandBuffer, pBlitImageInfo);538}539540void541sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,542const VkClearColorValue *pColor, uint32_t rangeCount,543const VkImageSubresourceRange *pRanges)544{545EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges);546}547548void549sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h,550VkImageLayout imageLayout,551const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,552const VkImageSubresourceRange *pRanges)553{554EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil,555rangeCount, pRanges);556}557558void559sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,560const VkClearAttachment *pAttachments, uint32_t rectCount,561const VkClearRect *pRects)562{563EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects);564}565566void567sqtt_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,568const VkResolveImageInfo2KHR *pResolveImageInfo)569{570EVENT_MARKER_ALIAS(ResolveImage2KHR, ResolveImage, commandBuffer, pResolveImageInfo);571}572573void574sqtt_CmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,575VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask,576uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,577uint32_t bufferMemoryBarrierCount,578const VkBufferMemoryBarrier *pBufferMemoryBarriers,579uint32_t imageMemoryBarrierCount,580const VkImageMemoryBarrier *pImageMemoryBarriers)581{582EVENT_MARKER(WaitEvents, commandBuffer, eventCount, pEvents, srcStageMask, dstStageMask,583memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount,584pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers);585}586587void588sqtt_CmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask,589VkPipelineStageFlags destStageMask, VkBool32 byRegion,590uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,591uint32_t bufferMemoryBarrierCount,592const VkBufferMemoryBarrier *pBufferMemoryBarriers,593uint32_t imageMemoryBarrierCount,594const VkImageMemoryBarrier *pImageMemoryBarriers)595{596EVENT_MARKER(PipelineBarrier, commandBuffer, srcStageMask, destStageMask, byRegion,597memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount,598pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers);599}600601void602sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,603uint32_t queryCount)604{605EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount);606}607608void609sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,610uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer,611VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags)612{613EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer,614dstOffset, stride, flags);615}616617#undef EVENT_MARKER618#define API_MARKER_ALIAS(cmd_name, api_name, ...) \619RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \620radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \621radv_Cmd##cmd_name(__VA_ARGS__); \622radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);623624#define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);625626static bool627radv_sqtt_dump_pipeline()628{629return getenv("RADV_THREAD_TRACE_PIPELINE");630}631632void633sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,634VkPipeline _pipeline)635{636RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);637638API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline);639640if (radv_sqtt_dump_pipeline())641radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline);642}643644void645sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,646VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount,647const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,648const uint32_t *pDynamicOffsets)649{650API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet,651descriptorSetCount, pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);652}653654void655sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,656VkIndexType indexType)657{658API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType);659}660661void662sqtt_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t firstBinding,663uint32_t bindingCount, const VkBuffer *pBuffers,664const VkDeviceSize *pOffsets)665{666API_MARKER(BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers, pOffsets);667}668669void670sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query,671VkQueryControlFlags flags)672{673API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags);674}675676void677sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query)678{679API_MARKER(EndQuery, commandBuffer, queryPool, query);680}681682void683sqtt_CmdWriteTimestamp(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage,684VkQueryPool queryPool, uint32_t flags)685{686API_MARKER(WriteTimestamp, commandBuffer, pipelineStage, queryPool, flags);687}688689void690sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,691VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,692const void *pValues)693{694API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues);695}696697void698sqtt_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,699const VkRenderPassBeginInfo *pRenderPassBeginInfo,700const VkSubpassBeginInfo *pSubpassBeginInfo)701{702API_MARKER_ALIAS(BeginRenderPass2, BeginRenderPass, commandBuffer, pRenderPassBeginInfo,703pSubpassBeginInfo);704}705706void707sqtt_CmdNextSubpass2(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo *pSubpassBeginInfo,708const VkSubpassEndInfo *pSubpassEndInfo)709{710API_MARKER_ALIAS(NextSubpass2, NextSubpass, commandBuffer, pSubpassBeginInfo, pSubpassEndInfo);711}712713void714sqtt_CmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pSubpassEndInfo)715{716API_MARKER_ALIAS(EndRenderPass2, EndRenderPass, commandBuffer, pSubpassEndInfo);717}718719void720sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount,721const VkCommandBuffer *pCmdBuffers)722{723API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers);724}725726void727sqtt_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount,728const VkViewport *pViewports)729{730API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, pViewports);731}732733void734sqtt_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount,735const VkRect2D *pScissors)736{737API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, pScissors);738}739740void741sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)742{743API_MARKER(SetLineWidth, commandBuffer, lineWidth);744}745746void747sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor,748float depthBiasClamp, float depthBiasSlopeFactor)749{750API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp,751depthBiasSlopeFactor);752}753754void755sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4])756{757API_MARKER(SetBlendConstants, commandBuffer, blendConstants);758}759760void761sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds)762{763API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds);764}765766void767sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,768uint32_t compareMask)769{770API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask);771}772773void774sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,775uint32_t writeMask)776{777API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask);778}779780void781sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,782uint32_t reference)783{784API_MARKER(SetStencilReference, commandBuffer, faceMask, reference);785}786787/* VK_EXT_debug_marker */788void789sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer,790const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)791{792RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);793radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName);794}795796void797sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)798{799RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);800radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);801}802803void804sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer,805const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)806{807RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);808radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName);809}810811VkResult812sqtt_DebugMarkerSetObjectNameEXT(VkDevice device, const VkDebugMarkerObjectNameInfoEXT *pNameInfo)813{814/* no-op */815return VK_SUCCESS;816}817818VkResult819sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo)820{821/* no-op */822return VK_SUCCESS;823}824825/* Pipelines */826static enum rgp_hardware_stages827radv_mesa_to_rgp_shader_stage(struct radv_pipeline *pipeline, gl_shader_stage stage)828{829struct radv_shader_variant *shader = pipeline->shaders[stage];830831switch (stage) {832case MESA_SHADER_VERTEX:833if (shader->info.vs.as_ls)834return RGP_HW_STAGE_LS;835else if (shader->info.vs.as_es)836return RGP_HW_STAGE_ES;837else if (shader->info.is_ngg)838return RGP_HW_STAGE_GS;839else840return RGP_HW_STAGE_VS;841case MESA_SHADER_TESS_CTRL:842return RGP_HW_STAGE_HS;843case MESA_SHADER_TESS_EVAL:844if (shader->info.tes.as_es)845return RGP_HW_STAGE_ES;846else if (shader->info.is_ngg)847return RGP_HW_STAGE_GS;848else849return RGP_HW_STAGE_VS;850case MESA_SHADER_GEOMETRY:851return RGP_HW_STAGE_GS;852case MESA_SHADER_FRAGMENT:853return RGP_HW_STAGE_PS;854case MESA_SHADER_COMPUTE:855return RGP_HW_STAGE_CS;856default:857unreachable("invalid mesa shader stage");858}859}860861static VkResult862radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline)863{864struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;865struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;866struct rgp_code_object_record *record;867868record = malloc(sizeof(struct rgp_code_object_record));869if (!record)870return VK_ERROR_OUT_OF_HOST_MEMORY;871872record->shader_stages_mask = 0;873record->num_shaders_combined = 0;874record->pipeline_hash[0] = pipeline->pipeline_hash;875record->pipeline_hash[1] = pipeline->pipeline_hash;876877for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {878struct radv_shader_variant *shader = pipeline->shaders[i];879uint8_t *code;880uint64_t va;881882if (!shader)883continue;884885code = malloc(shader->code_size);886if (!code) {887free(record);888return VK_ERROR_OUT_OF_HOST_MEMORY;889}890memcpy(code, shader->code_ptr, shader->code_size);891892va = radv_buffer_get_va(shader->bo) + shader->bo_offset;893894record->shader_data[i].hash[0] = (uint64_t)(uintptr_t)shader;895record->shader_data[i].hash[1] = (uint64_t)(uintptr_t)shader >> 32;896record->shader_data[i].code_size = shader->code_size;897record->shader_data[i].code = code;898record->shader_data[i].vgpr_count = shader->config.num_vgprs;899record->shader_data[i].sgpr_count = shader->config.num_sgprs;900record->shader_data[i].base_address = va & 0xffffffffffff;901record->shader_data[i].elf_symbol_offset = 0;902record->shader_data[i].hw_stage = radv_mesa_to_rgp_shader_stage(pipeline, i);903record->shader_data[i].is_combined = false;904905record->shader_stages_mask |= (1 << i);906record->num_shaders_combined++;907}908909simple_mtx_lock(&code_object->lock);910list_addtail(&record->list, &code_object->record);911code_object->record_count++;912simple_mtx_unlock(&code_object->lock);913914return VK_SUCCESS;915}916917static VkResult918radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)919{920bool result;921uint64_t base_va = ~0;922923result = ac_sqtt_add_pso_correlation(&device->thread_trace, pipeline->pipeline_hash);924if (!result)925return VK_ERROR_OUT_OF_HOST_MEMORY;926927/* Find the lowest shader BO VA. */928for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {929struct radv_shader_variant *shader = pipeline->shaders[i];930uint64_t va;931932if (!shader)933continue;934935va = radv_buffer_get_va(shader->bo) + shader->bo_offset;936base_va = MIN2(base_va, va);937}938939result =940ac_sqtt_add_code_object_loader_event(&device->thread_trace, pipeline->pipeline_hash, base_va);941if (!result)942return VK_ERROR_OUT_OF_HOST_MEMORY;943944result = radv_add_code_object(device, pipeline);945if (result != VK_SUCCESS)946return result;947948return VK_SUCCESS;949}950951static void952radv_unregister_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)953{954struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;955struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation;956struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events;957struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;958959/* Destroy the PSO correlation record. */960simple_mtx_lock(&pso_correlation->lock);961list_for_each_entry_safe(struct rgp_pso_correlation_record, record, &pso_correlation->record,962list)963{964if (record->pipeline_hash[0] == pipeline->pipeline_hash) {965pso_correlation->record_count--;966list_del(&record->list);967free(record);968break;969}970}971simple_mtx_unlock(&pso_correlation->lock);972973/* Destroy the code object loader record. */974simple_mtx_lock(&loader_events->lock);975list_for_each_entry_safe(struct rgp_loader_events_record, record, &loader_events->record, list)976{977if (record->code_object_hash[0] == pipeline->pipeline_hash) {978loader_events->record_count--;979list_del(&record->list);980free(record);981break;982}983}984simple_mtx_unlock(&loader_events->lock);985986/* Destroy the code object record. */987simple_mtx_lock(&code_object->lock);988list_for_each_entry_safe(struct rgp_code_object_record, record, &code_object->record, list)989{990if (record->pipeline_hash[0] == pipeline->pipeline_hash) {991uint32_t mask = record->shader_stages_mask;992int i;993994/* Free the disassembly. */995while (mask) {996i = u_bit_scan(&mask);997free(record->shader_data[i].code);998}9991000code_object->record_count--;1001list_del(&record->list);1002free(record);1003break;1004}1005}1006simple_mtx_unlock(&code_object->lock);1007}10081009VkResult1010sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,1011const VkGraphicsPipelineCreateInfo *pCreateInfos,1012const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)1013{1014RADV_FROM_HANDLE(radv_device, device, _device);1015VkResult result;10161017result = radv_CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,1018pPipelines);1019if (result != VK_SUCCESS)1020return result;10211022if (radv_sqtt_dump_pipeline()) {1023for (unsigned i = 0; i < count; i++) {1024RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);10251026if (!pipeline)1027continue;10281029result = radv_register_pipeline(device, pipeline);1030if (result != VK_SUCCESS)1031goto fail;1032}1033}10341035return VK_SUCCESS;10361037fail:1038for (unsigned i = 0; i < count; i++) {1039sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);1040pPipelines[i] = VK_NULL_HANDLE;1041}1042return result;1043}10441045VkResult1046sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,1047const VkComputePipelineCreateInfo *pCreateInfos,1048const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)1049{1050RADV_FROM_HANDLE(radv_device, device, _device);1051VkResult result;10521053result = radv_CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,1054pPipelines);1055if (result != VK_SUCCESS)1056return result;10571058if (radv_sqtt_dump_pipeline()) {1059for (unsigned i = 0; i < count; i++) {1060RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);10611062if (!pipeline)1063continue;10641065result = radv_register_pipeline(device, pipeline);1066if (result != VK_SUCCESS)1067goto fail;1068}1069}10701071return VK_SUCCESS;10721073fail:1074for (unsigned i = 0; i < count; i++) {1075sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);1076pPipelines[i] = VK_NULL_HANDLE;1077}1078return result;1079}10801081void1082sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline,1083const VkAllocationCallbacks *pAllocator)1084{1085RADV_FROM_HANDLE(radv_device, device, _device);1086RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);10871088if (!_pipeline)1089return;10901091if (radv_sqtt_dump_pipeline())1092radv_unregister_pipeline(device, pipeline);10931094radv_DestroyPipeline(_device, _pipeline, pAllocator);1095}10961097#undef API_MARKER109810991100