Path: blob/21.2-virgl/src/amd/common/ac_rgp_elf_object_pack.c
7236 views
/*1* Copyright 2021 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* on the rights to use, copy, modify, merge, publish, distribute, sub8* license, and/or sell copies of the Software, and to permit persons to whom9* the Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL18* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,19* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR20* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE21* USE OR OTHER DEALINGS IN THE SOFTWARE.22*23*/2425/**26* \file ac_rgp_elf_object_pack.c27*28* This file provides functions to create elf object for rgp profiling.29* The functions in this file create 64bit elf code object irrespective30* of if the driver is compiled as 32 or 64 bit.31*/3233#include <stdint.h>34#include <stdio.h>35#include <string.h>36#include <libelf.h>37#include "ac_msgpack.h"38#include "ac_rgp.h"39#include "main/macros.h"4041#ifndef EM_AMDGPU42// Old distributions may not have this enum constant43#define EM_AMDGPU 22444#endif4546char shader_stage_api_string[6][10] = {47".vertex", /* vertex */48".hull", /* tessellation control */49".domain", /* tessellation evaluation */50".geometry", /* geometry */51".pixel", /* fragment */52".compute" /* compute */53};5455char hw_stage_string[RGP_HW_STAGE_MAX][4] = {56".vs",57".ls",58".hs",59".es",60".gs",61".ps",62".cs"63};6465char hw_stage_symbol_string[RGP_HW_STAGE_MAX][16] = {66"_amdgpu_vs_main",67"_amdgpu_ls_main",68"_amdgpu_hs_main",69"_amdgpu_es_main",70"_amdgpu_gs_main",71"_amdgpu_ps_main",72"_amdgpu_cs_main"73};7475/**76* rgp profiler requires data for few variables stored in msgpack format77* in notes section. This function writes the data from78* struct rgp_code_object_record to elf object in msgpack format.79* for msgpack specification refer to80* github.com/msgpack/msgpack/blob/master/spec.md81*/82static void83ac_rgp_write_msgpack(FILE *output,84struct rgp_code_object_record *record,85uint32_t *written_size)86{87struct ac_msgpack msgpack;88uint32_t num_shaders;89uint32_t i;90uint32_t mask;9192num_shaders = util_bitcount(record->shader_stages_mask);9394ac_msgpack_init(&msgpack);9596ac_msgpack_add_fixmap_op(&msgpack, 2);97ac_msgpack_add_fixstr(&msgpack, "amdpal.version");98ac_msgpack_add_fixarray_op(&msgpack, 2);99ac_msgpack_add_uint(&msgpack, 2);100ac_msgpack_add_uint(&msgpack, 1);101102ac_msgpack_add_fixstr(&msgpack, "amdpal.pipelines");103ac_msgpack_add_fixarray_op(&msgpack, 1);104ac_msgpack_add_fixmap_op(&msgpack, 6);105106/* 1107* This not used in RGP but data needs to be present108*/109ac_msgpack_add_fixstr(&msgpack, ".spill_threshold");110ac_msgpack_add_uint(&msgpack, 0xffff);111112/* 2113* This not used in RGP but data needs to be present114*/115ac_msgpack_add_fixstr(&msgpack, ".user_data_limit");116ac_msgpack_add_uint(&msgpack, 32);117118/* 3 */119ac_msgpack_add_fixstr(&msgpack, ".shaders");120ac_msgpack_add_fixmap_op(&msgpack, num_shaders);121mask = record->shader_stages_mask;122while(mask) {123i = u_bit_scan(&mask);124ac_msgpack_add_fixstr(&msgpack,125shader_stage_api_string[i]);126ac_msgpack_add_fixmap_op(&msgpack, 2);127ac_msgpack_add_fixstr(&msgpack, ".api_shader_hash");128ac_msgpack_add_fixarray_op(&msgpack, 2);129ac_msgpack_add_uint(&msgpack,130record->shader_data[i].hash[0]);131ac_msgpack_add_uint(&msgpack, 0);132ac_msgpack_add_fixstr(&msgpack, ".hardware_mapping");133ac_msgpack_add_fixarray_op(&msgpack, 1);134ac_msgpack_add_fixstr(&msgpack, hw_stage_string[135record->shader_data[i].hw_stage]);136}137138/* 4 */139ac_msgpack_add_fixstr(&msgpack, ".hardware_stages");140ac_msgpack_add_fixmap_op(&msgpack,141record->num_shaders_combined);142mask = record->shader_stages_mask;143while(mask) {144i = u_bit_scan(&mask);145146if (record->shader_data[i].is_combined)147continue;148149ac_msgpack_add_fixstr(&msgpack, hw_stage_string[150record->shader_data[i].hw_stage]);151ac_msgpack_add_fixmap_op(&msgpack, 3);152ac_msgpack_add_fixstr(&msgpack, ".entry_point");153ac_msgpack_add_fixstr(&msgpack, hw_stage_symbol_string[154record->shader_data[i].hw_stage]);155156ac_msgpack_add_fixstr(&msgpack, ".sgpr_count");157ac_msgpack_add_uint(&msgpack,158record->shader_data[i].sgpr_count);159160ac_msgpack_add_fixstr(&msgpack, ".vgpr_count");161ac_msgpack_add_uint(&msgpack,162record->shader_data[i].vgpr_count);163}164165/* 5 */166ac_msgpack_add_fixstr(&msgpack, ".internal_pipeline_hash");167ac_msgpack_add_fixarray_op(&msgpack, 2);168ac_msgpack_add_uint(&msgpack, record->pipeline_hash[0]);169ac_msgpack_add_uint(&msgpack, record->pipeline_hash[1]);170171/* 6 */172ac_msgpack_add_fixstr(&msgpack, ".api");173ac_msgpack_add_fixstr(&msgpack, "Vulkan");174175ac_msgpack_resize_if_required(&msgpack, 4 - (msgpack.offset % 4));176msgpack.offset = ALIGN(msgpack.offset, 4);177fwrite(msgpack.mem, 1, msgpack.offset, output);178*written_size = msgpack.offset;179ac_msgpack_destroy(&msgpack);180}181182183static uint32_t184get_lowest_shader(uint32_t *shader_stages_mask,185struct rgp_code_object_record *record,186struct rgp_shader_data **rgp_shader_data)187{188uint32_t i, lowest = 0;189uint32_t mask;190uint64_t base_address = -1;191192if (*shader_stages_mask == 0)193return false;194195mask = *shader_stages_mask;196while(mask) {197i = u_bit_scan(&mask);198if (record->shader_data[i].is_combined) {199*shader_stages_mask = *shader_stages_mask & ~((uint32_t)1 << i);200continue;201}202if (base_address > record->shader_data[i].base_address) {203lowest = i;204base_address = record->shader_data[i].base_address;205}206}207208*shader_stages_mask = *shader_stages_mask & ~((uint32_t)1 << lowest);209*rgp_shader_data = &record->shader_data[lowest];210return true;211}212213/**214* write the shader code into elf object in text section215*/216static void217ac_rgp_file_write_elf_text(FILE *output, uint32_t *elf_size_calc,218struct rgp_code_object_record *record,219uint32_t *text_size)220{221struct rgp_shader_data *rgp_shader_data = NULL;222struct rgp_shader_data *prev_rgp_shader_data = NULL;223uint32_t symbol_offset = 0;224uint32_t mask = record->shader_stages_mask;225static bool warn_once = true;226227while(get_lowest_shader(&mask, record, &rgp_shader_data)) {228if (prev_rgp_shader_data) {229uint32_t code_offset = rgp_shader_data->base_address -230prev_rgp_shader_data->base_address;231uint32_t gap_between_code = code_offset -232prev_rgp_shader_data->code_size;233symbol_offset += code_offset;234if (gap_between_code > 0x10000 && warn_once) {235fprintf(stderr, "Warning: shader code far from previous "236"(%d bytes apart). The rgp capture file "237"might be very large.\n", gap_between_code);238warn_once = false;239}240241fseek(output, gap_between_code, SEEK_CUR);242*elf_size_calc += gap_between_code;243}244245rgp_shader_data->elf_symbol_offset = symbol_offset;246fwrite(rgp_shader_data->code, 1, rgp_shader_data->code_size, output);247*elf_size_calc += rgp_shader_data->code_size;248prev_rgp_shader_data = rgp_shader_data;249}250251symbol_offset += rgp_shader_data->code_size;252uint32_t align = ALIGN(symbol_offset, 256) - symbol_offset;253fseek(output, align, SEEK_CUR);254*elf_size_calc += align;255*text_size = symbol_offset + align;256}257258/*259* hardcoded index for string table and text section in elf object.260* While populating section header table, the index order should261* be strictly followed.262*/263#define RGP_ELF_STRING_TBL_SEC_HEADER_INDEX 1264#define RGP_ELF_TEXT_SEC_HEADER_INDEX 2265266/*267* hardcode the string table so that is a single write to output.268* the strings are in a structure so that it is easy to get the offset269* of given string in string table.270*/271struct ac_rgp_elf_string_table {272char null[sizeof("")];273char strtab[sizeof(".strtab")];274char text[sizeof(".text")];275char symtab[sizeof(".symtab")];276char note[sizeof(".note")];277char vs_main[sizeof("_amdgpu_vs_main")];278char ls_main[sizeof("_amdgpu_ls_main")];279char hs_main[sizeof("_amdgpu_hs_main")];280char es_main[sizeof("_amdgpu_es_main")];281char gs_main[sizeof("_amdgpu_gs_main")];282char ps_main[sizeof("_amdgpu_ps_main")];283char cs_main[sizeof("_amdgpu_cs_main")];284};285286struct ac_rgp_elf_string_table rgp_elf_strtab = {287.null = "",288.strtab = ".strtab",289.text = ".text",290.symtab = ".symtab",291.note = ".note",292.vs_main = "_amdgpu_vs_main",293.ls_main = "_amdgpu_ls_main",294.hs_main = "_amdgpu_hs_main",295.es_main = "_amdgpu_es_main",296.gs_main = "_amdgpu_gs_main",297.ps_main = "_amdgpu_ps_main",298.cs_main = "_amdgpu_cs_main",299};300301uint32_t rgp_elf_hw_stage_string_offset[RGP_HW_STAGE_MAX] = {302(uintptr_t)((struct ac_rgp_elf_string_table*)0)->vs_main,303(uintptr_t)((struct ac_rgp_elf_string_table*)0)->ls_main,304(uintptr_t)((struct ac_rgp_elf_string_table*)0)->hs_main,305(uintptr_t)((struct ac_rgp_elf_string_table*)0)->es_main,306(uintptr_t)((struct ac_rgp_elf_string_table*)0)->gs_main,307(uintptr_t)((struct ac_rgp_elf_string_table*)0)->ps_main,308(uintptr_t)((struct ac_rgp_elf_string_table*)0)->cs_main,309};310311312static void313ac_rgp_file_write_elf_symbol_table(FILE *output, uint32_t *elf_size_calc,314struct rgp_code_object_record *record,315uint32_t *symbol_table_size)316{317Elf64_Sym elf_sym;318uint32_t i;319uint32_t mask = record->shader_stages_mask;320321memset(&elf_sym, 0x00, sizeof(elf_sym));322fwrite(&elf_sym, 1, sizeof(elf_sym), output);323324while(mask) {325i = u_bit_scan(&mask);326if (record->shader_data[i].is_combined)327continue;328329elf_sym.st_name = rgp_elf_hw_stage_string_offset330[record->shader_data[i].hw_stage];331elf_sym.st_info = STT_FUNC;332elf_sym.st_other = 0x0;333elf_sym.st_shndx = RGP_ELF_TEXT_SEC_HEADER_INDEX;334elf_sym.st_value = record->shader_data[i].elf_symbol_offset;335elf_sym.st_size = record->shader_data[i].code_size;336fwrite(&elf_sym, 1, sizeof(elf_sym), output);337}338339*symbol_table_size = (record->num_shaders_combined + 1)340* sizeof(elf_sym);341*elf_size_calc += *symbol_table_size;342}343344345/* Below defines from from llvm project346* llvm/includel/llvm/BinaryFormat/ELF.h347*/348#define ELFOSABI_AMDGPU_PAL 65349#define NT_AMDGPU_METADATA 32350351uint8_t elf_ident[EI_NIDENT] = { ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,352ELFCLASS64, ELFDATA2LSB, EV_CURRENT,353ELFOSABI_AMDGPU_PAL,3540x00, 0x00, 0x00, 0x00, 0x00, 0x00,3550x00, 0x00 };356357#define NOTE_MSGPACK_NAME "AMDGPU"358struct ac_rgp_elf_note_msgpack_hdr {359Elf64_Nhdr hdr;360char name[sizeof(NOTE_MSGPACK_NAME)];361};362363void364ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start,365struct rgp_code_object_record *record,366uint32_t *written_size, uint32_t flags)367{368Elf64_Ehdr elf_hdr;369Elf64_Shdr sec_hdr[5];370uint32_t elf_size_calc;371struct ac_rgp_elf_note_msgpack_hdr note_hdr;372uint32_t text_size = 0;373uint32_t symbol_table_size = 0;374uint32_t msgpack_size = 0;375size_t note_sec_start;376uint32_t sh_offset;377378/* Give space for header in file. It will be written to file at the end */379fseek(output, sizeof(Elf64_Ehdr), SEEK_CUR);380381elf_size_calc = sizeof(Elf64_Ehdr);382383/* Initialize elf header */384memcpy(&elf_hdr.e_ident, &elf_ident, EI_NIDENT);385elf_hdr.e_type = ET_REL;386elf_hdr.e_machine = EM_AMDGPU;387elf_hdr.e_version = EV_CURRENT;388elf_hdr.e_entry = 0;389elf_hdr.e_flags = flags;390elf_hdr.e_shstrndx = 1; /* string table entry is hardcoded to 1*/391elf_hdr.e_phoff = 0;392elf_hdr.e_shentsize = sizeof(Elf64_Shdr);393elf_hdr.e_ehsize = sizeof(Elf64_Ehdr);394elf_hdr.e_phentsize = 0;395elf_hdr.e_phnum = 0;396397/* write hardcoded string table */398fwrite(&rgp_elf_strtab, 1, sizeof(rgp_elf_strtab), output);399elf_size_calc += sizeof(rgp_elf_strtab);400401/* write shader code as .text code */402ac_rgp_file_write_elf_text(output, &elf_size_calc, record, &text_size);403404/* write symbol table */405ac_rgp_file_write_elf_symbol_table(output, &elf_size_calc, record,406&symbol_table_size);407408/* write .note */409/* the .note section contains msgpack which stores variables */410note_sec_start = file_elf_start + elf_size_calc;411fseek(output, sizeof(struct ac_rgp_elf_note_msgpack_hdr), SEEK_CUR);412ac_rgp_write_msgpack(output, record, &msgpack_size);413note_hdr.hdr.n_namesz = sizeof(NOTE_MSGPACK_NAME);414note_hdr.hdr.n_descsz = msgpack_size;415note_hdr.hdr.n_type = NT_AMDGPU_METADATA;416memcpy(note_hdr.name, NOTE_MSGPACK_NAME "\0",417sizeof(NOTE_MSGPACK_NAME) + 1);418fseek(output, note_sec_start, SEEK_SET);419fwrite(¬e_hdr, 1, sizeof(struct ac_rgp_elf_note_msgpack_hdr), output);420fseek(output, 0, SEEK_END);421elf_size_calc += (msgpack_size +422sizeof(struct ac_rgp_elf_note_msgpack_hdr));423424/* write section headers */425sh_offset = elf_size_calc;426memset(&sec_hdr[0], 0x00, sizeof(Elf64_Shdr) * 5);427428/* string table must be at index 1 as used in other places*/429sec_hdr[1].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->strtab;430sec_hdr[1].sh_type = SHT_STRTAB;431sec_hdr[1].sh_offset = sizeof(Elf64_Ehdr);432sec_hdr[1].sh_size = sizeof(rgp_elf_strtab);433434/* text must be at index 2 as used in other places*/435sec_hdr[2].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->text;436sec_hdr[2].sh_type = SHT_PROGBITS;437sec_hdr[2].sh_flags = SHF_ALLOC | SHF_EXECINSTR;438sec_hdr[2].sh_offset = sec_hdr[1].sh_offset + sec_hdr[1].sh_size;439sec_hdr[2].sh_size = text_size;440sec_hdr[2].sh_addralign = 256;441442sec_hdr[3].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->symtab;443sec_hdr[3].sh_type = SHT_SYMTAB;444sec_hdr[3].sh_offset = sec_hdr[2].sh_offset +445ALIGN(sec_hdr[2].sh_size, 256);446sec_hdr[3].sh_size = symbol_table_size;447sec_hdr[3].sh_link = RGP_ELF_STRING_TBL_SEC_HEADER_INDEX;448sec_hdr[3].sh_addralign = 8;449sec_hdr[3].sh_entsize = sizeof(Elf64_Sym);450451sec_hdr[4].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->note;452sec_hdr[4].sh_type = SHT_NOTE;453sec_hdr[4].sh_offset = sec_hdr[3].sh_offset + sec_hdr[3].sh_size;454sec_hdr[4].sh_size = msgpack_size +455sizeof(struct ac_rgp_elf_note_msgpack_hdr);456sec_hdr[4].sh_addralign = 4;457fwrite(&sec_hdr, 1, sizeof(Elf64_Shdr) * 5, output);458elf_size_calc += (sizeof(Elf64_Shdr) * 5);459460/* update and write elf header */461elf_hdr.e_shnum = 5;462elf_hdr.e_shoff = sh_offset;463464fseek(output, file_elf_start, SEEK_SET);465fwrite(&elf_hdr, 1, sizeof(Elf64_Ehdr), output);466fseek(output, 0, SEEK_END);467468*written_size = elf_size_calc;469}470471472