Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/common/ac_rgp_elf_object_pack.c
7236 views
1
/*
2
* Copyright 2021 Advanced Micro Devices, Inc.
3
* All Rights Reserved.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* on the rights to use, copy, modify, merge, publish, distribute, sub
9
* license, and/or sell copies of the Software, and to permit persons to whom
10
* the Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
14
* Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22
* USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*
24
*/
25
26
/**
27
* \file ac_rgp_elf_object_pack.c
28
*
29
* This file provides functions to create elf object for rgp profiling.
30
* The functions in this file create 64bit elf code object irrespective
31
* of if the driver is compiled as 32 or 64 bit.
32
*/
33
34
#include <stdint.h>
35
#include <stdio.h>
36
#include <string.h>
37
#include <libelf.h>
38
#include "ac_msgpack.h"
39
#include "ac_rgp.h"
40
#include "main/macros.h"
41
42
#ifndef EM_AMDGPU
43
// Old distributions may not have this enum constant
44
#define EM_AMDGPU 224
45
#endif
46
47
char shader_stage_api_string[6][10] = {
48
".vertex", /* vertex */
49
".hull", /* tessellation control */
50
".domain", /* tessellation evaluation */
51
".geometry", /* geometry */
52
".pixel", /* fragment */
53
".compute" /* compute */
54
};
55
56
char hw_stage_string[RGP_HW_STAGE_MAX][4] = {
57
".vs",
58
".ls",
59
".hs",
60
".es",
61
".gs",
62
".ps",
63
".cs"
64
};
65
66
char hw_stage_symbol_string[RGP_HW_STAGE_MAX][16] = {
67
"_amdgpu_vs_main",
68
"_amdgpu_ls_main",
69
"_amdgpu_hs_main",
70
"_amdgpu_es_main",
71
"_amdgpu_gs_main",
72
"_amdgpu_ps_main",
73
"_amdgpu_cs_main"
74
};
75
76
/**
77
* rgp profiler requires data for few variables stored in msgpack format
78
* in notes section. This function writes the data from
79
* struct rgp_code_object_record to elf object in msgpack format.
80
* for msgpack specification refer to
81
* github.com/msgpack/msgpack/blob/master/spec.md
82
*/
83
static void
84
ac_rgp_write_msgpack(FILE *output,
85
struct rgp_code_object_record *record,
86
uint32_t *written_size)
87
{
88
struct ac_msgpack msgpack;
89
uint32_t num_shaders;
90
uint32_t i;
91
uint32_t mask;
92
93
num_shaders = util_bitcount(record->shader_stages_mask);
94
95
ac_msgpack_init(&msgpack);
96
97
ac_msgpack_add_fixmap_op(&msgpack, 2);
98
ac_msgpack_add_fixstr(&msgpack, "amdpal.version");
99
ac_msgpack_add_fixarray_op(&msgpack, 2);
100
ac_msgpack_add_uint(&msgpack, 2);
101
ac_msgpack_add_uint(&msgpack, 1);
102
103
ac_msgpack_add_fixstr(&msgpack, "amdpal.pipelines");
104
ac_msgpack_add_fixarray_op(&msgpack, 1);
105
ac_msgpack_add_fixmap_op(&msgpack, 6);
106
107
/* 1
108
* This not used in RGP but data needs to be present
109
*/
110
ac_msgpack_add_fixstr(&msgpack, ".spill_threshold");
111
ac_msgpack_add_uint(&msgpack, 0xffff);
112
113
/* 2
114
* This not used in RGP but data needs to be present
115
*/
116
ac_msgpack_add_fixstr(&msgpack, ".user_data_limit");
117
ac_msgpack_add_uint(&msgpack, 32);
118
119
/* 3 */
120
ac_msgpack_add_fixstr(&msgpack, ".shaders");
121
ac_msgpack_add_fixmap_op(&msgpack, num_shaders);
122
mask = record->shader_stages_mask;
123
while(mask) {
124
i = u_bit_scan(&mask);
125
ac_msgpack_add_fixstr(&msgpack,
126
shader_stage_api_string[i]);
127
ac_msgpack_add_fixmap_op(&msgpack, 2);
128
ac_msgpack_add_fixstr(&msgpack, ".api_shader_hash");
129
ac_msgpack_add_fixarray_op(&msgpack, 2);
130
ac_msgpack_add_uint(&msgpack,
131
record->shader_data[i].hash[0]);
132
ac_msgpack_add_uint(&msgpack, 0);
133
ac_msgpack_add_fixstr(&msgpack, ".hardware_mapping");
134
ac_msgpack_add_fixarray_op(&msgpack, 1);
135
ac_msgpack_add_fixstr(&msgpack, hw_stage_string[
136
record->shader_data[i].hw_stage]);
137
}
138
139
/* 4 */
140
ac_msgpack_add_fixstr(&msgpack, ".hardware_stages");
141
ac_msgpack_add_fixmap_op(&msgpack,
142
record->num_shaders_combined);
143
mask = record->shader_stages_mask;
144
while(mask) {
145
i = u_bit_scan(&mask);
146
147
if (record->shader_data[i].is_combined)
148
continue;
149
150
ac_msgpack_add_fixstr(&msgpack, hw_stage_string[
151
record->shader_data[i].hw_stage]);
152
ac_msgpack_add_fixmap_op(&msgpack, 3);
153
ac_msgpack_add_fixstr(&msgpack, ".entry_point");
154
ac_msgpack_add_fixstr(&msgpack, hw_stage_symbol_string[
155
record->shader_data[i].hw_stage]);
156
157
ac_msgpack_add_fixstr(&msgpack, ".sgpr_count");
158
ac_msgpack_add_uint(&msgpack,
159
record->shader_data[i].sgpr_count);
160
161
ac_msgpack_add_fixstr(&msgpack, ".vgpr_count");
162
ac_msgpack_add_uint(&msgpack,
163
record->shader_data[i].vgpr_count);
164
}
165
166
/* 5 */
167
ac_msgpack_add_fixstr(&msgpack, ".internal_pipeline_hash");
168
ac_msgpack_add_fixarray_op(&msgpack, 2);
169
ac_msgpack_add_uint(&msgpack, record->pipeline_hash[0]);
170
ac_msgpack_add_uint(&msgpack, record->pipeline_hash[1]);
171
172
/* 6 */
173
ac_msgpack_add_fixstr(&msgpack, ".api");
174
ac_msgpack_add_fixstr(&msgpack, "Vulkan");
175
176
ac_msgpack_resize_if_required(&msgpack, 4 - (msgpack.offset % 4));
177
msgpack.offset = ALIGN(msgpack.offset, 4);
178
fwrite(msgpack.mem, 1, msgpack.offset, output);
179
*written_size = msgpack.offset;
180
ac_msgpack_destroy(&msgpack);
181
}
182
183
184
static uint32_t
185
get_lowest_shader(uint32_t *shader_stages_mask,
186
struct rgp_code_object_record *record,
187
struct rgp_shader_data **rgp_shader_data)
188
{
189
uint32_t i, lowest = 0;
190
uint32_t mask;
191
uint64_t base_address = -1;
192
193
if (*shader_stages_mask == 0)
194
return false;
195
196
mask = *shader_stages_mask;
197
while(mask) {
198
i = u_bit_scan(&mask);
199
if (record->shader_data[i].is_combined) {
200
*shader_stages_mask = *shader_stages_mask & ~((uint32_t)1 << i);
201
continue;
202
}
203
if (base_address > record->shader_data[i].base_address) {
204
lowest = i;
205
base_address = record->shader_data[i].base_address;
206
}
207
}
208
209
*shader_stages_mask = *shader_stages_mask & ~((uint32_t)1 << lowest);
210
*rgp_shader_data = &record->shader_data[lowest];
211
return true;
212
}
213
214
/**
215
* write the shader code into elf object in text section
216
*/
217
static void
218
ac_rgp_file_write_elf_text(FILE *output, uint32_t *elf_size_calc,
219
struct rgp_code_object_record *record,
220
uint32_t *text_size)
221
{
222
struct rgp_shader_data *rgp_shader_data = NULL;
223
struct rgp_shader_data *prev_rgp_shader_data = NULL;
224
uint32_t symbol_offset = 0;
225
uint32_t mask = record->shader_stages_mask;
226
static bool warn_once = true;
227
228
while(get_lowest_shader(&mask, record, &rgp_shader_data)) {
229
if (prev_rgp_shader_data) {
230
uint32_t code_offset = rgp_shader_data->base_address -
231
prev_rgp_shader_data->base_address;
232
uint32_t gap_between_code = code_offset -
233
prev_rgp_shader_data->code_size;
234
symbol_offset += code_offset;
235
if (gap_between_code > 0x10000 && warn_once) {
236
fprintf(stderr, "Warning: shader code far from previous "
237
"(%d bytes apart). The rgp capture file "
238
"might be very large.\n", gap_between_code);
239
warn_once = false;
240
}
241
242
fseek(output, gap_between_code, SEEK_CUR);
243
*elf_size_calc += gap_between_code;
244
}
245
246
rgp_shader_data->elf_symbol_offset = symbol_offset;
247
fwrite(rgp_shader_data->code, 1, rgp_shader_data->code_size, output);
248
*elf_size_calc += rgp_shader_data->code_size;
249
prev_rgp_shader_data = rgp_shader_data;
250
}
251
252
symbol_offset += rgp_shader_data->code_size;
253
uint32_t align = ALIGN(symbol_offset, 256) - symbol_offset;
254
fseek(output, align, SEEK_CUR);
255
*elf_size_calc += align;
256
*text_size = symbol_offset + align;
257
}
258
259
/*
260
* hardcoded index for string table and text section in elf object.
261
* While populating section header table, the index order should
262
* be strictly followed.
263
*/
264
#define RGP_ELF_STRING_TBL_SEC_HEADER_INDEX 1
265
#define RGP_ELF_TEXT_SEC_HEADER_INDEX 2
266
267
/*
268
* hardcode the string table so that is a single write to output.
269
* the strings are in a structure so that it is easy to get the offset
270
* of given string in string table.
271
*/
272
struct ac_rgp_elf_string_table {
273
char null[sizeof("")];
274
char strtab[sizeof(".strtab")];
275
char text[sizeof(".text")];
276
char symtab[sizeof(".symtab")];
277
char note[sizeof(".note")];
278
char vs_main[sizeof("_amdgpu_vs_main")];
279
char ls_main[sizeof("_amdgpu_ls_main")];
280
char hs_main[sizeof("_amdgpu_hs_main")];
281
char es_main[sizeof("_amdgpu_es_main")];
282
char gs_main[sizeof("_amdgpu_gs_main")];
283
char ps_main[sizeof("_amdgpu_ps_main")];
284
char cs_main[sizeof("_amdgpu_cs_main")];
285
};
286
287
struct ac_rgp_elf_string_table rgp_elf_strtab = {
288
.null = "",
289
.strtab = ".strtab",
290
.text = ".text",
291
.symtab = ".symtab",
292
.note = ".note",
293
.vs_main = "_amdgpu_vs_main",
294
.ls_main = "_amdgpu_ls_main",
295
.hs_main = "_amdgpu_hs_main",
296
.es_main = "_amdgpu_es_main",
297
.gs_main = "_amdgpu_gs_main",
298
.ps_main = "_amdgpu_ps_main",
299
.cs_main = "_amdgpu_cs_main",
300
};
301
302
uint32_t rgp_elf_hw_stage_string_offset[RGP_HW_STAGE_MAX] = {
303
(uintptr_t)((struct ac_rgp_elf_string_table*)0)->vs_main,
304
(uintptr_t)((struct ac_rgp_elf_string_table*)0)->ls_main,
305
(uintptr_t)((struct ac_rgp_elf_string_table*)0)->hs_main,
306
(uintptr_t)((struct ac_rgp_elf_string_table*)0)->es_main,
307
(uintptr_t)((struct ac_rgp_elf_string_table*)0)->gs_main,
308
(uintptr_t)((struct ac_rgp_elf_string_table*)0)->ps_main,
309
(uintptr_t)((struct ac_rgp_elf_string_table*)0)->cs_main,
310
};
311
312
313
static void
314
ac_rgp_file_write_elf_symbol_table(FILE *output, uint32_t *elf_size_calc,
315
struct rgp_code_object_record *record,
316
uint32_t *symbol_table_size)
317
{
318
Elf64_Sym elf_sym;
319
uint32_t i;
320
uint32_t mask = record->shader_stages_mask;
321
322
memset(&elf_sym, 0x00, sizeof(elf_sym));
323
fwrite(&elf_sym, 1, sizeof(elf_sym), output);
324
325
while(mask) {
326
i = u_bit_scan(&mask);
327
if (record->shader_data[i].is_combined)
328
continue;
329
330
elf_sym.st_name = rgp_elf_hw_stage_string_offset
331
[record->shader_data[i].hw_stage];
332
elf_sym.st_info = STT_FUNC;
333
elf_sym.st_other = 0x0;
334
elf_sym.st_shndx = RGP_ELF_TEXT_SEC_HEADER_INDEX;
335
elf_sym.st_value = record->shader_data[i].elf_symbol_offset;
336
elf_sym.st_size = record->shader_data[i].code_size;
337
fwrite(&elf_sym, 1, sizeof(elf_sym), output);
338
}
339
340
*symbol_table_size = (record->num_shaders_combined + 1)
341
* sizeof(elf_sym);
342
*elf_size_calc += *symbol_table_size;
343
}
344
345
346
/* Below defines from from llvm project
347
* llvm/includel/llvm/BinaryFormat/ELF.h
348
*/
349
#define ELFOSABI_AMDGPU_PAL 65
350
#define NT_AMDGPU_METADATA 32
351
352
uint8_t elf_ident[EI_NIDENT] = { ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
353
ELFCLASS64, ELFDATA2LSB, EV_CURRENT,
354
ELFOSABI_AMDGPU_PAL,
355
0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
356
0x00, 0x00 };
357
358
#define NOTE_MSGPACK_NAME "AMDGPU"
359
struct ac_rgp_elf_note_msgpack_hdr {
360
Elf64_Nhdr hdr;
361
char name[sizeof(NOTE_MSGPACK_NAME)];
362
};
363
364
void
365
ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start,
366
struct rgp_code_object_record *record,
367
uint32_t *written_size, uint32_t flags)
368
{
369
Elf64_Ehdr elf_hdr;
370
Elf64_Shdr sec_hdr[5];
371
uint32_t elf_size_calc;
372
struct ac_rgp_elf_note_msgpack_hdr note_hdr;
373
uint32_t text_size = 0;
374
uint32_t symbol_table_size = 0;
375
uint32_t msgpack_size = 0;
376
size_t note_sec_start;
377
uint32_t sh_offset;
378
379
/* Give space for header in file. It will be written to file at the end */
380
fseek(output, sizeof(Elf64_Ehdr), SEEK_CUR);
381
382
elf_size_calc = sizeof(Elf64_Ehdr);
383
384
/* Initialize elf header */
385
memcpy(&elf_hdr.e_ident, &elf_ident, EI_NIDENT);
386
elf_hdr.e_type = ET_REL;
387
elf_hdr.e_machine = EM_AMDGPU;
388
elf_hdr.e_version = EV_CURRENT;
389
elf_hdr.e_entry = 0;
390
elf_hdr.e_flags = flags;
391
elf_hdr.e_shstrndx = 1; /* string table entry is hardcoded to 1*/
392
elf_hdr.e_phoff = 0;
393
elf_hdr.e_shentsize = sizeof(Elf64_Shdr);
394
elf_hdr.e_ehsize = sizeof(Elf64_Ehdr);
395
elf_hdr.e_phentsize = 0;
396
elf_hdr.e_phnum = 0;
397
398
/* write hardcoded string table */
399
fwrite(&rgp_elf_strtab, 1, sizeof(rgp_elf_strtab), output);
400
elf_size_calc += sizeof(rgp_elf_strtab);
401
402
/* write shader code as .text code */
403
ac_rgp_file_write_elf_text(output, &elf_size_calc, record, &text_size);
404
405
/* write symbol table */
406
ac_rgp_file_write_elf_symbol_table(output, &elf_size_calc, record,
407
&symbol_table_size);
408
409
/* write .note */
410
/* the .note section contains msgpack which stores variables */
411
note_sec_start = file_elf_start + elf_size_calc;
412
fseek(output, sizeof(struct ac_rgp_elf_note_msgpack_hdr), SEEK_CUR);
413
ac_rgp_write_msgpack(output, record, &msgpack_size);
414
note_hdr.hdr.n_namesz = sizeof(NOTE_MSGPACK_NAME);
415
note_hdr.hdr.n_descsz = msgpack_size;
416
note_hdr.hdr.n_type = NT_AMDGPU_METADATA;
417
memcpy(note_hdr.name, NOTE_MSGPACK_NAME "\0",
418
sizeof(NOTE_MSGPACK_NAME) + 1);
419
fseek(output, note_sec_start, SEEK_SET);
420
fwrite(&note_hdr, 1, sizeof(struct ac_rgp_elf_note_msgpack_hdr), output);
421
fseek(output, 0, SEEK_END);
422
elf_size_calc += (msgpack_size +
423
sizeof(struct ac_rgp_elf_note_msgpack_hdr));
424
425
/* write section headers */
426
sh_offset = elf_size_calc;
427
memset(&sec_hdr[0], 0x00, sizeof(Elf64_Shdr) * 5);
428
429
/* string table must be at index 1 as used in other places*/
430
sec_hdr[1].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->strtab;
431
sec_hdr[1].sh_type = SHT_STRTAB;
432
sec_hdr[1].sh_offset = sizeof(Elf64_Ehdr);
433
sec_hdr[1].sh_size = sizeof(rgp_elf_strtab);
434
435
/* text must be at index 2 as used in other places*/
436
sec_hdr[2].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->text;
437
sec_hdr[2].sh_type = SHT_PROGBITS;
438
sec_hdr[2].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
439
sec_hdr[2].sh_offset = sec_hdr[1].sh_offset + sec_hdr[1].sh_size;
440
sec_hdr[2].sh_size = text_size;
441
sec_hdr[2].sh_addralign = 256;
442
443
sec_hdr[3].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->symtab;
444
sec_hdr[3].sh_type = SHT_SYMTAB;
445
sec_hdr[3].sh_offset = sec_hdr[2].sh_offset +
446
ALIGN(sec_hdr[2].sh_size, 256);
447
sec_hdr[3].sh_size = symbol_table_size;
448
sec_hdr[3].sh_link = RGP_ELF_STRING_TBL_SEC_HEADER_INDEX;
449
sec_hdr[3].sh_addralign = 8;
450
sec_hdr[3].sh_entsize = sizeof(Elf64_Sym);
451
452
sec_hdr[4].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->note;
453
sec_hdr[4].sh_type = SHT_NOTE;
454
sec_hdr[4].sh_offset = sec_hdr[3].sh_offset + sec_hdr[3].sh_size;
455
sec_hdr[4].sh_size = msgpack_size +
456
sizeof(struct ac_rgp_elf_note_msgpack_hdr);
457
sec_hdr[4].sh_addralign = 4;
458
fwrite(&sec_hdr, 1, sizeof(Elf64_Shdr) * 5, output);
459
elf_size_calc += (sizeof(Elf64_Shdr) * 5);
460
461
/* update and write elf header */
462
elf_hdr.e_shnum = 5;
463
elf_hdr.e_shoff = sh_offset;
464
465
fseek(output, file_elf_start, SEEK_SET);
466
fwrite(&elf_hdr, 1, sizeof(Elf64_Ehdr), output);
467
fseek(output, 0, SEEK_END);
468
469
*written_size = elf_size_calc;
470
}
471
472