Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/common/ac_rtld.c
7188 views
1
/*
2
* Copyright 2014-2019 Advanced Micro Devices, Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*/
23
24
#include "ac_rtld.h"
25
26
#include "ac_binary.h"
27
#include "ac_gpu_info.h"
28
#include "util/compiler.h"
29
#include "util/u_dynarray.h"
30
#include "util/u_math.h"
31
32
#include <gelf.h>
33
#include <libelf.h>
34
#include <stdarg.h>
35
#include <stdio.h>
36
#include <stdlib.h>
37
#include <string.h>
38
39
#ifndef EM_AMDGPU
40
// Old distributions may not have this enum constant
41
#define EM_AMDGPU 224
42
#endif
43
44
#ifndef STT_AMDGPU_LDS
45
#define STT_AMDGPU_LDS 13 // this is deprecated -- remove
46
#endif
47
48
#ifndef SHN_AMDGPU_LDS
49
#define SHN_AMDGPU_LDS 0xff00
50
#endif
51
52
#ifndef R_AMDGPU_NONE
53
#define R_AMDGPU_NONE 0
54
#define R_AMDGPU_ABS32_LO 1
55
#define R_AMDGPU_ABS32_HI 2
56
#define R_AMDGPU_ABS64 3
57
#define R_AMDGPU_REL32 4
58
#define R_AMDGPU_REL64 5
59
#define R_AMDGPU_ABS32 6
60
#define R_AMDGPU_GOTPCREL 7
61
#define R_AMDGPU_GOTPCREL32_LO 8
62
#define R_AMDGPU_GOTPCREL32_HI 9
63
#define R_AMDGPU_REL32_LO 10
64
#define R_AMDGPU_REL32_HI 11
65
#define R_AMDGPU_RELATIVE64 13
66
#endif
67
68
/* For the UMR disassembler. */
69
#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
70
#define DEBUGGER_NUM_MARKERS 5
71
72
struct ac_rtld_section {
73
bool is_rx : 1;
74
bool is_pasted_text : 1;
75
uint64_t offset;
76
const char *name;
77
};
78
79
struct ac_rtld_part {
80
Elf *elf;
81
struct ac_rtld_section *sections;
82
unsigned num_sections;
83
};
84
85
static void report_errorvf(const char *fmt, va_list va)
86
{
87
fprintf(stderr, "ac_rtld error: ");
88
89
vfprintf(stderr, fmt, va);
90
91
fprintf(stderr, "\n");
92
}
93
94
static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
95
96
static void report_errorf(const char *fmt, ...)
97
{
98
va_list va;
99
va_start(va, fmt);
100
report_errorvf(fmt, va);
101
va_end(va);
102
}
103
104
static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
105
106
static void report_elf_errorf(const char *fmt, ...)
107
{
108
va_list va;
109
va_start(va, fmt);
110
report_errorvf(fmt, va);
111
va_end(va);
112
113
fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
114
}
115
116
/**
117
* Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
118
* \p part_idx.
119
*/
120
static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
121
const char *name, unsigned part_idx)
122
{
123
util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) {
124
if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name))
125
return symbol;
126
}
127
return 0;
128
}
129
130
static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
131
{
132
const struct ac_rtld_symbol *lhs = lhsp;
133
const struct ac_rtld_symbol *rhs = rhsp;
134
if (rhs->align > lhs->align)
135
return 1;
136
if (rhs->align < lhs->align)
137
return -1;
138
return 0;
139
}
140
141
/**
142
* Sort the given symbol list by decreasing alignment and assign offsets.
143
*/
144
static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
145
uint64_t *ptotal_size)
146
{
147
qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
148
149
uint64_t total_size = *ptotal_size;
150
151
for (unsigned i = 0; i < num_symbols; ++i) {
152
struct ac_rtld_symbol *s = &symbols[i];
153
assert(util_is_power_of_two_nonzero(s->align));
154
155
total_size = align64(total_size, s->align);
156
s->offset = total_size;
157
158
if (total_size + s->size < total_size) {
159
report_errorf("%s: size overflow", __FUNCTION__);
160
return false;
161
}
162
163
total_size += s->size;
164
}
165
166
*ptotal_size = total_size;
167
return true;
168
}
169
170
/**
171
* Read LDS symbols from the given \p section of the ELF of \p part and append
172
* them to the LDS symbols list.
173
*
174
* Shared LDS symbols are filtered out.
175
*/
176
static bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx,
177
Elf_Scn *section, uint32_t *lds_end_align)
178
{
179
#define report_if(cond) \
180
do { \
181
if ((cond)) { \
182
report_errorf(#cond); \
183
return false; \
184
} \
185
} while (false)
186
#define report_elf_if(cond) \
187
do { \
188
if ((cond)) { \
189
report_elf_errorf(#cond); \
190
return false; \
191
} \
192
} while (false)
193
194
struct ac_rtld_part *part = &binary->parts[part_idx];
195
Elf64_Shdr *shdr = elf64_getshdr(section);
196
uint32_t strtabidx = shdr->sh_link;
197
Elf_Data *symbols_data = elf_getdata(section, NULL);
198
report_elf_if(!symbols_data);
199
200
const Elf64_Sym *symbol = symbols_data->d_buf;
201
size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
202
203
for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
204
struct ac_rtld_symbol s = {0};
205
206
if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
207
/* old-style LDS symbols from initial prototype -- remove eventually */
208
s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
209
} else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
210
s.align = MIN2(symbol->st_value, 1u << 16);
211
report_if(!util_is_power_of_two_nonzero(s.align));
212
} else
213
continue;
214
215
report_if(symbol->st_size > 1u << 29);
216
217
s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
218
s.size = symbol->st_size;
219
s.part_idx = part_idx;
220
221
if (!strcmp(s.name, "__lds_end")) {
222
report_elf_if(s.size != 0);
223
*lds_end_align = MAX2(*lds_end_align, s.align);
224
continue;
225
}
226
227
const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx);
228
if (shared) {
229
report_elf_if(s.align > shared->align);
230
report_elf_if(s.size > shared->size);
231
continue;
232
}
233
234
util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
235
}
236
237
return true;
238
239
#undef report_if
240
#undef report_elf_if
241
}
242
243
/**
244
* Open a binary consisting of one or more shader parts.
245
*
246
* \param binary the uninitialized struct
247
* \param i binary opening parameters
248
*/
249
bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
250
{
251
/* One of the libelf implementations
252
* (http://www.mr511.de/software/english.htm) requires calling
253
* elf_version() before elf_memory().
254
*/
255
elf_version(EV_CURRENT);
256
257
memset(binary, 0, sizeof(*binary));
258
memcpy(&binary->options, &i.options, sizeof(binary->options));
259
binary->wave_size = i.wave_size;
260
binary->num_parts = i.num_parts;
261
binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
262
if (!binary->parts)
263
return false;
264
265
uint64_t pasted_text_size = 0;
266
uint64_t rx_align = 1;
267
uint64_t rx_size = 0;
268
uint64_t exec_size = 0;
269
270
#define report_if(cond) \
271
do { \
272
if ((cond)) { \
273
report_errorf(#cond); \
274
goto fail; \
275
} \
276
} while (false)
277
#define report_elf_if(cond) \
278
do { \
279
if ((cond)) { \
280
report_elf_errorf(#cond); \
281
goto fail; \
282
} \
283
} while (false)
284
285
/* Copy and layout shared LDS symbols. */
286
if (i.num_shared_lds_symbols) {
287
if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
288
i.num_shared_lds_symbols))
289
goto fail;
290
291
memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
292
}
293
294
util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol)
295
symbol->part_idx = ~0u;
296
297
unsigned max_lds_size = 64 * 1024;
298
299
if (i.info->chip_class == GFX6 ||
300
(i.shader_type != MESA_SHADER_COMPUTE && i.shader_type != MESA_SHADER_FRAGMENT))
301
max_lds_size = 32 * 1024;
302
303
uint64_t shared_lds_size = 0;
304
if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
305
goto fail;
306
307
if (shared_lds_size > max_lds_size) {
308
fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
309
(unsigned)shared_lds_size, max_lds_size);
310
goto fail;
311
}
312
binary->lds_size = shared_lds_size;
313
314
/* First pass over all parts: open ELFs, pre-determine the placement of
315
* sections in the memory image, and collect and layout private LDS symbols. */
316
uint32_t lds_end_align = 0;
317
318
if (binary->options.halt_at_entry)
319
pasted_text_size += 4;
320
321
for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
322
struct ac_rtld_part *part = &binary->parts[part_idx];
323
unsigned part_lds_symbols_begin =
324
util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
325
326
part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
327
report_elf_if(!part->elf);
328
329
const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
330
report_elf_if(!ehdr);
331
report_if(ehdr->e_machine != EM_AMDGPU);
332
333
size_t section_str_index;
334
size_t num_shdrs;
335
report_elf_if(elf_getshdrstrndx(part->elf, &section_str_index) < 0);
336
report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
337
338
part->num_sections = num_shdrs;
339
part->sections = calloc(sizeof(*part->sections), num_shdrs);
340
report_if(!part->sections);
341
342
Elf_Scn *section = NULL;
343
while ((section = elf_nextscn(part->elf, section))) {
344
Elf64_Shdr *shdr = elf64_getshdr(section);
345
struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
346
s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
347
report_elf_if(!s->name);
348
349
/* Cannot actually handle linked objects yet */
350
report_elf_if(shdr->sh_addr != 0);
351
352
/* Alignment must be 0 or a power of two */
353
report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
354
uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
355
356
if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) {
357
report_if(shdr->sh_flags & SHF_WRITE);
358
359
s->is_rx = true;
360
361
if (shdr->sh_flags & SHF_EXECINSTR) {
362
report_elf_if(shdr->sh_size & 3);
363
364
if (!strcmp(s->name, ".text"))
365
s->is_pasted_text = true;
366
367
exec_size += shdr->sh_size;
368
}
369
370
if (s->is_pasted_text) {
371
s->offset = pasted_text_size;
372
pasted_text_size += shdr->sh_size;
373
} else {
374
rx_align = align(rx_align, sh_align);
375
rx_size = align(rx_size, sh_align);
376
s->offset = rx_size;
377
rx_size += shdr->sh_size;
378
}
379
} else if (shdr->sh_type == SHT_SYMTAB) {
380
if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
381
goto fail;
382
}
383
}
384
385
uint64_t part_lds_size = shared_lds_size;
386
if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol,
387
part_lds_symbols_begin),
388
util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) -
389
part_lds_symbols_begin,
390
&part_lds_size))
391
goto fail;
392
binary->lds_size = MAX2(binary->lds_size, part_lds_size);
393
}
394
395
binary->rx_end_markers = pasted_text_size;
396
pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
397
398
/* __lds_end is a special symbol that points at the end of the memory
399
* occupied by other LDS symbols. Its alignment is taken as the
400
* maximum of its alignment over all shader parts where it occurs.
401
*/
402
if (lds_end_align) {
403
binary->lds_size = align(binary->lds_size, lds_end_align);
404
405
struct ac_rtld_symbol *lds_end =
406
util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
407
lds_end->name = "__lds_end";
408
lds_end->size = 0;
409
lds_end->align = lds_end_align;
410
lds_end->offset = binary->lds_size;
411
lds_end->part_idx = ~0u;
412
}
413
414
if (binary->lds_size > max_lds_size) {
415
fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
416
(unsigned)binary->lds_size, max_lds_size);
417
goto fail;
418
}
419
420
/* Second pass: Adjust offsets of non-pasted text sections. */
421
binary->rx_size = pasted_text_size;
422
binary->rx_size = align(binary->rx_size, rx_align);
423
424
for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
425
struct ac_rtld_part *part = &binary->parts[part_idx];
426
size_t num_shdrs;
427
elf_getshdrnum(part->elf, &num_shdrs);
428
429
for (unsigned j = 0; j < num_shdrs; ++j) {
430
struct ac_rtld_section *s = &part->sections[j];
431
if (s->is_rx && !s->is_pasted_text)
432
s->offset += binary->rx_size;
433
}
434
}
435
436
binary->rx_size += rx_size;
437
binary->exec_size = exec_size;
438
439
/* The SQ fetches up to N cache lines of 16 dwords
440
* ahead of the PC, configurable by SH_MEM_CONFIG and
441
* S_INST_PREFETCH. This can cause two issues:
442
*
443
* (1) Crossing a page boundary to an unmapped page. The logic
444
* does not distinguish between a required fetch and a "mere"
445
* prefetch and will fault.
446
*
447
* (2) Prefetching instructions that will be changed for a
448
* different shader.
449
*
450
* (2) is not currently an issue because we flush the I$ at IB
451
* boundaries, but (1) needs to be addressed. Due to buffer
452
* suballocation, we just play it safe.
453
*/
454
unsigned prefetch_distance = 0;
455
456
if (!i.info->has_graphics && i.info->family >= CHIP_ALDEBARAN)
457
prefetch_distance = 16;
458
else if (i.info->chip_class >= GFX10)
459
prefetch_distance = 3;
460
461
if (prefetch_distance)
462
binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 64);
463
464
return true;
465
466
#undef report_if
467
#undef report_elf_if
468
469
fail:
470
ac_rtld_close(binary);
471
return false;
472
}
473
474
void ac_rtld_close(struct ac_rtld_binary *binary)
475
{
476
for (unsigned i = 0; i < binary->num_parts; ++i) {
477
struct ac_rtld_part *part = &binary->parts[i];
478
free(part->sections);
479
elf_end(part->elf);
480
}
481
482
util_dynarray_fini(&binary->lds_symbols);
483
free(binary->parts);
484
binary->parts = NULL;
485
binary->num_parts = 0;
486
}
487
488
static bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data,
489
size_t *nbytes)
490
{
491
for (unsigned i = 0; i < part->num_sections; ++i) {
492
struct ac_rtld_section *s = &part->sections[i];
493
if (s->name && !strcmp(name, s->name)) {
494
Elf_Scn *target_scn = elf_getscn(part->elf, i);
495
Elf_Data *target_data = elf_getdata(target_scn, NULL);
496
if (!target_data) {
497
report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
498
return false;
499
}
500
501
*data = target_data->d_buf;
502
*nbytes = target_data->d_size;
503
return true;
504
}
505
}
506
return false;
507
}
508
509
bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
510
size_t *nbytes)
511
{
512
assert(binary->num_parts == 1);
513
return get_section_by_name(&binary->parts[0], name, data, nbytes);
514
}
515
516
bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
517
struct ac_shader_config *config)
518
{
519
for (unsigned i = 0; i < binary->num_parts; ++i) {
520
struct ac_rtld_part *part = &binary->parts[i];
521
const char *config_data;
522
size_t config_nbytes;
523
524
if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes))
525
return false;
526
527
/* TODO: be precise about scratch use? */
528
struct ac_shader_config c = {0};
529
ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, true, info, &c);
530
531
config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
532
config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
533
config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
534
config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
535
config->scratch_bytes_per_wave =
536
MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave);
537
538
assert(i == 0 || config->float_mode == c.float_mode);
539
config->float_mode = c.float_mode;
540
541
/* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
542
* the main shader part is used. */
543
assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0);
544
config->spi_ps_input_ena = c.spi_ps_input_ena;
545
config->spi_ps_input_addr = c.spi_ps_input_addr;
546
547
/* TODO: consistently use LDS symbols for this */
548
config->lds_size = MAX2(config->lds_size, c.lds_size);
549
550
/* TODO: Should we combine these somehow? It's currently only
551
* used for radeonsi's compute, where multiple parts aren't used. */
552
assert(config->rsrc1 == 0 && config->rsrc2 == 0);
553
config->rsrc1 = c.rsrc1;
554
config->rsrc2 = c.rsrc2;
555
}
556
557
return true;
558
}
559
560
static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx,
561
const Elf64_Sym *sym, const char *name, uint64_t *value)
562
{
563
/* TODO: properly disentangle the undef and the LDS cases once
564
* STT_AMDGPU_LDS is retired. */
565
if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
566
const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx);
567
568
if (lds_sym) {
569
*value = lds_sym->offset;
570
return true;
571
}
572
573
/* TODO: resolve from other parts */
574
575
if (u->get_external_symbol(u->cb_data, name, value))
576
return true;
577
578
report_errorf("symbol %s: unknown", name);
579
return false;
580
}
581
582
struct ac_rtld_part *part = &u->binary->parts[part_idx];
583
if (sym->st_shndx >= part->num_sections) {
584
report_errorf("symbol %s: section out of bounds", name);
585
return false;
586
}
587
588
struct ac_rtld_section *s = &part->sections[sym->st_shndx];
589
if (!s->is_rx) {
590
report_errorf("symbol %s: bad section", name);
591
return false;
592
}
593
594
uint64_t section_base = u->rx_va + s->offset;
595
596
*value = section_base + sym->st_value;
597
return true;
598
}
599
600
static bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx,
601
const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data)
602
{
603
#define report_if(cond) \
604
do { \
605
if ((cond)) { \
606
report_errorf(#cond); \
607
return false; \
608
} \
609
} while (false)
610
#define report_elf_if(cond) \
611
do { \
612
if ((cond)) { \
613
report_elf_errorf(#cond); \
614
return false; \
615
} \
616
} while (false)
617
618
struct ac_rtld_part *part = &u->binary->parts[part_idx];
619
Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
620
report_elf_if(!target_scn);
621
622
Elf_Data *target_data = elf_getdata(target_scn, NULL);
623
report_elf_if(!target_data);
624
625
Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
626
report_elf_if(!symbols_scn);
627
628
Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
629
report_elf_if(!symbols_shdr);
630
uint32_t strtabidx = symbols_shdr->sh_link;
631
632
Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
633
report_elf_if(!symbols_data);
634
635
const Elf64_Sym *symbols = symbols_data->d_buf;
636
size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
637
638
struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
639
report_if(!s->is_rx);
640
641
const char *orig_base = target_data->d_buf;
642
char *dst_base = u->rx_ptr + s->offset;
643
uint64_t va_base = u->rx_va + s->offset;
644
645
Elf64_Rel *rel = reloc_data->d_buf;
646
size_t num_relocs = reloc_data->d_size / sizeof(*rel);
647
for (size_t i = 0; i < num_relocs; ++i, ++rel) {
648
size_t r_sym = ELF64_R_SYM(rel->r_info);
649
unsigned r_type = ELF64_R_TYPE(rel->r_info);
650
651
const char *orig_ptr = orig_base + rel->r_offset;
652
char *dst_ptr = dst_base + rel->r_offset;
653
uint64_t va = va_base + rel->r_offset;
654
655
uint64_t symbol;
656
uint64_t addend;
657
658
if (r_sym == STN_UNDEF) {
659
symbol = 0;
660
} else {
661
report_elf_if(r_sym >= num_symbols);
662
663
const Elf64_Sym *sym = &symbols[r_sym];
664
const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name);
665
report_elf_if(!symbol_name);
666
667
if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
668
return false;
669
}
670
671
/* TODO: Should we also support .rela sections, where the
672
* addend is part of the relocation record? */
673
674
/* Load the addend from the ELF instead of the destination,
675
* because the destination may be in VRAM. */
676
switch (r_type) {
677
case R_AMDGPU_ABS32:
678
case R_AMDGPU_ABS32_LO:
679
case R_AMDGPU_ABS32_HI:
680
case R_AMDGPU_REL32:
681
case R_AMDGPU_REL32_LO:
682
case R_AMDGPU_REL32_HI:
683
addend = *(const uint32_t *)orig_ptr;
684
break;
685
case R_AMDGPU_ABS64:
686
case R_AMDGPU_REL64:
687
addend = *(const uint64_t *)orig_ptr;
688
break;
689
default:
690
report_errorf("unsupported r_type == %u", r_type);
691
return false;
692
}
693
694
uint64_t abs = symbol + addend;
695
696
switch (r_type) {
697
case R_AMDGPU_ABS32:
698
assert((uint32_t)abs == abs);
699
FALLTHROUGH;
700
case R_AMDGPU_ABS32_LO:
701
*(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
702
break;
703
case R_AMDGPU_ABS32_HI:
704
*(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
705
break;
706
case R_AMDGPU_ABS64:
707
*(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
708
break;
709
case R_AMDGPU_REL32:
710
assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
711
FALLTHROUGH;
712
case R_AMDGPU_REL32_LO:
713
*(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
714
break;
715
case R_AMDGPU_REL32_HI:
716
*(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
717
break;
718
case R_AMDGPU_REL64:
719
*(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
720
break;
721
default:
722
unreachable("bad r_type");
723
}
724
}
725
726
return true;
727
728
#undef report_if
729
#undef report_elf_if
730
}
731
732
/**
733
* Upload the binary or binaries to the provided GPU buffers, including
734
* relocations.
735
*/
736
int ac_rtld_upload(struct ac_rtld_upload_info *u)
737
{
738
#define report_if(cond) \
739
do { \
740
if ((cond)) { \
741
report_errorf(#cond); \
742
return -1; \
743
} \
744
} while (false)
745
#define report_elf_if(cond) \
746
do { \
747
if ((cond)) { \
748
report_errorf(#cond); \
749
return -1; \
750
} \
751
} while (false)
752
753
int size = 0;
754
if (u->binary->options.halt_at_entry) {
755
/* s_sethalt 1 */
756
*(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
757
}
758
759
/* First pass: upload raw section data and lay out private LDS symbols. */
760
for (unsigned i = 0; i < u->binary->num_parts; ++i) {
761
struct ac_rtld_part *part = &u->binary->parts[i];
762
763
Elf_Scn *section = NULL;
764
while ((section = elf_nextscn(part->elf, section))) {
765
Elf64_Shdr *shdr = elf64_getshdr(section);
766
struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
767
768
if (!s->is_rx)
769
continue;
770
771
report_if(shdr->sh_type != SHT_PROGBITS);
772
773
Elf_Data *data = elf_getdata(section, NULL);
774
report_elf_if(!data || data->d_size != shdr->sh_size);
775
memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
776
777
size = MAX2(size, s->offset + shdr->sh_size);
778
}
779
}
780
781
if (u->binary->rx_end_markers) {
782
uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
783
for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
784
*dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
785
size += 4 * DEBUGGER_NUM_MARKERS;
786
}
787
788
/* Second pass: handle relocations, overwriting uploaded data where
789
* appropriate. */
790
for (unsigned i = 0; i < u->binary->num_parts; ++i) {
791
struct ac_rtld_part *part = &u->binary->parts[i];
792
Elf_Scn *section = NULL;
793
while ((section = elf_nextscn(part->elf, section))) {
794
Elf64_Shdr *shdr = elf64_getshdr(section);
795
if (shdr->sh_type == SHT_REL) {
796
Elf_Data *relocs = elf_getdata(section, NULL);
797
report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
798
if (!apply_relocs(u, i, shdr, relocs))
799
return -1;
800
} else if (shdr->sh_type == SHT_RELA) {
801
report_errorf("SHT_RELA not supported");
802
return -1;
803
}
804
}
805
}
806
807
return size;
808
809
#undef report_if
810
#undef report_elf_if
811
}
812
813