diff options
author | Nicolai Hähnle <[email protected]> | 2019-05-03 21:18:51 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2019-06-12 20:28:23 -0400 |
commit | f8315ae04b304bbdb47680654238edd107c5a129 (patch) | |
tree | f6b0c284ea97df378392b5b48811397ce32c88d6 /src/amd/common/ac_rtld.c | |
parent | dc99a8cd9bf743746926c062a7c921f272e1648f (diff) |
amd/rtld: layout and relocate LDS symbols
Upcoming changes to LLVM will emit LDS objects as symbols in the ELF
symbol table, with relocations that will be resolved with this change.
Callers will also be able to define LDS symbols that are shared between
shader parts. This will be used by radeonsi for the ESGS ring in gfx9+
merged shaders.
Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/amd/common/ac_rtld.c')
-rw-r--r-- | src/amd/common/ac_rtld.c | 215 |
1 files changed, 199 insertions, 16 deletions
diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c index 996ff045b16..dc9cc04705b 100644 --- a/src/amd/common/ac_rtld.c +++ b/src/amd/common/ac_rtld.c @@ -31,11 +31,17 @@ #include <string.h> #include "ac_binary.h" +#include "ac_gpu_info.h" +#include "util/u_dynarray.h" #include "util/u_math.h" // Old distributions may not have this enum constant #define MY_EM_AMDGPU 224 +#ifndef STT_AMDGPU_LDS +#define STT_AMDGPU_LDS 13 +#endif + #ifndef R_AMDGPU_NONE #define R_AMDGPU_NONE 0 #define R_AMDGPU_ABS32_LO 1 @@ -105,16 +111,130 @@ static void report_elf_errorf(const char *fmt, ...) } /** + * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader + * \p part_idx. + */ +static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols, + const char *name, unsigned part_idx) +{ + util_dynarray_foreach(symbols, struct ac_rtld_symbol, symbol) { + if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && + !strcmp(name, symbol->name)) + return symbol; + } + return 0; +} + +static int compare_symbol_by_align(const void *lhsp, const void *rhsp) +{ + const struct ac_rtld_symbol *lhs = lhsp; + const struct ac_rtld_symbol *rhs = rhsp; + if (rhs->align > lhs->align) + return -1; + if (rhs->align < lhs->align) + return 1; + return 0; +} + +/** + * Sort the given symbol list by decreasing alignment and assign offsets. + */ +static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols, + uint64_t *ptotal_size) +{ + qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align); + + uint64_t total_size = *ptotal_size; + + for (unsigned i = 0; i < num_symbols; ++i) { + struct ac_rtld_symbol *s = &symbols[i]; + assert(util_is_power_of_two_nonzero(s->align)); + + total_size = align64(total_size, s->align); + s->offset = total_size; + + if (total_size + s->size < total_size) { + report_errorf("%s: size overflow", __FUNCTION__); + return false; + } + + total_size += s->size; + } + + *ptotal_size = total_size; + return true; +} + +/** + * Read LDS symbols from the given \p section of the ELF of \p part and append + * them to the LDS symbols list. + * + * Shared LDS symbols are filtered out. + */ +static bool read_private_lds_symbols(struct ac_rtld_binary *binary, + unsigned part_idx, + Elf_Scn *section, + uint32_t *lds_end_align) +{ +#define report_elf_if(cond) \ + do { \ + if ((cond)) { \ + report_errorf(#cond); \ + return false; \ + } \ + } while (false) + + struct ac_rtld_part *part = &binary->parts[part_idx]; + Elf64_Shdr *shdr = elf64_getshdr(section); + uint32_t strtabidx = shdr->sh_link; + Elf_Data *symbols_data = elf_getdata(section, NULL); + report_elf_if(!symbols_data); + + const Elf64_Sym *symbol = symbols_data->d_buf; + size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym); + + for (size_t j = 0; j < num_symbols; ++j, ++symbol) { + if (ELF64_ST_TYPE(symbol->st_info) != STT_AMDGPU_LDS) + continue; + + report_elf_if(symbol->st_size > 1u << 29); + + struct ac_rtld_symbol s = {}; + s.name = elf_strptr(part->elf, strtabidx, symbol->st_name); + s.size = symbol->st_size; + s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16); + s.part_idx = part_idx; + + if (!strcmp(s.name, "__lds_end")) { + report_elf_if(s.size != 0); + *lds_end_align = MAX2(*lds_end_align, s.align); + continue; + } + + const struct ac_rtld_symbol *shared = + find_symbol(&binary->lds_symbols, s.name, part_idx); + if (shared) { + report_elf_if(s.align > shared->align); + report_elf_if(s.size > shared->size); + continue; + } + + util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s); + } + + return true; + +#undef report_elf_if +} + +/** * Open a binary consisting of one or more shader parts. * * \param binary the uninitialized struct - * \param num_parts number of shader parts - * \param elf_ptrs pointers to the in-memory ELF objects for each shader part - * \param elf_sizes sizes (in bytes) of the in-memory ELF objects + * \param i binary opening parameters */ -bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts, - const char * const *elf_ptrs, - const size_t *elf_sizes) +bool ac_rtld_open(struct ac_rtld_binary *binary, + struct ac_rtld_open_info i) { /* One of the libelf implementations * (http://www.mr511.de/software/english.htm) requires calling @@ -123,8 +243,8 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts, elf_version(EV_CURRENT); memset(binary, 0, sizeof(*binary)); - binary->num_parts = num_parts; - binary->parts = calloc(sizeof(*binary->parts), num_parts); + binary->num_parts = i.num_parts; + binary->parts = calloc(sizeof(*binary->parts), i.num_parts); if (!binary->parts) return false; @@ -147,11 +267,35 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts, } \ } while (false) - /* First pass over all parts: open ELFs and determine the placement of - * sections in the memory image. */ - for (unsigned i = 0; i < num_parts; ++i) { - struct ac_rtld_part *part = &binary->parts[i]; - part->elf = elf_memory((char *)elf_ptrs[i], elf_sizes[i]); + /* Copy and layout shared LDS symbols. */ + if (i.num_shared_lds_symbols) { + if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol, + i.num_shared_lds_symbols)) + goto fail; + + memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size); + } + + util_dynarray_foreach(&binary->lds_symbols, struct ac_rtld_symbol, symbol) + symbol->part_idx = ~0u; + + unsigned max_lds_size = i.info->chip_class >= GFX7 ? 64 * 1024 : 32 * 1024; + uint64_t shared_lds_size = 0; + if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size)) + goto fail; + report_if(shared_lds_size > max_lds_size); + binary->lds_size = shared_lds_size; + + /* First pass over all parts: open ELFs, pre-determine the placement of + * sections in the memory image, and collect and layout private LDS symbols. */ + uint32_t lds_end_align = 0; + + for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) { + struct ac_rtld_part *part = &binary->parts[part_idx]; + unsigned part_lds_symbols_begin = + util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol); + + part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]); report_elf_if(!part->elf); const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf); @@ -203,19 +347,48 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts, s->offset = rx_size; rx_size += shdr->sh_size; } + } else if (shdr->sh_type == SHT_SYMTAB) { + if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align)) + goto fail; } } + + uint64_t part_lds_size = shared_lds_size; + if (!layout_symbols( + util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol, part_lds_symbols_begin), + util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) - part_lds_symbols_begin, + &part_lds_size)) + goto fail; + binary->lds_size = MAX2(binary->lds_size, part_lds_size); } binary->rx_end_markers = pasted_text_size; pasted_text_size += 4 * DEBUGGER_NUM_MARKERS; + /* __lds_end is a special symbol that points at the end of the memory + * occupied by other LDS symbols. Its alignment is taken as the + * maximum of its alignment over all shader parts where it occurs. + */ + if (lds_end_align) { + binary->lds_size = align(binary->lds_size, lds_end_align); + + struct ac_rtld_symbol *lds_end = + util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1); + lds_end->name = "__lds_end"; + lds_end->size = 0; + lds_end->align = lds_end_align; + lds_end->offset = binary->lds_size; + lds_end->part_idx = ~0u; + } + + report_elf_if(binary->lds_size > max_lds_size); + /* Second pass: Adjust offsets of non-pasted text sections. */ binary->rx_size = pasted_text_size; binary->rx_size = align(binary->rx_size, rx_align); - for (unsigned i = 0; i < num_parts; ++i) { - struct ac_rtld_part *part = &binary->parts[i]; + for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) { + struct ac_rtld_part *part = &binary->parts[part_idx]; size_t num_shdrs; elf_getshdrnum(part->elf, &num_shdrs); @@ -246,6 +419,7 @@ void ac_rtld_close(struct ac_rtld_binary *binary) elf_end(part->elf); } + util_dynarray_fini(&binary->lds_symbols); free(binary->parts); binary->parts = NULL; binary->num_parts = 0; @@ -330,6 +504,14 @@ static bool resolve_symbol(const struct ac_rtld_upload_info *u, const char *name, uint64_t *value) { if (sym->st_shndx == SHN_UNDEF) { + const struct ac_rtld_symbol *lds_sym = + find_symbol(&u->binary->lds_symbols, name, part_idx); + + if (lds_sym) { + *value = lds_sym->offset; + return true; + } + /* TODO: resolve from other parts */ if (u->get_external_symbol(u->cb_data, name, value)) @@ -510,9 +692,10 @@ bool ac_rtld_upload(struct ac_rtld_upload_info *u) } \ } while (false) - /* First pass: upload raw section data. */ + /* First pass: upload raw section data and lay out private LDS symbols. */ for (unsigned i = 0; i < u->binary->num_parts; ++i) { struct ac_rtld_part *part = &u->binary->parts[i]; + Elf_Scn *section = NULL; while ((section = elf_nextscn(part->elf, section))) { Elf64_Shdr *shdr = elf64_getshdr(section); |