summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorNicolai Hähnle <[email protected]>2019-05-03 21:18:51 +0200
committerMarek Olšák <[email protected]>2019-06-12 20:28:23 -0400
commitf8315ae04b304bbdb47680654238edd107c5a129 (patch)
treef6b0c284ea97df378392b5b48811397ce32c88d6 /src/amd
parentdc99a8cd9bf743746926c062a7c921f272e1648f (diff)
amd/rtld: layout and relocate LDS symbols
Upcoming changes to LLVM will emit LDS objects as symbols in the ELF symbol table, with relocations that will be resolved with this change. Callers will also be able to define LDS symbols that are shared between shader parts. This will be used by radeonsi for the ESGS ring in gfx9+ merged shaders. Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/common/ac_rtld.c215
-rw-r--r--src/amd/common/ac_rtld.h39
2 files changed, 235 insertions, 19 deletions
diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c
index 996ff045b16..dc9cc04705b 100644
--- a/src/amd/common/ac_rtld.c
+++ b/src/amd/common/ac_rtld.c
@@ -31,11 +31,17 @@
#include <string.h>
#include "ac_binary.h"
+#include "ac_gpu_info.h"
+#include "util/u_dynarray.h"
#include "util/u_math.h"
// Old distributions may not have this enum constant
#define MY_EM_AMDGPU 224
+#ifndef STT_AMDGPU_LDS
+#define STT_AMDGPU_LDS 13
+#endif
+
#ifndef R_AMDGPU_NONE
#define R_AMDGPU_NONE 0
#define R_AMDGPU_ABS32_LO 1
@@ -105,16 +111,130 @@ static void report_elf_errorf(const char *fmt, ...)
}
/**
+ * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
+ * \p part_idx.
+ */
+static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
+ const char *name, unsigned part_idx)
+{
+ util_dynarray_foreach(symbols, struct ac_rtld_symbol, symbol) {
+ if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) &&
+ !strcmp(name, symbol->name))
+ return symbol;
+ }
+ return 0;
+}
+
+static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
+{
+ const struct ac_rtld_symbol *lhs = lhsp;
+ const struct ac_rtld_symbol *rhs = rhsp;
+ if (rhs->align > lhs->align)
+ return -1;
+ if (rhs->align < lhs->align)
+ return 1;
+ return 0;
+}
+
+/**
+ * Sort the given symbol list by decreasing alignment and assign offsets.
+ */
+static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
+ uint64_t *ptotal_size)
+{
+ qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
+
+ uint64_t total_size = *ptotal_size;
+
+ for (unsigned i = 0; i < num_symbols; ++i) {
+ struct ac_rtld_symbol *s = &symbols[i];
+ assert(util_is_power_of_two_nonzero(s->align));
+
+ total_size = align64(total_size, s->align);
+ s->offset = total_size;
+
+ if (total_size + s->size < total_size) {
+ report_errorf("%s: size overflow", __FUNCTION__);
+ return false;
+ }
+
+ total_size += s->size;
+ }
+
+ *ptotal_size = total_size;
+ return true;
+}
+
+/**
+ * Read LDS symbols from the given \p section of the ELF of \p part and append
+ * them to the LDS symbols list.
+ *
+ * Shared LDS symbols are filtered out.
+ */
+static bool read_private_lds_symbols(struct ac_rtld_binary *binary,
+ unsigned part_idx,
+ Elf_Scn *section,
+ uint32_t *lds_end_align)
+{
+#define report_elf_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
+
+ struct ac_rtld_part *part = &binary->parts[part_idx];
+ Elf64_Shdr *shdr = elf64_getshdr(section);
+ uint32_t strtabidx = shdr->sh_link;
+ Elf_Data *symbols_data = elf_getdata(section, NULL);
+ report_elf_if(!symbols_data);
+
+ const Elf64_Sym *symbol = symbols_data->d_buf;
+ size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
+
+ for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
+ if (ELF64_ST_TYPE(symbol->st_info) != STT_AMDGPU_LDS)
+ continue;
+
+ report_elf_if(symbol->st_size > 1u << 29);
+
+ struct ac_rtld_symbol s = {};
+ s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
+ s.size = symbol->st_size;
+ s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
+ s.part_idx = part_idx;
+
+ if (!strcmp(s.name, "__lds_end")) {
+ report_elf_if(s.size != 0);
+ *lds_end_align = MAX2(*lds_end_align, s.align);
+ continue;
+ }
+
+ const struct ac_rtld_symbol *shared =
+ find_symbol(&binary->lds_symbols, s.name, part_idx);
+ if (shared) {
+ report_elf_if(s.align > shared->align);
+ report_elf_if(s.size > shared->size);
+ continue;
+ }
+
+ util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
+ }
+
+ return true;
+
+#undef report_elf_if
+}
+
+/**
* Open a binary consisting of one or more shader parts.
*
* \param binary the uninitialized struct
- * \param num_parts number of shader parts
- * \param elf_ptrs pointers to the in-memory ELF objects for each shader part
- * \param elf_sizes sizes (in bytes) of the in-memory ELF objects
+ * \param i binary opening parameters
*/
-bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts,
- const char * const *elf_ptrs,
- const size_t *elf_sizes)
+bool ac_rtld_open(struct ac_rtld_binary *binary,
+ struct ac_rtld_open_info i)
{
/* One of the libelf implementations
* (http://www.mr511.de/software/english.htm) requires calling
@@ -123,8 +243,8 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts,
elf_version(EV_CURRENT);
memset(binary, 0, sizeof(*binary));
- binary->num_parts = num_parts;
- binary->parts = calloc(sizeof(*binary->parts), num_parts);
+ binary->num_parts = i.num_parts;
+ binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
if (!binary->parts)
return false;
@@ -147,11 +267,35 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts,
} \
} while (false)
- /* First pass over all parts: open ELFs and determine the placement of
- * sections in the memory image. */
- for (unsigned i = 0; i < num_parts; ++i) {
- struct ac_rtld_part *part = &binary->parts[i];
- part->elf = elf_memory((char *)elf_ptrs[i], elf_sizes[i]);
+ /* Copy and layout shared LDS symbols. */
+ if (i.num_shared_lds_symbols) {
+ if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
+ i.num_shared_lds_symbols))
+ goto fail;
+
+ memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
+ }
+
+ util_dynarray_foreach(&binary->lds_symbols, struct ac_rtld_symbol, symbol)
+ symbol->part_idx = ~0u;
+
+ unsigned max_lds_size = i.info->chip_class >= GFX7 ? 64 * 1024 : 32 * 1024;
+ uint64_t shared_lds_size = 0;
+ if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
+ goto fail;
+ report_if(shared_lds_size > max_lds_size);
+ binary->lds_size = shared_lds_size;
+
+ /* First pass over all parts: open ELFs, pre-determine the placement of
+ * sections in the memory image, and collect and layout private LDS symbols. */
+ uint32_t lds_end_align = 0;
+
+ for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
+ struct ac_rtld_part *part = &binary->parts[part_idx];
+ unsigned part_lds_symbols_begin =
+ util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
+
+ part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
report_elf_if(!part->elf);
const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
@@ -203,19 +347,48 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts,
s->offset = rx_size;
rx_size += shdr->sh_size;
}
+ } else if (shdr->sh_type == SHT_SYMTAB) {
+ if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
+ goto fail;
}
}
+
+ uint64_t part_lds_size = shared_lds_size;
+ if (!layout_symbols(
+ util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol, part_lds_symbols_begin),
+ util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) - part_lds_symbols_begin,
+ &part_lds_size))
+ goto fail;
+ binary->lds_size = MAX2(binary->lds_size, part_lds_size);
}
binary->rx_end_markers = pasted_text_size;
pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
+ /* __lds_end is a special symbol that points at the end of the memory
+ * occupied by other LDS symbols. Its alignment is taken as the
+ * maximum of its alignment over all shader parts where it occurs.
+ */
+ if (lds_end_align) {
+ binary->lds_size = align(binary->lds_size, lds_end_align);
+
+ struct ac_rtld_symbol *lds_end =
+ util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
+ lds_end->name = "__lds_end";
+ lds_end->size = 0;
+ lds_end->align = lds_end_align;
+ lds_end->offset = binary->lds_size;
+ lds_end->part_idx = ~0u;
+ }
+
+ report_elf_if(binary->lds_size > max_lds_size);
+
/* Second pass: Adjust offsets of non-pasted text sections. */
binary->rx_size = pasted_text_size;
binary->rx_size = align(binary->rx_size, rx_align);
- for (unsigned i = 0; i < num_parts; ++i) {
- struct ac_rtld_part *part = &binary->parts[i];
+ for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
+ struct ac_rtld_part *part = &binary->parts[part_idx];
size_t num_shdrs;
elf_getshdrnum(part->elf, &num_shdrs);
@@ -246,6 +419,7 @@ void ac_rtld_close(struct ac_rtld_binary *binary)
elf_end(part->elf);
}
+ util_dynarray_fini(&binary->lds_symbols);
free(binary->parts);
binary->parts = NULL;
binary->num_parts = 0;
@@ -330,6 +504,14 @@ static bool resolve_symbol(const struct ac_rtld_upload_info *u,
const char *name, uint64_t *value)
{
if (sym->st_shndx == SHN_UNDEF) {
+ const struct ac_rtld_symbol *lds_sym =
+ find_symbol(&u->binary->lds_symbols, name, part_idx);
+
+ if (lds_sym) {
+ *value = lds_sym->offset;
+ return true;
+ }
+
/* TODO: resolve from other parts */
if (u->get_external_symbol(u->cb_data, name, value))
@@ -510,9 +692,10 @@ bool ac_rtld_upload(struct ac_rtld_upload_info *u)
} \
} while (false)
- /* First pass: upload raw section data. */
+ /* First pass: upload raw section data and lay out private LDS symbols. */
for (unsigned i = 0; i < u->binary->num_parts; ++i) {
struct ac_rtld_part *part = &u->binary->parts[i];
+
Elf_Scn *section = NULL;
while ((section = elf_nextscn(part->elf, section))) {
Elf64_Shdr *shdr = elf64_getshdr(section);
diff --git a/src/amd/common/ac_rtld.h b/src/amd/common/ac_rtld.h
index 0d93488fbbb..01c29b50817 100644
--- a/src/amd/common/ac_rtld.h
+++ b/src/amd/common/ac_rtld.h
@@ -28,8 +28,19 @@
#include <stdint.h>
#include <stddef.h>
+#include "util/u_dynarray.h"
+
struct ac_rtld_part;
struct ac_shader_config;
+struct radeon_info;
+
+struct ac_rtld_symbol {
+ const char *name;
+ uint32_t size;
+ uint32_t align;
+ uint64_t offset; /* filled in by ac_rtld_open */
+ unsigned part_idx; /* shader part in which this symbol appears */
+};
/* Lightweight wrapper around underlying ELF objects. */
struct ac_rtld_binary {
@@ -40,6 +51,9 @@ struct ac_rtld_binary {
unsigned num_parts;
struct ac_rtld_part *parts;
+
+ struct util_dynarray lds_symbols;
+ uint32_t lds_size;
};
/**
@@ -54,9 +68,28 @@ struct ac_rtld_binary {
typedef bool (*ac_rtld_get_external_symbol_cb)(
void *cb_data, const char *symbol, uint64_t *value);
-bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts,
- const char * const *elf_ptrs,
- const size_t *elf_sizes);
+/**
+ * Lifetimes of \ref info, in-memory ELF objects, and the names of
+ * \ref shared_lds_symbols must extend until \ref ac_rtld_close is called on
+ * the opened binary.
+ */
+struct ac_rtld_open_info {
+ const struct radeon_info *info;
+
+ unsigned num_parts;
+ const char * const *elf_ptrs; /* in-memory ELF objects of each part */
+ const size_t *elf_sizes; /* sizes of corresponding in-memory ELF objects in bytes */
+
+ /* Shared LDS symbols are layouted such that they are accessible from
+ * all shader parts. Non-shared (private) LDS symbols of one part may
+ * overlap private LDS symbols of another shader part.
+ */
+ unsigned num_shared_lds_symbols;
+ const struct ac_rtld_symbol *shared_lds_symbols;
+};
+
+bool ac_rtld_open(struct ac_rtld_binary *binary,
+ struct ac_rtld_open_info i);
void ac_rtld_close(struct ac_rtld_binary *binary);