diff options
author | Rhys Perry <[email protected]> | 2019-09-10 18:11:13 +0100 |
---|---|---|
committer | Timur Kristóf <[email protected]> | 2019-10-10 09:57:53 +0200 |
commit | 83993f535eb90874ca2256ddbd35bce4e407c13a (patch) | |
tree | 7bf5e29a5ae9ce3271f00f70e8028069ff7fc8e6 | |
parent | 0be1dd856445cf30acc0a7ca74b662f6c21512b8 (diff) |
aco: workaround GFX10 0x3f branch bug
According to LLVM, branches with an offset of 0x3f are buggy.
v2: (by Timur Kristóf)
- extract the GFX10 specific part to its own function
Signed-off-by: Rhys Perry <[email protected]>
Signed-off-by: Timur Kristóf <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
-rw-r--r-- | src/amd/compiler/aco_assembler.cpp | 44 |
1 files changed, 39 insertions, 5 deletions
diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 34eb9fb730f..5a82d44d74b 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -1,4 +1,5 @@ -#include <map> +#include <vector> +#include <algorithm> #include "aco_ir.h" #include "common/sid.h" @@ -9,7 +10,7 @@ namespace aco { struct asm_context { Program *program; enum chip_class chip_class; - std::map<int, SOPP_instruction*> branches; + std::vector<std::pair<int, SOPP_instruction*>> branches; std::vector<unsigned> constaddrs; const int16_t* opcode; // TODO: keep track of branch instructions referring blocks @@ -135,7 +136,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* encoding |= opcode << 16; encoding |= (uint16_t) sopp->imm; if (sopp->block != -1) - ctx.branches.insert({out.size(), sopp}); + ctx.branches.emplace_back(out.size(), sopp); out.push_back(encoding); break; } @@ -605,10 +606,43 @@ void fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program) } } +static void fix_branches_gfx10(asm_context& ctx, std::vector<uint32_t>& out) +{ + /* Branches with an offset of 0x3f are buggy on GFX10, we workaround by inserting NOPs if needed. */ + bool gfx10_3f_bug = false; + + do { + auto buggy_branch_it = std::find_if(ctx.branches.begin(), ctx.branches.end(), [&ctx](const auto &branch) -> bool { + return ((int)ctx.program->blocks[branch.second->block].offset - branch.first - 1) == 0x3f; + }); + + gfx10_3f_bug = buggy_branch_it != ctx.branches.end(); + + if (gfx10_3f_bug) { + /* Insert an s_nop after the branch */ + constexpr uint32_t s_nop_0 = 0xbf800000u; + auto out_pos = std::next(out.begin(), buggy_branch_it->first + 1); + out.insert(out_pos, s_nop_0); + + /* Update the offset of each affected block */ + for (Block& block : ctx.program->blocks) { + if (block.offset > (unsigned)buggy_branch_it->first) + block.offset++; + } + + /* Update the branches following the current one */ + for (auto branch_it = std::next(buggy_branch_it); branch_it != ctx.branches.end(); ++branch_it) + branch_it->first++; + } + } while (gfx10_3f_bug); +} + void fix_branches(asm_context& ctx, std::vector<uint32_t>& out) { - for (std::pair<int, SOPP_instruction*> branch : ctx.branches) - { + if (ctx.chip_class >= GFX10) + fix_branches_gfx10(ctx, out); + + for (std::pair<int, SOPP_instruction*> &branch : ctx.branches) { int offset = (int)ctx.program->blocks[branch.second->block].offset - branch.first - 1; out[branch.first] |= (uint16_t) offset; } |