diff options
author | Timur Kristóf <[email protected]> | 2019-10-23 11:24:53 +0200 |
---|---|---|
committer | Timur Kristóf <[email protected]> | 2019-10-25 10:10:42 +0200 |
commit | e5a8616973d45909a75a708f88dfb9c3cbb5ded8 (patch) | |
tree | 58abe54a28de4e468db324b5836cbcb3d3f04cb0 | |
parent | 99aed688d346e26b23e2e00cb6f7df9be3415ccb (diff) |
aco/gfx10: Mitigate VcmpxPermlaneHazard.
Any permlane instruction that follows any VOPC instruction can cause a hazard,
this commit implements a workaround that avoids this causing a problem.
Signed-off-by: Timur Kristóf <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
-rw-r--r-- | src/amd/compiler/README | 8 | ||||
-rw-r--r-- | src/amd/compiler/aco_insert_NOPs.cpp | 20 |
2 files changed, 28 insertions, 0 deletions
diff --git a/src/amd/compiler/README b/src/amd/compiler/README index 630f1fcd42a..d3ecc896bc4 100644 --- a/src/amd/compiler/README +++ b/src/amd/compiler/README @@ -173,3 +173,11 @@ The 12-bit immediate OFFSET field of FLAT instructions must always be 0. GLOBAL and SCRATCH are unaffected. ACO doesn't use FLAT load/store on GFX10, so is unaffected. + +### VcmpxPermlaneHazard + +Triggered by: +Any permlane instruction that follows any VOPC instruction. +Confirmed by AMD devs that despite the name, this doesn't only affect v_cmpx. + +Mitigated by: any VALU instruction except `v_nop`. diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index eb325c940ba..a80dd0c04bc 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -39,6 +39,7 @@ struct NOP_ctx { /* GFX10 */ int last_VMEM_since_scalar_write = -1; + bool has_VOPC = false; NOP_ctx(Program* program) : chip_class(program->chip_class) { vcc_physical = program->config->num_sgprs - 2; @@ -283,6 +284,25 @@ std::pair<int, int> handle_instruction_gfx10(NOP_ctx& ctx, aco_ptr<Instruction>& ctx.last_VMEM_since_scalar_write = -1; } + /* VcmpxPermlaneHazard + * Handle any permlane following a VOPC instruction, insert v_mov between them. + */ + if (instr->format == Format::VOPC) { + ctx.has_VOPC = true; + } else if (ctx.has_VOPC && + (instr->opcode == aco_opcode::v_permlane16_b32 || + instr->opcode == aco_opcode::v_permlanex16_b32)) { + ctx.has_VOPC = false; + + /* v_nop would be discarded by SQ, so use v_mov with the first operand of the permlane */ + aco_ptr<VOP1_instruction> v_mov{create_instruction<VOP1_instruction>(aco_opcode::v_mov_b32, Format::VOP1, 1, 1)}; + v_mov->definitions[0] = Definition(instr->operands[0].physReg(), v1); + v_mov->operands[0] = Operand(instr->operands[0].physReg(), v1); + new_instructions.emplace_back(std::move(v_mov)); + } else if (instr->isVALU() && instr->opcode != aco_opcode::v_nop) { + ctx.has_VOPC = false; + } + return std::make_pair(sNOPs, vNOPs); } |