summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimur Kristóf <[email protected]>2019-10-23 11:24:53 +0200
committerTimur Kristóf <[email protected]>2019-10-25 10:10:42 +0200
commite5a8616973d45909a75a708f88dfb9c3cbb5ded8 (patch)
tree58abe54a28de4e468db324b5836cbcb3d3f04cb0
parent99aed688d346e26b23e2e00cb6f7df9be3415ccb (diff)
aco/gfx10: Mitigate VcmpxPermlaneHazard.
Any permlane instruction that follows any VOPC instruction can cause a hazard, this commit implements a workaround that avoids this causing a problem. Signed-off-by: Timur Kristóf <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]>
-rw-r--r--src/amd/compiler/README8
-rw-r--r--src/amd/compiler/aco_insert_NOPs.cpp20
2 files changed, 28 insertions, 0 deletions
diff --git a/src/amd/compiler/README b/src/amd/compiler/README
index 630f1fcd42a..d3ecc896bc4 100644
--- a/src/amd/compiler/README
+++ b/src/amd/compiler/README
@@ -173,3 +173,11 @@ The 12-bit immediate OFFSET field of FLAT instructions must always be 0.
GLOBAL and SCRATCH are unaffected.
ACO doesn't use FLAT load/store on GFX10, so is unaffected.
+
+### VcmpxPermlaneHazard
+
+Triggered by:
+Any permlane instruction that follows any VOPC instruction.
+Confirmed by AMD devs that despite the name, this doesn't only affect v_cmpx.
+
+Mitigated by: any VALU instruction except `v_nop`.
diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp
index eb325c940ba..a80dd0c04bc 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -39,6 +39,7 @@ struct NOP_ctx {
/* GFX10 */
int last_VMEM_since_scalar_write = -1;
+ bool has_VOPC = false;
NOP_ctx(Program* program) : chip_class(program->chip_class) {
vcc_physical = program->config->num_sgprs - 2;
@@ -283,6 +284,25 @@ std::pair<int, int> handle_instruction_gfx10(NOP_ctx& ctx, aco_ptr<Instruction>&
ctx.last_VMEM_since_scalar_write = -1;
}
+ /* VcmpxPermlaneHazard
+ * Handle any permlane following a VOPC instruction, insert v_mov between them.
+ */
+ if (instr->format == Format::VOPC) {
+ ctx.has_VOPC = true;
+ } else if (ctx.has_VOPC &&
+ (instr->opcode == aco_opcode::v_permlane16_b32 ||
+ instr->opcode == aco_opcode::v_permlanex16_b32)) {
+ ctx.has_VOPC = false;
+
+ /* v_nop would be discarded by SQ, so use v_mov with the first operand of the permlane */
+ aco_ptr<VOP1_instruction> v_mov{create_instruction<VOP1_instruction>(aco_opcode::v_mov_b32, Format::VOP1, 1, 1)};
+ v_mov->definitions[0] = Definition(instr->operands[0].physReg(), v1);
+ v_mov->operands[0] = Operand(instr->operands[0].physReg(), v1);
+ new_instructions.emplace_back(std::move(v_mov));
+ } else if (instr->isVALU() && instr->opcode != aco_opcode::v_nop) {
+ ctx.has_VOPC = false;
+ }
+
return std::make_pair(sNOPs, vNOPs);
}