aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2020-01-21 16:49:22 +0100
committerMarge Bot <[email protected]>2020-01-24 18:34:27 +0000
commit1ac49ba908acf70a8ae4aad71dc715bf625aea1e (patch)
tree8881182bad162dbe6bd4bb766e8eadc21d1b9a06 /src
parentb9cc50fbce4c5aeab035d855c9368e24e2ed2d20 (diff)
aco: fix a hazard with v_interp_* and v_{read,readfirst}lane_* on GFX6
It's required to insert 1 wait state if the dst VGPR of any v_interp_* is followed by a read with v_readfirstlane or v_readlane to fix GPU hangs on GFX6. Note that v_writelane_* is apparently not affected. This hazard isn't documented anywhere but AMD confirmed it. This fixes a GPU hang with the texturemipmapgen Sascha demo on GFX6. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3533>
Diffstat (limited to 'src')
-rw-r--r--src/amd/compiler/README.md9
-rw-r--r--src/amd/compiler/aco_insert_NOPs.cpp18
2 files changed, 27 insertions, 0 deletions
diff --git a/src/amd/compiler/README.md b/src/amd/compiler/README.md
index 7c7e68f458e..9c47c07bc80 100644
--- a/src/amd/compiler/README.md
+++ b/src/amd/compiler/README.md
@@ -131,6 +131,15 @@ finish and then write to vcc (for example, `s_mov_b64 vcc, vcc`) to correct vccz
Currently, we don't do this.
+## GCN / GFX6 hazards
+
+### VINTRP followed by a read with v_readfirstlane or v_readlane
+
+It's required to insert 1 wait state if the dst VGPR of any v_interp_* is
+followed by a read with v_readfirstlane or v_readlane to fix GPU hangs on GFX6.
+Note that v_writelane_* is apparently not affected. This hazard isn't
+documented anywhere but AMD confirmed it.
+
## RDNA / GFX10 hazards
### SMEM store followed by a load with the same address
diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp
index 689d5e25acc..8c032bb699c 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -353,6 +353,24 @@ int handle_instruction_gfx8_9(NOP_ctx_gfx8_9& ctx, aco_ptr<Instruction>& instr,
ctx.VALU_wrsgpr = NOPs ? new_idx : new_idx + 1;
}
}
+
+ /* It's required to insert 1 wait state if the dst VGPR of any v_interp_*
+ * is followed by a read with v_readfirstlane or v_readlane to fix GPU
+ * hangs on GFX6. Note that v_writelane_* is apparently not affected.
+ * This hazard isn't documented anywhere but AMD confirmed that hazard.
+ */
+ if (ctx.chip_class == GFX6 &&
+ !new_instructions.empty() &&
+ (instr->opcode == aco_opcode::v_readfirstlane_b32 ||
+ instr->opcode == aco_opcode::v_readlane_b32)) {
+ aco_ptr<Instruction>& pred = new_instructions.back();
+ if (pred->format == Format::VINTRP) {
+ Definition pred_def = pred->definitions[0];
+ Operand& op = instr->operands[0];
+ if (regs_intersect(pred_def.physReg(), pred_def.size(), op.physReg(), op.size()))
+ NOPs = std::max(NOPs, 1);
+ }
+ }
return NOPs;
} else if (instr->isVMEM() && ctx.VALU_wrsgpr + 5 >= new_idx) {
/* If the VALU writes the SGPR that is used by a VMEM, the user must add five wait states. */