aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/compiler/aco_insert_NOPs.cpp77
-rw-r--r--src/amd/compiler/aco_instruction_selection_setup.cpp20
-rw-r--r--src/amd/compiler/aco_ir.h3
-rw-r--r--src/amd/compiler/aco_live_var_analysis.cpp6
4 files changed, 75 insertions, 31 deletions
diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp
index 75dbe852174..bb703d7481e 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -274,6 +274,41 @@ bool test_bitset_range(BITSET_WORD *words, unsigned start, unsigned size) {
}
}
+/* A SMEM clause is any group of consecutive SMEM instructions. The
+ * instructions in this group may return out of order and/or may be replayed.
+ *
+ * To fix this potential hazard correctly, we have to make sure that when a
+ * clause has more than one instruction, no instruction in the clause writes
+ * to a register that is read by another instruction in the clause (including
+ * itself). In this case, we have to break the SMEM clause by inserting non
+ * SMEM instructions.
+ *
+ * SMEM clauses are only present on GFX8+, and only matter when XNACK is set.
+ */
+void handle_smem_clause_hazards(Program *program, NOP_ctx_gfx6 &ctx,
+ aco_ptr<Instruction>& instr, int *NOPs)
+{
+ /* break off from previous SMEM clause if needed */
+ if (!*NOPs & (ctx.smem_clause || ctx.smem_write)) {
+ /* Don't allow clauses with store instructions since the clause's
+ * instructions may use the same address. */
+ if (ctx.smem_write || instr->definitions.empty() || instr_info.is_atomic[(unsigned)instr->opcode]) {
+ *NOPs = 1;
+ } else if (program->xnack_enabled) {
+ for (Operand op : instr->operands) {
+ if (!op.isConstant() && test_bitset_range(ctx.smem_clause_write, op.physReg(), op.size())) {
+ *NOPs = 1;
+ break;
+ }
+ }
+
+ Definition def = instr->definitions[0];
+ if (!*NOPs && test_bitset_range(ctx.smem_clause_read_write, def.physReg(), def.size()))
+ *NOPs = 1;
+ }
+ }
+}
+
/* TODO: we don't handle accessing VCC using the actual SGPR instead of using the alias */
void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &ctx,
aco_ptr<Instruction>& instr, std::vector<aco_ptr<Instruction>>& new_instructions)
@@ -300,24 +335,7 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c
}
}
- /* break off from prevous SMEM clause if needed */
- if (!NOPs & (ctx.smem_clause || ctx.smem_write)) {
- /* Don't allow clauses with store instructions since the clause's
- * instructions may use the same address. */
- if (ctx.smem_write || instr->definitions.empty() || instr_info.is_atomic[(unsigned)instr->opcode]) {
- NOPs = 1;
- } else {
- for (Operand op : instr->operands) {
- if (!op.isConstant() && test_bitset_range(ctx.smem_clause_write, op.physReg(), op.size())) {
- NOPs = 1;
- break;
- }
- }
- Definition def = instr->definitions[0];
- if (!NOPs && test_bitset_range(ctx.smem_clause_read_write, def.physReg(), def.size()))
- NOPs = 1;
- }
- }
+ handle_smem_clause_hazards(program, ctx, instr, &NOPs);
} else if (instr->isSALU()) {
if (instr->opcode == aco_opcode::s_setreg_b32 || instr->opcode == aco_opcode::s_setreg_imm32_b32 ||
instr->opcode == aco_opcode::s_getreg_b32) {
@@ -414,8 +432,11 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c
if ((ctx.smem_clause || ctx.smem_write) && (NOPs || instr->format != Format::SMEM)) {
ctx.smem_clause = false;
ctx.smem_write = false;
- BITSET_ZERO(ctx.smem_clause_read_write);
- BITSET_ZERO(ctx.smem_clause_write);
+
+ if (program->xnack_enabled) {
+ BITSET_ZERO(ctx.smem_clause_read_write);
+ BITSET_ZERO(ctx.smem_clause_write);
+ }
}
if (instr->format == Format::SMEM) {
@@ -424,15 +445,17 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c
} else {
ctx.smem_clause = true;
- for (Operand op : instr->operands) {
- if (!op.isConstant()) {
- set_bitset_range(ctx.smem_clause_read_write, op.physReg(), op.size());
+ if (program->xnack_enabled) {
+ for (Operand op : instr->operands) {
+ if (!op.isConstant()) {
+ set_bitset_range(ctx.smem_clause_read_write, op.physReg(), op.size());
+ }
}
- }
- Definition def = instr->definitions[0];
- set_bitset_range(ctx.smem_clause_read_write, def.physReg(), def.size());
- set_bitset_range(ctx.smem_clause_write, def.physReg(), def.size());
+ Definition def = instr->definitions[0];
+ set_bitset_range(ctx.smem_clause_read_write, def.physReg(), def.size());
+ set_bitset_range(ctx.smem_clause_write, def.physReg(), def.size());
+ }
}
} else if (instr->isVALU()) {
for (Definition def : instr->definitions) {
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index d365f79698a..462cd48d960 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -1150,6 +1150,24 @@ setup_nir(isel_context *ctx, nir_shader *nir)
nir_index_ssa_defs(func);
}
+void
+setup_xnack(Program *program)
+{
+ switch (program->family) {
+ /* GFX8 APUs */
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ /* GFX9 APUS */
+ case CHIP_RAVEN:
+ case CHIP_RAVEN2:
+ case CHIP_RENOIR:
+ program->xnack_enabled = true;
+ break;
+ default:
+ break;
+ }
+}
+
isel_context
setup_isel_context(Program* program,
unsigned shader_count,
@@ -1308,6 +1326,8 @@ setup_isel_context(Program* program,
ctx.block->loop_nest_depth = 0;
ctx.block->kind = block_kind_top_level;
+ setup_xnack(program);
+
return ctx;
}
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 73a1d394eff..ace84db1018 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1252,8 +1252,9 @@ public:
uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
unsigned workgroup_size; /* if known; otherwise UINT_MAX */
+ bool xnack_enabled = false;
+
bool needs_vcc = false;
- bool needs_xnack_mask = false;
bool needs_flat_scr = false;
uint32_t allocateId()
diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp
index e223d6d5f84..106c5eb3166 100644
--- a/src/amd/compiler/aco_live_var_analysis.cpp
+++ b/src/amd/compiler/aco_live_var_analysis.cpp
@@ -302,19 +302,19 @@ uint16_t get_extra_sgprs(Program *program)
{
if (program->chip_class >= GFX10) {
assert(!program->needs_flat_scr);
- assert(!program->needs_xnack_mask);
+ assert(!program->xnack_enabled);
return 2;
} else if (program->chip_class >= GFX8) {
if (program->needs_flat_scr)
return 6;
- else if (program->needs_xnack_mask)
+ else if (program->xnack_enabled)
return 4;
else if (program->needs_vcc)
return 2;
else
return 0;
} else {
- assert(!program->needs_xnack_mask);
+ assert(!program->xnack_enabled);
if (program->needs_flat_scr)
return 4;
else if (program->needs_vcc)