aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp76
-rw-r--r--src/amd/compiler/aco_ir.h17
2 files changed, 92 insertions, 1 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 3ffa6153d46..52db7f4a6b3 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -8235,7 +8235,7 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr)
std::vector<unsigned>& preds = logical ? ctx->block->logical_preds : ctx->block->linear_preds;
unsigned num_operands = 0;
- Operand operands[std::max(exec_list_length(&instr->srcs), (unsigned)preds.size())];
+ Operand operands[std::max(exec_list_length(&instr->srcs), (unsigned)preds.size()) + 1];
unsigned num_defined = 0;
unsigned cur_pred_idx = 0;
for (std::pair<unsigned, nir_ssa_def *> src : phi_src) {
@@ -8266,6 +8266,17 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr)
while (cur_pred_idx++ < preds.size())
operands[num_operands++] = Operand(dst.regClass());
+ /* If the loop ends with a break, still add a linear continue edge in case
+ * that break is divergent or continue_or_break is used. We'll either remove
+ * this operand later in visit_loop() if it's not necessary or replace the
+ * undef with something correct. */
+ if (!logical && ctx->block->kind & block_kind_loop_header) {
+ nir_loop *loop = nir_cf_node_as_loop(instr->instr.block->cf_node.parent);
+ nir_block *last = nir_loop_last_block(loop);
+ if (last->successors[0] != instr->instr.block)
+ operands[num_operands++] = Operand(RegClass());
+ }
+
if (num_defined == 0) {
Builder bld(ctx->program, ctx->block);
if (dst.regClass() == s1) {
@@ -8487,6 +8498,51 @@ void visit_block(isel_context *ctx, nir_block *block)
+static Operand create_continue_phis(isel_context *ctx, unsigned first, unsigned last,
+ aco_ptr<Instruction>& header_phi, Operand *vals)
+{
+ vals[0] = Operand(header_phi->definitions[0].getTemp());
+ RegClass rc = vals[0].regClass();
+
+ unsigned loop_nest_depth = ctx->program->blocks[first].loop_nest_depth;
+
+ unsigned next_pred = 1;
+
+ for (unsigned idx = first + 1; idx <= last; idx++) {
+ Block& block = ctx->program->blocks[idx];
+ if (block.loop_nest_depth != loop_nest_depth) {
+ vals[idx - first] = vals[idx - 1 - first];
+ continue;
+ }
+
+ if (block.kind & block_kind_continue) {
+ vals[idx - first] = header_phi->operands[next_pred];
+ next_pred++;
+ continue;
+ }
+
+ bool all_same = true;
+ for (unsigned i = 1; all_same && (i < block.linear_preds.size()); i++)
+ all_same = vals[block.linear_preds[i] - first] == vals[block.linear_preds[0] - first];
+
+ Operand val;
+ if (all_same) {
+ val = vals[block.linear_preds[0] - first];
+ } else {
+ aco_ptr<Instruction> phi(create_instruction<Pseudo_instruction>(
+ aco_opcode::p_linear_phi, Format::PSEUDO, block.linear_preds.size(), 1));
+ for (unsigned i = 0; i < block.linear_preds.size(); i++)
+ phi->operands[i] = vals[block.linear_preds[i] - first];
+ val = Operand(Temp(ctx->program->allocateId(), rc));
+ phi->definitions[0] = Definition(val.getTemp());
+ block.instructions.emplace(block.instructions.begin(), std::move(phi));
+ }
+ vals[idx - first] = val;
+ }
+
+ return vals[last - first];
+}
+
static void visit_loop(isel_context *ctx, nir_loop *loop)
{
//TODO: we might want to wrap the loop around a branch if exec_potentially_empty=true
@@ -8570,6 +8626,24 @@ static void visit_loop(isel_context *ctx, nir_loop *loop)
}
}
+ /* Fixup linear phis in loop header from expecting a continue. Both this fixup
+ * and the previous one shouldn't both happen at once because a break in the
+ * merge block would get CSE'd */
+ if (nir_loop_last_block(loop)->successors[0] != nir_loop_first_block(loop)) {
+ unsigned num_vals = ctx->cf_info.has_branch ? 1 : (ctx->block->index - loop_header_idx + 1);
+ Operand vals[num_vals];
+ for (aco_ptr<Instruction>& instr : ctx->program->blocks[loop_header_idx].instructions) {
+ if (instr->opcode == aco_opcode::p_linear_phi) {
+ if (ctx->cf_info.has_branch)
+ instr->operands.pop_back();
+ else
+ instr->operands.back() = create_continue_phis(ctx, loop_header_idx, ctx->block->index, instr, vals);
+ } else if (!is_phi(instr)) {
+ break;
+ }
+ }
+ }
+
ctx->cf_info.has_branch = false;
// TODO: if the loop has not a single exit, we must add one °°
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 5bbe337fe17..1eae6c5d0cc 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -521,6 +521,23 @@ public:
return isFirstKill() && !isLateKill();
}
+ constexpr bool operator == (Operand other) const noexcept
+ {
+ if (other.size() != size())
+ return false;
+ if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
+ return false;
+ if (isFixed() && other.isFixed() && physReg() != other.physReg())
+ return false;
+ if (isLiteral())
+ return other.isLiteral() && other.constantValue() == constantValue();
+ else if (isConstant())
+ return other.isConstant() && other.physReg() == physReg();
+ else if (isUndefined())
+ return other.isUndefined() && other.regClass() == regClass();
+ else
+ return other.isTemp() && other.getTemp() == getTemp();
+ }
private:
union {
uint32_t i;