aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2019-11-18 17:26:38 +0000
committerRhys Perry <[email protected]>2019-12-02 16:56:19 +0000
commit0e8da9f60718520d1c3abd335a047282bb760b78 (patch)
tree382105e6021ad6d25c5071deefd0da0ab470e496 /src
parent06fc83989c04368f14e004ba5543c6b5daa6c098 (diff)
aco: handle loop exit and IF merge phis with break/discard
ACO considers discards jumps and creates edges in the CFG for them but NIR does neither of these. This can be fixed instead by keeping track of whether a side of an IF had a break/discard, but this doesn't solve the issue with discards affecting loop exit phis. So this reworks phi handling a bit. Fixes these tests: dEQP-VK.graphicsfuzz.disc-and-add-in-func-in-loop dEQP-VK.graphicsfuzz.loop-call-discard dEQP-VK.graphicsfuzz.complex-nested-loops-and-call Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp129
-rw-r--r--src/amd/compiler/aco_instruction_selection_setup.cpp5
2 files changed, 80 insertions, 54 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 51e22f2a822..807cc02264c 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -6912,20 +6912,48 @@ Operand get_phi_operand(isel_context *ctx, nir_ssa_def *ssa)
void visit_phi(isel_context *ctx, nir_phi_instr *instr)
{
aco_ptr<Pseudo_instruction> phi;
- unsigned num_src = exec_list_length(&instr->srcs);
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
assert(instr->dest.ssa.bit_size != 1 || dst.regClass() == s2);
- aco_opcode opcode = !dst.is_linear() || ctx->divergent_vals[instr->dest.ssa.index] ? aco_opcode::p_phi : aco_opcode::p_linear_phi;
+ bool logical = !dst.is_linear() || ctx->divergent_vals[instr->dest.ssa.index];
+ logical |= ctx->block->kind & block_kind_merge;
+ aco_opcode opcode = logical ? aco_opcode::p_phi : aco_opcode::p_linear_phi;
+ /* we want a sorted list of sources, since the predecessor list is also sorted */
std::map<unsigned, nir_ssa_def*> phi_src;
- bool all_undef = true;
- nir_foreach_phi_src(src, instr) {
+ nir_foreach_phi_src(src, instr)
phi_src[src->pred->index] = src->src.ssa;
- if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef)
- all_undef = false;
+
+ std::vector<unsigned>& preds = logical ? ctx->block->logical_preds : ctx->block->linear_preds;
+ unsigned num_operands = 0;
+ Operand operands[std::max(exec_list_length(&instr->srcs), (unsigned)preds.size())];
+ unsigned num_defined = 0;
+ unsigned cur_pred_idx = 0;
+ for (std::pair<unsigned, nir_ssa_def *> src : phi_src) {
+ if (cur_pred_idx < preds.size()) {
+ /* handle missing preds (IF merges with discard/break) and extra preds (loop exit with discard) */
+ unsigned block = ctx->cf_info.nir_to_aco[src.first];
+ unsigned skipped = 0;
+ while (cur_pred_idx + skipped < preds.size() && preds[cur_pred_idx + skipped] != block)
+ skipped++;
+ if (cur_pred_idx + skipped < preds.size()) {
+ for (unsigned i = 0; i < skipped; i++)
+ operands[num_operands++] = Operand(dst.regClass());
+ cur_pred_idx += skipped;
+ } else {
+ continue;
+ }
+ }
+ cur_pred_idx++;
+ Operand op = get_phi_operand(ctx, src.second);
+ operands[num_operands++] = op;
+ num_defined += !op.isUndefined();
}
- if (all_undef) {
+ /* handle block_kind_continue_or_break at loop exit blocks */
+ while (cur_pred_idx++ < preds.size())
+ operands[num_operands++] = Operand(dst.regClass());
+
+ if (num_defined == 0) {
Builder bld(ctx->program, ctx->block);
if (dst.regClass() == s1) {
bld.sop1(aco_opcode::s_mov_b32, Definition(dst), Operand(0u));
@@ -6941,17 +6969,41 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr)
return;
}
+ /* we can use a linear phi in some cases if one src is undef */
+ if (dst.is_linear() && ctx->block->kind & block_kind_merge && num_defined == 1) {
+ phi.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, num_operands, 1));
+
+ Block *linear_else = &ctx->program->blocks[ctx->block->linear_preds[1]];
+ Block *invert = &ctx->program->blocks[linear_else->linear_preds[0]];
+ assert(invert->kind & block_kind_invert);
+
+ unsigned then_block = invert->linear_preds[0];
+
+ Block* insert_block = NULL;
+ for (unsigned i = 0; i < num_operands; i++) {
+ Operand op = operands[i];
+ if (op.isUndefined())
+ continue;
+ insert_block = ctx->block->logical_preds[i] == then_block ? invert : ctx->block;
+ phi->operands[0] = op;
+ break;
+ }
+ assert(insert_block); /* should be handled by the "num_defined == 0" case above */
+ phi->operands[1] = Operand(dst.regClass());
+ phi->definitions[0] = Definition(dst);
+ insert_block->instructions.emplace(insert_block->instructions.begin(), std::move(phi));
+ return;
+ }
+
/* try to scalarize vector phis */
if (instr->dest.ssa.bit_size != 1 && dst.size() > 1) {
// TODO: scalarize linear phis on divergent ifs
bool can_scalarize = (opcode == aco_opcode::p_phi || !(ctx->block->kind & block_kind_merge));
std::array<Temp, 4> new_vec;
- for (std::pair<const unsigned, nir_ssa_def*>& pair : phi_src) {
- Operand src = get_phi_operand(ctx, pair.second);
- if (src.isTemp() && ctx->allocated_vec.find(src.tempId()) == ctx->allocated_vec.end()) {
+ for (unsigned i = 0; can_scalarize && (i < num_operands); i++) {
+ Operand src = operands[i];
+ if (src.isTemp() && ctx->allocated_vec.find(src.tempId()) == ctx->allocated_vec.end())
can_scalarize = false;
- break;
- }
}
if (can_scalarize) {
unsigned num_components = instr->dest.ssa.num_components;
@@ -6960,12 +7012,10 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr)
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)};
for (unsigned k = 0; k < num_components; k++) {
- phi.reset(create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, num_src, 1));
- std::map<unsigned, nir_ssa_def*>::iterator it = phi_src.begin();
- for (unsigned i = 0; i < num_src; i++) {
- Operand src = get_phi_operand(ctx, it->second);
+ phi.reset(create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, num_operands, 1));
+ for (unsigned i = 0; i < num_operands; i++) {
+ Operand src = operands[i];
phi->operands[i] = src.isTemp() ? Operand(ctx->allocated_vec[src.tempId()][k]) : Operand(rc);
- ++it;
}
Temp phi_dst = {ctx->program->allocateId(), rc};
phi->definitions[0] = Definition(phi_dst);
@@ -6980,43 +7030,9 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr)
}
}
- unsigned extra_src = 0;
- if (opcode == aco_opcode::p_linear_phi && (ctx->block->kind & block_kind_loop_exit) &&
- ctx->program->blocks[ctx->block->index-2].kind & block_kind_continue_or_break) {
- extra_src++;
- }
-
- phi.reset(create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, num_src + extra_src, 1));
-
- /* if we have a linear phi on a divergent if, we know that one src is undef */
- if (opcode == aco_opcode::p_linear_phi && ctx->block->kind & block_kind_merge) {
- assert(extra_src == 0);
- Block* block;
- /* we place the phi either in the invert-block or in the current block */
- if (phi_src.begin()->second->parent_instr->type != nir_instr_type_ssa_undef) {
- assert((++phi_src.begin())->second->parent_instr->type == nir_instr_type_ssa_undef);
- Block& linear_else = ctx->program->blocks[ctx->block->linear_preds[1]];
- block = &ctx->program->blocks[linear_else.linear_preds[0]];
- assert(block->kind & block_kind_invert);
- phi->operands[0] = get_phi_operand(ctx, phi_src.begin()->second);
- } else {
- assert((++phi_src.begin())->second->parent_instr->type != nir_instr_type_ssa_undef);
- block = ctx->block;
- phi->operands[0] = get_phi_operand(ctx, (++phi_src.begin())->second);
- }
- phi->operands[1] = Operand(dst.regClass());
- phi->definitions[0] = Definition(dst);
- block->instructions.emplace(block->instructions.begin(), std::move(phi));
- return;
- }
-
- std::map<unsigned, nir_ssa_def*>::iterator it = phi_src.begin();
- for (unsigned i = 0; i < num_src; i++) {
- phi->operands[i] = get_phi_operand(ctx, it->second);
- ++it;
- }
- for (unsigned i = 0; i < extra_src; i++)
- phi->operands[num_src + i] = Operand(dst.regClass());
+ phi.reset(create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, num_operands, 1));
+ for (unsigned i = 0; i < num_operands; i++)
+ phi->operands[i] = operands[i];
phi->definitions[0] = Definition(dst);
ctx->block->instructions.emplace(ctx->block->instructions.begin(), std::move(phi));
}
@@ -7062,6 +7078,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr)
return;
}
ctx->cf_info.parent_loop.has_divergent_branch = true;
+ ctx->cf_info.nir_to_aco[instr->instr.block->index] = ctx->block->index;
break;
case nir_jump_continue:
logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx];
@@ -7073,6 +7090,7 @@ void visit_jump(isel_context *ctx, nir_jump_instr *instr)
we must ensure that they are handled correctly */
ctx->cf_info.parent_loop.has_divergent_continue = true;
ctx->cf_info.parent_loop.has_divergent_branch = true;
+ ctx->cf_info.nir_to_aco[instr->instr.block->index] = ctx->block->index;
} else {
/* uniform continue - directly jump to the loop header */
ctx->block->kind |= block_kind_uniform;
@@ -7144,6 +7162,9 @@ void visit_block(isel_context *ctx, nir_block *block)
//abort();
}
}
+
+ if (!ctx->cf_info.parent_loop.has_divergent_branch)
+ ctx->cf_info.nir_to_aco[block->index] = ctx->block->index;
}
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index ba7c7c12a52..ab96a4507cf 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -69,6 +69,7 @@ struct isel_context {
bool is_divergent = false;
} parent_if;
bool exec_potentially_empty = false;
+ std::unique_ptr<unsigned[]> nir_to_aco; /* NIR block index to ACO block index */
} cf_info;
Temp arg_temps[AC_MAX_ARGS];
@@ -133,6 +134,8 @@ void init_context(isel_context *ctx, nir_shader *shader)
unsigned spi_ps_inputs = 0;
+ std::unique_ptr<unsigned[]> nir_to_aco{new unsigned[impl->num_blocks]()};
+
bool done = false;
while (!done) {
done = true;
@@ -538,6 +541,7 @@ void init_context(isel_context *ctx, nir_shader *shader)
allocated[i] = Temp(ctx->program->allocateId(), allocated[i].regClass());
ctx->allocated.reset(allocated.release());
+ ctx->cf_info.nir_to_aco.reset(nir_to_aco.release());
}
Pseudo_instruction *add_startpgm(struct isel_context *ctx)
@@ -899,6 +903,7 @@ setup_isel_context(Program* program,
nir_function_impl *func = nir_shader_get_entrypoint(nir);
nir_index_ssa_defs(func);
+ nir_metadata_require(func, nir_metadata_block_index);
if (args->options->dump_preoptir) {
fprintf(stderr, "NIR shader before instruction selection:\n");