diff options
-rw-r--r-- | src/freedreno/ir3/ir3.h | 2 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_legalize.c | 69 |
2 files changed, 71 insertions, 0 deletions
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 6cb882002f0..44175888200 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1168,6 +1168,8 @@ static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n) list_for_each_entry(struct ir3_block, __block, __list, node) #define foreach_block_safe(__block, __list) \ list_for_each_entry_safe(struct ir3_block, __block, __list, node) +#define foreach_block_rev(__block, __list) \ + list_for_each_entry_rev(struct ir3_block, __block, __list, node) /* iterators for arrays: */ #define foreach_array(__array, __list) \ diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index bed8cd1c03a..d0b0dd83b46 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -592,6 +592,73 @@ block_sched(struct ir3 *ir) } } +/* Here we workaround the fact that kill doesn't actually kill the thread as + * GL expects. The last instruction always needs to be an end instruction, + * which means that if we're stuck in a loop where kill is the only way out, + * then we may have to jump out to the end. kill may also have the d3d + * semantics of converting the thread to a helper thread, rather than setting + * the exec mask to 0, in which case the helper thread could get stuck in an + * infinite loop. + * + * We do this late, both to give the scheduler the opportunity to reschedule + * kill instructions earlier and to avoid having to create a separate basic + * block. + * + * TODO: Assuming that the wavefront doesn't stop as soon as all threads are + * killed, we might benefit by doing this more aggressively when the remaining + * part of the program after the kill is large, since that would let us + * skip over the instructions when there are no non-killed threads left. + */ +static void +kill_sched(struct ir3 *ir, struct ir3_shader_variant *so) +{ + /* True if we know that this block will always eventually lead to the end + * block: + */ + bool always_ends = true; + bool added = false; + struct ir3_block *last_block = + list_last_entry(&ir->block_list, struct ir3_block, node); + + foreach_block_rev (block, &ir->block_list) { + for (unsigned i = 0; i < 2 && block->successors[i]; i++) { + if (block->successors[i]->start_ip <= block->end_ip) + always_ends = false; + } + + if (always_ends) + continue; + + foreach_instr_safe (instr, &block->instr_list) { + if (instr->opc != OPC_KILL) + continue; + + struct ir3_instruction *br = ir3_instr_create(block, OPC_BR); + br->regs[1] = instr->regs[1]; + br->cat0.target = + list_last_entry(&ir->block_list, struct ir3_block, node); + + list_del(&br->node); + list_add(&br->node, &instr->node); + + added = true; + } + } + + if (added) { + /* I'm not entirely sure how the branchstack works, but we probably + * need to add at least one entry for the divergence which is resolved + * at the end: + */ + so->branchstack++; + + /* We don't update predecessors/successors, so we have to do this + * manually: + */ + mark_jp(last_block); + } +} + /* Insert nop's required to make this a legal/valid shader program: */ static void nop_sched(struct ir3 *ir) @@ -669,6 +736,8 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) *max_bary = ctx->max_bary; block_sched(ir); + if (so->type == MESA_SHADER_FRAGMENT) + kill_sched(ir, so); nop_sched(ir); do { |