diff options
Diffstat (limited to 'src/freedreno/ir3/ir3_a6xx.c')
-rw-r--r-- | src/freedreno/ir3/ir3_a6xx.c | 54 |
1 files changed, 31 insertions, 23 deletions
diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index fd18fc3aa3c..6fed98d7673 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -330,35 +330,35 @@ const struct ir3_context_funcs ir3_a6xx_funcs = { * extra mov from src1.x to dst. This way the other compiler passes * can ignore this quirk of the new instruction encoding. * - * This might cause extra complication in the future when we support - * spilling, as I think we'd want to re-run the scheduling pass. One - * possible alternative might be to do this in the RA pass after - * ra_allocate() but before destroying the SSA links. (Ie. we do - * want to know if anything consumes the result of the atomic instr, - * if there is no consumer then inserting the extra mov is pointless. + * This should run after RA. */ static struct ir3_instruction * get_atomic_dest_mov(struct ir3_instruction *atomic) { + struct ir3_instruction *mov; + /* if we've already created the mov-out, then re-use it: */ if (atomic->data) return atomic->data; + /* We are already out of SSA here, so we can't use the nice builders: */ + mov = ir3_instr_create(atomic->block, OPC_MOV); + ir3_reg_create(mov, 0, 0); /* dst */ + ir3_reg_create(mov, 0, 0); /* src */ + + mov->cat1.src_type = TYPE_U32; + mov->cat1.dst_type = TYPE_U32; + /* extract back out the 'dummy' which serves as stand-in for dest: */ - struct ir3_instruction *src = ssa(atomic->regs[3]); + struct ir3_instruction *src = atomic->regs[3]->instr; debug_assert(src->opc == OPC_META_COLLECT); - struct ir3_instruction *dummy = ssa(src->regs[1]); - struct ir3_instruction *mov = ir3_MOV(atomic->block, dummy, TYPE_U32); + *mov->regs[0] = *atomic->regs[0]; + *mov->regs[1] = *src->regs[1]->instr->regs[0]; mov->flags |= IR3_INSTR_SY; - if (atomic->regs[0]->flags & IR3_REG_ARRAY) { - mov->regs[0]->flags |= IR3_REG_ARRAY; - mov->regs[0]->array = atomic->regs[0]->array; - } - /* it will have already been appended to the end of the block, which * isn't where we want it, so fix-up the location: */ @@ -368,11 +368,13 @@ get_atomic_dest_mov(struct ir3_instruction *atomic) return atomic->data = mov; } -void +bool ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so) { + bool progress = false; + if (ir3_shader_nibo(so) == 0) - return; + return false; foreach_block (block, &ir->block_list) { foreach_instr (instr, &block->instr_list) { @@ -385,21 +387,27 @@ ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so) struct ir3_register *reg; foreach_src(reg, instr) { - struct ir3_instruction *src = ssa(reg); + struct ir3_instruction *src = reg->instr; if (!src) continue; - if (is_atomic(src->opc) && (src->flags & IR3_INSTR_G)) + if (is_atomic(src->opc) && (src->flags & IR3_INSTR_G)) { reg->instr = get_atomic_dest_mov(src); + progress = true; + } } } + } - /* we also need to fixup shader outputs: */ - struct ir3_instruction *out; - foreach_output_n(out, n, ir) - if (is_atomic(out->opc) && (out->flags & IR3_INSTR_G)) - ir->outputs[n] = get_atomic_dest_mov(out); + /* we also need to fixup shader outputs: */ + struct ir3_instruction *out; + foreach_output_n (out, n, ir) { + if (is_atomic(out->opc) && (out->flags & IR3_INSTR_G)) { + ir->outputs[n] = get_atomic_dest_mov(out); + progress = true; + } } + return progress; } |