1 files changed, 31 insertions, 23 deletions
diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c
index fd18fc3aa3c..6fed98d7673 100644
--- a/src/freedreno/ir3/ir3_a6xx.c
+++ b/src/freedreno/ir3/ir3_a6xx.c
@@ -330,35 +330,35 @@ const struct ir3_context_funcs ir3_a6xx_funcs = {
  * extra mov from src1.x to dst.  This way the other compiler passes
  * can ignore this quirk of the new instruction encoding.
  *
- * This might cause extra complication in the future when we support
- * spilling, as I think we'd want to re-run the scheduling pass.  One
- * possible alternative might be to do this in the RA pass after
- * ra_allocate() but before destroying the SSA links.  (Ie. we do
- * want to know if anything consumes the result of the atomic instr,
- * if there is no consumer then inserting the extra mov is pointless.
+ * This should run after RA.
  */
 
 static struct ir3_instruction *
 get_atomic_dest_mov(struct ir3_instruction *atomic)
 {
+	struct ir3_instruction *mov;
+
 	/* if we've already created the mov-out, then re-use it: */
 	if (atomic->data)
 		return atomic->data;
 
+	/* We are already out of SSA here, so we can't use the nice builders: */
+	mov = ir3_instr_create(atomic->block, OPC_MOV);
+	ir3_reg_create(mov, 0, 0);    /* dst */
+	ir3_reg_create(mov, 0, 0);    /* src */
+
+	mov->cat1.src_type = TYPE_U32;
+	mov->cat1.dst_type = TYPE_U32;
+
 	/* extract back out the 'dummy' which serves as stand-in for dest: */
-	struct ir3_instruction *src = ssa(atomic->regs[3]);
+	struct ir3_instruction *src = atomic->regs[3]->instr;
 	debug_assert(src->opc == OPC_META_COLLECT);
-	struct ir3_instruction *dummy = ssa(src->regs[1]);
 
-	struct ir3_instruction *mov = ir3_MOV(atomic->block, dummy, TYPE_U32);
+	*mov->regs[0] = *atomic->regs[0];
+	*mov->regs[1] = *src->regs[1]->instr->regs[0];
 
 	mov->flags |= IR3_INSTR_SY;
 
-	if (atomic->regs[0]->flags & IR3_REG_ARRAY) {
-		mov->regs[0]->flags |= IR3_REG_ARRAY;
-		mov->regs[0]->array = atomic->regs[0]->array;
-	}
-
 	/* it will have already been appended to the end of the block, which
 	 * isn't where we want it, so fix-up the location:
 	 */
@@ -368,11 +368,13 @@ get_atomic_dest_mov(struct ir3_instruction *atomic)
 	return atomic->data = mov;
 }
 
-void
+bool
 ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so)
 {
+	bool progress = false;
+
 	if (ir3_shader_nibo(so) == 0)
-		return;
+		return false;
 
 	foreach_block (block, &ir->block_list) {
 		foreach_instr (instr, &block->instr_list) {
@@ -385,21 +387,27 @@ ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so)
 			struct ir3_register *reg;
 
 			foreach_src(reg, instr) {
-				struct ir3_instruction *src = ssa(reg);
+				struct ir3_instruction *src = reg->instr;
 
 				if (!src)
 					continue;
 
-				if (is_atomic(src->opc) && (src->flags & IR3_INSTR_G))
+				if (is_atomic(src->opc) && (src->flags & IR3_INSTR_G)) {
 					reg->instr = get_atomic_dest_mov(src);
+					progress = true;
+				}
 			}
 		}
+	}
 
-		/* we also need to fixup shader outputs: */
-		struct ir3_instruction *out;
-		foreach_output_n(out, n, ir)
-			if (is_atomic(out->opc) && (out->flags & IR3_INSTR_G))
-				ir->outputs[n] = get_atomic_dest_mov(out);
+	/* we also need to fixup shader outputs: */
+	struct ir3_instruction *out;
+	foreach_output_n (out, n, ir) {
+		if (is_atomic(out->opc) && (out->flags & IR3_INSTR_G)) {
+			ir->outputs[n] = get_atomic_dest_mov(out);
+			progress = true;
+		}
 	}
 
+	return progress;
 }