aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2020-01-10 14:07:03 -0800
committerMarge Bot <[email protected]>2020-02-01 02:40:22 +0000
commit300d1181c72043afe045a155079fc152fcd1283e (patch)
tree44e62fdeb54af7f78b0fd7de5410905ad2a7689f /src
parent304b50c9f8f57a115ac251f022093c8adfb0823d (diff)
freedreno/ir3: move atomic fixup after RA
A post-RA sched pass will move the extra mov's to the wrong place, so rework the fixup so it can run after RA (and therefore after postsched) Signed-off-by: Rob Clark <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3569>
Diffstat (limited to 'src')
-rw-r--r--src/freedreno/ir3/ir3.h2
-rw-r--r--src/freedreno/ir3/ir3_a6xx.c54
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c10
3 files changed, 38 insertions, 28 deletions
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index bf0f92a478a..b9cf06e636d 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -1138,7 +1138,7 @@ void ir3_sun(struct ir3 *ir);
void ir3_sched_add_deps(struct ir3 *ir);
int ir3_sched(struct ir3 *ir);
-void ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so);
+bool ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so);
/* register assignment: */
struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler);
diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c
index fd18fc3aa3c..6fed98d7673 100644
--- a/src/freedreno/ir3/ir3_a6xx.c
+++ b/src/freedreno/ir3/ir3_a6xx.c
@@ -330,35 +330,35 @@ const struct ir3_context_funcs ir3_a6xx_funcs = {
* extra mov from src1.x to dst. This way the other compiler passes
* can ignore this quirk of the new instruction encoding.
*
- * This might cause extra complication in the future when we support
- * spilling, as I think we'd want to re-run the scheduling pass. One
- * possible alternative might be to do this in the RA pass after
- * ra_allocate() but before destroying the SSA links. (Ie. we do
- * want to know if anything consumes the result of the atomic instr,
- * if there is no consumer then inserting the extra mov is pointless.
+ * This should run after RA.
*/
static struct ir3_instruction *
get_atomic_dest_mov(struct ir3_instruction *atomic)
{
+ struct ir3_instruction *mov;
+
/* if we've already created the mov-out, then re-use it: */
if (atomic->data)
return atomic->data;
+ /* We are already out of SSA here, so we can't use the nice builders: */
+ mov = ir3_instr_create(atomic->block, OPC_MOV);
+ ir3_reg_create(mov, 0, 0); /* dst */
+ ir3_reg_create(mov, 0, 0); /* src */
+
+ mov->cat1.src_type = TYPE_U32;
+ mov->cat1.dst_type = TYPE_U32;
+
/* extract back out the 'dummy' which serves as stand-in for dest: */
- struct ir3_instruction *src = ssa(atomic->regs[3]);
+ struct ir3_instruction *src = atomic->regs[3]->instr;
debug_assert(src->opc == OPC_META_COLLECT);
- struct ir3_instruction *dummy = ssa(src->regs[1]);
- struct ir3_instruction *mov = ir3_MOV(atomic->block, dummy, TYPE_U32);
+ *mov->regs[0] = *atomic->regs[0];
+ *mov->regs[1] = *src->regs[1]->instr->regs[0];
mov->flags |= IR3_INSTR_SY;
- if (atomic->regs[0]->flags & IR3_REG_ARRAY) {
- mov->regs[0]->flags |= IR3_REG_ARRAY;
- mov->regs[0]->array = atomic->regs[0]->array;
- }
-
/* it will have already been appended to the end of the block, which
* isn't where we want it, so fix-up the location:
*/
@@ -368,11 +368,13 @@ get_atomic_dest_mov(struct ir3_instruction *atomic)
return atomic->data = mov;
}
-void
+bool
ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so)
{
+ bool progress = false;
+
if (ir3_shader_nibo(so) == 0)
- return;
+ return false;
foreach_block (block, &ir->block_list) {
foreach_instr (instr, &block->instr_list) {
@@ -385,21 +387,27 @@ ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so)
struct ir3_register *reg;
foreach_src(reg, instr) {
- struct ir3_instruction *src = ssa(reg);
+ struct ir3_instruction *src = reg->instr;
if (!src)
continue;
- if (is_atomic(src->opc) && (src->flags & IR3_INSTR_G))
+ if (is_atomic(src->opc) && (src->flags & IR3_INSTR_G)) {
reg->instr = get_atomic_dest_mov(src);
+ progress = true;
+ }
}
}
+ }
- /* we also need to fixup shader outputs: */
- struct ir3_instruction *out;
- foreach_output_n(out, n, ir)
- if (is_atomic(out->opc) && (out->flags & IR3_INSTR_G))
- ir->outputs[n] = get_atomic_dest_mov(out);
+ /* we also need to fixup shader outputs: */
+ struct ir3_instruction *out;
+ foreach_output_n (out, n, ir) {
+ if (is_atomic(out->opc) && (out->flags & IR3_INSTR_G)) {
+ ir->outputs[n] = get_atomic_dest_mov(out);
+ progress = true;
+ }
}
+ return progress;
}
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 1ae1dfd8e63..c5a1f915b9c 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -3406,10 +3406,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
goto out;
}
- if (compiler->gpu_id >= 600) {
- ir3_a6xx_fixup_atomic_dests(ir, so);
- }
-
ir3_debug_print(ir, "AFTER SCHED");
/* Pre-assign VS inputs on a6xx+ binning pass shader, to align
@@ -3484,6 +3480,12 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
ir3_debug_print(ir, "AFTER RA");
+ if (compiler->gpu_id >= 600) {
+ if (ir3_a6xx_fixup_atomic_dests(ir, so)) {
+ ir3_debug_print(ir, "AFTER ATOMIC FIXUP");
+ }
+ }
+
if (so->type == MESA_SHADER_FRAGMENT)
pack_inlocs(ctx);