panfrost/midgard: Always break up fragment writeout

In a fragment shader, r0 is written out with a special branch sequence. r0 is not a real register here, but essentially a pipeline register -- as such, it needs to be written out in full and on time, with hanging dependencies in the bundle. Otherwise, we break up the bundle, which costs an extra ALU cycle and adds a move. When the scheduler ran last thing, we could do this analysis within the scheduler. Now that RA can run after scheduling, that's no longer valid, so we remove the analysis and always break it up (at a performance penalty). Future work can add a post-RA/post-schedule pass to merge writeout blocks if possible. It's a bit of a low-priority next to fixing conformance regressions, of course. Signed-off-by: Alyssa Rosenzweig <[email protected]>
author: Alyssa Rosenzweig <[email protected]> 2019-06-05 14:48:57 +0000
committer: Alyssa Rosenzweig <[email protected]> 2019-06-05 18:06:49 +0000
commit: 1ea987576d167db7d762e8d99ea05efbb519281b (patch)
tree: 20cade1559d8a246bdb62d497419d1918615b4df /src/gallium/drivers
parent: 3d11b075f0af363f8c8c433508b379454d8db9a8 (diff)
1 files changed, 21 insertions, 68 deletions
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c
index 1a562af8142..9d4ce2a97d5 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_schedule.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_schedule.c
@@ -288,75 +288,28 @@ schedule_bundle(compiler_context *ctx, midgard_block *block, midgard_instruction
                                  * the branch writeout */
 
                                 if (ains->writeout) {
-                                        if (index == 0) {
-                                                /* Inject a move */
-                                                midgard_instruction ins = v_fmov(0, blank_alu_src, SSA_FIXED_REGISTER(0));
-                                                ins.unit = UNIT_VMUL;
-                                                control |= ins.unit;
-
-                                                /* TODO don't leak */
-                                                midgard_instruction *move =
-                                                        mem_dup(&ins, sizeof(midgard_instruction));
-                                                bytes_emitted += sizeof(midgard_reg_info);
-                                                bytes_emitted += sizeof(midgard_vector_alu);
-                                                bundle.instructions[packed_idx++] = move;
-                                        } else {
-                                                /* Analyse the group to see if r0 is written in full, on-time, without hanging dependencies */
-                                                bool written_late = false;
-                                                bool components[4] = { 0 };
-                                                uint16_t register_dep_mask = 0;
-                                                uint16_t written_mask = 0;
-
-                                                midgard_instruction *qins = ins;
-                                                for (int t = 0; t < index; ++t) {
-                                                        if (qins->registers.out_reg != 0) {
-                                                                /* Mark down writes */
-
-                                                                written_mask |= (1 << qins->registers.out_reg);
-                                                        } else {
-                                                                /* Mark down the register dependencies for errata check */
-
-                                                                if (qins->registers.src1_reg < 16)
-                                                                        register_dep_mask |= (1 << qins->registers.src1_reg);
-
-                                                                if (qins->registers.src2_reg < 16)
-                                                                        register_dep_mask |= (1 << qins->registers.src2_reg);
-
-                                                                int mask = qins->alu.mask;
-
-                                                                for (int c = 0; c < 4; ++c)
-                                                                        if (mask & (0x3 << (2 * c)))
-                                                                                components[c] = true;
-
-                                                                /* ..but if the writeout is too late, we have to break up anyway... for some reason */
-
-                                                                if (qins->unit == UNIT_VLUT)
-                                                                        written_late = true;
-                                                        }
-
-                                                        /* Advance instruction pointer */
-                                                        qins = mir_next_op(qins);
-                                                }
-
-                                                /* Register dependencies of r0 must be out of fragment writeout bundle */
-                                                if (register_dep_mask & written_mask)
-                                                        break;
-
-                                                if (written_late)
-                                                        break;
-
-                                                /* If even a single component is not written, break it up (conservative check). */
-                                                bool breakup = false;
-
-                                                for (int c = 0; c < 4; ++c)
-                                                        if (!components[c])
-                                                                breakup = true;
-
-                                                if (breakup)
-                                                        break;
+                                        /* The rules for when "bare" writeout
+                                         * is safe are when all components are
+                                         * r0 are written out in the final
+                                         * bundle, earlier than VLUT, where any
+                                         * register dependencies of r0 are from
+                                         * an earlier bundle. We can't verify
+                                         * this before RA, so we don't try. */
+
+                                        if (index != 0)
+                                                break;
 
-                                                /* Otherwise, we're free to proceed */
-                                        }
+                                        /* Inject a move */
+                                        midgard_instruction ins = v_fmov(0, blank_alu_src, SSA_FIXED_REGISTER(0));
+                                        ins.unit = UNIT_VMUL;
+                                        control |= ins.unit;
+
+                                        /* TODO don't leak */
+                                        midgard_instruction *move =
+                                                mem_dup(&ins, sizeof(midgard_instruction));
+                                        bytes_emitted += sizeof(midgard_reg_info);
+                                        bytes_emitted += sizeof(midgard_vector_alu);
+                                        bundle.instructions[packed_idx++] = move;
                                 }
 
                                 if (ains->unit == ALU_ENAB_BRANCH) {
author	Alyssa Rosenzweig <[email protected]>	2019-06-05 14:48:57 +0000
committer	Alyssa Rosenzweig <[email protected]>	2019-06-05 18:06:49 +0000
commit	1ea987576d167db7d762e8d99ea05efbb519281b (patch)
tree	20cade1559d8a246bdb62d497419d1918615b4df /src/gallium/drivers
parent	3d11b075f0af363f8c8c433508b379454d8db9a8 (diff)