summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMatt Turner <[email protected]>2016-04-20 14:22:53 -0700
committerMatt Turner <[email protected]>2016-04-21 10:32:40 -0700
commit33565d67641142a68d537023e181b6dcd587e551 (patch)
tree45511efd04452dc6e996890c165259eb8fe60290 /src
parent0020ca3c9269a8baa54501cb824af14fa497ac14 (diff)
i965/fs: Readd opt_drop_redundant_mov_to_flags().
This reverts commit b449366587b5f3f64c6fb45fe22c39e4bc8a4309. I removed the pass thinking that it was now not useful, but that was not true. I believe I ran shader-db on HSW and saw no results, but HSW does not use the unlit centroid workaround code and as a result does not emit redundant MOV_DISPATCH_TO_FLAGS instructions. On IVB, the shader-db results are: total instructions in shared programs: 6650806 -> 6646303 (-0.07%) instructions in affected programs: 106893 -> 102390 (-4.21%) helped: 793 total cycles in shared programs: 56195538 -> 56103720 (-0.16%) cycles in affected programs: 873048 -> 781230 (-10.52%) helped: 553 HURT: 209 On SNB, the shader-db results are: total instructions in shared programs: 7173074 -> 7168541 (-0.06%) instructions in affected programs: 119757 -> 115224 (-3.79%) helped: 799 total cycles in shared programs: 98128032 -> 98072938 (-0.06%) cycles in affected programs: 1437104 -> 1382010 (-3.83%) helped: 454 HURT: 237 Reviewed-by: Iago Toral Quiroga <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp40
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h1
2 files changed, 41 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index fc72cb36788..5d6a10773b3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5186,6 +5186,44 @@ fs_visitor::calculate_register_pressure()
}
}
+/**
+ * Look for repeated FS_OPCODE_MOV_DISPATCH_TO_FLAGS and drop the later ones.
+ *
+ * The needs_unlit_centroid_workaround ends up producing one of these per
+ * channel of centroid input, so it's good to clean them up.
+ *
+ * An assumption here is that nothing ever modifies the dispatched pixels
+ * value that FS_OPCODE_MOV_DISPATCH_TO_FLAGS reads from, but the hardware
+ * dictates that anyway.
+ */
+bool
+fs_visitor::opt_drop_redundant_mov_to_flags()
+{
+ bool flag_mov_found[2] = {false};
+ bool progress = false;
+
+ /* Instructions removed by this pass can only be added if this were true */
+ if (!devinfo->needs_unlit_centroid_workaround)
+ return false;
+
+ foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+ if (inst->is_control_flow()) {
+ memset(flag_mov_found, 0, sizeof(flag_mov_found));
+ } else if (inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) {
+ if (!flag_mov_found[inst->flag_subreg]) {
+ flag_mov_found[inst->flag_subreg] = true;
+ } else {
+ inst->remove(block);
+ progress = true;
+ }
+ } else if (inst->writes_flag()) {
+ flag_mov_found[inst->flag_subreg] = false;
+ }
+ }
+
+ return progress;
+}
+
void
fs_visitor::optimize()
{
@@ -5243,6 +5281,8 @@ fs_visitor::optimize()
int iteration = 0;
int pass_num = 0;
+ OPT(opt_drop_redundant_mov_to_flags);
+
OPT(lower_simd_width);
OPT(lower_logical_sends);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index c7f20ca5646..bcd2e3e9b12 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -153,6 +153,7 @@ public:
bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
exec_list *acp);
+ bool opt_drop_redundant_mov_to_flags();
bool opt_register_renaming();
bool register_coalesce();
bool compute_to_mrf();