summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2014-02-14 15:29:01 -0800
committerEric Anholt <[email protected]>2014-02-20 10:15:13 -0800
commit9e3cab8881626edd72d222f35c5d2a5fd9661bce (patch)
tree4f0ffe861cb912bd57c4a015e886807193d5daee
parentb2b2a2c06c20f3ca592af6e96222deab67ea239c (diff)
i965/fs: Add an optimization pass to remove redundant flags movs.
We generate steaming piles of these for the centroid workaround, and this quickly cleans them up. total instructions in shared programs: 1591228 -> 1590047 (-0.07%) instructions in affected programs: 26111 -> 24930 (-4.52%) GAINED: 0 LOST: 0 (Improved apps are l4d2, csgo, and dolphin) Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp33
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h1
2 files changed, 34 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 0fd9e908f58..65f2c808626 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3304,6 +3304,37 @@ fs_visitor::calculate_register_pressure()
}
}
+/**
+ * Look for repeated FS_OPCODE_MOV_DISPATCH_TO_FLAGS and drop the later ones.
+ *
+ * The needs_unlit_centroid_workaround ends up producing one of these per
+ * channel of centroid input, so it's good to clean them up.
+ *
+ * An assumption here is that nothing ever modifies the dispatched pixels
+ * value that FS_OPCODE_MOV_DISPATCH_TO_FLAGS reads from, but the hardware
+ * dictates that anyway.
+ */
+void
+fs_visitor::opt_drop_redundant_mov_to_flags()
+{
+ bool flag_mov_found[2] = {false};
+
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ if (inst->is_control_flow()) {
+ memset(flag_mov_found, 0, sizeof(flag_mov_found));
+ } else if (inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) {
+ if (!flag_mov_found[inst->flag_subreg])
+ flag_mov_found[inst->flag_subreg] = true;
+ else
+ inst->remove();
+ } else if (inst->writes_flag()) {
+ flag_mov_found[inst->flag_subreg] = false;
+ }
+ }
+}
+
bool
fs_visitor::run()
{
@@ -3369,6 +3400,8 @@ fs_visitor::run()
remove_dead_constants();
setup_pull_constants();
+ opt_drop_redundant_mov_to_flags();
+
bool progress;
do {
progress = false;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 8a596bcaa0e..5058c48bdc8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -367,6 +367,7 @@ public:
bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
exec_list *acp);
+ void opt_drop_redundant_mov_to_flags();
bool register_coalesce();
bool compute_to_mrf();
bool dead_code_eliminate();