diff options
author | Matt Turner <[email protected]> | 2016-04-20 14:22:53 -0700 |
---|---|---|
committer | Matt Turner <[email protected]> | 2016-04-21 10:32:40 -0700 |
commit | 33565d67641142a68d537023e181b6dcd587e551 (patch) | |
tree | 45511efd04452dc6e996890c165259eb8fe60290 /src | |
parent | 0020ca3c9269a8baa54501cb824af14fa497ac14 (diff) |
i965/fs: Readd opt_drop_redundant_mov_to_flags().
This reverts commit b449366587b5f3f64c6fb45fe22c39e4bc8a4309.
I removed the pass thinking that it was now not useful, but that was not
true. I believe I ran shader-db on HSW and saw no results, but HSW does
not use the unlit centroid workaround code and as a result does not emit
redundant MOV_DISPATCH_TO_FLAGS instructions.
On IVB, the shader-db results are:
total instructions in shared programs: 6650806 -> 6646303 (-0.07%)
instructions in affected programs: 106893 -> 102390 (-4.21%)
helped: 793
total cycles in shared programs: 56195538 -> 56103720 (-0.16%)
cycles in affected programs: 873048 -> 781230 (-10.52%)
helped: 553
HURT: 209
On SNB, the shader-db results are:
total instructions in shared programs: 7173074 -> 7168541 (-0.06%)
instructions in affected programs: 119757 -> 115224 (-3.79%)
helped: 799
total cycles in shared programs: 98128032 -> 98072938 (-0.06%)
cycles in affected programs: 1437104 -> 1382010 (-3.83%)
helped: 454
HURT: 237
Reviewed-by: Iago Toral Quiroga <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 40 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 1 |
2 files changed, 41 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index fc72cb36788..5d6a10773b3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5186,6 +5186,44 @@ fs_visitor::calculate_register_pressure() } } +/** + * Look for repeated FS_OPCODE_MOV_DISPATCH_TO_FLAGS and drop the later ones. + * + * The needs_unlit_centroid_workaround ends up producing one of these per + * channel of centroid input, so it's good to clean them up. + * + * An assumption here is that nothing ever modifies the dispatched pixels + * value that FS_OPCODE_MOV_DISPATCH_TO_FLAGS reads from, but the hardware + * dictates that anyway. + */ +bool +fs_visitor::opt_drop_redundant_mov_to_flags() +{ + bool flag_mov_found[2] = {false}; + bool progress = false; + + /* Instructions removed by this pass can only be added if this were true */ + if (!devinfo->needs_unlit_centroid_workaround) + return false; + + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + if (inst->is_control_flow()) { + memset(flag_mov_found, 0, sizeof(flag_mov_found)); + } else if (inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) { + if (!flag_mov_found[inst->flag_subreg]) { + flag_mov_found[inst->flag_subreg] = true; + } else { + inst->remove(block); + progress = true; + } + } else if (inst->writes_flag()) { + flag_mov_found[inst->flag_subreg] = false; + } + } + + return progress; +} + void fs_visitor::optimize() { @@ -5243,6 +5281,8 @@ fs_visitor::optimize() int iteration = 0; int pass_num = 0; + OPT(opt_drop_redundant_mov_to_flags); + OPT(lower_simd_width); OPT(lower_logical_sends); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index c7f20ca5646..bcd2e3e9b12 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -153,6 +153,7 @@ public: bool try_constant_propagate(fs_inst *inst, acp_entry *entry); bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block, exec_list *acp); + bool opt_drop_redundant_mov_to_flags(); bool opt_register_renaming(); bool register_coalesce(); bool compute_to_mrf(); |