diff options
author | Eric Anholt <[email protected]> | 2013-03-15 14:43:28 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2013-04-01 16:17:25 -0700 |
commit | 9f43b8492818bab47ef9cc489b91c2618446a3e9 (patch) | |
tree | 8bdf704ad0fc4a8eb9f3c8c7fc15cab30e4e3829 /src/mesa/drivers | |
parent | dca5fc14358a8b267b3854c39c976a822885898f (diff) |
i965/fs: Do CSE on gen7's varying-index pull constant loads.
This is our first CSE on a regs_written() > 1 instruction, so it takes a
bit of extra fixup. Reduces the number of loads on kwin's Lanczos shader
from 12 to 2.
v2: Fix compiler warning (false positive on possibly-uninitialized variable)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61554
Reviewed-by: Kenneth Graunke <[email protected]> (v1)
NOTE: This is a candidate for the 9.1 branch.
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 43 |
1 files changed, 32 insertions, 11 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 02642c91a61..5a50d45ddc9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -68,6 +68,7 @@ is_expression(const fs_inst *const inst) case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case FS_OPCODE_CINTERP: case FS_OPCODE_LINTERP: return true; @@ -129,21 +130,41 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) */ bool no_existing_temp = entry->tmp.file == BAD_FILE; if (no_existing_temp) { - entry->tmp = fs_reg(this, glsl_type::float_type); - entry->tmp.type = inst->dst.type; - - fs_inst *copy = new(ralloc_parent(inst)) - fs_inst(BRW_OPCODE_MOV, entry->generator->dst, entry->tmp); - entry->generator->insert_after(copy); - entry->generator->dst = entry->tmp; + int written = entry->generator->regs_written(); + + fs_reg orig_dst = entry->generator->dst; + fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written), + orig_dst.type); + entry->tmp = tmp; + entry->generator->dst = tmp; + + for (int i = 0; i < written; i++) { + fs_inst *copy = MOV(orig_dst, tmp); + copy->force_writemask_all = + entry->generator->force_writemask_all; + entry->generator->insert_after(copy); + + orig_dst.reg_offset++; + tmp.reg_offset++; + } } /* dest <- temp */ + int written = inst->regs_written(); + assert(written == entry->generator->regs_written()); assert(inst->dst.type == entry->tmp.type); - fs_inst *copy = new(ralloc_parent(inst)) - fs_inst(BRW_OPCODE_MOV, inst->dst, entry->tmp); - copy->force_writemask_all = inst->force_writemask_all; - inst->replace_with(copy); + fs_reg dst = inst->dst; + fs_reg tmp = entry->tmp; + fs_inst *copy = NULL; + for (int i = 0; i < written; i++) { + copy = MOV(dst, tmp); + copy->force_writemask_all = inst->force_writemask_all; + inst->insert_before(copy); + + dst.reg_offset++; + tmp.reg_offset++; + } + inst->remove(); /* Appending an instruction may have changed our bblock end. */ if (inst == block->end) { |