diff options
author | Matt Turner <[email protected]> | 2014-06-11 13:49:34 -0700 |
---|---|---|
committer | Matt Turner <[email protected]> | 2014-07-06 18:19:15 -0700 |
commit | 6f7c4a8d05df0337a992ff485c9de86f8bf81e0a (patch) | |
tree | afd824f97b4f78cf6d556da86e3ef6df4f5d831c /src/mesa/drivers | |
parent | 7921bf0062e82eab732f788146464f0f0ed39b20 (diff) |
i965/vec4: Perform CSE on CMP(N) instructions.
Port of commit b16b3c87 to the vec4 code.
No shader-db improvements, but might as well. The fs backend saw an
improvement because it's scalar and multiple identical CMP instructions
were generated by the SEL peepholes.
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index f3a69ed9fb6..eeaa743e15f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -56,6 +56,8 @@ is_expression(const vec4_instruction *const inst) case BRW_OPCODE_SHR: case BRW_OPCODE_SHL: case BRW_OPCODE_ASR: + case BRW_OPCODE_CMP: + case BRW_OPCODE_CMPN: case BRW_OPCODE_ADD: case BRW_OPCODE_MUL: case BRW_OPCODE_FRC: @@ -135,7 +137,7 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) /* Skip some cases. */ if (is_expression(inst) && !inst->predicate && inst->mlen == 0 && - !inst->conditional_mod) + (inst->dst.file != HW_REG || inst->dst.is_null())) { bool found = false; @@ -195,6 +197,19 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) } foreach_in_list_safe(aeb_entry, entry, aeb) { + /* Kill all AEB entries that write a different value to or read from + * the flag register if we just wrote it. + */ + if (inst->writes_flag()) { + if (entry->generator->reads_flag() || + (entry->generator->writes_flag() && + !instructions_match(inst, entry->generator))) { + entry->remove(); + ralloc_free(entry); + continue; + } + } + for (int i = 0; i < 3; i++) { src_reg *src = &entry->generator->src[i]; |