summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorMatt Turner <[email protected]>2014-06-11 13:49:34 -0700
committerMatt Turner <[email protected]>2014-07-06 18:19:15 -0700
commit6f7c4a8d05df0337a992ff485c9de86f8bf81e0a (patch)
treeafd824f97b4f78cf6d556da86e3ef6df4f5d831c /src/mesa
parent7921bf0062e82eab732f788146464f0f0ed39b20 (diff)
i965/vec4: Perform CSE on CMP(N) instructions.
Port of commit b16b3c87 to the vec4 code. No shader-db improvements, but might as well. The fs backend saw an improvement because it's scalar and multiple identical CMP instructions were generated by the SEL peepholes.
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_cse.cpp17
1 files changed, 16 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index f3a69ed9fb6..eeaa743e15f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -56,6 +56,8 @@ is_expression(const vec4_instruction *const inst)
case BRW_OPCODE_SHR:
case BRW_OPCODE_SHL:
case BRW_OPCODE_ASR:
+ case BRW_OPCODE_CMP:
+ case BRW_OPCODE_CMPN:
case BRW_OPCODE_ADD:
case BRW_OPCODE_MUL:
case BRW_OPCODE_FRC:
@@ -135,7 +137,7 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
/* Skip some cases. */
if (is_expression(inst) && !inst->predicate && inst->mlen == 0 &&
- !inst->conditional_mod)
+ (inst->dst.file != HW_REG || inst->dst.is_null()))
{
bool found = false;
@@ -195,6 +197,19 @@ vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
}
foreach_in_list_safe(aeb_entry, entry, aeb) {
+ /* Kill all AEB entries that write a different value to or read from
+ * the flag register if we just wrote it.
+ */
+ if (inst->writes_flag()) {
+ if (entry->generator->reads_flag() ||
+ (entry->generator->writes_flag() &&
+ !instructions_match(inst, entry->generator))) {
+ entry->remove();
+ ralloc_free(entry);
+ continue;
+ }
+ }
+
for (int i = 0; i < 3; i++) {
src_reg *src = &entry->generator->src[i];