summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/i915/i915_fpc_optimize.c
diff options
context:
space:
mode:
authorStéphane Marchesin <[email protected]>2011-09-21 17:54:24 -0700
committerStéphane Marchesin <[email protected]>2011-09-21 18:01:43 -0700
commit51f641291d19d05acf04f08dd51215c2702a1695 (patch)
tree9d97d4119f3dd7c94c96453d624bd0d71d56e81f /src/gallium/drivers/i915/i915_fpc_optimize.c
parentfc1fbb2dc183f979d30b8abc8b235c3a8ff32839 (diff)
i915g: Add an optimization to get rid of useless movs, in particular at the end of some shaders.
Diffstat (limited to 'src/gallium/drivers/i915/i915_fpc_optimize.c')
-rw-r--r--src/gallium/drivers/i915/i915_fpc_optimize.c41
1 files changed, 38 insertions, 3 deletions
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c
index 654a8f81a09..50b3a28573c 100644
--- a/src/gallium/drivers/i915/i915_fpc_optimize.c
+++ b/src/gallium/drivers/i915/i915_fpc_optimize.c
@@ -36,6 +36,14 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_dump.h"
+static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_full_dst_register* d1)
+{
+ return (s1->Register.File == d1->Register.File &&
+ s1->Register.Indirect == d1->Register.Indirect &&
+ s1->Register.Dimension == d1->Register.Dimension &&
+ s1->Register.Index == d1->Register.Index);
+}
+
static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
{
return (d1->Register.File == d2->Register.File &&
@@ -119,7 +127,7 @@ static void set_neutral_element_swizzle(struct i915_full_src_register* r,
* Optimize away things like:
* MUL OUT[0].xyz, TEMP[1], TEMP[2]
* MOV OUT[0].w, TEMP[2]
- * into:
+ * into:
* MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
* This is useful for optimizing texenv.
*/
@@ -172,6 +180,32 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio
}
}
+/*
+ * Optimize away things like:
+ * *** TEMP[0], TEMP[1], TEMP[2]
+ * MOV OUT[0] TEMP[0]
+ * into:
+ * *** OUT[0], TEMP[1], TEMP[2]
+ */
+static void i915_fpc_optimize_useless_mov(union i915_full_token* current, union i915_full_token* next)
+{
+ if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
+ next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
+ next->FullInstruction.Src[0].Register.Absolute == 0 &&
+ next->FullInstruction.Src[0].Register.Negate == 0 &&
+ is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
+ current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
+ same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
+ {
+ next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
+
+ current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
+ return;
+ }
+}
+
static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
{
o->File = i->File;
@@ -240,9 +274,10 @@ struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
tgsi_parse_token( &parse );
copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
- if (i > 0)
+ if (i > 0) {
+ i915_fpc_optimize_useless_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
-
+ }
i++;
}
tgsi_parse_free (&parse);