diff options
author | Stéphane Marchesin <[email protected]> | 2011-09-21 17:54:24 -0700 |
---|---|---|
committer | Stéphane Marchesin <[email protected]> | 2011-09-21 18:01:43 -0700 |
commit | 51f641291d19d05acf04f08dd51215c2702a1695 (patch) | |
tree | 9d97d4119f3dd7c94c96453d624bd0d71d56e81f /src/gallium/drivers/i915/i915_fpc_optimize.c | |
parent | fc1fbb2dc183f979d30b8abc8b235c3a8ff32839 (diff) |
i915g: Add an optimization to get rid of useless movs, in particular at the end of some shaders.
Diffstat (limited to 'src/gallium/drivers/i915/i915_fpc_optimize.c')
-rw-r--r-- | src/gallium/drivers/i915/i915_fpc_optimize.c | 41 |
1 files changed, 38 insertions, 3 deletions
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c index 654a8f81a09..50b3a28573c 100644 --- a/src/gallium/drivers/i915/i915_fpc_optimize.c +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -36,6 +36,14 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_dump.h" +static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_full_dst_register* d1) +{ + return (s1->Register.File == d1->Register.File && + s1->Register.Indirect == d1->Register.Indirect && + s1->Register.Dimension == d1->Register.Dimension && + s1->Register.Index == d1->Register.Index); +} + static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2) { return (d1->Register.File == d2->Register.File && @@ -119,7 +127,7 @@ static void set_neutral_element_swizzle(struct i915_full_src_register* r, * Optimize away things like: * MUL OUT[0].xyz, TEMP[1], TEMP[2] * MOV OUT[0].w, TEMP[2] - * into: + * into: * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] * This is useful for optimizing texenv. */ @@ -172,6 +180,32 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio } } +/* + * Optimize away things like: + * *** TEMP[0], TEMP[1], TEMP[2] + * MOV OUT[0] TEMP[0] + * into: + * *** OUT[0], TEMP[1], TEMP[2] + */ +static void i915_fpc_optimize_useless_mov(union i915_full_token* current, union i915_full_token* next) +{ + if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && + next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE && + next->FullInstruction.Src[0].Register.Absolute == 0 && + next->FullInstruction.Src[0].Register.Negate == 0 && + is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) && + current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask && + same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) ) + { + next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; + + current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0]; + return; + } +} + static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i) { o->File = i->File; @@ -240,9 +274,10 @@ struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) tgsi_parse_token( &parse ); copy_token(&out_tokens->Tokens[i] , &parse.FullToken); - if (i > 0) + if (i > 0) { + i915_fpc_optimize_useless_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); - + } i++; } tgsi_parse_free (&parse); |