aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/i915/i915_fpc_optimize.c
diff options
context:
space:
mode:
authorStéphane Marchesin <[email protected]>2012-01-11 02:24:34 -0800
committerStéphane Marchesin <[email protected]>2012-01-11 02:25:42 -0800
commit3235435e801122fb6d28c0bcc232ddaeb6b9c568 (patch)
tree1d5bf9bc508cfeabb85ede309e18ad1efc174748 /src/gallium/drivers/i915/i915_fpc_optimize.c
parent5840ec2d41d09895bec7074a90521978fb933695 (diff)
i915g: Fix bugs in the shader optimizer.
Diffstat (limited to 'src/gallium/drivers/i915/i915_fpc_optimize.c')
-rw-r--r--src/gallium/drivers/i915/i915_fpc_optimize.c126
1 files changed, 80 insertions, 46 deletions
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c
index d9b4c46b909..b09f18b01ee 100644
--- a/src/gallium/drivers/i915/i915_fpc_optimize.c
+++ b/src/gallium/drivers/i915/i915_fpc_optimize.c
@@ -66,6 +66,8 @@ static boolean has_destination(unsigned opcode)
{
return (opcode != TGSI_OPCODE_NOP &&
opcode != TGSI_OPCODE_KIL &&
+ opcode != TGSI_OPCODE_KILP &&
+ opcode != TGSI_OPCODE_END &&
opcode != TGSI_OPCODE_RET);
}
@@ -130,6 +132,50 @@ static void set_neutral_element_swizzle(struct i915_full_src_register* r,
r->Register.SwizzleW = TGSI_SWIZZLE_W;
}
+static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
+{
+ o->File = i->File;
+ o->Indirect = i->Indirect;
+ o->Dimension = i->Dimension;
+ o->Index = i->Index;
+ o->SwizzleX = i->SwizzleX;
+ o->SwizzleY = i->SwizzleY;
+ o->SwizzleZ = i->SwizzleZ;
+ o->SwizzleW = i->SwizzleW;
+ o->Absolute = i->Absolute;
+ o->Negate = i->Negate;
+}
+
+static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
+{
+ o->File = i->File;
+ o->WriteMask = i->WriteMask;
+ o->Indirect = i->Indirect;
+ o->Dimension = i->Dimension;
+ o->Index = i->Index;
+}
+
+static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
+{
+ memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
+ memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
+
+ copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
+
+ copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
+ copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
+ copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
+}
+
+static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
+{
+ if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
+ memcpy(o, i, sizeof(*o));
+ else
+ copy_instruction(&o->FullInstruction, &i->FullInstruction);
+
+}
+
/*
* Optimize away things like:
* MUL OUT[0].xyz, TEMP[1], TEMP[2]
@@ -147,6 +193,7 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
+ !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
@@ -170,6 +217,7 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
+ !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
@@ -189,12 +237,36 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio
/*
* Optimize away things like:
+ * MOV TEMP[0].xyz TEMP[0].xyzx
+ * into:
+ * NOP
+ */
+static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current)
+{
+ union i915_full_token current;
+ copy_token(&current , tgsi_current);
+ if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
+ current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
+ has_destination(current.FullInstruction.Instruction.Opcode) &&
+ current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
+ current.FullInstruction.Src[0].Register.Absolute == 0 &&
+ current.FullInstruction.Src[0].Register.Negate == 0 &&
+ is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
+ same_src_dst_reg(&current.FullInstruction.Src[0], &current.FullInstruction.Dst[0]) )
+ {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/*
+ * Optimize away things like:
* *** TEMP[0], TEMP[1], TEMP[2]
* MOV OUT[0] TEMP[0]
* into:
* *** OUT[0], TEMP[1], TEMP[2]
*/
-static void i915_fpc_optimize_useless_mov(union i915_full_token* current, union i915_full_token* next)
+static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token* current, union i915_full_token* next)
{
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
@@ -215,50 +287,6 @@ static void i915_fpc_optimize_useless_mov(union i915_full_token* current, union
}
}
-static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
-{
- o->File = i->File;
- o->Indirect = i->Indirect;
- o->Dimension = i->Dimension;
- o->Index = i->Index;
- o->SwizzleX = i->SwizzleX;
- o->SwizzleY = i->SwizzleY;
- o->SwizzleZ = i->SwizzleZ;
- o->SwizzleW = i->SwizzleW;
- o->Absolute = i->Absolute;
- o->Negate = i->Negate;
-}
-
-static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
-{
- o->File = i->File;
- o->WriteMask = i->WriteMask;
- o->Indirect = i->Indirect;
- o->Dimension = i->Dimension;
- o->Index = i->Index;
-}
-
-static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
-{
- memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
- memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
-
- copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
-
- copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
- copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
- copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
-}
-
-static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
-{
- if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
- memcpy(o, i, sizeof(*o));
- else
- copy_instruction(&o->FullInstruction, &i->FullInstruction);
-
-}
-
struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
{
struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
@@ -281,10 +309,16 @@ struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
tgsi_parse_init( &parse, tokens );
while( !tgsi_parse_end_of_tokens( &parse ) ) {
tgsi_parse_token( &parse );
+
+ if (i915_fpc_useless_mov(&parse.FullToken)) {
+ out_tokens->NumTokens--;
+ continue;
+ }
+
copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
if (i > 0) {
- i915_fpc_optimize_useless_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
+ i915_fpc_optimize_useless_mov_after_inst(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
}
i++;