diff options
author | Stéphane Marchesin <[email protected]> | 2012-01-11 02:24:34 -0800 |
---|---|---|
committer | Stéphane Marchesin <[email protected]> | 2012-01-11 02:25:42 -0800 |
commit | 3235435e801122fb6d28c0bcc232ddaeb6b9c568 (patch) | |
tree | 1d5bf9bc508cfeabb85ede309e18ad1efc174748 | |
parent | 5840ec2d41d09895bec7074a90521978fb933695 (diff) |
i915g: Fix bugs in the shader optimizer.
-rw-r--r-- | src/gallium/drivers/i915/i915_fpc_optimize.c | 126 |
1 files changed, 80 insertions, 46 deletions
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c index d9b4c46b909..b09f18b01ee 100644 --- a/src/gallium/drivers/i915/i915_fpc_optimize.c +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -66,6 +66,8 @@ static boolean has_destination(unsigned opcode) { return (opcode != TGSI_OPCODE_NOP && opcode != TGSI_OPCODE_KIL && + opcode != TGSI_OPCODE_KILP && + opcode != TGSI_OPCODE_END && opcode != TGSI_OPCODE_RET); } @@ -130,6 +132,50 @@ static void set_neutral_element_swizzle(struct i915_full_src_register* r, r->Register.SwizzleW = TGSI_SWIZZLE_W; } +static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i) +{ + o->File = i->File; + o->Indirect = i->Indirect; + o->Dimension = i->Dimension; + o->Index = i->Index; + o->SwizzleX = i->SwizzleX; + o->SwizzleY = i->SwizzleY; + o->SwizzleZ = i->SwizzleZ; + o->SwizzleW = i->SwizzleW; + o->Absolute = i->Absolute; + o->Negate = i->Negate; +} + +static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i) +{ + o->File = i->File; + o->WriteMask = i->WriteMask; + o->Indirect = i->Indirect; + o->Dimension = i->Dimension; + o->Index = i->Index; +} + +static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i) +{ + memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); + memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); + + copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register); + + copy_src_reg(&o->Src[0].Register, &i->Src[0].Register); + copy_src_reg(&o->Src[1].Register, &i->Src[1].Register); + copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); +} + +static void copy_token(union i915_full_token* o, union tgsi_full_token* i) +{ + if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) + memcpy(o, i, sizeof(*o)); + else + copy_instruction(&o->FullInstruction, &i->FullInstruction); + +} + /* * Optimize away things like: * MUL OUT[0].xyz, TEMP[1], TEMP[2] @@ -147,6 +193,7 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) && + !same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) @@ -170,6 +217,7 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && + !same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) @@ -189,12 +237,36 @@ static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, unio /* * Optimize away things like: + * MOV TEMP[0].xyz TEMP[0].xyzx + * into: + * NOP + */ +static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current) +{ + union i915_full_token current; + copy_token(¤t , tgsi_current); + if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && + current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && + has_destination(current.FullInstruction.Instruction.Opcode) && + current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE && + current.FullInstruction.Src[0].Register.Absolute == 0 && + current.FullInstruction.Src[0].Register.Negate == 0 && + is_unswizzled(¤t.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) && + same_src_dst_reg(¤t.FullInstruction.Src[0], ¤t.FullInstruction.Dst[0]) ) + { + return TRUE; + } + return FALSE; +} + +/* + * Optimize away things like: * *** TEMP[0], TEMP[1], TEMP[2] * MOV OUT[0] TEMP[0] * into: * *** OUT[0], TEMP[1], TEMP[2] */ -static void i915_fpc_optimize_useless_mov(union i915_full_token* current, union i915_full_token* next) +static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token* current, union i915_full_token* next) { if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && @@ -215,50 +287,6 @@ static void i915_fpc_optimize_useless_mov(union i915_full_token* current, union } } -static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i) -{ - o->File = i->File; - o->Indirect = i->Indirect; - o->Dimension = i->Dimension; - o->Index = i->Index; - o->SwizzleX = i->SwizzleX; - o->SwizzleY = i->SwizzleY; - o->SwizzleZ = i->SwizzleZ; - o->SwizzleW = i->SwizzleW; - o->Absolute = i->Absolute; - o->Negate = i->Negate; -} - -static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i) -{ - o->File = i->File; - o->WriteMask = i->WriteMask; - o->Indirect = i->Indirect; - o->Dimension = i->Dimension; - o->Index = i->Index; -} - -static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i) -{ - memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); - memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); - - copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register); - - copy_src_reg(&o->Src[0].Register, &i->Src[0].Register); - copy_src_reg(&o->Src[1].Register, &i->Src[1].Register); - copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); -} - -static void copy_token(union i915_full_token* o, union tgsi_full_token* i) -{ - if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) - memcpy(o, i, sizeof(*o)); - else - copy_instruction(&o->FullInstruction, &i->FullInstruction); - -} - struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) { struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); @@ -281,10 +309,16 @@ struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) tgsi_parse_init( &parse, tokens ); while( !tgsi_parse_end_of_tokens( &parse ) ) { tgsi_parse_token( &parse ); + + if (i915_fpc_useless_mov(&parse.FullToken)) { + out_tokens->NumTokens--; + continue; + } + copy_token(&out_tokens->Tokens[i] , &parse.FullToken); if (i > 0) { - i915_fpc_optimize_useless_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); + i915_fpc_optimize_useless_mov_after_inst(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); } i++; |