From 3f965556b49e58c6cce3e58ab4fecefabab71216 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 21 Jun 2013 14:39:54 -0400 Subject: freedreno/a3xx/compiler: fix for replicating instructions If we are accumulating result into tmp.x, and need a mov to final destination, we want to move the .x component into all of the components enabled from the read dest's writemask, ie. we want: MOV dst.xyzw tmp.xxxx rather than: MOV dst.xyzw tmp.xyzw Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 42 ++++++++++------------- 1 file changed, 18 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers/freedreno/a3xx') diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index d844cc0f164..3f8abf077fa 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -304,6 +304,19 @@ get_internal_temp(struct fd3_compile_context *ctx, src_from_dst(tmp_src, tmp_dst); } +/* same as get_internal_temp, but w/ src.xxxx (for instructions that + * replicate their results) + */ +static void +get_internal_temp_repl(struct fd3_compile_context *ctx, + struct tgsi_dst_register *tmp_dst, + struct tgsi_src_register *tmp_src) +{ + get_internal_temp(ctx, tmp_dst, tmp_src); + tmp_src->SwizzleX = tmp_src->SwizzleY = + tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X; +} + static void get_immediate(struct fd3_compile_context *ctx, struct tgsi_src_register *reg, uint32_t val) @@ -498,10 +511,7 @@ trans_dotp(const struct instr_translater *t, opc_t opc_mad = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32; unsigned i; - assert(inst->Instruction.NumSrcRegs == 2); - assert(inst->Instruction.NumDstRegs == 1); - - get_internal_temp(ctx, &tmp_dst, &tmp_src); + get_internal_temp_repl(ctx, &tmp_dst, &tmp_src); /* Blob compiler never seems to use a const in src1 position for * mad.*, although there does seem (according to disassembler @@ -628,10 +638,7 @@ trans_pow(const struct instr_translater *t, struct tgsi_src_register *src0 = &inst->Src[0].Register; struct tgsi_src_register *src1 = &inst->Src[1].Register; - assert(inst->Instruction.NumSrcRegs == 2); - assert(inst->Instruction.NumDstRegs == 1); - - get_internal_temp(ctx, &tmp_dst, &tmp_src); + get_internal_temp_repl(ctx, &tmp_dst, &tmp_src); /* log2 Rtmp, Rsrc0 */ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; @@ -697,9 +704,6 @@ trans_samp(const struct instr_translater *t, if (tex == TGSI_TEXTURE_3D) flags |= IR3_INSTR_3D; - assert(inst->Instruction.NumSrcRegs == 2); - assert(inst->Instruction.NumDstRegs == 1); - /* The texture sample instructions need to coord in successive * registers/components (ie. src.xy but not src.yx). And TXP * needs the .w component in .z for 2D.. so in some cases we @@ -707,7 +711,7 @@ trans_samp(const struct instr_translater *t, * around: */ for (i = 1; (i < 4) && (order[i] >= 0); i++) { - if (src_swiz(coord, 0) != (src_swiz(coord, i) + order[i])) { + if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) { type_t type_mov = get_type(ctx); /* need to move things around: */ @@ -882,9 +886,6 @@ instr_cat0(const struct instr_translater *t, struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { - assert(inst->Instruction.NumSrcRegs == 0); - assert(inst->Instruction.NumDstRegs == 0); - ir3_instr_create(ctx->ir, 0, t->opc); } @@ -896,9 +897,6 @@ instr_cat1(const struct instr_translater *t, struct tgsi_dst_register *dst = get_dst(ctx, inst); struct tgsi_src_register *src = &inst->Src[0].Register; - assert(inst->Instruction.NumSrcRegs == 1); - assert(inst->Instruction.NumDstRegs == 1); - /* mov instructions can't handle a negate on src: */ if (src->Negate) { struct tgsi_src_register constval; @@ -975,9 +973,6 @@ instr_cat3(const struct instr_translater *t, struct tgsi_src_register tmp_src; struct ir3_instruction *instr; - assert(inst->Instruction.NumSrcRegs == 3); - assert(inst->Instruction.NumDstRegs == 1); - /* Blob compiler never seems to use a const in src1 position.. * although there does seem (according to disassembler hidden * in libllvm-a3xx.so) to be a bit to indicate that src1 is a @@ -1008,9 +1003,6 @@ instr_cat4(const struct instr_translater *t, struct tgsi_dst_register *dst = get_dst(ctx, inst); struct ir3_instruction *instr; - assert(inst->Instruction.NumSrcRegs == 1); - assert(inst->Instruction.NumDstRegs == 1); - ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; instr = ir3_instr_create(ctx->ir, 4, t->opc); @@ -1126,6 +1118,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) break; case TGSI_SEMANTIC_COLOR: case TGSI_SEMANTIC_GENERIC: + case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_TEXCOORD: for (i = decl->Range.First; i <= decl->Range.Last; i++) so->outputs[so->outputs_count++].regid = regid(i + base, 0); break; -- cgit v1.2.3