diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_lowering.c | 167 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_lowering.h | 2 |
2 files changed, 149 insertions, 20 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c index 0ffd855793a..b2dd37e66c8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c @@ -676,14 +676,19 @@ transform_lit(struct tgsi_transform_context *tctx, * dst.w = 1.0 * * ; needs: 1 tmp, imm{1.0} - * FLR tmpA.x, src.x + * if (lowering FLR) { + * FRC tmpA.x, src.x + * SUB tmpA.x, src.x, tmpA.x + * } else { + * FLR tmpA.x, src.x + * } * EX2 tmpA.y, src.x * SUB dst.y, src.x, tmpA.x * EX2 dst.x, tmpA.x * MOV dst.z, tmpA.y * MOV dst.w, imm{1.0} */ -#define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \ +#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \ NINST(1)+ NINST(1) - OINST(1)) #define EXP_TMP 1 static void @@ -696,14 +701,35 @@ transform_exp(struct tgsi_transform_context *tctx, struct tgsi_full_instruction new_inst; if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { - /* FLR tmpA.x, src.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); - tctx->emit_instruction(tctx, &new_inst); + if (ctx->config->lower_FLR) { + /* FRC tmpA.x, src.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); + tctx->emit_instruction(tctx, &new_inst); + + /* SUB tmpA.x, src.x, tmpA.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); + new_inst.Instruction.NumSrcRegs = 2; + reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); + tctx->emit_instruction(tctx, &new_inst); + } else { + /* FLR tmpA.x, src.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); + tctx->emit_instruction(tctx, &new_inst); + } } if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { @@ -771,14 +797,19 @@ transform_exp(struct tgsi_transform_context *tctx, * * ; needs: 1 tmp, imm{1.0} * LG2 tmpA.x, |src.x| - * FLR tmpA.y, tmpA.x + * if (lowering FLR) { + * FRC tmpA.y, tmpA.x + * SUB tmpA.y, tmpA.x, tmpA.y + * } else { + * FLR tmpA.y, tmpA.x + * } * EX2 tmpA.z, tmpA.y * RCP tmpA.z, tmpA.z * MUL dst.y, |src.x|, tmpA.z * MOV dst.xz, tmpA.yx * MOV dst.w, imm{1.0} */ -#define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \ +#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \ NINST(2) + NINST(1) + NINST(1) - OINST(1)) #define LOG_TMP 1 static void @@ -803,14 +834,35 @@ transform_log(struct tgsi_transform_context *tctx, } if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { - /* FLR tmpA.y, tmpA.x */ - new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; - new_inst.Instruction.NumDstRegs = 1; - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); - new_inst.Instruction.NumSrcRegs = 1; - reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); - tctx->emit_instruction(tctx, &new_inst); + if (ctx->config->lower_FLR) { + /* FRC tmpA.y, tmpA.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); + tctx->emit_instruction(tctx, &new_inst); + + /* SUB tmpA.y, tmpA.x, tmpA.y */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); + new_inst.Instruction.NumSrcRegs = 2; + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); + tctx->emit_instruction(tctx, &new_inst); + } else { + /* FLR tmpA.y, tmpA.x */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); + tctx->emit_instruction(tctx, &new_inst); + } } if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { @@ -1005,6 +1057,58 @@ transform_dotp(struct tgsi_transform_context *tctx, } } +/* FLR - floor, CEIL - ceil + * ; needs: 1 tmp + * if (CEIL) { + * FRC tmpA, -src + * ADD dst, src, tmpA + * } else { + * FRC tmpA, src + * SUB dst, src, tmpA + * } + */ +#define FLR_GROW (NINST(1) + NINST(2) - OINST(1)) +#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1)) +#define FLR_TMP 1 +#define CEIL_TMP 1 +static void +transform_flr_ceil(struct tgsi_transform_context *tctx, + struct tgsi_full_instruction *inst) +{ + struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); + struct tgsi_full_dst_register *dst = &inst->Dst[0]; + struct tgsi_full_src_register *src0 = &inst->Src[0]; + struct tgsi_full_instruction new_inst; + unsigned opcode = inst->Instruction.Opcode; + + if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { + /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */ + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); + new_inst.Instruction.NumSrcRegs = 1; + reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); + + if (opcode == TGSI_OPCODE_CEIL) + new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate; + tctx->emit_instruction(tctx, &new_inst); + + /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ + new_inst = tgsi_default_full_instruction(); + if (opcode == TGSI_OPCODE_CEIL) + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; + else + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.NumDstRegs = 1; + reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); + new_inst.Instruction.NumSrcRegs = 2; + reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &new_inst); + } +} + /* Inserts a MOV_SAT for the needed components of tex coord. Note that * in the case of TXP, the clamping must happen *after* projection, so * we need to lower TXP to TEX. @@ -1401,6 +1505,16 @@ transform_instr(struct tgsi_transform_context *tctx, goto skip; transform_dotp(tctx, inst); break; + case TGSI_OPCODE_FLR: + if (!ctx->config->lower_FLR) + goto skip; + transform_flr_ceil(tctx, inst); + break; + case TGSI_OPCODE_CEIL: + if (!ctx->config->lower_CEIL) + goto skip; + transform_flr_ceil(tctx, inst); + break; case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: case TGSI_OPCODE_TXB: @@ -1432,6 +1546,9 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, /* sanity check in case limit is ever increased: */ STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS); + /* sanity check the lowering */ + assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL))); + memset(&ctx, 0, sizeof(ctx)); ctx.base.transform_instruction = transform_instr; ctx.info = info; @@ -1473,6 +1590,8 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, OPCS(DPH) || OPCS(DP2) || OPCS(DP2A) || + OPCS(FLR) || + OPCS(CEIL) || OPCS(TXP) || ctx.two_side_colors || ctx.saturate)) @@ -1541,6 +1660,14 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, newlen += DP2A_GROW * OPCS(DP2A); numtmp = MAX2(numtmp, DOTP_TMP); } + if (OPCS(FLR)) { + newlen += FLR_GROW * OPCS(FLR); + numtmp = MAX2(numtmp, FLR_TMP); + } + if (OPCS(CEIL)) { + newlen += CEIL_GROW * OPCS(CEIL); + numtmp = MAX2(numtmp, CEIL_TMP); + } if (ctx.saturate || config->lower_TXP) { int n = 0; diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/src/gallium/auxiliary/tgsi/tgsi_lowering.h index 52c204fc55e..a96d85dd155 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h @@ -68,6 +68,8 @@ struct tgsi_lowering_config unsigned lower_DPH:1; unsigned lower_DP2:1; unsigned lower_DP2A:1; + unsigned lower_FLR:1; + unsigned lower_CEIL:1; /* bitmask of (1 << TGSI_TEXTURE_type): */ unsigned lower_TXP; |