From aee1ed708ac5fa4a5db47dc84be4aae00af9d0f0 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 20 Aug 2013 13:51:35 -0400 Subject: freedreno/a3xx/compiler: handle saturate on dst Sometimes things other than color dst need saturating, like if there is a 'clamp(foo, 0.0, 1.0)'. So for saturated dst add the extra instructions to fix up dst. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 49 +++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'src') diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index 07bede4a4c8..e2c78531d1e 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -131,6 +131,11 @@ struct fd3_compile_context { struct tgsi_src_register tmp_src; }; + +static void vectorize(struct fd3_compile_context *ctx, + struct ir3_instruction *instr, struct tgsi_dst_register *dst, + int nsrcs, ...); + static unsigned compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, const struct tgsi_token *tokens) @@ -234,6 +239,10 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, flags |= IR3_REG_CONST; num = src->Index + ctx->base_reg[src->File]; break; + case TGSI_FILE_OUTPUT: + /* NOTE: we should only end up w/ OUTPUT file for things like + * clamp()'ing saturated dst instructions + */ case TGSI_FILE_INPUT: case TGSI_FILE_TEMPORARY: num = src->Index + ctx->base_reg[src->File]; @@ -407,6 +416,35 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, } +static void +create_clamp(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, + struct tgsi_src_register *minval, struct tgsi_src_register *maxval) +{ + struct ir3_instruction *instr; + struct tgsi_src_register src; + + src_from_dst(&src, dst); + + instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F); + vectorize(ctx, instr, dst, 2, &src, 0, minval, 0); + + instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F); + vectorize(ctx, instr, dst, 2, &src, 0, maxval, 0); +} + +static void +create_clamp_imm(struct fd3_compile_context *ctx, + struct tgsi_dst_register *dst, + uint32_t minval, uint32_t maxval) +{ + struct tgsi_src_register minconst, maxconst; + + get_immediate(ctx, &minconst, minval); + get_immediate(ctx, &maxconst, maxval); + + create_clamp(ctx, dst, &minconst, &maxconst); +} + static struct tgsi_dst_register * get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { @@ -1229,6 +1267,17 @@ compile_instructions(struct fd3_compile_context *ctx) assert(0); } + switch (inst->Instruction.Saturate) { + case TGSI_SAT_ZERO_ONE: + create_clamp_imm(ctx, &inst->Dst[0].Register, + fui(0.0), fui(1.0)); + break; + case TGSI_SAT_MINUS_PLUS_ONE: + create_clamp_imm(ctx, &inst->Dst[0].Register, + fui(-1.0), fui(1.0)); + break; + } + break; } default: -- cgit v1.2.3