diff options
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 124 |
1 files changed, 123 insertions, 1 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index b05b27278f8..352afcf39aa 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -96,6 +96,9 @@ struct fd3_compile_context { /* last input dst (for setting (ei) flag): */ struct ir3_register *last_input; + /* last instruction with relative addressing: */ + struct ir3_instruction *last_rel; + unsigned next_inloc; unsigned num_internal_temps; struct tgsi_src_register internal_temps[6]; @@ -156,6 +159,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, ctx->ir = so->ir; ctx->so = so; ctx->last_input = NULL; + ctx->last_rel = NULL; ctx->next_inloc = 8; ctx->num_internal_temps = 0; ctx->branch_count = 0; @@ -222,11 +226,23 @@ struct instr_translater { unsigned arg; }; +static void +handle_last_rel(struct fd3_compile_context *ctx) +{ + if (ctx->last_rel) { + ctx->last_rel->flags |= IR3_INSTR_UL; + ctx->last_rel = NULL; + } +} + static unsigned src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg) { unsigned flags = 0; + if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)) + return flags; + if (regmask_get(ctx->needs_ss, reg)) { flags |= IR3_INSTR_SS; memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss)); @@ -251,6 +267,9 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, case TGSI_FILE_TEMPORARY: num = dst->Index + ctx->base_reg[dst->File]; break; + case TGSI_FILE_ADDRESS: + num = REG_A0; + break; default: compile_error(ctx, "unsupported dst register file: %s\n", tgsi_file_name(dst->File)); @@ -270,6 +289,11 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, unsigned flags = 0, num = 0; struct ir3_register *reg; + /* TODO we need to use a mov to temp for const >= 64.. or maybe + * we could use relative addressing.. + */ + compile_assert(ctx, src->Index < 64); + switch (src->File) { case TGSI_FILE_IMMEDIATE: /* TODO if possible, use actual immediate instead of const.. but @@ -298,11 +322,16 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, flags |= IR3_REG_ABS; if (src->Negate) flags |= IR3_REG_NEGATE; + if (src->Indirect) + flags |= IR3_REG_RELATIV; if (ctx->so->half_precision) flags |= IR3_REG_HALF; reg = ir3_reg_create(instr, regid(num, chan), flags); + if (src->Indirect) + ctx->last_rel = instr; + instr->flags |= src_flags(ctx, reg); return reg; @@ -350,6 +379,39 @@ get_internal_temp(struct fd3_compile_context *ctx, return tmp_src; } +/* Get internal half-precision temp src/dst to use for a sequence of + * instructions generated by a single TGSI op. + */ +static struct tgsi_src_register * +get_internal_temp_hr(struct fd3_compile_context *ctx, + struct tgsi_dst_register *tmp_dst) +{ + struct tgsi_src_register *tmp_src; + int n; + + if (ctx->so->half_precision) + return get_internal_temp(ctx, tmp_dst); + + tmp_dst->File = TGSI_FILE_TEMPORARY; + tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; + tmp_dst->Indirect = 0; + tmp_dst->Dimension = 0; + + /* assign next temporary: */ + n = ctx->num_internal_temps++; + compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); + tmp_src = &ctx->internal_temps[n]; + + /* just use hr0 because no one else should be using half- + * precision regs: + */ + tmp_dst->Index = 0; + + src_from_dst(tmp_src, tmp_dst); + + return tmp_src; +} + /* same as get_internal_temp, but w/ src.xxxx (for instructions that * replicate their results) */ @@ -520,6 +582,11 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { struct tgsi_src_register *src = &inst->Src[i].Register; if ((src->File == dst->File) && (src->Index == dst->Index)) { + if ((src->SwizzleX == TGSI_SWIZZLE_X) && + (src->SwizzleY == TGSI_SWIZZLE_Y) && + (src->SwizzleZ == TGSI_SWIZZLE_Z) && + (src->SwizzleW == TGSI_SWIZZLE_W)) + continue; ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst); ctx->tmp_dst.WriteMask = dst->WriteMask; dst = &ctx->tmp_dst; @@ -548,6 +615,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, { va_list ap; int i, j, n = 0; + bool indirect = dst->Indirect; add_dst_reg(ctx, instr, dst, 0); @@ -560,6 +628,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, reg->flags |= flags & ~IR3_REG_NEGATE; if (flags & IR3_REG_NEGATE) reg->flags ^= IR3_REG_NEGATE; + indirect |= src->Indirect; } va_end(ap); @@ -589,6 +658,9 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, cur->flags |= src_flags(ctx, cur->regs[j+1]); } va_end(ap); + + if (indirect) + ctx->last_rel = cur; } } @@ -890,6 +962,53 @@ trans_frac(const struct instr_translater *t, tmp_src, IR3_REG_NEGATE); } +/* ARL(x) = x, but mova from hrN.x to a0.. */ +static void +trans_arl(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_instruction *instr; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register *tmp_src; + struct tgsi_dst_register *dst = &inst->Dst[0].Register; + struct tgsi_src_register *src = &inst->Src[0].Register; + unsigned chan = src->SwizzleX; + compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS); + + handle_last_rel(ctx); + + tmp_src = get_internal_temp_hr(ctx, &tmp_dst); + + + /* cov.{f32,f16}s16 Rtmp, Rsrc */ + instr = ir3_instr_create(ctx->ir, 1, 0); + instr->cat1.src_type = get_ftype(ctx); + instr->cat1.dst_type = TYPE_S16; + add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; + add_src_reg(ctx, instr, src, chan); + + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2; + + /* shl.b Rtmp, Rtmp, 2 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_SHL_B); + add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; + add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2; + + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2; + + /* mova a0, Rtmp */ + instr = ir3_instr_create(ctx->ir, 1, 0); + instr->cat1.src_type = TYPE_S16; + instr->cat1.dst_type = TYPE_S16; + add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF; + add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; + + /* need to ensure 5 instr slots before a0 is used: */ + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; +} + /* POW(a,b) = EXP2(b * LOG2(a)) */ static void trans_pow(const struct instr_translater *t, @@ -1425,10 +1544,11 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { INSTR(MIN, instr_cat2, .opc = OPC_MIN_F), INSTR(MAX, instr_cat2, .opc = OPC_MAX_F), INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), + INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F), INSTR(LRP, trans_lrp), INSTR(FRC, trans_frac), INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F), - INSTR(ARL, instr_cat2, .opc = OPC_FLOOR_F), + INSTR(ARL, trans_arl), INSTR(EX2, instr_cat4, .opc = OPC_EXP2), INSTR(LG2, instr_cat4, .opc = OPC_LOG2), INSTR(POW, trans_pow), @@ -1626,6 +1746,8 @@ compile_instructions(struct fd3_compile_context *ctx) if (ctx->last_input) ctx->last_input->flags |= IR3_REG_EI; + + handle_last_rel(ctx); } int |