summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2013-10-24 17:53:31 -0400
committerRob Clark <[email protected]>2013-10-24 20:21:08 -0400
commita453242fda0a57012a790e67053545211c0486ed (patch)
tree209fe8ac3df5ed8bb06b1bfc8275bf785f3aff14
parent4317c4e6e05f84a985ff76a7f66e506681d8e37f (diff)
freedreno/a3xx/compiler: relative addressing
Signed-off-by: Rob Clark <[email protected]>
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler.c124
1 files changed, 123 insertions, 1 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index b05b27278f8..352afcf39aa 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -96,6 +96,9 @@ struct fd3_compile_context {
/* last input dst (for setting (ei) flag): */
struct ir3_register *last_input;
+ /* last instruction with relative addressing: */
+ struct ir3_instruction *last_rel;
+
unsigned next_inloc;
unsigned num_internal_temps;
struct tgsi_src_register internal_temps[6];
@@ -156,6 +159,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
ctx->ir = so->ir;
ctx->so = so;
ctx->last_input = NULL;
+ ctx->last_rel = NULL;
ctx->next_inloc = 8;
ctx->num_internal_temps = 0;
ctx->branch_count = 0;
@@ -222,11 +226,23 @@ struct instr_translater {
unsigned arg;
};
+static void
+handle_last_rel(struct fd3_compile_context *ctx)
+{
+ if (ctx->last_rel) {
+ ctx->last_rel->flags |= IR3_INSTR_UL;
+ ctx->last_rel = NULL;
+ }
+}
+
static unsigned
src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg)
{
unsigned flags = 0;
+ if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
+ return flags;
+
if (regmask_get(ctx->needs_ss, reg)) {
flags |= IR3_INSTR_SS;
memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
@@ -251,6 +267,9 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
case TGSI_FILE_TEMPORARY:
num = dst->Index + ctx->base_reg[dst->File];
break;
+ case TGSI_FILE_ADDRESS:
+ num = REG_A0;
+ break;
default:
compile_error(ctx, "unsupported dst register file: %s\n",
tgsi_file_name(dst->File));
@@ -270,6 +289,11 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
unsigned flags = 0, num = 0;
struct ir3_register *reg;
+ /* TODO we need to use a mov to temp for const >= 64.. or maybe
+ * we could use relative addressing..
+ */
+ compile_assert(ctx, src->Index < 64);
+
switch (src->File) {
case TGSI_FILE_IMMEDIATE:
/* TODO if possible, use actual immediate instead of const.. but
@@ -298,11 +322,16 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
flags |= IR3_REG_ABS;
if (src->Negate)
flags |= IR3_REG_NEGATE;
+ if (src->Indirect)
+ flags |= IR3_REG_RELATIV;
if (ctx->so->half_precision)
flags |= IR3_REG_HALF;
reg = ir3_reg_create(instr, regid(num, chan), flags);
+ if (src->Indirect)
+ ctx->last_rel = instr;
+
instr->flags |= src_flags(ctx, reg);
return reg;
@@ -350,6 +379,39 @@ get_internal_temp(struct fd3_compile_context *ctx,
return tmp_src;
}
+/* Get internal half-precision temp src/dst to use for a sequence of
+ * instructions generated by a single TGSI op.
+ */
+static struct tgsi_src_register *
+get_internal_temp_hr(struct fd3_compile_context *ctx,
+ struct tgsi_dst_register *tmp_dst)
+{
+ struct tgsi_src_register *tmp_src;
+ int n;
+
+ if (ctx->so->half_precision)
+ return get_internal_temp(ctx, tmp_dst);
+
+ tmp_dst->File = TGSI_FILE_TEMPORARY;
+ tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
+ tmp_dst->Indirect = 0;
+ tmp_dst->Dimension = 0;
+
+ /* assign next temporary: */
+ n = ctx->num_internal_temps++;
+ compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
+ tmp_src = &ctx->internal_temps[n];
+
+ /* just use hr0 because no one else should be using half-
+ * precision regs:
+ */
+ tmp_dst->Index = 0;
+
+ src_from_dst(tmp_src, tmp_dst);
+
+ return tmp_src;
+}
+
/* same as get_internal_temp, but w/ src.xxxx (for instructions that
* replicate their results)
*/
@@ -520,6 +582,11 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
struct tgsi_src_register *src = &inst->Src[i].Register;
if ((src->File == dst->File) && (src->Index == dst->Index)) {
+ if ((src->SwizzleX == TGSI_SWIZZLE_X) &&
+ (src->SwizzleY == TGSI_SWIZZLE_Y) &&
+ (src->SwizzleZ == TGSI_SWIZZLE_Z) &&
+ (src->SwizzleW == TGSI_SWIZZLE_W))
+ continue;
ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
ctx->tmp_dst.WriteMask = dst->WriteMask;
dst = &ctx->tmp_dst;
@@ -548,6 +615,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
{
va_list ap;
int i, j, n = 0;
+ bool indirect = dst->Indirect;
add_dst_reg(ctx, instr, dst, 0);
@@ -560,6 +628,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
reg->flags |= flags & ~IR3_REG_NEGATE;
if (flags & IR3_REG_NEGATE)
reg->flags ^= IR3_REG_NEGATE;
+ indirect |= src->Indirect;
}
va_end(ap);
@@ -589,6 +658,9 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
cur->flags |= src_flags(ctx, cur->regs[j+1]);
}
va_end(ap);
+
+ if (indirect)
+ ctx->last_rel = cur;
}
}
@@ -890,6 +962,53 @@ trans_frac(const struct instr_translater *t,
tmp_src, IR3_REG_NEGATE);
}
+/* ARL(x) = x, but mova from hrN.x to a0.. */
+static void
+trans_arl(const struct instr_translater *t,
+ struct fd3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register *tmp_src;
+ struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+ struct tgsi_src_register *src = &inst->Src[0].Register;
+ unsigned chan = src->SwizzleX;
+ compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS);
+
+ handle_last_rel(ctx);
+
+ tmp_src = get_internal_temp_hr(ctx, &tmp_dst);
+
+
+ /* cov.{f32,f16}s16 Rtmp, Rsrc */
+ instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr->cat1.src_type = get_ftype(ctx);
+ instr->cat1.dst_type = TYPE_S16;
+ add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
+ add_src_reg(ctx, instr, src, chan);
+
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2;
+
+ /* shl.b Rtmp, Rtmp, 2 */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_SHL_B);
+ add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
+ add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
+
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2;
+
+ /* mova a0, Rtmp */
+ instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr->cat1.src_type = TYPE_S16;
+ instr->cat1.dst_type = TYPE_S16;
+ add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF;
+ add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
+
+ /* need to ensure 5 instr slots before a0 is used: */
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+}
+
/* POW(a,b) = EXP2(b * LOG2(a)) */
static void
trans_pow(const struct instr_translater *t,
@@ -1425,10 +1544,11 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
INSTR(MIN, instr_cat2, .opc = OPC_MIN_F),
INSTR(MAX, instr_cat2, .opc = OPC_MAX_F),
INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
+ INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F),
INSTR(LRP, trans_lrp),
INSTR(FRC, trans_frac),
INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F),
- INSTR(ARL, instr_cat2, .opc = OPC_FLOOR_F),
+ INSTR(ARL, trans_arl),
INSTR(EX2, instr_cat4, .opc = OPC_EXP2),
INSTR(LG2, instr_cat4, .opc = OPC_LOG2),
INSTR(POW, trans_pow),
@@ -1626,6 +1746,8 @@ compile_instructions(struct fd3_compile_context *ctx)
if (ctx->last_input)
ctx->last_input->flags |= IR3_REG_EI;
+
+ handle_last_rel(ctx);
}
int