summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/freedreno/freedreno_compiler.c100
1 files changed, 52 insertions, 48 deletions
diff --git a/src/gallium/drivers/freedreno/freedreno_compiler.c b/src/gallium/drivers/freedreno/freedreno_compiler.c
index 3d7f7c970d6..a26f8cf562c 100644
--- a/src/gallium/drivers/freedreno/freedreno_compiler.c
+++ b/src/gallium/drivers/freedreno/freedreno_compiler.c
@@ -57,11 +57,13 @@ struct fd_compile_context {
/* Internal-Temporary and Predicate register assignment:
*
* Some TGSI instructions which translate into multiple actual
- * instructions need one or more temporary registers (which are not
+ * instructions need one or more temporary registers, which are not
* assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY).
- * Whenever possible, the dst register is used as the first temporary,
- * but this is not possible when the dst register is in an export (ie.
- * in TGSI_FILE_OUTPUT).
+ * And some instructions (texture fetch) cannot write directly to
+ * output registers. We could be more clever and re-use dst or a
+ * src register in some cases. But for now don't try to be clever.
+ * Eventually we should implement an optimization pass that re-
+ * juggles the register usage and gets rid of unneeded temporaries.
*
* The predicate register must be valid across multiple TGSI
* instructions, but internal temporary's do not. For this reason,
@@ -513,6 +515,21 @@ add_regs_scalar_1(struct fd_compile_context *ctx,
* Helpers for TGSI instructions that don't map to a single shader instr:
*/
+static void
+src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
+{
+ src->File = dst->File;
+ src->Indirect = dst->Indirect;
+ src->Dimension = dst->Dimension;
+ src->Index = dst->Index;
+ src->Absolute = 0;
+ src->Negate = 0;
+ src->SwizzleX = TGSI_SWIZZLE_X;
+ src->SwizzleY = TGSI_SWIZZLE_Y;
+ src->SwizzleZ = TGSI_SWIZZLE_Z;
+ src->SwizzleW = TGSI_SWIZZLE_W;
+}
+
/* Get internal-temp src/dst to use for a sequence of instructions
* generated by a single TGSI op.. if possible, use the final dst
* register as the temporary to avoid allocating a new register, but
@@ -521,44 +538,26 @@ add_regs_scalar_1(struct fd_compile_context *ctx,
* so that you don't end up using the same register for all your
* internal temps.
*/
-static bool
+static void
get_internal_temp(struct fd_compile_context *ctx,
- struct tgsi_dst_register *orig_dst,
struct tgsi_dst_register *tmp_dst,
struct tgsi_src_register *tmp_src)
{
- bool using_temp = false;
+ int n;
tmp_dst->File = TGSI_FILE_TEMPORARY;
tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
tmp_dst->Indirect = 0;
tmp_dst->Dimension = 0;
- if (orig_dst && (orig_dst->File != TGSI_FILE_OUTPUT)) {
- /* if possible, use orig dst register for the temporary: */
- tmp_dst->Index = orig_dst->Index;
- } else {
- /* otherwise assign one: */
- int n = ctx->num_internal_temps++;
- if (ctx->pred_reg != -1)
- n++;
- tmp_dst->Index = get_temp_gpr(ctx,
- ctx->num_regs[TGSI_FILE_TEMPORARY] + n);
- using_temp = true;
- }
+ /* assign next temporary: */
+ n = ctx->num_internal_temps++;
+ if (ctx->pred_reg != -1)
+ n++;
+
+ tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n;
- tmp_src->File = tmp_dst->File;
- tmp_src->Indirect = tmp_dst->Indirect;
- tmp_src->Dimension = tmp_dst->Dimension;
- tmp_src->Index = tmp_dst->Index;
- tmp_src->Absolute = 0;
- tmp_src->Negate = 0;
- tmp_src->SwizzleX = TGSI_SWIZZLE_X;
- tmp_src->SwizzleY = TGSI_SWIZZLE_Y;
- tmp_src->SwizzleZ = TGSI_SWIZZLE_Z;
- tmp_src->SwizzleW = TGSI_SWIZZLE_W;
-
- return using_temp;
+ src_from_dst(tmp_src, tmp_dst);
}
static void
@@ -574,12 +573,7 @@ get_predicate(struct fd_compile_context *ctx, struct tgsi_dst_register *dst,
dst->Index = get_temp_gpr(ctx, ctx->pred_reg);
if (src) {
- src->File = dst->File;
- src->Indirect = dst->Indirect;
- src->Dimension = dst->Dimension;
- src->Index = dst->Index;
- src->Absolute = 0;
- src->Negate = 0;
+ src_from_dst(src, dst);
src->SwizzleX = TGSI_SWIZZLE_W;
src->SwizzleY = TGSI_SWIZZLE_W;
src->SwizzleZ = TGSI_SWIZZLE_W;
@@ -717,7 +711,7 @@ translate_pow(struct fd_compile_context *ctx,
struct tgsi_src_register tmp_src;
struct ir2_instruction *alu;
- get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src);
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP);
add_regs_dummy_vector(alu);
@@ -763,16 +757,25 @@ translate_tex(struct fd_compile_context *ctx,
struct tgsi_full_instruction *inst, unsigned opc)
{
struct ir2_instruction *instr;
+ struct ir2_register *reg;
struct tgsi_dst_register tmp_dst;
struct tgsi_src_register tmp_src;
const struct tgsi_src_register *coord;
- bool using_temp;
+ bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) ||
+ (inst->Instruction.Saturate != TGSI_SAT_NONE);
int idx;
- using_temp = get_internal_temp(ctx,
- &inst->Dst[0].Register, &tmp_dst, &tmp_src);
+ if (using_temp || (opc == TGSI_OPCODE_TXP))
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
if (opc == TGSI_OPCODE_TXP) {
+ static const char *swiz[] = {
+ [TGSI_SWIZZLE_X] = "xxxx",
+ [TGSI_SWIZZLE_Y] = "yyyy",
+ [TGSI_SWIZZLE_Z] = "zzzz",
+ [TGSI_SWIZZLE_W] = "wwww",
+ };
+
/* TXP - Projective Texture Lookup:
*
* coord.x = src0.x / src.w
@@ -792,7 +795,8 @@ translate_tex(struct fd_compile_context *ctx,
/* RECIP_IEEE: */
add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___";
- add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = "wwww";
+ add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle =
+ swiz[inst->Src[0].Register.SwizzleW];
instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_";
@@ -813,8 +817,8 @@ translate_tex(struct fd_compile_context *ctx,
ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index;
ctx->so->tfetch_instrs[idx].instr = instr;
- add_dst_reg(ctx, instr, &tmp_dst);
- add_src_reg(ctx, instr, coord);
+ add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register);
+ reg = add_src_reg(ctx, instr, coord);
/* dst register needs to be marked for sync: */
ctx->need_sync |= 1 << instr->regs[0]->num;
@@ -862,7 +866,7 @@ translate_sge_slt(struct fd_compile_context *ctx,
break;
}
- get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src);
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
add_dst_reg(ctx, instr, &tmp_dst);
@@ -893,8 +897,8 @@ translate_lrp(struct fd_compile_context *ctx,
struct tgsi_src_register tmp_src1, tmp_src2;
struct tgsi_src_register tmp_const;
- get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst1, &tmp_src1);
- get_internal_temp(ctx, NULL, &tmp_dst2, &tmp_src2);
+ get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
+ get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
get_immediate(ctx, &tmp_const, fui(1.0));
@@ -945,7 +949,7 @@ translate_trig(struct fd_compile_context *ctx,
break;
}
- get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src);
+ get_internal_temp(ctx, &tmp_dst, &tmp_src);
tmp_dst.WriteMask = TGSI_WRITEMASK_X;
tmp_src.SwizzleX = tmp_src.SwizzleY =