diff options
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_compiler.c | 100 |
1 files changed, 52 insertions, 48 deletions
diff --git a/src/gallium/drivers/freedreno/freedreno_compiler.c b/src/gallium/drivers/freedreno/freedreno_compiler.c index 3d7f7c970d6..a26f8cf562c 100644 --- a/src/gallium/drivers/freedreno/freedreno_compiler.c +++ b/src/gallium/drivers/freedreno/freedreno_compiler.c @@ -57,11 +57,13 @@ struct fd_compile_context { /* Internal-Temporary and Predicate register assignment: * * Some TGSI instructions which translate into multiple actual - * instructions need one or more temporary registers (which are not + * instructions need one or more temporary registers, which are not * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY). - * Whenever possible, the dst register is used as the first temporary, - * but this is not possible when the dst register is in an export (ie. - * in TGSI_FILE_OUTPUT). + * And some instructions (texture fetch) cannot write directly to + * output registers. We could be more clever and re-use dst or a + * src register in some cases. But for now don't try to be clever. + * Eventually we should implement an optimization pass that re- + * juggles the register usage and gets rid of unneeded temporaries. * * The predicate register must be valid across multiple TGSI * instructions, but internal temporary's do not. For this reason, @@ -513,6 +515,21 @@ add_regs_scalar_1(struct fd_compile_context *ctx, * Helpers for TGSI instructions that don't map to a single shader instr: */ +static void +src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) +{ + src->File = dst->File; + src->Indirect = dst->Indirect; + src->Dimension = dst->Dimension; + src->Index = dst->Index; + src->Absolute = 0; + src->Negate = 0; + src->SwizzleX = TGSI_SWIZZLE_X; + src->SwizzleY = TGSI_SWIZZLE_Y; + src->SwizzleZ = TGSI_SWIZZLE_Z; + src->SwizzleW = TGSI_SWIZZLE_W; +} + /* Get internal-temp src/dst to use for a sequence of instructions * generated by a single TGSI op.. if possible, use the final dst * register as the temporary to avoid allocating a new register, but @@ -521,44 +538,26 @@ add_regs_scalar_1(struct fd_compile_context *ctx, * so that you don't end up using the same register for all your * internal temps. */ -static bool +static void get_internal_temp(struct fd_compile_context *ctx, - struct tgsi_dst_register *orig_dst, struct tgsi_dst_register *tmp_dst, struct tgsi_src_register *tmp_src) { - bool using_temp = false; + int n; tmp_dst->File = TGSI_FILE_TEMPORARY; tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; tmp_dst->Indirect = 0; tmp_dst->Dimension = 0; - if (orig_dst && (orig_dst->File != TGSI_FILE_OUTPUT)) { - /* if possible, use orig dst register for the temporary: */ - tmp_dst->Index = orig_dst->Index; - } else { - /* otherwise assign one: */ - int n = ctx->num_internal_temps++; - if (ctx->pred_reg != -1) - n++; - tmp_dst->Index = get_temp_gpr(ctx, - ctx->num_regs[TGSI_FILE_TEMPORARY] + n); - using_temp = true; - } + /* assign next temporary: */ + n = ctx->num_internal_temps++; + if (ctx->pred_reg != -1) + n++; + + tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n; - tmp_src->File = tmp_dst->File; - tmp_src->Indirect = tmp_dst->Indirect; - tmp_src->Dimension = tmp_dst->Dimension; - tmp_src->Index = tmp_dst->Index; - tmp_src->Absolute = 0; - tmp_src->Negate = 0; - tmp_src->SwizzleX = TGSI_SWIZZLE_X; - tmp_src->SwizzleY = TGSI_SWIZZLE_Y; - tmp_src->SwizzleZ = TGSI_SWIZZLE_Z; - tmp_src->SwizzleW = TGSI_SWIZZLE_W; - - return using_temp; + src_from_dst(tmp_src, tmp_dst); } static void @@ -574,12 +573,7 @@ get_predicate(struct fd_compile_context *ctx, struct tgsi_dst_register *dst, dst->Index = get_temp_gpr(ctx, ctx->pred_reg); if (src) { - src->File = dst->File; - src->Indirect = dst->Indirect; - src->Dimension = dst->Dimension; - src->Index = dst->Index; - src->Absolute = 0; - src->Negate = 0; + src_from_dst(src, dst); src->SwizzleX = TGSI_SWIZZLE_W; src->SwizzleY = TGSI_SWIZZLE_W; src->SwizzleZ = TGSI_SWIZZLE_W; @@ -717,7 +711,7 @@ translate_pow(struct fd_compile_context *ctx, struct tgsi_src_register tmp_src; struct ir2_instruction *alu; - get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src); + get_internal_temp(ctx, &tmp_dst, &tmp_src); alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP); add_regs_dummy_vector(alu); @@ -763,16 +757,25 @@ translate_tex(struct fd_compile_context *ctx, struct tgsi_full_instruction *inst, unsigned opc) { struct ir2_instruction *instr; + struct ir2_register *reg; struct tgsi_dst_register tmp_dst; struct tgsi_src_register tmp_src; const struct tgsi_src_register *coord; - bool using_temp; + bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) || + (inst->Instruction.Saturate != TGSI_SAT_NONE); int idx; - using_temp = get_internal_temp(ctx, - &inst->Dst[0].Register, &tmp_dst, &tmp_src); + if (using_temp || (opc == TGSI_OPCODE_TXP)) + get_internal_temp(ctx, &tmp_dst, &tmp_src); if (opc == TGSI_OPCODE_TXP) { + static const char *swiz[] = { + [TGSI_SWIZZLE_X] = "xxxx", + [TGSI_SWIZZLE_Y] = "yyyy", + [TGSI_SWIZZLE_Z] = "zzzz", + [TGSI_SWIZZLE_W] = "wwww", + }; + /* TXP - Projective Texture Lookup: * * coord.x = src0.x / src.w @@ -792,7 +795,8 @@ translate_tex(struct fd_compile_context *ctx, /* RECIP_IEEE: */ add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___"; - add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = "wwww"; + add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = + swiz[inst->Src[0].Register.SwizzleW]; instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_"; @@ -813,8 +817,8 @@ translate_tex(struct fd_compile_context *ctx, ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index; ctx->so->tfetch_instrs[idx].instr = instr; - add_dst_reg(ctx, instr, &tmp_dst); - add_src_reg(ctx, instr, coord); + add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register); + reg = add_src_reg(ctx, instr, coord); /* dst register needs to be marked for sync: */ ctx->need_sync |= 1 << instr->regs[0]->num; @@ -862,7 +866,7 @@ translate_sge_slt(struct fd_compile_context *ctx, break; } - get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src); + get_internal_temp(ctx, &tmp_dst, &tmp_src); instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); add_dst_reg(ctx, instr, &tmp_dst); @@ -893,8 +897,8 @@ translate_lrp(struct fd_compile_context *ctx, struct tgsi_src_register tmp_src1, tmp_src2; struct tgsi_src_register tmp_const; - get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst1, &tmp_src1); - get_internal_temp(ctx, NULL, &tmp_dst2, &tmp_src2); + get_internal_temp(ctx, &tmp_dst1, &tmp_src1); + get_internal_temp(ctx, &tmp_dst2, &tmp_src2); get_immediate(ctx, &tmp_const, fui(1.0)); @@ -945,7 +949,7 @@ translate_trig(struct fd_compile_context *ctx, break; } - get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src); + get_internal_temp(ctx, &tmp_dst, &tmp_src); tmp_dst.WriteMask = TGSI_WRITEMASK_X; tmp_src.SwizzleX = tmp_src.SwizzleY = |