summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2014-09-01 12:37:26 -0400
committerRob Clark <[email protected]>2014-09-01 18:08:21 -0400
commitef858ac77055eb3c6bebd01719cee681959123a0 (patch)
tree0d50ea93a3b1fe803705a2cbd93e25ff8c9e6259
parent5e5604cc28a9542d3f21f63b1c3e04f7650dc0d6 (diff)
freedreno/ir3: add DDX/DDY
Signed-off-by: Rob Clark <[email protected]>
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler.c57
1 files changed, 53 insertions, 4 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
index 371ab547881..472efd89266 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -1172,6 +1172,15 @@ get_tex_info(struct ir3_compile_context *ctx,
return NULL;
}
+static bool check_swiz(struct tgsi_src_register *src, int8_t order[4])
+{
+ unsigned i;
+ for (i = 1; (i < 4) && order[i] >= 0; i++)
+ if (src_swiz(src, i) != (src_swiz(src, 0) + order[i]))
+ return false;
+ return true;
+}
+
static struct tgsi_src_register *
get_tex_coord(struct ir3_compile_context *ctx,
struct tgsi_full_instruction *inst,
@@ -1181,7 +1190,6 @@ get_tex_coord(struct ir3_compile_context *ctx,
struct ir3_instruction *instr;
unsigned tex = inst->Texture.Texture;
bool needs_mov = false;
- unsigned i;
/* cat5 instruction cannot seem to handle const or relative: */
if (is_rel_or_const(coord))
@@ -1197,9 +1205,8 @@ get_tex_coord(struct ir3_compile_context *ctx,
* might need to emit some mov instructions to shuffle things
* around:
*/
- for (i = 1; (i < 4) && (tinf->order[i] >= 0) && !needs_mov; i++)
- if (src_swiz(coord, i) != (src_swiz(coord, 0) + tinf->order[i]))
- needs_mov = true;
+ if (!needs_mov)
+ needs_mov = !check_swiz(coord, tinf->order);
if (needs_mov) {
struct tgsi_dst_register tmp_dst;
@@ -1265,6 +1272,46 @@ trans_samp(const struct instr_translater *t,
add_src_reg_wrmask(ctx, instr, coord, coord->SwizzleW, 0x1);
}
+/* DDX/DDY */
+static void
+trans_deriv(const struct instr_translater *t,
+ struct ir3_compile_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+ struct tgsi_src_register *src = &inst->Src[0].Register;
+ static const int8_t order[4] = {0, 1, 2, 3};
+
+ if (!check_swiz(src, order)) {
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register *tmp_src;
+
+ tmp_src = get_internal_temp(ctx, &tmp_dst);
+ create_mov(ctx, &tmp_dst, src);
+
+ src = tmp_src;
+ }
+
+ /* This might be a workaround for hw bug? Blob compiler always
+ * seems to work two components at a time for dsy/dsx. It does
+ * actually seem to work in some cases (or at least some piglit
+ * tests) for four components at a time. But seems more reliable
+ * to split this into two instructions like the blob compiler
+ * does:
+ */
+
+ instr = instr_create(ctx, 5, t->opc);
+ instr->cat5.type = get_ftype(ctx);
+ add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask & 0x3);
+ add_src_reg_wrmask(ctx, instr, src, 0, dst->WriteMask & 0x3);
+
+ instr = instr_create(ctx, 5, t->opc);
+ instr->cat5.type = get_ftype(ctx);
+ add_dst_reg_wrmask(ctx, instr, dst, 2, (dst->WriteMask >> 2) & 0x3);
+ add_src_reg_wrmask(ctx, instr, src, 2, (dst->WriteMask >> 2) & 0x3);
+}
+
/*
* SEQ(a,b) = (a == b) ? 1.0 : 0.0
* cmps.f.eq tmp0, a, b
@@ -2010,6 +2057,8 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
INSTR(TXB, trans_samp, .opc = OPC_SAMB, .arg = TGSI_OPCODE_TXB),
+ INSTR(DDX, trans_deriv, .opc = OPC_DSX),
+ INSTR(DDY, trans_deriv, .opc = OPC_DSY),
INSTR(SGT, trans_cmp),
INSTR(SLT, trans_cmp),
INSTR(FSLT, trans_cmp),