summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2016-01-05 17:18:09 -0800
committerEric Anholt <[email protected]>2016-01-06 12:43:09 -0800
commit25aa436e8690dea181049e312e3e7f5f0c9d45da (patch)
treec04cef53161f4b0869f49df66f47fb22a460dfc9 /src/gallium/drivers/vc4
parent7a9eb76786ea5534d395eef199974f8221e047d8 (diff)
vc4: Optimize out a comparison for bcsel based on an ALU comparison
We routinely have code like: vec1 ssa_220 = fge ssa_104, ssa_61 vec1 ssa_199 = bcsel ssa_220, ssa_106, ssa_105 and we would compare fge's args and choose between ~0 and 0 to generate ssa_220, then compare ssa_220 to 0 and choose between bcsel's args. Instead, try to notice the pattern and compare between fge's args to select between bcsel's args. total instructions in shared programs: 88019 -> 87574 (-0.51%) instructions in affected programs: 9985 -> 9540 (-4.46%) total estimated cycles in shared programs: 245752 -> 245237 (-0.21%) estimated cycles in affected programs: 17232 -> 16717 (-2.99%)
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c73
1 files changed, 59 insertions, 14 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index e04cca1c09d..ede14ab9b1d 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -845,13 +845,19 @@ ntq_emit_ubfe(struct vc4_compile *c, struct qreg base, struct qreg offset,
return qir_UNPACK_8_I(c, base, offset_bit / 8);
}
-static struct qreg
-ntq_emit_comparison(struct vc4_compile *c, nir_alu_instr *instr,
- struct qreg src0, struct qreg src1)
+/**
+ * If compare_instr is a valid comparison instruction, emits the
+ * compare_instr's comparison and returns the sel_instr's return value based
+ * on the compare_instr's result.
+ */
+static bool
+ntq_emit_comparison(struct vc4_compile *c, struct qreg *dest,
+ nir_alu_instr *compare_instr,
+ nir_alu_instr *sel_instr)
{
enum qpu_cond cond;
- switch (instr->op) {
+ switch (compare_instr->op) {
case nir_op_feq:
case nir_op_ieq:
case nir_op_seq:
@@ -874,25 +880,63 @@ ntq_emit_comparison(struct vc4_compile *c, nir_alu_instr *instr,
cond = QPU_COND_NS;
break;
default:
- unreachable("bad ALU op for comparison");
+ return false;
}
- if (nir_op_infos[instr->op].input_types[0] == nir_type_float)
+ struct qreg src0 = ntq_get_alu_src(c, compare_instr, 0);
+ struct qreg src1 = ntq_get_alu_src(c, compare_instr, 1);
+
+ if (nir_op_infos[compare_instr->op].input_types[0] == nir_type_float)
qir_SF(c, qir_FSUB(c, src0, src1));
else
qir_SF(c, qir_SUB(c, src0, src1));
- switch (instr->op) {
+ switch (sel_instr->op) {
case nir_op_seq:
case nir_op_sne:
case nir_op_sge:
case nir_op_slt:
- return qir_SEL(c, cond,
- qir_uniform_f(c, 1.0), qir_uniform_f(c, 0.0));
+ *dest = qir_SEL(c, cond,
+ qir_uniform_f(c, 1.0), qir_uniform_f(c, 0.0));
+ break;
+
+ case nir_op_bcsel:
+ *dest = qir_SEL(c, cond,
+ ntq_get_alu_src(c, sel_instr, 1),
+ ntq_get_alu_src(c, sel_instr, 2));
+ break;
+
default:
- return qir_SEL(c, cond,
- qir_uniform_ui(c, ~0), qir_uniform_ui(c, 0.0));
+ *dest = qir_SEL(c, cond,
+ qir_uniform_ui(c, ~0), qir_uniform_ui(c, 0));
+ break;
}
+
+ return true;
+}
+
+/**
+ * Attempts to fold a comparison generating a boolean result into the
+ * condition code for selecting between two values, instead of comparing the
+ * boolean result against 0 to generate the condition code.
+ */
+static struct qreg ntq_emit_bcsel(struct vc4_compile *c, nir_alu_instr *instr,
+ struct qreg *src)
+{
+ if (!instr->src[0].src.is_ssa)
+ goto out;
+ nir_alu_instr *compare =
+ nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
+ if (!compare)
+ goto out;
+
+ struct qreg dest;
+ if (ntq_emit_comparison(c, &dest, compare, instr))
+ return dest;
+
+out:
+ qir_SF(c, src[0]);
+ return qir_SEL(c, QPU_COND_NS, src[1], src[2]);
}
static void
@@ -1037,12 +1081,13 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
case nir_op_ige:
case nir_op_uge:
case nir_op_ilt:
- *dest = ntq_emit_comparison(c, instr, src[0], src[1]);
+ if (!ntq_emit_comparison(c, dest, instr, instr)) {
+ fprintf(stderr, "Bad comparison instruction\n");
+ }
break;
case nir_op_bcsel:
- qir_SF(c, src[0]);
- *dest = qir_SEL(c, QPU_COND_NS, src[1], src[2]);
+ *dest = ntq_emit_bcsel(c, instr, src);
break;
case nir_op_fcsel:
qir_SF(c, src[0]);