summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2016-11-15 14:48:43 -0800
committerEric Anholt <[email protected]>2016-11-29 08:52:50 -0800
commit4690a93b123a64f8730a870a336ae9756d11fd18 (patch)
tree13ed6e2582fbbfe75ee85c2ef36f0ffbf3f15592
parentf4baf809937c98dabee90ea57b9d7e6135bdb0df (diff)
vc4: Add support for coalescing ALU ops into tex_[srtb] MOVs.
This isn't as complete as I would like (can't merge interpolation because of the implicit r5 dependency, doesn't work with control flow), but this was cheap and easy. Improves 3DMMES Taiji performance by 1.15353% +/- 0.299896% (n=29, 16) total instructions in shared programs: 99810 -> 99059 (-0.75%) instructions in affected programs: 10705 -> 9954 (-7.02%)
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c36
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c11
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h1
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c18
4 files changed, 37 insertions, 29 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c b/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c
index c08c02619f7..b247c690d82 100644
--- a/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c
+++ b/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c
@@ -24,8 +24,8 @@
/**
* @file vc4_opt_coalesce_ff_writes.c
*
- * This modifies instructions that generate the value consumed by a VPM write
- * to write directly into the VPM.
+ * This modifies instructions that generate the value consumed by a VPM or TMU
+ * coordinate write to write directly into the VPM or TMU.
*/
#include "vc4_qir.h"
@@ -33,9 +33,6 @@
bool
qir_opt_coalesce_ff_writes(struct vc4_compile *c)
{
- if (c->stage == QSTAGE_FRAG)
- return false;
-
/* For now, only do this pass when we don't have control flow. */
struct qblock *block = qir_entry_block(c);
if (block != qir_exit_block(c))
@@ -60,7 +57,7 @@ qir_opt_coalesce_ff_writes(struct vc4_compile *c)
if (mov_inst->src[0].file != QFILE_TEMP)
continue;
- if (mov_inst->dst.file != QFILE_VPM)
+ if (!(mov_inst->dst.file == QFILE_VPM || qir_is_tex(mov_inst)))
continue;
uint32_t temp = mov_inst->src[0].index;
@@ -71,24 +68,37 @@ qir_opt_coalesce_ff_writes(struct vc4_compile *c)
if (!inst)
continue;
+ /* Don't bother trying to fold in an ALU op using a uniform to
+ * a texture op, as we'll just have to lower the uniform back
+ * out.
+ */
+ if (qir_is_tex(mov_inst) && qir_has_uniform_read(inst))
+ continue;
+
if (qir_depends_on_flags(inst) || inst->sf)
continue;
if (qir_has_side_effects(c, inst) ||
- qir_has_side_effect_reads(c, inst)) {
+ qir_has_side_effect_reads(c, inst) ||
+ inst->op == QOP_VARY_ADD_C) {
continue;
}
- /* Move the generating instruction to the end of the program
- * to maintain the order of the VPM writes.
+ /* Move the generating instruction into the position of the FF
+ * write.
*/
+ c->defs[inst->dst.index] = NULL;
+ inst->dst.file = mov_inst->dst.file;
+ inst->dst.index = mov_inst->dst.index;
+ if (qir_has_implicit_tex_uniform(mov_inst)) {
+ inst->src[qir_get_tex_uniform_src(inst)] =
+ mov_inst->src[qir_get_tex_uniform_src(mov_inst)];
+ }
+
list_del(&inst->link);
list_addtail(&inst->link, &mov_inst->link);
- qir_remove_instruction(c, mov_inst);
- c->defs[inst->dst.index] = NULL;
- inst->dst.file = QFILE_VPM;
- inst->dst.index = 0;
+ qir_remove_instruction(c, mov_inst);
progress = true;
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index a082c41dfe0..d4f35d8f01a 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -180,6 +180,17 @@ qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst)
}
bool
+qir_has_uniform_read(struct qinst *inst)
+{
+ for (int i = 0; i < qir_get_nsrc(inst); i++) {
+ if (inst->src[i].file == QFILE_UNIF)
+ return true;
+ }
+
+ return false;
+}
+
+bool
qir_is_mul(struct qinst *inst)
{
switch (inst->op) {
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 28d33449391..e189bc32d94 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -577,6 +577,7 @@ int qir_get_tex_uniform_src(struct qinst *inst);
bool qir_reg_equals(struct qreg a, struct qreg b);
bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
+bool qir_has_uniform_read(struct qinst *inst);
bool qir_is_mul(struct qinst *inst);
bool qir_is_raw_mov(struct qinst *inst);
bool qir_is_tex(struct qinst *inst);
diff --git a/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c
index 23ae8ebfa6f..443682a4670 100644
--- a/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c
+++ b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c
@@ -36,24 +36,10 @@
#include "util/u_math.h"
static bool
-inst_reads_a_uniform(struct qinst *inst)
-{
- if (qir_is_tex(inst))
- return true;
-
- for (int i = 0; i < qir_get_nsrc(inst); i++) {
- if (inst->src[i].file == QFILE_UNIF)
- return true;
- }
-
- return false;
-}
-
-static bool
block_reads_any_uniform(struct qblock *block)
{
qir_for_each_inst(inst, block) {
- if (inst_reads_a_uniform(inst))
+ if (qir_has_uniform_read(inst))
return true;
}
@@ -94,7 +80,7 @@ qir_emit_uniform_stream_resets(struct vc4_compile *c)
}
qir_for_each_inst(inst, block) {
- if (inst_reads_a_uniform(inst))
+ if (qir_has_uniform_read(inst))
uniform_count++;
}
}