summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2016-06-03 17:09:14 -0700
committerEric Anholt <[email protected]>2016-07-04 16:33:22 -0700
commit8f2af4763a60c5ea5f64829321ae97bbfa51a4ce (patch)
tree216912fdc1cffcd6ac62bfd7b80a69389d7ed9b5 /src/gallium/drivers
parent200b4e4bd5e87fea91193e3d1976b9cf0eabf8ba (diff)
vc4: Optimize out redundant SF updates.
Tiny change on shader-db currently, but it will be important when we start emitting a lot of SFs from the same variable as part of control flow support. total instructions in shared programs: 89463 -> 89430 (-0.04%) instructions in affected programs: 1522 -> 1489 (-2.17%) total estimated cycles in shared programs: 250060 -> 250015 (-0.02%) estimated cycles in affected programs: 8568 -> 8523 (-0.53%)
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_peephole_sf.c82
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c2
2 files changed, 78 insertions, 6 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_peephole_sf.c b/src/gallium/drivers/vc4/vc4_opt_peephole_sf.c
index 0bc3e67acb2..5536f8dd204 100644
--- a/src/gallium/drivers/vc4/vc4_opt_peephole_sf.c
+++ b/src/gallium/drivers/vc4/vc4_opt_peephole_sf.c
@@ -24,7 +24,7 @@
/**
* @file vc4_opt_peephole_sf.c
*
- * Quick optimization to eliminate unused SF updates.
+ * Quick optimization to eliminate unused or identical SF updates.
*/
#include "vc4_qir.h"
@@ -33,12 +33,12 @@
static bool debug;
static void
-dump_from(struct vc4_compile *c, struct qinst *inst)
+dump_from(struct vc4_compile *c, struct qinst *inst, const char *type)
{
if (!debug)
return;
- fprintf(stderr, "optimizing: ");
+ fprintf(stderr, "optimizing %s: ", type);
qir_dump_inst(c, inst);
fprintf(stderr, "\n");
}
@@ -54,26 +54,98 @@ dump_to(struct vc4_compile *c, struct qinst *inst)
fprintf(stderr, "\n");
}
+static bool
+inst_srcs_updated(struct qinst *inst, struct qinst *writer)
+{
+ /* If the sources get overwritten, stop tracking the
+ * last instruction writing SF.
+ */
+ switch (writer->dst.file) {
+ case QFILE_TEMP:
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file == QFILE_TEMP &&
+ inst->src[i].index == writer->dst.index) {
+ return true;
+ }
+ }
+ return false;
+ default:
+ return false;
+ }
+}
+
+static bool
+src_file_varies_on_reread(struct qreg reg)
+{
+ switch (reg.file) {
+ case QFILE_VARY:
+ case QFILE_VPM:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+inst_result_equals(struct qinst *a, struct qinst *b)
+{
+ if (a->op != b->op ||
+ qir_depends_on_flags(a) ||
+ qir_depends_on_flags(b)) {
+ return false;
+ }
+
+ for (int i = 0; i < qir_get_op_nsrc(a->op); i++) {
+ if (!qir_reg_equals(a->src[i], b->src[i]) ||
+ src_file_varies_on_reread(a->src[i]) ||
+ src_file_varies_on_reread(b->src[i])) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
bool
qir_opt_peephole_sf(struct vc4_compile *c)
{
bool progress = false;
bool sf_live = false;
+ struct qinst *last_sf = NULL;
/* Walk the block from bottom to top, tracking if the SF is used, and
- * removing unused ones.
+ * removing unused or repeated ones.
*/
list_for_each_entry_rev(struct qinst, inst, &c->instructions, link) {
if (inst->sf) {
if (!sf_live) {
- dump_from(c, inst);
+ /* Our instruction's SF isn't read, so drop it.
+ */
+ dump_from(c, inst, "dead SF");
inst->sf = false;
dump_to(c, inst);
progress = true;
+ } else if (last_sf &&
+ inst_result_equals(last_sf, inst)) {
+ /* The last_sf sets up same value as inst, so
+ * just drop the later one.
+ */
+ dump_from(c, last_sf, "repeated SF");
+ last_sf->sf = false;
+ dump_to(c, last_sf);
+ progress = true;
+ last_sf = inst;
+ } else {
+ last_sf = inst;
}
sf_live = false;
}
+ if (last_sf) {
+ if (inst_srcs_updated(last_sf, inst))
+ last_sf = NULL;
+ }
+
if (qir_depends_on_flags(inst))
sf_live = true;
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index b36c0d934cc..526e3a179aa 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -388,7 +388,7 @@ qir_emit(struct vc4_compile *c, struct qinst *inst)
bool
qir_reg_equals(struct qreg a, struct qreg b)
{
- return a.file == b.file && a.index == b.index;
+ return a.file == b.file && a.index == b.index && a.pack == b.pack;
}
struct vc4_compile *