summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2014-08-25 00:12:21 -0700
committerEric Anholt <[email protected]>2014-09-04 11:39:51 -0700
commit55d2a1626219ac041ce05477827b592efa1c7b81 (patch)
tree8cd77b31a7e725ff3fd34933dfd481246d5955ad /src/gallium
parent80b27ca2cd8cd2bb2937baa441c43a396887cc03 (diff)
vc4: Add a CSE optimization pass.
Debugging a regression in discard support was just too full of duplicate instructions, so I decided to remove them instead of re-analyzing each of them as I dumped their outputs in simulation.
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/vc4/Makefile.sources1
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_cse.c183
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c49
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h5
4 files changed, 238 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index f8e04e40009..aa4ee6b6866 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -7,6 +7,7 @@ C_SOURCES := \
vc4_formats.c \
vc4_opt_algebraic.c \
vc4_opt_copy_propagation.c \
+ vc4_opt_cse.c \
vc4_opt_dead_code.c \
vc4_program.c \
vc4_qir.c \
diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c
new file mode 100644
index 00000000000..511e3b94f07
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_opt_cse.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc4_opt_cse.c
+ *
+ * Implements CSE for QIR without control flow.
+ *
+ * For each operation that writes a destination (and isn't just a MOV), put it
+ * in the hash table of all instructions that do so. When faced with another
+ * one, look it up in the hash table by its opcode and operands. If there's
+ * an entry in the table, then just reuse the entry's destination as the
+ * source of a MOV instead of reproducing the computation. That MOV will then
+ * get cleaned up by copy propagation.
+ */
+
+#include "vc4_qir.h"
+
+#include "util/hash_table.h"
+#include "util/ralloc.h"
+
+static bool debug;
+
+struct inst_key {
+ enum qop op;
+ struct qreg src[4];
+ /**
+ * If the instruction depends on the flags, how many QOP_SFs have been
+ * seen before this instruction, or if it depends on r4, how many r4
+ * writes have been seen.
+ */
+ uint32_t implicit_arg_update_count;
+};
+
+static bool
+inst_key_equals(const void *a, const void *b)
+{
+ const struct inst_key *key_a = a;
+ const struct inst_key *key_b = b;
+
+ return memcmp(key_a, key_b, sizeof(*key_a)) == 0;
+}
+
+static struct qinst *
+vc4_find_cse(struct hash_table *ht, struct qinst *inst, uint32_t sf_count,
+ uint32_t r4_count)
+{
+ if (inst->dst.file != QFILE_TEMP ||
+ inst->op == QOP_MOV ||
+ qir_get_op_nsrc(inst->op) > 4) {
+ return NULL;
+ }
+
+ struct inst_key key;
+ memset(&key, 0, sizeof(key));
+ key.op = inst->op;
+ memcpy(key.src, inst->src,
+ qir_get_op_nsrc(inst->op) * sizeof(key.src[0]));
+ if (qir_depends_on_flags(inst))
+ key.implicit_arg_update_count = sf_count;
+ if (qir_reads_r4(inst))
+ key.implicit_arg_update_count = r4_count;
+
+ uint32_t hash = _mesa_hash_data(&key, sizeof(key));
+ struct hash_entry *entry =
+ _mesa_hash_table_search(ht, hash, &key);
+
+ if (entry) {
+ if (debug) {
+ fprintf(stderr, "CSE found match:\n");
+
+ fprintf(stderr, " Original inst: ");
+ qir_dump_inst(entry->data);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Our inst: ");
+ qir_dump_inst(inst);
+ fprintf(stderr, "\n");
+ }
+
+ return entry->data;
+ }
+
+ struct inst_key *alloc_key = ralloc(ht, struct inst_key);
+ if (!alloc_key)
+ return NULL;
+ memcpy(alloc_key, &key, sizeof(*alloc_key));
+ _mesa_hash_table_insert(ht, hash, alloc_key, inst);
+
+ if (debug) {
+ fprintf(stderr, "Added to CSE HT: ");
+ qir_dump_inst(inst);
+ fprintf(stderr, "\n");
+ }
+
+ return NULL;
+}
+
+bool
+qir_opt_cse(struct qcompile *c)
+{
+ bool progress = false;
+ struct simple_node *node, *t;
+ struct qinst *last_sf = NULL;
+ uint32_t sf_count = 0, r4_count = 0;
+
+ return false;
+ struct hash_table *ht = _mesa_hash_table_create(NULL, inst_key_equals);
+ if (!ht)
+ return false;
+
+ foreach_s(node, t, &c->instructions) {
+ struct qinst *inst = (struct qinst *)node;
+
+ if (qir_has_side_effects(inst)) {
+ if (inst->op == QOP_TLB_DISCARD_SETUP)
+ last_sf = NULL;
+ continue;
+ }
+
+ if (inst->op == QOP_SF) {
+ if (last_sf &&
+ qir_reg_equals(last_sf->src[0], inst->src[0])) {
+ if (debug) {
+ fprintf(stderr,
+ "Removing redundant SF: ");
+ qir_dump_inst(inst);
+ fprintf(stderr, "\n");
+ }
+ remove_from_list(&inst->link);
+ progress = true;
+ continue;
+ } else {
+ last_sf = inst;
+ sf_count++;
+ }
+ } else {
+ struct qinst *cse = vc4_find_cse(ht, inst,
+ sf_count, r4_count);
+ if (cse) {
+ inst->src[0] = cse->dst;
+ for (int i = 1; i < qir_get_op_nsrc(inst->op);
+ i++)
+ inst->src[i] = c->undef;
+ inst->op = QOP_MOV;
+ progress = true;
+
+ if (debug) {
+ fprintf(stderr, " Turned into: ");
+ qir_dump_inst(inst);
+ fprintf(stderr, "\n");
+ }
+ }
+ }
+
+ if (qir_reads_r4(inst))
+ r4_count++;
+ }
+
+ ralloc_free(ht);
+
+ return progress;
+}
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 93f97c219f7..a017a72c14b 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -127,6 +127,54 @@ qir_has_side_effects(struct qinst *inst)
return qir_op_info[inst->op].has_side_effects;
}
+bool
+qir_depends_on_flags(struct qinst *inst)
+{
+ switch (inst->op) {
+ case QOP_SEL_X_0_NS:
+ case QOP_SEL_X_0_NC:
+ case QOP_SEL_X_0_ZS:
+ case QOP_SEL_X_0_ZC:
+ case QOP_SEL_X_Y_NS:
+ case QOP_SEL_X_Y_NC:
+ case QOP_SEL_X_Y_ZS:
+ case QOP_SEL_X_Y_ZC:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+qir_writes_r4(struct qinst *inst)
+{
+ switch (inst->op) {
+ case QOP_TEX_RESULT:
+ case QOP_TLB_COLOR_READ:
+ case QOP_RCP:
+ case QOP_RSQ:
+ case QOP_EXP2:
+ case QOP_LOG2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+qir_reads_r4(struct qinst *inst)
+{
+ switch (inst->op) {
+ case QOP_R4_UNPACK_A:
+ case QOP_R4_UNPACK_B:
+ case QOP_R4_UNPACK_C:
+ case QOP_R4_UNPACK_D:
+ return true;
+ default:
+ return false;
+ }
+}
+
static void
qir_print_reg(struct qreg reg)
{
@@ -274,6 +322,7 @@ qir_optimize(struct qcompile *c)
bool progress = false;
OPTPASS(qir_opt_algebraic);
+ OPTPASS(qir_opt_cse);
OPTPASS(qir_opt_copy_propagation);
OPTPASS(qir_opt_dead_code);
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 2e210c3bd60..7336a3733de 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -28,6 +28,7 @@
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
+#include <string.h>
#include "util/u_simple_list.h"
@@ -227,6 +228,9 @@ struct qreg qir_get_temp(struct qcompile *c);
int qir_get_op_nsrc(enum qop qop);
bool qir_reg_equals(struct qreg a, struct qreg b);
bool qir_has_side_effects(struct qinst *inst);
+bool qir_depends_on_flags(struct qinst *inst);
+bool qir_writes_r4(struct qinst *inst);
+bool qir_reads_r4(struct qinst *inst);
void qir_dump(struct qcompile *c);
void qir_dump_inst(struct qinst *inst);
@@ -235,6 +239,7 @@ const char *qir_get_stage_name(enum qstage stage);
void qir_optimize(struct qcompile *c);
bool qir_opt_algebraic(struct qcompile *c);
bool qir_opt_copy_propagation(struct qcompile *c);
+bool qir_opt_cse(struct qcompile *c);
bool qir_opt_dead_code(struct qcompile *c);
#define QIR_ALU0(name) \