diff options
author | Eric Anholt <[email protected]> | 2014-08-25 00:12:21 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2014-09-04 11:39:51 -0700 |
commit | 55d2a1626219ac041ce05477827b592efa1c7b81 (patch) | |
tree | 8cd77b31a7e725ff3fd34933dfd481246d5955ad /src/gallium | |
parent | 80b27ca2cd8cd2bb2937baa441c43a396887cc03 (diff) |
vc4: Add a CSE optimization pass.
Debugging a regression in discard support was just too full of duplicate
instructions, so I decided to remove them instead of re-analyzing each of
them as I dumped their outputs in simulation.
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/vc4/Makefile.sources | 1 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_opt_cse.c | 183 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.c | 49 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 5 |
4 files changed, 238 insertions, 0 deletions
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index f8e04e40009..aa4ee6b6866 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -7,6 +7,7 @@ C_SOURCES := \ vc4_formats.c \ vc4_opt_algebraic.c \ vc4_opt_copy_propagation.c \ + vc4_opt_cse.c \ vc4_opt_dead_code.c \ vc4_program.c \ vc4_qir.c \ diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c new file mode 100644 index 00000000000..511e3b94f07 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_opt_cse.c @@ -0,0 +1,183 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc4_opt_cse.c + * + * Implements CSE for QIR without control flow. + * + * For each operation that writes a destination (and isn't just a MOV), put it + * in the hash table of all instructions that do so. When faced with another + * one, look it up in the hash table by its opcode and operands. If there's + * an entry in the table, then just reuse the entry's destination as the + * source of a MOV instead of reproducing the computation. That MOV will then + * get cleaned up by copy propagation. + */ + +#include "vc4_qir.h" + +#include "util/hash_table.h" +#include "util/ralloc.h" + +static bool debug; + +struct inst_key { + enum qop op; + struct qreg src[4]; + /** + * If the instruction depends on the flags, how many QOP_SFs have been + * seen before this instruction, or if it depends on r4, how many r4 + * writes have been seen. + */ + uint32_t implicit_arg_update_count; +}; + +static bool +inst_key_equals(const void *a, const void *b) +{ + const struct inst_key *key_a = a; + const struct inst_key *key_b = b; + + return memcmp(key_a, key_b, sizeof(*key_a)) == 0; +} + +static struct qinst * +vc4_find_cse(struct hash_table *ht, struct qinst *inst, uint32_t sf_count, + uint32_t r4_count) +{ + if (inst->dst.file != QFILE_TEMP || + inst->op == QOP_MOV || + qir_get_op_nsrc(inst->op) > 4) { + return NULL; + } + + struct inst_key key; + memset(&key, 0, sizeof(key)); + key.op = inst->op; + memcpy(key.src, inst->src, + qir_get_op_nsrc(inst->op) * sizeof(key.src[0])); + if (qir_depends_on_flags(inst)) + key.implicit_arg_update_count = sf_count; + if (qir_reads_r4(inst)) + key.implicit_arg_update_count = r4_count; + + uint32_t hash = _mesa_hash_data(&key, sizeof(key)); + struct hash_entry *entry = + _mesa_hash_table_search(ht, hash, &key); + + if (entry) { + if (debug) { + fprintf(stderr, "CSE found match:\n"); + + fprintf(stderr, " Original inst: "); + qir_dump_inst(entry->data); + fprintf(stderr, "\n"); + + fprintf(stderr, " Our inst: "); + qir_dump_inst(inst); + fprintf(stderr, "\n"); + } + + return entry->data; + } + + struct inst_key *alloc_key = ralloc(ht, struct inst_key); + if (!alloc_key) + return NULL; + memcpy(alloc_key, &key, sizeof(*alloc_key)); + _mesa_hash_table_insert(ht, hash, alloc_key, inst); + + if (debug) { + fprintf(stderr, "Added to CSE HT: "); + qir_dump_inst(inst); + fprintf(stderr, "\n"); + } + + return NULL; +} + +bool +qir_opt_cse(struct qcompile *c) +{ + bool progress = false; + struct simple_node *node, *t; + struct qinst *last_sf = NULL; + uint32_t sf_count = 0, r4_count = 0; + + return false; + struct hash_table *ht = _mesa_hash_table_create(NULL, inst_key_equals); + if (!ht) + return false; + + foreach_s(node, t, &c->instructions) { + struct qinst *inst = (struct qinst *)node; + + if (qir_has_side_effects(inst)) { + if (inst->op == QOP_TLB_DISCARD_SETUP) + last_sf = NULL; + continue; + } + + if (inst->op == QOP_SF) { + if (last_sf && + qir_reg_equals(last_sf->src[0], inst->src[0])) { + if (debug) { + fprintf(stderr, + "Removing redundant SF: "); + qir_dump_inst(inst); + fprintf(stderr, "\n"); + } + remove_from_list(&inst->link); + progress = true; + continue; + } else { + last_sf = inst; + sf_count++; + } + } else { + struct qinst *cse = vc4_find_cse(ht, inst, + sf_count, r4_count); + if (cse) { + inst->src[0] = cse->dst; + for (int i = 1; i < qir_get_op_nsrc(inst->op); + i++) + inst->src[i] = c->undef; + inst->op = QOP_MOV; + progress = true; + + if (debug) { + fprintf(stderr, " Turned into: "); + qir_dump_inst(inst); + fprintf(stderr, "\n"); + } + } + } + + if (qir_reads_r4(inst)) + r4_count++; + } + + ralloc_free(ht); + + return progress; +} diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 93f97c219f7..a017a72c14b 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -127,6 +127,54 @@ qir_has_side_effects(struct qinst *inst) return qir_op_info[inst->op].has_side_effects; } +bool +qir_depends_on_flags(struct qinst *inst) +{ + switch (inst->op) { + case QOP_SEL_X_0_NS: + case QOP_SEL_X_0_NC: + case QOP_SEL_X_0_ZS: + case QOP_SEL_X_0_ZC: + case QOP_SEL_X_Y_NS: + case QOP_SEL_X_Y_NC: + case QOP_SEL_X_Y_ZS: + case QOP_SEL_X_Y_ZC: + return true; + default: + return false; + } +} + +bool +qir_writes_r4(struct qinst *inst) +{ + switch (inst->op) { + case QOP_TEX_RESULT: + case QOP_TLB_COLOR_READ: + case QOP_RCP: + case QOP_RSQ: + case QOP_EXP2: + case QOP_LOG2: + return true; + default: + return false; + } +} + +bool +qir_reads_r4(struct qinst *inst) +{ + switch (inst->op) { + case QOP_R4_UNPACK_A: + case QOP_R4_UNPACK_B: + case QOP_R4_UNPACK_C: + case QOP_R4_UNPACK_D: + return true; + default: + return false; + } +} + static void qir_print_reg(struct qreg reg) { @@ -274,6 +322,7 @@ qir_optimize(struct qcompile *c) bool progress = false; OPTPASS(qir_opt_algebraic); + OPTPASS(qir_opt_cse); OPTPASS(qir_opt_copy_propagation); OPTPASS(qir_opt_dead_code); diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 2e210c3bd60..7336a3733de 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -28,6 +28,7 @@ #include <stdlib.h> #include <stdbool.h> #include <stdint.h> +#include <string.h> #include "util/u_simple_list.h" @@ -227,6 +228,9 @@ struct qreg qir_get_temp(struct qcompile *c); int qir_get_op_nsrc(enum qop qop); bool qir_reg_equals(struct qreg a, struct qreg b); bool qir_has_side_effects(struct qinst *inst); +bool qir_depends_on_flags(struct qinst *inst); +bool qir_writes_r4(struct qinst *inst); +bool qir_reads_r4(struct qinst *inst); void qir_dump(struct qcompile *c); void qir_dump_inst(struct qinst *inst); @@ -235,6 +239,7 @@ const char *qir_get_stage_name(enum qstage stage); void qir_optimize(struct qcompile *c); bool qir_opt_algebraic(struct qcompile *c); bool qir_opt_copy_propagation(struct qcompile *c); +bool qir_opt_cse(struct qcompile *c); bool qir_opt_dead_code(struct qcompile *c); #define QIR_ALU0(name) \ |