summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2014-12-17 20:35:17 -0800
committerEric Anholt <[email protected]>2014-12-18 15:00:56 -0800
commite06b0778f59980429fececb1aa0de0f0a3f23427 (patch)
treea5d184e4582fde84cde8909ac20f48befda6283c /src/gallium/drivers/vc4
parenta871eff16cc18232ee03b372d75cb6f633213e14 (diff)
vc4: Coalesce MOVs into VPM with the instructions generating the values.
total instructions in shared programs: 41168 -> 40976 (-0.47%) instructions in affected programs: 18156 -> 17964 (-1.06%)
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r--src/gallium/drivers/vc4/Makefile.sources1
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_vpm_writes.c117
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c38
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h2
4 files changed, 143 insertions, 15 deletions
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index 1f8e8c41bf4..95f1a340ab3 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -16,6 +16,7 @@ C_SOURCES := \
vc4_opt_cse.c \
vc4_opt_dead_code.c \
vc4_opt_small_immediates.c \
+ vc4_opt_vpm_writes.c \
vc4_packet.h \
vc4_program.c \
vc4_qir.c \
diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
new file mode 100644
index 00000000000..477d32605a4
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc4_opt_vpm_writes.c
+ *
+ * This modifies instructions that generate the value consumed by a VPM write
+ * to write directly into the VPM.
+ */
+
+#include "vc4_qir.h"
+
+bool
+qir_opt_vpm_writes(struct vc4_compile *c)
+{
+ if (c->stage == QSTAGE_FRAG)
+ return false;
+
+ bool progress = false;
+ struct simple_node *node;
+ struct qinst *defs[c->num_temps];
+ struct qinst *vpm_writes[64] = { 0 };
+ uint32_t use_count[c->num_temps];
+ uint32_t vpm_write_count = 0;
+ memset(&defs, 0, sizeof(defs));
+ memset(&use_count, 0, sizeof(use_count));
+
+ foreach(node, &c->instructions) {
+ struct qinst *inst = (struct qinst *)node;
+
+ switch (inst->dst.file) {
+ case QFILE_TEMP:
+ defs[inst->dst.index] = inst;
+ break;
+ case QFILE_VPM:
+ vpm_writes[vpm_write_count++] = inst;
+ break;
+ default:
+ break;
+ }
+
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file == QFILE_TEMP)
+ use_count[inst->src[i].index]++;
+ }
+ }
+
+ for (int i = 0; i < vpm_write_count; i++) {
+ if (vpm_writes[i]->op != QOP_MOV ||
+ vpm_writes[i]->src[0].file != QFILE_TEMP) {
+ continue;
+ }
+
+ uint32_t temp = vpm_writes[i]->src[0].index;
+ if (use_count[temp] != 1)
+ continue;
+
+ struct qinst *inst = defs[temp];
+ if (qir_is_multi_instruction(inst))
+ continue;
+
+ if (qir_depends_on_flags(inst))
+ continue;
+
+ if (qir_has_side_effects(c, inst))
+ continue;
+
+ /* A QOP_TEX_RESULT destination is r4, so we can't move
+ * accesses to it past another QOP_TEX_RESULT which would
+ * update it.
+ */
+ int src;
+ for (src = 0; src < qir_get_op_nsrc(inst->op); src++) {
+ if (inst->src[src].file == QFILE_TEMP) {
+ if (defs[inst->src[src].index]->op ==
+ QOP_TEX_RESULT) {
+ break;
+ }
+ }
+ }
+ if (src != qir_get_op_nsrc(inst->op))
+ continue;
+
+ /* Move the generating instruction to the end of the program
+ * to maintain the order of the VPM writes.
+ */
+ move_to_tail(&vpm_writes[i]->link, &inst->link);
+ qir_remove_instruction(vpm_writes[i]);
+
+ inst->dst.file = QFILE_VPM;
+ inst->dst.index = 0;
+
+ progress = true;
+ }
+
+ return progress;
+}
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 91bdefe81e5..3fd39413222 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -32,6 +32,7 @@ struct qir_op_info {
const char *name;
uint8_t ndst, nsrc;
bool has_side_effects;
+ bool multi_instruction;
};
static const struct qir_op_info qir_op_info[] = {
@@ -59,21 +60,21 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_NOT] = { "not", 1, 1 },
[QOP_SF] = { "sf", 0, 1 },
- [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1 },
- [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1 },
- [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1 },
- [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1 },
- [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2 },
- [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2 },
- [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2 },
- [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2 },
-
- [QOP_RCP] = { "rcp", 1, 1 },
- [QOP_RSQ] = { "rsq", 1, 1 },
- [QOP_EXP2] = { "exp2", 1, 2 },
- [QOP_LOG2] = { "log2", 1, 2 },
- [QOP_PACK_COLORS] = { "pack_colors", 1, 4 },
- [QOP_PACK_SCALED] = { "pack_scaled", 1, 2 },
+ [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1, false, true },
+ [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true },
+ [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true },
+ [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true },
+ [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true },
+ [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true },
+ [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true },
+ [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true },
+
+ [QOP_RCP] = { "rcp", 1, 1, false, true },
+ [QOP_RSQ] = { "rsq", 1, 1, false, true },
+ [QOP_EXP2] = { "exp2", 1, 2, false, true },
+ [QOP_LOG2] = { "log2", 1, 2, false, true },
+ [QOP_PACK_COLORS] = { "pack_colors", 1, 4, false, true },
+ [QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true },
[QOP_VPM_READ] = { "vpm_read", 0, 1, true },
[QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
[QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
@@ -156,6 +157,12 @@ qir_has_side_effects(struct vc4_compile *c, struct qinst *inst)
}
bool
+qir_is_multi_instruction(struct qinst *inst)
+{
+ return qir_op_info[inst->op].multi_instruction;
+}
+
+bool
qir_depends_on_flags(struct qinst *inst)
{
switch (inst->op) {
@@ -397,6 +404,7 @@ qir_optimize(struct vc4_compile *c)
OPTPASS(qir_opt_copy_propagation);
OPTPASS(qir_opt_dead_code);
OPTPASS(qir_opt_small_immediates);
+ OPTPASS(qir_opt_vpm_writes);
if (!progress)
break;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index dd9866e126f..f7d59a80dac 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -374,6 +374,7 @@ struct qreg qir_get_temp(struct vc4_compile *c);
int qir_get_op_nsrc(enum qop qop);
bool qir_reg_equals(struct qreg a, struct qreg b);
bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
+bool qir_is_multi_instruction(struct qinst *inst);
bool qir_depends_on_flags(struct qinst *inst);
bool qir_writes_r4(struct qinst *inst);
bool qir_reads_r4(struct qinst *inst);
@@ -389,6 +390,7 @@ bool qir_opt_copy_propagation(struct vc4_compile *c);
bool qir_opt_cse(struct vc4_compile *c);
bool qir_opt_dead_code(struct vc4_compile *c);
bool qir_opt_small_immediates(struct vc4_compile *c);
+bool qir_opt_vpm_writes(struct vc4_compile *c);
void qpu_schedule_instructions(struct vc4_compile *c);