summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_vpm.c74
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h2
3 files changed, 71 insertions, 7 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm.c b/src/gallium/drivers/vc4/vc4_opt_vpm.c
index 0fcf1e5c6dd..d15b0c1a39f 100644
--- a/src/gallium/drivers/vc4/vc4_opt_vpm.c
+++ b/src/gallium/drivers/vc4/vc4_opt_vpm.c
@@ -24,14 +24,16 @@
/**
* @file vc4_opt_vpm.c
*
- * This modifies instructions that generate the value consumed by a VPM write
- * to write directly into the VPM.
+ * This modifies instructions that:
+ * 1. exclusively consume a value read from the VPM to directly read the VPM if
+ * other operands allow it.
+ * 2. generate the value consumed by a VPM write to write directly into the VPM.
*/
#include "vc4_qir.h"
bool
-qir_opt_vpm_writes(struct vc4_compile *c)
+qir_opt_vpm(struct vc4_compile *c)
{
if (c->stage == QSTAGE_FRAG)
return false;
@@ -52,8 +54,70 @@ qir_opt_vpm_writes(struct vc4_compile *c)
}
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
- if (inst->src[i].file == QFILE_TEMP)
- use_count[inst->src[i].index]++;
+ if (inst->src[i].file == QFILE_TEMP) {
+ uint32_t temp = inst->src[i].index;
+ use_count[temp]++;
+ }
+ }
+ }
+
+ /* For instructions reading from a temporary that contains a VPM read
+ * result, try to move the instruction up in place of the VPM read.
+ */
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
+ if (!inst || qir_is_multi_instruction(inst))
+ continue;
+
+ if (qir_depends_on_flags(inst) || inst->sf)
+ continue;
+
+ if (qir_has_side_effects(c, inst) ||
+ qir_has_side_effect_reads(c, inst) ||
+ qir_is_tex(inst))
+ continue;
+
+ for (int j = 0; j < qir_get_op_nsrc(inst->op); j++) {
+ if (inst->src[j].file != QFILE_TEMP ||
+ inst->src[j].pack)
+ continue;
+
+ uint32_t temp = inst->src[j].index;
+
+ /* Since VPM reads pull from a FIFO, we only get to
+ * read each VPM entry once (unless we reset the read
+ * pointer). That means we can't copy-propagate a VPM
+ * read to multiple locations.
+ */
+ if (use_count[temp] != 1)
+ continue;
+
+ struct qinst *mov = c->defs[temp];
+ if (!mov ||
+ (mov->op != QOP_MOV &&
+ mov->op != QOP_FMOV &&
+ mov->op != QOP_MMOV) ||
+ mov->src[0].file != QFILE_VPM) {
+ continue;
+ }
+
+ uint32_t temps = 0;
+ for (int k = 0; k < qir_get_op_nsrc(inst->op); k++) {
+ if (inst->src[k].file == QFILE_TEMP)
+ temps++;
+ }
+
+ /* The instruction is safe to reorder if its other
+ * sources are independent of previous instructions
+ */
+ if (temps == 1) {
+ list_del(&inst->link);
+ inst->src[j] = mov->src[0];
+ list_replace(&mov->link, &inst->link);
+ c->defs[temp] = NULL;
+ free(mov);
+ progress = true;
+ break;
+ }
}
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index f9eb0e151c5..65f0067c61e 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -526,7 +526,7 @@ qir_optimize(struct vc4_compile *c)
OPTPASS(qir_opt_copy_propagation);
OPTPASS(qir_opt_dead_code);
OPTPASS(qir_opt_small_immediates);
- OPTPASS(qir_opt_vpm_writes);
+ OPTPASS(qir_opt_vpm);
if (!progress)
break;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index bae31768bd8..4f39d72f552 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -484,7 +484,7 @@ bool qir_opt_copy_propagation(struct vc4_compile *c);
bool qir_opt_cse(struct vc4_compile *c);
bool qir_opt_dead_code(struct vc4_compile *c);
bool qir_opt_small_immediates(struct vc4_compile *c);
-bool qir_opt_vpm_writes(struct vc4_compile *c);
+bool qir_opt_vpm(struct vc4_compile *c);
void vc4_nir_lower_blend(struct vc4_compile *c);
void vc4_nir_lower_io(struct vc4_compile *c);
nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,