diff options
author | Varad Gautam <[email protected]> | 2016-03-08 01:01:59 +0530 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2016-03-15 13:09:24 -0700 |
commit | e103b52aec773537d2821d8acc42ac9caa2a4b17 (patch) | |
tree | f4ac5bce2da4a9faf98e6fb801a8506ffdfcbbe3 /src | |
parent | 00bdbb22a92a40472ef47d22b26926ac8f542826 (diff) |
vc4: Coalesce instructions using VPM reads into the VPM read.
This is done instead of copy propagating the VPM reads into the
instructions using them, because VPM reads have to stay in order.
shader-db results:
total instructions in shared programs: 78509 -> 78114 (-0.50%)
instructions in affected programs: 5203 -> 4808 (-7.59%)
total estimated cycles in shared programs: 234670 -> 234318 (-0.15%)
estimated cycles in affected programs: 5345 -> 4993 (-6.59%)
Signed-off-by: Varad Gautam <[email protected]>
Reviewed-by: Eric Anholt <[email protected]>
Tested-by: Rhys Kidd <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/vc4/vc4_opt_vpm.c | 74 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 2 |
3 files changed, 71 insertions, 7 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm.c b/src/gallium/drivers/vc4/vc4_opt_vpm.c index 0fcf1e5c6dd..d15b0c1a39f 100644 --- a/src/gallium/drivers/vc4/vc4_opt_vpm.c +++ b/src/gallium/drivers/vc4/vc4_opt_vpm.c @@ -24,14 +24,16 @@ /** * @file vc4_opt_vpm.c * - * This modifies instructions that generate the value consumed by a VPM write - * to write directly into the VPM. + * This modifies instructions that: + * 1. exclusively consume a value read from the VPM to directly read the VPM if + * other operands allow it. + * 2. generate the value consumed by a VPM write to write directly into the VPM. */ #include "vc4_qir.h" bool -qir_opt_vpm_writes(struct vc4_compile *c) +qir_opt_vpm(struct vc4_compile *c) { if (c->stage == QSTAGE_FRAG) return false; @@ -52,8 +54,70 @@ qir_opt_vpm_writes(struct vc4_compile *c) } for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { - if (inst->src[i].file == QFILE_TEMP) - use_count[inst->src[i].index]++; + if (inst->src[i].file == QFILE_TEMP) { + uint32_t temp = inst->src[i].index; + use_count[temp]++; + } + } + } + + /* For instructions reading from a temporary that contains a VPM read + * result, try to move the instruction up in place of the VPM read. + */ + list_for_each_entry(struct qinst, inst, &c->instructions, link) { + if (!inst || qir_is_multi_instruction(inst)) + continue; + + if (qir_depends_on_flags(inst) || inst->sf) + continue; + + if (qir_has_side_effects(c, inst) || + qir_has_side_effect_reads(c, inst) || + qir_is_tex(inst)) + continue; + + for (int j = 0; j < qir_get_op_nsrc(inst->op); j++) { + if (inst->src[j].file != QFILE_TEMP || + inst->src[j].pack) + continue; + + uint32_t temp = inst->src[j].index; + + /* Since VPM reads pull from a FIFO, we only get to + * read each VPM entry once (unless we reset the read + * pointer). That means we can't copy-propagate a VPM + * read to multiple locations. + */ + if (use_count[temp] != 1) + continue; + + struct qinst *mov = c->defs[temp]; + if (!mov || + (mov->op != QOP_MOV && + mov->op != QOP_FMOV && + mov->op != QOP_MMOV) || + mov->src[0].file != QFILE_VPM) { + continue; + } + + uint32_t temps = 0; + for (int k = 0; k < qir_get_op_nsrc(inst->op); k++) { + if (inst->src[k].file == QFILE_TEMP) + temps++; + } + + /* The instruction is safe to reorder if its other + * sources are independent of previous instructions + */ + if (temps == 1) { + list_del(&inst->link); + inst->src[j] = mov->src[0]; + list_replace(&mov->link, &inst->link); + c->defs[temp] = NULL; + free(mov); + progress = true; + break; + } } } diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index f9eb0e151c5..65f0067c61e 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -526,7 +526,7 @@ qir_optimize(struct vc4_compile *c) OPTPASS(qir_opt_copy_propagation); OPTPASS(qir_opt_dead_code); OPTPASS(qir_opt_small_immediates); - OPTPASS(qir_opt_vpm_writes); + OPTPASS(qir_opt_vpm); if (!progress) break; diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index bae31768bd8..4f39d72f552 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -484,7 +484,7 @@ bool qir_opt_copy_propagation(struct vc4_compile *c); bool qir_opt_cse(struct vc4_compile *c); bool qir_opt_dead_code(struct vc4_compile *c); bool qir_opt_small_immediates(struct vc4_compile *c); -bool qir_opt_vpm_writes(struct vc4_compile *c); +bool qir_opt_vpm(struct vc4_compile *c); void vc4_nir_lower_blend(struct vc4_compile *c); void vc4_nir_lower_io(struct vc4_compile *c); nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b, |