diff options
author | Eric Anholt <[email protected]> | 2016-11-15 14:48:43 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2016-11-29 08:52:50 -0800 |
commit | 4690a93b123a64f8730a870a336ae9756d11fd18 (patch) | |
tree | 13ed6e2582fbbfe75ee85c2ef36f0ffbf3f15592 /src/gallium/drivers | |
parent | f4baf809937c98dabee90ea57b9d7e6135bdb0df (diff) |
vc4: Add support for coalescing ALU ops into tex_[srtb] MOVs.
This isn't as complete as I would like (can't merge interpolation because
of the implicit r5 dependency, doesn't work with control flow), but this
was cheap and easy.
Improves 3DMMES Taiji performance by 1.15353% +/- 0.299896% (n=29, 16)
total instructions in shared programs: 99810 -> 99059 (-0.75%)
instructions in affected programs: 10705 -> 9954 (-7.02%)
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c | 36 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c | 18 |
4 files changed, 37 insertions, 29 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c b/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c index c08c02619f7..b247c690d82 100644 --- a/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c +++ b/src/gallium/drivers/vc4/vc4_opt_coalesce_ff_writes.c @@ -24,8 +24,8 @@ /** * @file vc4_opt_coalesce_ff_writes.c * - * This modifies instructions that generate the value consumed by a VPM write - * to write directly into the VPM. + * This modifies instructions that generate the value consumed by a VPM or TMU + * coordinate write to write directly into the VPM or TMU. */ #include "vc4_qir.h" @@ -33,9 +33,6 @@ bool qir_opt_coalesce_ff_writes(struct vc4_compile *c) { - if (c->stage == QSTAGE_FRAG) - return false; - /* For now, only do this pass when we don't have control flow. */ struct qblock *block = qir_entry_block(c); if (block != qir_exit_block(c)) @@ -60,7 +57,7 @@ qir_opt_coalesce_ff_writes(struct vc4_compile *c) if (mov_inst->src[0].file != QFILE_TEMP) continue; - if (mov_inst->dst.file != QFILE_VPM) + if (!(mov_inst->dst.file == QFILE_VPM || qir_is_tex(mov_inst))) continue; uint32_t temp = mov_inst->src[0].index; @@ -71,24 +68,37 @@ qir_opt_coalesce_ff_writes(struct vc4_compile *c) if (!inst) continue; + /* Don't bother trying to fold in an ALU op using a uniform to + * a texture op, as we'll just have to lower the uniform back + * out. + */ + if (qir_is_tex(mov_inst) && qir_has_uniform_read(inst)) + continue; + if (qir_depends_on_flags(inst) || inst->sf) continue; if (qir_has_side_effects(c, inst) || - qir_has_side_effect_reads(c, inst)) { + qir_has_side_effect_reads(c, inst) || + inst->op == QOP_VARY_ADD_C) { continue; } - /* Move the generating instruction to the end of the program - * to maintain the order of the VPM writes. + /* Move the generating instruction into the position of the FF + * write. */ + c->defs[inst->dst.index] = NULL; + inst->dst.file = mov_inst->dst.file; + inst->dst.index = mov_inst->dst.index; + if (qir_has_implicit_tex_uniform(mov_inst)) { + inst->src[qir_get_tex_uniform_src(inst)] = + mov_inst->src[qir_get_tex_uniform_src(mov_inst)]; + } + list_del(&inst->link); list_addtail(&inst->link, &mov_inst->link); - qir_remove_instruction(c, mov_inst); - c->defs[inst->dst.index] = NULL; - inst->dst.file = QFILE_VPM; - inst->dst.index = 0; + qir_remove_instruction(c, mov_inst); progress = true; } diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index a082c41dfe0..d4f35d8f01a 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -180,6 +180,17 @@ qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst) } bool +qir_has_uniform_read(struct qinst *inst) +{ + for (int i = 0; i < qir_get_nsrc(inst); i++) { + if (inst->src[i].file == QFILE_UNIF) + return true; + } + + return false; +} + +bool qir_is_mul(struct qinst *inst) { switch (inst->op) { diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 28d33449391..e189bc32d94 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -577,6 +577,7 @@ int qir_get_tex_uniform_src(struct qinst *inst); bool qir_reg_equals(struct qreg a, struct qreg b); bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); +bool qir_has_uniform_read(struct qinst *inst); bool qir_is_mul(struct qinst *inst); bool qir_is_raw_mov(struct qinst *inst); bool qir_is_tex(struct qinst *inst); diff --git a/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c index 23ae8ebfa6f..443682a4670 100644 --- a/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c +++ b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c @@ -36,24 +36,10 @@ #include "util/u_math.h" static bool -inst_reads_a_uniform(struct qinst *inst) -{ - if (qir_is_tex(inst)) - return true; - - for (int i = 0; i < qir_get_nsrc(inst); i++) { - if (inst->src[i].file == QFILE_UNIF) - return true; - } - - return false; -} - -static bool block_reads_any_uniform(struct qblock *block) { qir_for_each_inst(inst, block) { - if (inst_reads_a_uniform(inst)) + if (qir_has_uniform_read(inst)) return true; } @@ -94,7 +80,7 @@ qir_emit_uniform_stream_resets(struct vc4_compile *c) } qir_for_each_inst(inst, block) { - if (inst_reads_a_uniform(inst)) + if (qir_has_uniform_read(inst)) uniform_count++; } } |