diff options
author | Eric Anholt <[email protected]> | 2015-08-18 21:26:05 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2015-08-20 23:43:04 -0700 |
commit | 8b36d107fdd6f6b91556fcdc3498df16803d4181 (patch) | |
tree | 197465bb5fde7eb6081064552ce27d61785f6221 /src/gallium/drivers/vc4/vc4_program.c | |
parent | 572a48366d9dfac6a7f9ee8f4d29832c496125e2 (diff) |
vc4: Pack the unorm-packing bits into a src MUL instruction when possible.
Now that we do non-SSA QIR instructions, we can take a NIR SSA src that's
only used by the unorm packing and just stuff the pack bits into it.
total instructions in shared programs: 98136 -> 97974 (-0.17%)
instructions in affected programs: 4149 -> 3987 (-3.90%)
Diffstat (limited to 'src/gallium/drivers/vc4/vc4_program.c')
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 77 |
1 files changed, 67 insertions, 10 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index ff41779e6c1..6bf4c9eab9b 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -818,6 +818,72 @@ declare_uniform_range(struct vc4_compile *c, uint32_t start, uint32_t size) c->ubo_ranges[array_id].used = false; } +static bool +ntq_src_is_only_ssa_def_user(nir_src *src) +{ + if (!src->is_ssa) + return false; + + if (!list_empty(&src->ssa->if_uses)) + return false; + + return (src->ssa->uses.next == &src->use_link && + src->ssa->uses.next->next == &src->ssa->uses); +} + +/** + * In general, emits a nir_pack_unorm_4x8 as a series of MOVs with the pack + * bit set. + * + * However, as an optimization, it tries to find the instructions generating + * the sources to be packed and just emit the pack flag there, if possible. + */ +static void +ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr) +{ + struct qreg result = qir_get_temp(c); + struct nir_alu_instr *vec4 = NULL; + + /* If packing from a vec4 op (as expected), identify it so that we can + * peek back at what generated its sources. + */ + if (instr->src[0].src.is_ssa && + instr->src[0].src.ssa->parent_instr->type == nir_instr_type_alu && + nir_instr_as_alu(instr->src[0].src.ssa->parent_instr)->op == + nir_op_vec4) { + vec4 = nir_instr_as_alu(instr->src[0].src.ssa->parent_instr); + } + + for (int i = 0; i < 4; i++) { + int swiz = instr->src[0].swizzle[i]; + struct qreg src; + if (vec4) { + src = ntq_get_src(c, vec4->src[swiz].src, + vec4->src[swiz].swizzle[0]); + } else { + src = ntq_get_src(c, instr->src[0].src, swiz); + } + + if (vec4 && + ntq_src_is_only_ssa_def_user(&vec4->src[swiz].src) && + src.file == QFILE_TEMP && + c->defs[src.index] && + qir_is_mul(c->defs[src.index]) && + !c->defs[src.index]->dst.pack) { + struct qinst *rewrite = c->defs[src.index]; + c->defs[src.index] = NULL; + rewrite->dst = result; + rewrite->dst.pack = QPU_PACK_MUL_8A + i; + continue; + } + + qir_PACK_8_F(c, result, src, i); + } + + struct qreg *dest = ntq_get_dest(c, &instr->dest.dest); + *dest = result; +} + static void ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) { @@ -839,16 +905,7 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) } if (instr->op == nir_op_pack_unorm_4x8) { - struct qreg result = qir_get_temp(c); - - for (int i = 0; i < 4; i++) { - qir_PACK_8_F(c, result, - ntq_get_src(c, instr->src[0].src, - instr->src[0].swizzle[i]), - i); - } - struct qreg *dest = ntq_get_dest(c, &instr->dest.dest); - *dest = result; + ntq_emit_pack_unorm_4x8(c, instr); return; } |