diff options
author | Samuel Pitoiset <[email protected]> | 2016-10-07 01:16:24 +0200 |
---|---|---|
committer | Samuel Pitoiset <[email protected]> | 2016-10-12 17:46:03 +0200 |
commit | 87b06cab14c449e442be27650024f044e93c9a7c (patch) | |
tree | faca7df0af71290d3a0f0c94f32031fd2d9baa07 /src/gallium/drivers | |
parent | 85ba409967bb0327b85460639080214b3997fc17 (diff) |
nv50/ir: optimize ADD(SHL(a, b), c) to SHLADD(a, b, c)
total instructions in shared programs :2286901 -> 2284473 (-0.11%)
total gprs used in shared programs :335256 -> 335273 (0.01%)
total local used in shared programs :31968 -> 31968 (0.00%)
local gpr inst bytes
helped 0 41 852 852
hurt 0 44 23 23
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 6efb29eb4a9..d88bb341746 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2132,6 +2132,92 @@ AlgebraicOpt::visit(BasicBlock *bb) // ============================================================================= +// ADD(SHL(a, b), c) -> SHLADD(a, b, c) +class LateAlgebraicOpt : public Pass +{ +private: + virtual bool visit(Instruction *); + + void handleADD(Instruction *); + bool tryADDToSHLADD(Instruction *); +}; + +void +LateAlgebraicOpt::handleADD(Instruction *add) +{ + Value *src0 = add->getSrc(0); + Value *src1 = add->getSrc(1); + + if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) + return; + + if (prog->getTarget()->isOpSupported(OP_SHLADD, add->dType)) + tryADDToSHLADD(add); +} + +// ADD(SHL(a, b), c) -> SHLADD(a, b, c) +bool +LateAlgebraicOpt::tryADDToSHLADD(Instruction *add) +{ + Value *src0 = add->getSrc(0); + Value *src1 = add->getSrc(1); + ImmediateValue imm; + Instruction *shl; + Modifier mod[2]; + Value *src; + int s; + + if (add->saturate || add->usesFlags() || typeSizeof(add->dType) == 8 + || isFloatType(add->dType)) + return false; + + if (src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_SHL) + s = 0; + else + if (src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_SHL) + s = 1; + else + return false; + + src = add->getSrc(s); + shl = src->getUniqueInsn(); + + if (shl->bb != add->bb || shl->usesFlags() || shl->subOp) + return false; + + if (!shl->src(1).getImmediate(imm)) + return false; + + mod[0] = add->src(0).mod; + mod[1] = add->src(1).mod; + + add->op = OP_SHLADD; + add->setSrc(2, add->src(!s)); + add->src(2).mod = mod[s]; + + add->setSrc(0, shl->getSrc(0)); + add->setSrc(1, new_ImmediateValue(shl->bb->getProgram(), imm.reg.data.u32)); + add->src(1).mod = Modifier(0); + + return true; +} + +bool +LateAlgebraicOpt::visit(Instruction *i) +{ + switch (i->op) { + case OP_ADD: + handleADD(i); + break; + default: + break; + } + + return true; +} + +// ============================================================================= + static inline void updateLdStOffset(Instruction *ldst, int32_t offset, Function *fn) { @@ -3436,6 +3522,7 @@ Program::optimizeSSA(int level) RUN_PASS(2, AlgebraicOpt, run); RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks RUN_PASS(1, ConstantFolding, foldAll); + RUN_PASS(2, LateAlgebraicOpt, run); RUN_PASS(1, LoadPropagation, run); RUN_PASS(1, IndirectPropagation, run); RUN_PASS(2, MemoryOpt, run); |