diff options
author | Ilia Mirkin <[email protected]> | 2015-12-04 15:21:11 -0500 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2015-12-05 17:50:23 -0500 |
commit | abd326e81b06f58797be94bd655ee06b17a34f0c (patch) | |
tree | 1a96ec979854bb7576f67718beddcab561a9b19a /src | |
parent | 31fde8fabadcd9240c1e96c8a953b465def9b516 (diff) |
nv50/ir: propagate indirect loads into instructions
This way $r1 = $r0 + 4; c1[$r1] becomes c1[$r0+4].
On SM35:
total instructions in shared programs : 6206257 -> 6185058 (-0.34%)
total gprs used in shared programs : 911045 -> 910722 (-0.04%)
total local used in shared programs : 39072 -> 39072 (0.00%)
local gpr inst bytes
helped 0 417 4195 4195
hurt 0 280 0 0
Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 9e49c19b8e1..0d74f725bd1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -265,6 +265,57 @@ LoadPropagation::visit(BasicBlock *bb) // ============================================================================= +class IndirectPropagation : public Pass +{ +private: + virtual bool visit(BasicBlock *); +}; + +bool +IndirectPropagation::visit(BasicBlock *bb) +{ + const Target *targ = prog->getTarget(); + Instruction *next; + + for (Instruction *i = bb->getEntry(); i; i = next) { + next = i->next; + + for (int s = 0; i->srcExists(s); ++s) { + Instruction *insn; + ImmediateValue imm; + if (!i->src(s).isIndirect(0)) + continue; + insn = i->getIndirect(s, 0)->getInsn(); + if (!insn) + continue; + if (insn->op == OP_ADD && !isFloatType(insn->dType)) { + if (insn->src(0).getFile() != targ->nativeFile(FILE_ADDRESS) || + !insn->src(1).getImmediate(imm)) + continue; + i->setIndirect(s, 0, insn->getSrc(0)); + i->setSrc(s, cloneShallow(func, i->getSrc(s))); + i->src(s).get()->reg.data.offset += imm.reg.data.u32; + } else if (insn->op == OP_SUB && !isFloatType(insn->dType)) { + if (insn->src(0).getFile() != targ->nativeFile(FILE_ADDRESS) || + !insn->src(1).getImmediate(imm)) + continue; + i->setIndirect(s, 0, insn->getSrc(0)); + i->setSrc(s, cloneShallow(func, i->getSrc(s))); + i->src(s).get()->reg.data.offset -= imm.reg.data.u32; + } else if (insn->op == OP_MOV) { + if (!insn->src(0).getImmediate(imm)) + continue; + i->setIndirect(s, 0, NULL); + i->setSrc(s, cloneShallow(func, i->getSrc(s))); + i->src(s).get()->reg.data.offset += imm.reg.data.u32; + } + } + } + return true; +} + +// ============================================================================= + // Evaluate constant expressions. class ConstantFolding : public Pass { @@ -3135,6 +3186,7 @@ Program::optimizeSSA(int level) RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks RUN_PASS(1, ConstantFolding, foldAll); RUN_PASS(1, LoadPropagation, run); + RUN_PASS(1, IndirectPropagation, run); RUN_PASS(2, MemoryOpt, run); RUN_PASS(2, LocalCSE, run); RUN_PASS(0, DeadCodeElim, buryAll); |