diff options
author | Roy Spliet <[email protected]> | 2015-02-06 22:46:56 +0100 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2015-02-10 08:02:13 -0500 |
commit | 09ee907266f315300a7856b55e50e74dce8e946f (patch) | |
tree | 287917eedab74d16ee18b168242e3552746437d6 /src/gallium/drivers/nouveau | |
parent | 3dc39d0bca0f06454cb5cddf7571facb7671767a (diff) |
nv50/ir: Fold IMM into MAD
Add a specific optimisation pass for NV50 to check whether SRC0 or SRC1 is
a MOV dst, IMM. If so: fold the IMM in and try to drop the MOV. Must be
done post-RA because it requires that SDST == SSRC2.
V2: improve readability and add comments to clarify decisions
V3: Remove redundant code... compiler already attempts to put the IMM in
SSRC1
Signed-off-by: Roy Spliet <[email protected]>
Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 21d20caffdf..62d2ef75f46 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2259,6 +2259,56 @@ FlatteningPass::tryPredicateConditional(BasicBlock *bb) // ============================================================================= +// Fold Immediate into MAD; must be done after register allocation due to +// constraint SDST == SSRC2 +// TODO: +// Does NVC0+ have other situations where this pass makes sense? +class NV50PostRaConstantFolding : public Pass +{ +private: + virtual bool visit(BasicBlock *); +}; + +bool +NV50PostRaConstantFolding::visit(BasicBlock *bb) +{ + Value *vtmp; + Instruction *def; + + for (Instruction *i = bb->getFirst(); i; i = i->next) { + switch (i->op) { + case OP_MAD: + if (i->def(0).getFile() != FILE_GPR || + i->src(0).getFile() != FILE_GPR || + i->src(1).getFile() != FILE_GPR || + i->src(2).getFile() != FILE_GPR || + i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id) + break; + + def = i->getSrc(1)->getInsn(); + if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) { + vtmp = i->getSrc(1); + i->setSrc(1, def->getSrc(0)); + + /* There's no post-RA dead code elimination, so do it here + * XXX: if we add more code-removing post-RA passes, we might + * want to create a post-RA dead-code elim pass */ + if (vtmp->refCount() == 0) + delete_Instruction(bb->getProgram(), def); + + break; + } + break; + default: + break; + } + } + + return true; +} + +// ============================================================================= + // Common subexpression elimination. Stupid O^2 implementation. class LocalCSE : public Pass { @@ -2629,6 +2679,9 @@ bool Program::optimizePostRA(int level) { RUN_PASS(2, FlatteningPass, run); + if (getTarget()->getChipset() < 0xc0) + RUN_PASS(2, NV50PostRaConstantFolding, run); + return true; } |