diff options
author | Karol Herbst <[email protected]> | 2017-03-26 21:45:58 +0200 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2017-03-31 23:57:13 -0400 |
commit | d346b8588c36949695f2b01ca76619e84754dd50 (patch) | |
tree | 250fa91aa9921ac68e2d52b36afd8b689cc37b02 /src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | |
parent | d6ce32514760296b19f7609ec12f25e46c8ea34a (diff) |
nv50/ir: implement mad post ra folding for nvc0+
changes for GpuTest /test=pixmark_piano /benchmark /no_scorebox /msaa=0
/benchmark_duration_ms=60000 /width=1024 /height=640:
score: 1026 -> 1045
changes for shader-db:
total instructions in shared programs : 3943335 -> 3934925 (-0.21%)
total gprs used in shared programs : 481563 -> 481563 (0.00%)
total local used in shared programs : 27469 -> 27469 (0.00%)
total bytes used in shared programs : 36139384 -> 36061888 (-0.21%)
local gpr inst bytes
helped 0 0 3587 3587
hurt 0 0 0 0
v2: removed TODO
reorderd to show changes without RA modification
removed stale debugging print() call
v3: remove predicate checks
enable only for gf100 ISA
Signed-off-by: Karol Herbst <[email protected]>
Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 51 |
1 files changed, 47 insertions, 4 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 6dc4e21e11c..f7293b273ee 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -3195,7 +3195,8 @@ class PostRaLoadPropagation : public Pass private: virtual bool visit(Instruction *); - void handleMAD(Instruction *); + void handleMADforNV50(Instruction *); + void handleMADforNVC0(Instruction *); }; static bool @@ -3210,7 +3211,7 @@ post_ra_dead(Instruction *i) // Fold Immediate into MAD; must be done after register allocation due to // constraint SDST == SSRC2 void -PostRaLoadPropagation::handleMAD(Instruction *i) +PostRaLoadPropagation::handleMADforNV50(Instruction *i) { if (i->def(0).getFile() != FILE_GPR || i->src(0).getFile() != FILE_GPR || @@ -3263,12 +3264,54 @@ PostRaLoadPropagation::handleMAD(Instruction *i) } } +void +PostRaLoadPropagation::handleMADforNVC0(Instruction *i) +{ + if (i->def(0).getFile() != FILE_GPR || + i->src(0).getFile() != FILE_GPR || + i->src(1).getFile() != FILE_GPR || + i->src(2).getFile() != FILE_GPR || + i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id) + return; + + // TODO: gm107 can also do this for S32, maybe other chipsets as well + if (i->dType != TYPE_F32) + return; + + if ((i->src(2).mod | Modifier(NV50_IR_MOD_NEG)) != Modifier(NV50_IR_MOD_NEG)) + return; + + ImmediateValue val; + int s; + + if (i->src(0).getImmediate(val)) + s = 1; + else if (i->src(1).getImmediate(val)) + s = 0; + else + return; + + if ((i->src(s).mod | Modifier(NV50_IR_MOD_NEG)) != Modifier(NV50_IR_MOD_NEG)) + return; + + if (s == 1) + i->swapSources(0, 1); + + Instruction *imm = i->getSrc(1)->getInsn(); + i->setSrc(1, imm->getSrc(0)); + if (post_ra_dead(imm)) + delete_Instruction(prog, imm); +} + bool PostRaLoadPropagation::visit(Instruction *i) { switch (i->op) { case OP_MAD: - handleMAD(i); + if (prog->getTarget()->getChipset() < 0xc0) + handleMADforNV50(i); + else + handleMADforNVC0(i); break; default: break; @@ -3698,7 +3741,7 @@ bool Program::optimizePostRA(int level) { RUN_PASS(2, FlatteningPass, run); - if (getTarget()->getChipset() < 0xc0) + if (getTarget()->getChipset() < NVISA_GK20A_CHIPSET) RUN_PASS(2, PostRaLoadPropagation, run); return true; |