summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau/codegen
diff options
context:
space:
mode:
authorKarol Herbst <[email protected]>2017-03-26 21:45:58 +0200
committerIlia Mirkin <[email protected]>2017-03-31 23:57:13 -0400
commitd346b8588c36949695f2b01ca76619e84754dd50 (patch)
tree250fa91aa9921ac68e2d52b36afd8b689cc37b02 /src/gallium/drivers/nouveau/codegen
parentd6ce32514760296b19f7609ec12f25e46c8ea34a (diff)
nv50/ir: implement mad post ra folding for nvc0+
changes for GpuTest /test=pixmark_piano /benchmark /no_scorebox /msaa=0 /benchmark_duration_ms=60000 /width=1024 /height=640: score: 1026 -> 1045 changes for shader-db: total instructions in shared programs : 3943335 -> 3934925 (-0.21%) total gprs used in shared programs : 481563 -> 481563 (0.00%) total local used in shared programs : 27469 -> 27469 (0.00%) total bytes used in shared programs : 36139384 -> 36061888 (-0.21%) local gpr inst bytes helped 0 0 3587 3587 hurt 0 0 0 0 v2: removed TODO reorderd to show changes without RA modification removed stale debugging print() call v3: remove predicate checks enable only for gf100 ISA Signed-off-by: Karol Herbst <[email protected]> Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp51
1 files changed, 47 insertions, 4 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 6dc4e21e11c..f7293b273ee 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -3195,7 +3195,8 @@ class PostRaLoadPropagation : public Pass
private:
virtual bool visit(Instruction *);
- void handleMAD(Instruction *);
+ void handleMADforNV50(Instruction *);
+ void handleMADforNVC0(Instruction *);
};
static bool
@@ -3210,7 +3211,7 @@ post_ra_dead(Instruction *i)
// Fold Immediate into MAD; must be done after register allocation due to
// constraint SDST == SSRC2
void
-PostRaLoadPropagation::handleMAD(Instruction *i)
+PostRaLoadPropagation::handleMADforNV50(Instruction *i)
{
if (i->def(0).getFile() != FILE_GPR ||
i->src(0).getFile() != FILE_GPR ||
@@ -3263,12 +3264,54 @@ PostRaLoadPropagation::handleMAD(Instruction *i)
}
}
+void
+PostRaLoadPropagation::handleMADforNVC0(Instruction *i)
+{
+ if (i->def(0).getFile() != FILE_GPR ||
+ i->src(0).getFile() != FILE_GPR ||
+ i->src(1).getFile() != FILE_GPR ||
+ i->src(2).getFile() != FILE_GPR ||
+ i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
+ return;
+
+ // TODO: gm107 can also do this for S32, maybe other chipsets as well
+ if (i->dType != TYPE_F32)
+ return;
+
+ if ((i->src(2).mod | Modifier(NV50_IR_MOD_NEG)) != Modifier(NV50_IR_MOD_NEG))
+ return;
+
+ ImmediateValue val;
+ int s;
+
+ if (i->src(0).getImmediate(val))
+ s = 1;
+ else if (i->src(1).getImmediate(val))
+ s = 0;
+ else
+ return;
+
+ if ((i->src(s).mod | Modifier(NV50_IR_MOD_NEG)) != Modifier(NV50_IR_MOD_NEG))
+ return;
+
+ if (s == 1)
+ i->swapSources(0, 1);
+
+ Instruction *imm = i->getSrc(1)->getInsn();
+ i->setSrc(1, imm->getSrc(0));
+ if (post_ra_dead(imm))
+ delete_Instruction(prog, imm);
+}
+
bool
PostRaLoadPropagation::visit(Instruction *i)
{
switch (i->op) {
case OP_MAD:
- handleMAD(i);
+ if (prog->getTarget()->getChipset() < 0xc0)
+ handleMADforNV50(i);
+ else
+ handleMADforNVC0(i);
break;
default:
break;
@@ -3698,7 +3741,7 @@ bool
Program::optimizePostRA(int level)
{
RUN_PASS(2, FlatteningPass, run);
- if (getTarget()->getChipset() < 0xc0)
+ if (getTarget()->getChipset() < NVISA_GK20A_CHIPSET)
RUN_PASS(2, PostRaLoadPropagation, run);
return true;