summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlia Mirkin <imirkin@alum.mit.edu>2015-12-04 16:05:56 -0500
committerIlia Mirkin <imirkin@alum.mit.edu>2015-12-05 17:50:23 -0500
commit31fde8fabadcd9240c1e96c8a953b465def9b516 (patch)
tree1006af1054a7b08d8a4753c67376841f3d4aeb27
parenta4eff86f4afb6618aff488e9da5600e33d97a9c3 (diff)
nv50/ir: flip shl(add, imm) into add(shl, imm)
This works when the add also has an immediate. This often happens in address calculations. These addresses can then be inlined as well. On code targeted to SM35: total instructions in shared programs : 6223346 -> 6206257 (-0.27%) total gprs used in shared programs : 911075 -> 911045 (-0.00%) total local used in shared programs : 39072 -> 39072 (0.00%) local gpr inst bytes helped 0 119 3664 3664 hurt 0 74 15 15 Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp38
1 files changed, 34 insertions, 4 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index b79e465b4fa..9e49c19b8e1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1132,13 +1132,43 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
break;
// try to concatenate shifts
Instruction *si = i->getSrc(0)->getInsn();
- if (!si || si->op != OP_SHL)
+ if (!si)
break;
ImmediateValue imm1;
- if (si->src(1).getImmediate(imm1)) {
+ switch (si->op) {
+ case OP_SHL:
+ if (si->src(1).getImmediate(imm1)) {
+ bld.setPosition(i, false);
+ i->setSrc(0, si->getSrc(0));
+ i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32));
+ }
+ break;
+ case OP_SUB:
+ case OP_ADD:
+ int adds;
+ if (isFloatType(si->dType))
+ return;
+ if (si->op != OP_SUB && si->src(0).getImmediate(imm1))
+ adds = 0;
+ else if (si->src(1).getImmediate(imm1))
+ adds = 1;
+ else
+ return;
+ // SHL(ADD(x, y), z) = ADD(SHL(x, z), SHL(y, z))
+
+ // This is more operations, but if one of x, y is an immediate, then
+ // we can get a situation where (a) we can use ISCADD, or (b)
+ // propagate the add bit into an indirect load.
bld.setPosition(i, false);
- i->setSrc(0, si->getSrc(0));
- i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32));
+ i->op = si->op;
+ i->setSrc(adds, bld.loadImm(NULL, imm1.reg.data.u32 << imm0.reg.data.u32));
+ i->setSrc(!adds, bld.mkOp2v(OP_SHL, i->dType,
+ bld.getSSA(i->def(0).getSize(), i->def(0).getFile()),
+ si->getSrc(!adds),
+ bld.mkImm(imm0.reg.data.u32)));
+ break;
+ default:
+ return;
}
}
break;