aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRhys Perry <[email protected]>2018-08-18 15:06:01 +0100
committerRhys Perry <[email protected]>2018-08-27 13:57:01 +0100
commit400a4eb964bc43c339ce213ee40b41e11c8d44dc (patch)
tree48a745387ecc144ef90fb1f4d88fde3ce82c127c /src
parent2f52925f5c60c72c9389bfdc122c3d5f8e15b25f (diff)
nv50/ir: optimize near power-of-twos into shladd
total instructions in shared programs : 5819319 -> 5796385 (-0.39%) total gprs used in shared programs : 670571 -> 670103 (-0.07%) total shared used in shared programs : 548832 -> 548832 (0.00%) total local used in shared programs : 21164 -> 21164 (0.00%) local shared gpr inst bytes helped 0 0 318 1758 1758 hurt 0 0 63 0 0 Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Karol Herbst <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp27
1 files changed, 27 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 30de9f5c82d..1ab743705a7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -958,6 +958,9 @@ ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2)
bool
ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value *c)
{
+ const Target *target = prog->getTarget();
+ int64_t absB = llabs(b);
+
//a * (2^shl) -> a << shl
if (b >= 0 && util_is_power_of_two_or_zero64(b)) {
int shl = util_logbase2_64(b);
@@ -970,6 +973,30 @@ ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value *
return true;
}
+ //a * (2^shl + 1) -> a << shl + a
+ //a * -(2^shl + 1) -> -a << shl + a
+ //a * (2^shl - 1) -> a << shl - a
+ //a * -(2^shl - 1) -> -a << shl - a
+ if (typeSizeof(ty) == 4 &&
+ (util_is_power_of_two_or_zero64(absB - 1) ||
+ util_is_power_of_two_or_zero64(absB + 1)) &&
+ target->isOpSupported(OP_SHLADD, TYPE_U32)) {
+ bool subA = util_is_power_of_two_or_zero64(absB + 1);
+ int shl = subA ? util_logbase2_64(absB + 1) : util_logbase2_64(absB - 1);
+
+ Value *res = c ? bld.getSSA() : def;
+ Instruction *insn = bld.mkOp3(OP_SHLADD, TYPE_U32, res, a, bld.mkImm(shl), a);
+ if (b < 0)
+ insn->src(0).mod = Modifier(NV50_IR_MOD_NEG);
+ if (subA)
+ insn->src(2).mod = Modifier(NV50_IR_MOD_NEG);
+
+ if (c)
+ bld.mkOp2(OP_ADD, TYPE_U32, def, res, c);
+
+ return true;
+ }
+
return false;
}