diff options
author | Rhys Perry <[email protected]> | 2018-06-13 16:30:01 +0100 |
---|---|---|
committer | Rhys Perry <[email protected]> | 2018-08-27 13:56:47 +0100 |
commit | 2f52925f5c60c72c9389bfdc122c3d5f8e15b25f (patch) | |
tree | adb69f9ebd07b998164fd28778f724e9952906c8 /src/gallium/drivers | |
parent | b60bc7a4ab0b252ce8f8a82076a45f3a16202d58 (diff) |
nv50/ir: move a * b -> a << log2(b) code into createMul()
With this commit, OP_MAD is handled on nv50 too. This commit is also
useful for later commits.
Also, instead of creating a shladd, it relies on LateAlgebraicOpt to
create one. This simplifies the code and helps shader-db slightly overall.
total instructions in shared programs : 5820882 -> 5819319 (-0.03%)
total gprs used in shared programs : 670595 -> 670571 (-0.00%)
total shared used in shared programs : 548832 -> 548832 (0.00%)
total local used in shared programs : 21164 -> 21164 (0.00%)
local shared gpr inst bytes
helped 0 0 18 230 230
hurt 0 0 8 263 263
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Karol Herbst <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 45 |
1 files changed, 30 insertions, 15 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index dc7bf24ba23..30de9f5c82d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -379,6 +379,8 @@ private: CmpInstruction *findOriginForTestWithZero(Value *); + bool createMul(DataType ty, Value *def, Value *a, int64_t b, Value *c); + unsigned int foldCount; BuildUtil bld; @@ -953,10 +955,27 @@ ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2) } } +bool +ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value *c) +{ + //a * (2^shl) -> a << shl + if (b >= 0 && util_is_power_of_two_or_zero64(b)) { + int shl = util_logbase2_64(b); + + Value *res = c ? bld.getSSA() : def; + bld.mkOp2(OP_SHL, ty, res, a, bld.mkImm(shl)); + if (c) + bld.mkOp2(OP_ADD, ty, def, res, c); + + return true; + } + + return false; +} + void ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) { - const Target *target = prog->getTarget(); const int t = !s; const operation op = i->op; Instruction *newi = i; @@ -1040,13 +1059,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->setSrc(s, i->getSrc(t)); i->src(s).mod = i->src(t).mod; } else - if (!isFloatType(i->sType) && !imm0.isNegative() && imm0.isPow2()) { - i->op = OP_SHL; - imm0.applyLog2(); - i->setSrc(0, i->getSrc(t)); - i->src(0).mod = i->src(t).mod; - i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32)); - i->src(1).mod = 0; + if (!isFloatType(i->dType) && !i->src(t).mod) { + bld.setPosition(i, false); + int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32; + if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) + delete_Instruction(prog, i); } else if (i->postFactor && i->sType == TYPE_F32) { /* Can't emit a postfactor with an immediate, have to fold it in */ @@ -1079,13 +1096,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->setSrc(2, NULL); i->op = OP_ADD; } else - if (s == 1 && !imm0.isNegative() && imm0.isPow2() && - !isFloatType(i->dType) && - target->isOpSupported(OP_SHLADD, i->dType) && - !i->subOp) { - i->op = OP_SHLADD; - imm0.applyLog2(); - i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32)); + if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && !i->src(2).mod) { + bld.setPosition(i, false); + int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32; + if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2))) + delete_Instruction(prog, i); } break; case OP_SUB: |