diff options
author | Rhys Perry <[email protected]> | 2018-08-18 15:06:50 +0100 |
---|---|---|
committer | Rhys Perry <[email protected]> | 2018-08-27 13:57:11 +0100 |
commit | d27c7918916cdc8092959124955f887592e37d72 (patch) | |
tree | 56bba3b7d22cc57c741025917dceb89a63e5e6d1 | |
parent | 400a4eb964bc43c339ce213ee40b41e11c8d44dc (diff) |
nv50/ir: optimize multiplication by 16-bit immediates into two xmads
Rather than the usual three that would be created.
total instructions in shared programs : 5796385 -> 5786560 (-0.17%)
total gprs used in shared programs : 670103 -> 669968 (-0.02%)
total shared used in shared programs : 548832 -> 548832 (0.00%)
total local used in shared programs : 21164 -> 21068 (-0.45%)
local shared gpr inst bytes
helped 1 0 64 1040 1040
hurt 0 0 27 0 0
Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Karol Herbst <[email protected]>
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 1ab743705a7..ecb4bae2a83 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -997,6 +997,16 @@ ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value * return true; } + if (typeSizeof(ty) == 4 && b >= 0 && b <= 0xffff && + target->isOpSupported(OP_XMAD, TYPE_U32)) { + Value *tmp = bld.mkOp3v(OP_XMAD, TYPE_U32, bld.getSSA(), + a, bld.mkImm((uint32_t)b), c ? c : bld.mkImm(0)); + bld.mkOp3(OP_XMAD, TYPE_U32, def, a, bld.mkImm((uint32_t)b), tmp)->subOp = + NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_H1(0); + + return true; + } + return false; } |