summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2017-11-10 22:10:46 -0500
committerIlia Mirkin <[email protected]>2017-11-26 01:10:03 -0500
commit50e913b9c5d311334281da89b4e9969d48fd62b6 (patch)
treeb3cc1d2f1105f0e184059845ff3fa19bde791290 /src
parent3079993727ddfe6b09e1f0ec8f834691fd654b46 (diff)
nv50/ir: add optimization for modulo by a non-power-of-2 value
We can still use the optimized division methods which make use of multiplication with overflow. Signed-off-by: Ilia Mirkin <[email protected]> Reviewed-by: Tobias Klausmann <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp15
1 files changed, 15 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 2448c737e73..cfd623ea877 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1192,6 +1192,21 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
delete_Instruction(prog, i);
}
+ } else if (s == 1) {
+ // In this case, we still want the optimized lowering that we get
+ // from having division by an immediate.
+ //
+ // a % b == a - (a/b) * b
+ bld.setPosition(i, false);
+ Value *div = bld.mkOp2v(OP_DIV, i->sType, bld.getSSA(),
+ i->getSrc(0), i->getSrc(1));
+ newi = bld.mkOp2(OP_ADD, i->sType, i->getDef(0), i->getSrc(0),
+ bld.mkOp2v(OP_MUL, i->sType, bld.getSSA(), div, i->getSrc(1)));
+ // TODO: Check that target supports this. In this case, we know that
+ // all backends do.
+ newi->src(1).mod = Modifier(NV50_IR_MOD_NEG);
+
+ delete_Instruction(prog, i);
}
break;