summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2017-01-13 00:41:11 -0500
committerIlia Mirkin <[email protected]>2017-01-16 21:13:09 -0500
commit19963231a3245358c0e8fdd74c4654761e62b6c8 (patch)
tree34a60bbd334aff116e3062d11d54e83404fff109 /src
parent5ba380c226b127cbfad00dd647471e1518ba2cb2 (diff)
nv50/ir: optimize shl + and
Address loading can often end up as shl + shr + shl combinations. The latter two are equal shifts, which get converted into an and mask. However if the previous shl is more than the mask is trying to remove (in terms of low bits), we can just remove the and entirely. This reduces some large shaders by as many as 3% of instructions (out of 2K). total instructions in shared programs : 6495509 -> 6491076 (-0.07%) total gprs used in shared programs : 954621 -> 954623 (0.00%) local gpr inst bytes helped 0 0 1014 1014 hurt 0 2 0 0 Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp11
1 files changed, 11 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 28b59857e63..04b6af2471b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1260,6 +1260,17 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
i->op = OP_EXTBF;
i->setSrc(0, src->getSrc(0));
i->setSrc(1, new_ImmediateValue(prog, ext));
+ } else if (src->op == OP_SHL &&
+ src->src(1).getImmediate(imm1) &&
+ i->src(t).mod == Modifier(0) &&
+ util_is_power_of_two(~imm0.reg.data.u32 + 1) &&
+ util_last_bit(~imm0.reg.data.u32) <= imm1.reg.data.u32) {
+ i->op = OP_MOV;
+ i->setSrc(s, NULL);
+ if (t) {
+ i->setSrc(0, i->getSrc(t));
+ i->setSrc(t, NULL);
+ }
}
}
break;