aboutsummaryrefslogtreecommitdiffstats
path: root/src/compiler
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2019-07-12 23:26:59 -0500
committerJason Ekstrand <[email protected]>2019-07-17 18:44:35 +0000
commit812b341578040496c76cc05e7c9cc05fe57ce834 (patch)
tree75c5e5a848ecfffb97f5396c8f0be9d9a6db7f4d /src/compiler
parente8505e982a3f911440cad8a963313d2d62d27fc6 (diff)
nir/algebraic: Optimize comparisons and up-casts
These seem like obvious enough optimizations in the world of multiple integer bit sizes. The only known thing which hits these at the moment is some Vulkan CTS tests for 16-bit SSBO values which like to up-cast and check for equality. However, it's something that's bound to come up as we start seeing more integers in shaders. The optimizations of comparisons of casted values with constants are something which we would ideally do with range analysis. However, lacking that, we can do it in opt_algebraic as long as one side is a constant. In dEQP-VK.ssbo.phys.layout.random.16bit.scalar.13, this commit, along with the previous commit, reduce the number of instructions emitted on Skylake from 55328 to 44546, a reduction of 20%. Acked-by: Matt Turner <[email protected]> Reviewed-by: Ian Romanick <[email protected]>
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py67
1 files changed, 67 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 42462d5befa..abefbb54756 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -1037,6 +1037,73 @@ for N, M in itertools.product(type_sizes('uint'), type_sizes('uint')):
# The N == M case is handled by other optimizations
pass
+# Optimize comparisons with up-casts
+for t in ['int', 'uint', 'float']:
+ for N, M in itertools.product(type_sizes(t), repeat=2):
+ if N == 1 or N >= M:
+ continue
+
+ x2xM = '{0}2{0}{1}'.format(t[0], M)
+ x2xN = '{0}2{0}{1}'.format(t[0], N)
+ aN = 'a@' + str(N)
+ bN = 'b@' + str(N)
+ xeq = 'feq' if t == 'float' else 'ieq'
+ xne = 'fne' if t == 'float' else 'ine'
+ xge = '{0}ge'.format(t[0])
+ xlt = '{0}lt'.format(t[0])
+
+ # Up-casts are lossless so for correctly signed comparisons of
+ # up-casted values we can do the comparison at the largest of the two
+ # original sizes and drop one or both of the casts. (We have
+ # optimizations to drop the no-op casts which this may generate.)
+ for P in type_sizes(t):
+ if P == 1 or P > N:
+ continue
+
+ bP = 'b@' + str(P)
+ optimizations += [
+ ((xeq, (x2xM, aN), (x2xM, bP)), (xeq, a, (x2xN, b))),
+ ((xne, (x2xM, aN), (x2xM, bP)), (xne, a, (x2xN, b))),
+ ((xge, (x2xM, aN), (x2xM, bP)), (xge, a, (x2xN, b))),
+ ((xlt, (x2xM, aN), (x2xM, bP)), (xlt, a, (x2xN, b))),
+ ((xge, (x2xM, bP), (x2xM, aN)), (xge, (x2xN, b), a)),
+ ((xlt, (x2xM, bP), (x2xM, aN)), (xlt, (x2xN, b), a)),
+ ]
+
+ # The next bit doesn't work on floats because the range checks would
+ # get way too complicated.
+ if t in ['int', 'uint']:
+ if t == 'int':
+ xN_min = -(1 << (N - 1))
+ xN_max = (1 << (N - 1)) - 1
+ elif t == 'uint':
+ xN_min = 0
+ xN_max = (1 << N) - 1
+ else:
+ assert False
+
+ # If we're up-casting and comparing to a constant, we can unfold
+ # the comparison into a comparison with the shrunk down constant
+ # and a check that the constant fits in the smaller bit size.
+ optimizations += [
+ ((xeq, (x2xM, aN), '#b'),
+ ('iand', (xeq, a, (x2xN, b)), (xeq, (x2xM, (x2xN, b)), b))),
+ ((xne, (x2xM, aN), '#b'),
+ ('ior', (xne, a, (x2xN, b)), (xne, (x2xM, (x2xN, b)), b))),
+ ((xlt, (x2xM, aN), '#b'),
+ ('iand', (xlt, xN_min, b),
+ ('ior', (xlt, xN_max, b), (xlt, a, (x2xN, b))))),
+ ((xlt, '#a', (x2xM, bN)),
+ ('iand', (xlt, a, xN_max),
+ ('ior', (xlt, a, xN_min), (xlt, (x2xN, a), b)))),
+ ((xge, (x2xM, aN), '#b'),
+ ('iand', (xge, xN_max, b),
+ ('ior', (xge, xN_min, b), (xge, a, (x2xN, b))))),
+ ((xge, '#a', (x2xM, bN)),
+ ('iand', (xge, a, xN_min),
+ ('ior', (xge, a, xN_max), (xge, (x2xN, a), b)))),
+ ]
+
def fexp2i(exp, bits):
# We assume that exp is already in the right range.
if bits == 16: