nir/opt_algebraic: Use a more elementary mechanism for lowering ldexp

author: Jason Ekstrand <[email protected]> 2016-01-27 15:21:28 -0800
committer: Jason Ekstrand <[email protected]> 2016-01-27 15:21:28 -0800
commit: 8f0ef9bbeb2932209f4bdc408f79c2313bd6ba51 (patch)
tree: cdd22e9fb2858c4a73aada7ae5a6cec210518ac6 /src/glsl
parent: f7d6b8ccfe3f04382d34bcd49cdbdfec88709719 (diff)
1 files changed, 2 insertions, 62 deletions
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
index 190e4b7b43b..f4bfd3a921a 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -230,6 +230,8 @@ optimizations = [
    (('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'),
    (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'),
    (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'),
+   (('ldexp', 'x', 'exp'),
+    ('fmul', 'x', ('ishl', ('imin', ('imax', ('iadd', 'exp', 0x7f), 0), 0xff), 23))),
 
    (('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
     ('bcsel', ('ilt', 31, 'bits'), 'insert',
@@ -332,68 +334,6 @@ for op in ['flt', 'fge', 'feq', 'fne',
        ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
    ]
 
-def ldexp_to_arith(x, exp):
-   """
-   Translates
-      ldexp x exp
-   into
-
-      extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
-      resulting_biased_exp = extracted_biased_exp + exp;
-
-      if (resulting_biased_exp < 1) {
-         return copysign(0.0, x);
-      }
-
-      return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
-                         lshift(i2u(resulting_biased_exp), exp_shift));
-
-   which we can't actually implement as such, since NIR doesn't have
-   vectorized if-statements. We actually implement it without branches
-   using conditional-select:
-
-      extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
-      resulting_biased_exp = extracted_biased_exp + exp;
-
-      is_not_zero_or_underflow = gequal(resulting_biased_exp, 1);
-      x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x));
-      resulting_biased_exp = csel(is_not_zero_or_underflow,
-                                  resulting_biased_exp, 0);
-
-      return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
-                         lshift(i2u(resulting_biased_exp), exp_shift));
-   """
-
-   sign_mask = 0x80000000
-   exp_shift = 23
-   exp_width = 8
-
-   # Extract the biased exponent from <x>.
-   extracted_biased_exp = ('ushr', ('fabs', x), exp_shift)
-   resulting_biased_exp = ('iadd', extracted_biased_exp, exp)
-
-   # Test if result is ±0.0, subnormal, or underflow by checking if the
-   # resulting biased exponent would be less than 0x1. If so, the result is
-   # 0.0 with the sign of x. (Actually, invert the conditions so that
-   # immediate values are the second arguments, which is better for i965)
-   zero_sign_x = ('iand', x, sign_mask)
-
-   is_not_zero_or_underflow = ('ige', resulting_biased_exp, 0x1)
-
-   # We could test for overflows by checking if the resulting biased exponent
-   # would be greater than 0xFE. Turns out we don't need to because the GLSL
-   # spec says:
-   #
-   #    "If this product is too large to be represented in the
-   #     floating-point type, the result is undefined."
-
-   return ('bitfield_insert',
-           ('bcsel', is_not_zero_or_underflow, x, zero_sign_x),
-           ('bcsel', is_not_zero_or_underflow, resulting_biased_exp, 0),
-           exp_shift, exp_width)
-
-optimizations += [(('ldexp', 'x', 'exp'), ldexp_to_arith('x', 'exp'))]
-
 # This section contains "late" optimizations that should be run after the
 # regular optimizations have finished.  Optimizations should go here if
 # they help code generation but do not necessarily produce code that is
author	Jason Ekstrand <[email protected]>	2016-01-27 15:21:28 -0800
committer	Jason Ekstrand <[email protected]>	2016-01-27 15:21:28 -0800
commit	8f0ef9bbeb2932209f4bdc408f79c2313bd6ba51 (patch)
tree	cdd22e9fb2858c4a73aada7ae5a6cec210518ac6 /src/glsl
parent	f7d6b8ccfe3f04382d34bcd49cdbdfec88709719 (diff)