diff options
author | Jason Ekstrand <[email protected]> | 2016-01-27 15:21:28 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2016-01-27 15:21:28 -0800 |
commit | 8f0ef9bbeb2932209f4bdc408f79c2313bd6ba51 (patch) | |
tree | cdd22e9fb2858c4a73aada7ae5a6cec210518ac6 /src/glsl | |
parent | f7d6b8ccfe3f04382d34bcd49cdbdfec88709719 (diff) |
nir/opt_algebraic: Use a more elementary mechanism for lowering ldexp
Diffstat (limited to 'src/glsl')
-rw-r--r-- | src/glsl/nir/nir_opt_algebraic.py | 64 |
1 files changed, 2 insertions, 62 deletions
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 190e4b7b43b..f4bfd3a921a 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -230,6 +230,8 @@ optimizations = [ (('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'), (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'), (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'), + (('ldexp', 'x', 'exp'), + ('fmul', 'x', ('ishl', ('imin', ('imax', ('iadd', 'exp', 0x7f), 0), 0xff), 23))), (('bitfield_insert', 'base', 'insert', 'offset', 'bits'), ('bcsel', ('ilt', 31, 'bits'), 'insert', @@ -332,68 +334,6 @@ for op in ['flt', 'fge', 'feq', 'fne', ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))), ] -def ldexp_to_arith(x, exp): - """ - Translates - ldexp x exp - into - - extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); - resulting_biased_exp = extracted_biased_exp + exp; - - if (resulting_biased_exp < 1) { - return copysign(0.0, x); - } - - return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | - lshift(i2u(resulting_biased_exp), exp_shift)); - - which we can't actually implement as such, since NIR doesn't have - vectorized if-statements. We actually implement it without branches - using conditional-select: - - extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); - resulting_biased_exp = extracted_biased_exp + exp; - - is_not_zero_or_underflow = gequal(resulting_biased_exp, 1); - x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x)); - resulting_biased_exp = csel(is_not_zero_or_underflow, - resulting_biased_exp, 0); - - return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | - lshift(i2u(resulting_biased_exp), exp_shift)); - """ - - sign_mask = 0x80000000 - exp_shift = 23 - exp_width = 8 - - # Extract the biased exponent from <x>. - extracted_biased_exp = ('ushr', ('fabs', x), exp_shift) - resulting_biased_exp = ('iadd', extracted_biased_exp, exp) - - # Test if result is ±0.0, subnormal, or underflow by checking if the - # resulting biased exponent would be less than 0x1. If so, the result is - # 0.0 with the sign of x. (Actually, invert the conditions so that - # immediate values are the second arguments, which is better for i965) - zero_sign_x = ('iand', x, sign_mask) - - is_not_zero_or_underflow = ('ige', resulting_biased_exp, 0x1) - - # We could test for overflows by checking if the resulting biased exponent - # would be greater than 0xFE. Turns out we don't need to because the GLSL - # spec says: - # - # "If this product is too large to be represented in the - # floating-point type, the result is undefined." - - return ('bitfield_insert', - ('bcsel', is_not_zero_or_underflow, x, zero_sign_x), - ('bcsel', is_not_zero_or_underflow, resulting_biased_exp, 0), - exp_shift, exp_width) - -optimizations += [(('ldexp', 'x', 'exp'), ldexp_to_arith('x', 'exp'))] - # This section contains "late" optimizations that should be run after the # regular optimizations have finished. Optimizations should go here if # they help code generation but do not necessarily produce code that is |