diff options
author | Ian Romanick <[email protected]> | 2020-03-02 19:20:42 -0800 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-03-18 20:36:29 +0000 |
commit | abf28d6a70c3219e41c904806f77ea92d31bdb0f (patch) | |
tree | 99df102ed30a4421daa32fde19d4f2ecd96160f7 /src | |
parent | 8178fa88763a321cb5df853ee219884c2a7eedcc (diff) |
soft-fp64: Relax the way NaN is propagated
Also reassociate a couple expressions to encourage some CSE.
Results on the 308 shaders extracted from the fp64 portion of the OpenGL
CTS:
Tiger Lake and Ice Lake had similar results. (Tiger Lake shown)
total instructions in shared programs: 813599 -> 797951 (-1.92%)
instructions in affected programs: 796110 -> 780462 (-1.97%)
helped: 92
HURT: 0
helped stats (abs) min: 3 max: 5198 x̄: 170.09 x̃: 83
helped stats (rel) min: 0.36% max: 5.50% x̄: 1.57% x̃: 1.40%
95% mean confidence interval for instructions value: -282.42 -57.75
95% mean confidence interval for instructions %-change: -1.71% -1.42%
Instructions are helped.
total cycles in shared programs: 6687128 -> 6601437 (-1.28%)
cycles in affected programs: 6582246 -> 6496555 (-1.30%)
helped: 92
HURT: 0
helped stats (abs) min: 36 max: 14442 x̄: 931.42 x̃: 592
helped stats (rel) min: 0.45% max: 3.16% x̄: 1.44% x̃: 1.23%
95% mean confidence interval for cycles value: -1257.58 -605.27
95% mean confidence interval for cycles %-change: -1.58% -1.30%
Cycles are helped.
total spills in shared programs: 759 -> 702 (-7.51%)
spills in affected programs: 759 -> 702 (-7.51%)
helped: 3
HURT: 0
total fills in shared programs: 2412 -> 1442 (-40.22%)
fills in affected programs: 2412 -> 1442 (-40.22%)
helped: 3
HURT: 0
Reviewed-by: Matt Turner <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4142>
Diffstat (limited to 'src')
-rw-r--r-- | src/compiler/glsl/float64.glsl | 21 |
1 files changed, 19 insertions, 2 deletions
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl index c83e1aa8c97..5b0a9dc0c28 100644 --- a/src/compiler/glsl/float64.glsl +++ b/src/compiler/glsl/float64.glsl @@ -59,6 +59,11 @@ #define FLOAT_ROUND_UP 3 #define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN +/* Relax propagation of NaN. Binary operations with a NaN source will still + * produce a NaN result, but it won't follow strict IEEE rules. + */ +#define RELAXED_NAN_PROPAGATION + /* Absolute value of a Float64 : * Clear the sign bit */ @@ -639,6 +644,12 @@ __normalizeRoundAndPackFloat64(uint zSign, uint64_t __propagateFloat64NaN(uint64_t __a, uint64_t __b) { +#if defined RELAXED_NAN_PROPAGATION + uvec2 a = unpackUint2x32(__a); + uvec2 b = unpackUint2x32(__b); + + return packUint2x32(uvec2(a.x | b.x, a.y | b.y)); +#else bool aIsNaN = __is_nan(__a); bool bIsNaN = __is_nan(__b); uvec2 a = unpackUint2x32(__a); @@ -647,6 +658,7 @@ __propagateFloat64NaN(uint64_t __a, uint64_t __b) b.y |= 0x00080000u; return packUint2x32(mix(b, mix(a, b, bvec2(bIsNaN, bIsNaN)), bvec2(aIsNaN, aIsNaN))); +#endif } /* Returns the result of adding the double-precision floating-point values @@ -674,7 +686,7 @@ __fadd64(uint64_t a, uint64_t b) if (orig_exp_diff_is_zero) { if (aExp == 0x7FF) { - bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u; + bool propagate = ((aFracHi | bFracHi) | (aFracLo| bFracLo)) != 0u; return mix(a, __propagateFloat64NaN(a, b), propagate); } __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); @@ -753,7 +765,7 @@ __fadd64(uint64_t a, uint64_t b) return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1); } if (aExp == 0x7FF) { - bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u; + bool propagate = ((aFracHi | bFracHi) | (aFracLo | bFracLo)) != 0u; return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate); } bExp = mix(bExp, 1, aExp == 0); @@ -879,8 +891,13 @@ __fmul64(uint64_t a, uint64_t b) return __packFloat64(zSign, 0x7FF, 0u, 0u); } if (bExp == 0x7FF) { + /* a cannot be NaN, but is b NaN? */ if ((bFracHi | bFracLo) != 0u) +#if defined RELAXED_NAN_PROPAGATION + return b; +#else return __propagateFloat64NaN(a, b); +#endif if ((uint(aExp) | aFracHi | aFracLo) == 0u) return 0xFFFFFFFFFFFFFFFFUL; return __packFloat64(zSign, 0x7FF, 0u, 0u); |