soft-fp64: Relax the way NaN is propagated

Also reassociate a couple expressions to encourage some CSE. Results on the 308 shaders extracted from the fp64 portion of the OpenGL CTS: Tiger Lake and Ice Lake had similar results. (Tiger Lake shown) total instructions in shared programs: 813599 -> 797951 (-1.92%) instructions in affected programs: 796110 -> 780462 (-1.97%) helped: 92 HURT: 0 helped stats (abs) min: 3 max: 5198 x̄: 170.09 x̃: 83 helped stats (rel) min: 0.36% max: 5.50% x̄: 1.57% x̃: 1.40% 95% mean confidence interval for instructions value: -282.42 -57.75 95% mean confidence interval for instructions %-change: -1.71% -1.42% Instructions are helped. total cycles in shared programs: 6687128 -> 6601437 (-1.28%) cycles in affected programs: 6582246 -> 6496555 (-1.30%) helped: 92 HURT: 0 helped stats (abs) min: 36 max: 14442 x̄: 931.42 x̃: 592 helped stats (rel) min: 0.45% max: 3.16% x̄: 1.44% x̃: 1.23% 95% mean confidence interval for cycles value: -1257.58 -605.27 95% mean confidence interval for cycles %-change: -1.58% -1.30% Cycles are helped. total spills in shared programs: 759 -> 702 (-7.51%) spills in affected programs: 759 -> 702 (-7.51%) helped: 3 HURT: 0 total fills in shared programs: 2412 -> 1442 (-40.22%) fills in affected programs: 2412 -> 1442 (-40.22%) helped: 3 HURT: 0 Reviewed-by: Matt Turner <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4142>
author: Ian Romanick <[email protected]> 2020-03-02 19:20:42 -0800
committer: Marge Bot <[email protected]> 2020-03-18 20:36:29 +0000
commit: abf28d6a70c3219e41c904806f77ea92d31bdb0f (patch)
tree: 99df102ed30a4421daa32fde19d4f2ecd96160f7 /src
parent: 8178fa88763a321cb5df853ee219884c2a7eedcc (diff)
1 files changed, 19 insertions, 2 deletions
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
index c83e1aa8c97..5b0a9dc0c28 100644
--- a/src/compiler/glsl/float64.glsl
+++ b/src/compiler/glsl/float64.glsl
@@ -59,6 +59,11 @@
 #define FLOAT_ROUND_UP              3
 #define FLOAT_ROUNDING_MODE         FLOAT_ROUND_NEAREST_EVEN
 
+/* Relax propagation of NaN.  Binary operations with a NaN source will still
+ * produce a NaN result, but it won't follow strict IEEE rules.
+ */
+#define RELAXED_NAN_PROPAGATION
+
 /* Absolute value of a Float64 :
  * Clear the sign bit
  */
@@ -639,6 +644,12 @@ __normalizeRoundAndPackFloat64(uint zSign,
 uint64_t
 __propagateFloat64NaN(uint64_t __a, uint64_t __b)
 {
+#if defined RELAXED_NAN_PROPAGATION
+   uvec2 a = unpackUint2x32(__a);
+   uvec2 b = unpackUint2x32(__b);
+
+   return packUint2x32(uvec2(a.x | b.x, a.y | b.y));
+#else
    bool aIsNaN = __is_nan(__a);
    bool bIsNaN = __is_nan(__b);
    uvec2 a = unpackUint2x32(__a);
@@ -647,6 +658,7 @@ __propagateFloat64NaN(uint64_t __a, uint64_t __b)
    b.y |= 0x00080000u;
 
    return packUint2x32(mix(b, mix(a, b, bvec2(bIsNaN, bIsNaN)), bvec2(aIsNaN, aIsNaN)));
+#endif
 }
 
 /* Returns the result of adding the double-precision floating-point values
@@ -674,7 +686,7 @@ __fadd64(uint64_t a, uint64_t b)
 
       if (orig_exp_diff_is_zero) {
          if (aExp == 0x7FF) {
-            bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;
+            bool propagate = ((aFracHi | bFracHi) | (aFracLo| bFracLo)) != 0u;
             return mix(a, __propagateFloat64NaN(a, b), propagate);
          }
          __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
@@ -753,7 +765,7 @@ __fadd64(uint64_t a, uint64_t b)
          return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);
       }
       if (aExp == 0x7FF) {
-          bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;
+         bool propagate = ((aFracHi | bFracHi) | (aFracLo | bFracLo)) != 0u;
          return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate);
       }
       bExp = mix(bExp, 1, aExp == 0);
@@ -879,8 +891,13 @@ __fmul64(uint64_t a, uint64_t b)
       return __packFloat64(zSign, 0x7FF, 0u, 0u);
    }
    if (bExp == 0x7FF) {
+      /* a cannot be NaN, but is b NaN? */
       if ((bFracHi | bFracLo) != 0u)
+#if defined RELAXED_NAN_PROPAGATION
+         return b;
+#else
          return __propagateFloat64NaN(a, b);
+#endif
       if ((uint(aExp) | aFracHi | aFracLo) == 0u)
          return 0xFFFFFFFFFFFFFFFFUL;
       return __packFloat64(zSign, 0x7FF, 0u, 0u);
author	Ian Romanick <[email protected]>	2020-03-02 19:20:42 -0800
committer	Marge Bot <[email protected]>	2020-03-18 20:36:29 +0000
commit	abf28d6a70c3219e41c904806f77ea92d31bdb0f (patch)
tree	99df102ed30a4421daa32fde19d4f2ecd96160f7 /src
parent	8178fa88763a321cb5df853ee219884c2a7eedcc (diff)