summaryrefslogtreecommitdiffstats
path: root/src/compiler
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2020-03-04 23:00:24 -0800
committerMarge Bot <[email protected]>2020-03-18 20:36:29 +0000
commitbf2eb3e0eee39e79f5426dfa18d9d3b7f9dfbcb2 (patch)
tree91b95e6febd3897f478ea203194575a3d5d14bc3 /src/compiler
parenta8882132f9243e61ca5a5b5f63cbfcca1120ff90 (diff)
soft-fp64: Split a block that was missing a cast on a comparison
This function has code like: if (0x7FD <= zExp) { if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == zFrac0 && 0xFFFFFFFFu == zFrac1) && increment)) { ... return ...; } if (zExp < 0) { I saw that, and I thought, "Uh... what? Dead code?" I thought it was a bit fishy, so I grabbed the Berkeley SoftFloat Library 3e code, and there is similar code in softfloat_roundPackToF64 (source/s_roundPackToF64.c), but it has an extra (uint16_t) cast in the first comparison. This is basicially a shortcut for if (zExp < 0 || zExp >= 0x7FD) { So, having the nesting kind of makes sense. On a CPU, nesting the flow control can be an optimization. On a GPU, it's just fail. Split the block so that we don't need the uint16_t cast magic. Results on the 308 shaders extracted from the fp64 portion of the OpenGL CTS: Tiger Lake and Ice Lake had similar results. (Tiger Lake shown) total instructions in shared programs: 683638 -> 658127 (-3.73%) instructions in affected programs: 666839 -> 641328 (-3.83%) helped: 92 HURT: 0 helped stats (abs) min: 26 max: 2456 x̄: 277.29 x̃: 144 helped stats (rel) min: 3.21% max: 4.22% x̄: 3.79% x̃: 3.90% 95% mean confidence interval for instructions value: -345.84 -208.75 95% mean confidence interval for instructions %-change: -3.86% -3.73% Instructions are helped. total cycles in shared programs: 5458858 -> 5344600 (-2.09%) cycles in affected programs: 5360114 -> 5245856 (-2.13%) helped: 92 HURT: 0 helped stats (abs) min: 126 max: 10300 x̄: 1241.93 x̃: 655 helped stats (rel) min: 1.71% max: 2.37% x̄: 2.12% x̃: 2.17% 95% mean confidence interval for cycles value: -1539.93 -943.94 95% mean confidence interval for cycles %-change: -2.16% -2.08% Cycles are helped. Fixes: f111d72596c ("glsl: Add "built-in" functions to do add(fp64, fp64)") Reviewed-by: Matt Turner <[email protected]> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4142> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4142>
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/glsl/float64.glsl28
1 files changed, 15 insertions, 13 deletions
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
index d41f0740bed..dd1179012ca 100644
--- a/src/compiler/glsl/float64.glsl
+++ b/src/compiler/glsl/float64.glsl
@@ -500,23 +500,25 @@ __roundAndPackFloat64(uint zSign,
}
return __packFloat64(zSign, 0x7FF, 0u, 0u);
}
- if (zExp < 0) {
- __shift64ExtraRightJamming(
- zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2);
- zExp = 0;
- if (roundNearestEven) {
- increment = zFrac2 < 0u;
+ }
+
+ if (zExp < 0) {
+ __shift64ExtraRightJamming(
+ zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2);
+ zExp = 0;
+ if (roundNearestEven) {
+ increment = zFrac2 < 0u;
+ } else {
+ if (zSign != 0u) {
+ increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
+ (zFrac2 != 0u);
} else {
- if (zSign != 0u) {
- increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
- (zFrac2 != 0u);
- } else {
- increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
- (zFrac2 != 0u);
- }
+ increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
+ (zFrac2 != 0u);
}
}
}
+
if (increment) {
__add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
zFrac1 &= ~((zFrac2 + uint(zFrac2 == 0u)) & uint(roundNearestEven));