aboutsummaryrefslogtreecommitdiffstats
path: root/src/compiler
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2020-03-04 13:00:44 -0800
committerMarge Bot <[email protected]>2020-03-18 20:36:29 +0000
commita8882132f9243e61ca5a5b5f63cbfcca1120ff90 (patch)
tree0bec1601c537d4647dcac84a7984b01497d2b64a /src/compiler
parent2d1216a039889cec8d8dbd994d4e50ed47d9692c (diff)
soft-fp64/fadd: Common code optimization for differing sign case
This is basically the same ideas from the previous 4 commits applied to the aSign != bSign part... and all smashed into one commit. The shader hurt for spill and / or fills is from KHR-GL46.gpu_shader_fp64.builtin.inverse_dmat4. Results on the 308 shaders extracted from the fp64 portion of the OpenGL CTS: Tiger Lake total instructions in shared programs: 787258 -> 683638 (-13.16%) instructions in affected programs: 725435 -> 621815 (-14.28%) helped: 74 HURT: 0 helped stats (abs) min: 152 max: 10261 x̄: 1400.27 x̃: 975 helped stats (rel) min: 11.61% max: 20.92% x̄: 15.40% x̃: 14.86% 95% mean confidence interval for instructions value: -1740.11 -1060.43 95% mean confidence interval for instructions %-change: -16.01% -14.79% Instructions are helped. total cycles in shared programs: 6483227 -> 5458858 (-15.80%) cycles in affected programs: 6051245 -> 5026876 (-16.93%) helped: 74 HURT: 0 helped stats (abs) min: 1566 max: 95474 x̄: 13842.82 x̃: 9757 helped stats (rel) min: 13.94% max: 23.26% x̄: 17.98% x̃: 17.57% 95% mean confidence interval for cycles value: -17104.25 -10581.40 95% mean confidence interval for cycles %-change: -18.61% -17.35% Cycles are helped. total spills in shared programs: 553 -> 445 (-19.53%) spills in affected programs: 553 -> 445 (-19.53%) helped: 1 HURT: 0 total fills in shared programs: 1307 -> 1323 (1.22%) fills in affected programs: 1307 -> 1323 (1.22%) helped: 0 HURT: 1 Ice Lake total instructions in shared programs: 781216 -> 678470 (-13.15%) instructions in affected programs: 720088 -> 617342 (-14.27%) helped: 74 HURT: 0 helped stats (abs) min: 153 max: 8863 x̄: 1388.46 x̃: 975 helped stats (rel) min: 11.24% max: 21.03% x̄: 15.47% x̃: 15.01% 95% mean confidence interval for instructions value: -1703.57 -1073.35 95% mean confidence interval for instructions %-change: -16.09% -14.85% Instructions are helped. total cycles in shared programs: 6464085 -> 5453997 (-15.63%) cycles in affected programs: 6031771 -> 5021683 (-16.75%) helped: 74 HURT: 0 helped stats (abs) min: 1552 max: 90317 x̄: 13649.84 x̃: 9650 helped stats (rel) min: 13.84% max: 23.11% x̄: 17.83% x̃: 17.41% 95% mean confidence interval for cycles value: -16802.89 -10496.79 95% mean confidence interval for cycles %-change: -18.46% -17.21% Cycles are helped. total spills in shared programs: 279 -> 368 (31.90%) spills in affected programs: 279 -> 368 (31.90%) helped: 0 HURT: 1 total fills in shared programs: 973 -> 1155 (18.71%) fills in affected programs: 973 -> 1155 (18.71%) helped: 0 HURT: 1 Reviewed-by: Matt Turner <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4142>
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/glsl/float64.glsl32
1 files changed, 11 insertions, 21 deletions
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
index d079207a774..d41f0740bed 100644
--- a/src/compiler/glsl/float64.glsl
+++ b/src/compiler/glsl/float64.glsl
@@ -740,15 +740,23 @@ __fadd64(uint64_t a, uint64_t b)
__shortShift64Left(aFracHi, aFracLo, 10, aFracHi, aFracLo);
__shortShift64Left(bFracHi, bFracLo, 10, bFracHi, bFracLo);
- if (0 < expDiff) {
+ if (expDiff != 0) {
uint zFrac0;
uint zFrac1;
+ if (expDiff < 0) {
+ EXCHANGE(aFracHi, bFracHi);
+ EXCHANGE(aFracLo, bFracLo);
+ EXCHANGE(aExp, bExp);
+ aSign ^= 0x80000000u;
+ }
+
if (aExp == 0x7FF) {
bool propagate = (aFracHi | aFracLo) != 0u;
- return mix(a, __propagateFloat64NaN(a, b), propagate);
+ return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
}
- expDiff = mix(expDiff, expDiff - 1, bExp == 0);
+
+ expDiff = mix(abs(expDiff), abs(expDiff) - 1, bExp == 0);
bFracHi = mix(bFracHi | 0x40000000u, bFracHi, bExp == 0);
__shift64RightJamming(bFracHi, bFracLo, expDiff, bFracHi, bFracLo);
aFracHi |= 0x40000000u;
@@ -757,24 +765,6 @@ __fadd64(uint64_t a, uint64_t b)
--zExp;
return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);
}
- if (expDiff < 0) {
- uint zFrac0;
- uint zFrac1;
-
- if (bExp == 0x7FF) {
- bool propagate = (bFracHi | bFracLo) != 0u;
- return mix(__packFloat64(aSign ^ 0x80000000u, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
- }
- expDiff = mix(expDiff, expDiff + 1, aExp == 0);
- aFracHi = mix(aFracHi | 0x40000000u, aFracHi, aExp == 0);
- __shift64RightJamming(aFracHi, aFracLo, - expDiff, aFracHi, aFracLo);
- bFracHi |= 0x40000000u;
- __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
- zExp = bExp;
- aSign ^= 0x80000000u;
- --zExp;
- return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);
- }
if (aExp == 0x7FF) {
bool propagate = ((aFracHi | bFracHi) | (aFracLo | bFracLo)) != 0u;
return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate);