diff options
author | Ian Romanick <[email protected]> | 2020-03-04 13:00:44 -0800 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-03-18 20:36:29 +0000 |
commit | a8882132f9243e61ca5a5b5f63cbfcca1120ff90 (patch) | |
tree | 0bec1601c537d4647dcac84a7984b01497d2b64a /src/compiler | |
parent | 2d1216a039889cec8d8dbd994d4e50ed47d9692c (diff) |
soft-fp64/fadd: Common code optimization for differing sign case
This is basically the same ideas from the previous 4 commits applied
to the aSign != bSign part... and all smashed into one commit.
The shader hurt for spill and / or fills is from
KHR-GL46.gpu_shader_fp64.builtin.inverse_dmat4.
Results on the 308 shaders extracted from the fp64 portion of the OpenGL
CTS:
Tiger Lake
total instructions in shared programs: 787258 -> 683638 (-13.16%)
instructions in affected programs: 725435 -> 621815 (-14.28%)
helped: 74
HURT: 0
helped stats (abs) min: 152 max: 10261 x̄: 1400.27 x̃: 975
helped stats (rel) min: 11.61% max: 20.92% x̄: 15.40% x̃: 14.86%
95% mean confidence interval for instructions value: -1740.11 -1060.43
95% mean confidence interval for instructions %-change: -16.01% -14.79%
Instructions are helped.
total cycles in shared programs: 6483227 -> 5458858 (-15.80%)
cycles in affected programs: 6051245 -> 5026876 (-16.93%)
helped: 74
HURT: 0
helped stats (abs) min: 1566 max: 95474 x̄: 13842.82 x̃: 9757
helped stats (rel) min: 13.94% max: 23.26% x̄: 17.98% x̃: 17.57%
95% mean confidence interval for cycles value: -17104.25 -10581.40
95% mean confidence interval for cycles %-change: -18.61% -17.35%
Cycles are helped.
total spills in shared programs: 553 -> 445 (-19.53%)
spills in affected programs: 553 -> 445 (-19.53%)
helped: 1
HURT: 0
total fills in shared programs: 1307 -> 1323 (1.22%)
fills in affected programs: 1307 -> 1323 (1.22%)
helped: 0
HURT: 1
Ice Lake
total instructions in shared programs: 781216 -> 678470 (-13.15%)
instructions in affected programs: 720088 -> 617342 (-14.27%)
helped: 74
HURT: 0
helped stats (abs) min: 153 max: 8863 x̄: 1388.46 x̃: 975
helped stats (rel) min: 11.24% max: 21.03% x̄: 15.47% x̃: 15.01%
95% mean confidence interval for instructions value: -1703.57 -1073.35
95% mean confidence interval for instructions %-change: -16.09% -14.85%
Instructions are helped.
total cycles in shared programs: 6464085 -> 5453997 (-15.63%)
cycles in affected programs: 6031771 -> 5021683 (-16.75%)
helped: 74
HURT: 0
helped stats (abs) min: 1552 max: 90317 x̄: 13649.84 x̃: 9650
helped stats (rel) min: 13.84% max: 23.11% x̄: 17.83% x̃: 17.41%
95% mean confidence interval for cycles value: -16802.89 -10496.79
95% mean confidence interval for cycles %-change: -18.46% -17.21%
Cycles are helped.
total spills in shared programs: 279 -> 368 (31.90%)
spills in affected programs: 279 -> 368 (31.90%)
helped: 0
HURT: 1
total fills in shared programs: 973 -> 1155 (18.71%)
fills in affected programs: 973 -> 1155 (18.71%)
helped: 0
HURT: 1
Reviewed-by: Matt Turner <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4142>
Diffstat (limited to 'src/compiler')
-rw-r--r-- | src/compiler/glsl/float64.glsl | 32 |
1 files changed, 11 insertions, 21 deletions
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl index d079207a774..d41f0740bed 100644 --- a/src/compiler/glsl/float64.glsl +++ b/src/compiler/glsl/float64.glsl @@ -740,15 +740,23 @@ __fadd64(uint64_t a, uint64_t b) __shortShift64Left(aFracHi, aFracLo, 10, aFracHi, aFracLo); __shortShift64Left(bFracHi, bFracLo, 10, bFracHi, bFracLo); - if (0 < expDiff) { + if (expDiff != 0) { uint zFrac0; uint zFrac1; + if (expDiff < 0) { + EXCHANGE(aFracHi, bFracHi); + EXCHANGE(aFracLo, bFracLo); + EXCHANGE(aExp, bExp); + aSign ^= 0x80000000u; + } + if (aExp == 0x7FF) { bool propagate = (aFracHi | aFracLo) != 0u; - return mix(a, __propagateFloat64NaN(a, b), propagate); + return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate); } - expDiff = mix(expDiff, expDiff - 1, bExp == 0); + + expDiff = mix(abs(expDiff), abs(expDiff) - 1, bExp == 0); bFracHi = mix(bFracHi | 0x40000000u, bFracHi, bExp == 0); __shift64RightJamming(bFracHi, bFracLo, expDiff, bFracHi, bFracLo); aFracHi |= 0x40000000u; @@ -757,24 +765,6 @@ __fadd64(uint64_t a, uint64_t b) --zExp; return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1); } - if (expDiff < 0) { - uint zFrac0; - uint zFrac1; - - if (bExp == 0x7FF) { - bool propagate = (bFracHi | bFracLo) != 0u; - return mix(__packFloat64(aSign ^ 0x80000000u, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate); - } - expDiff = mix(expDiff, expDiff + 1, aExp == 0); - aFracHi = mix(aFracHi | 0x40000000u, aFracHi, aExp == 0); - __shift64RightJamming(aFracHi, aFracLo, - expDiff, aFracHi, aFracLo); - bFracHi |= 0x40000000u; - __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1); - zExp = bExp; - aSign ^= 0x80000000u; - --zExp; - return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1); - } if (aExp == 0x7FF) { bool propagate = ((aFracHi | bFracHi) | (aFracLo | bFracLo)) != 0u; return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate); |