diff options
author | Ian Romanick <[email protected]> | 2020-03-03 12:26:37 -0800 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-03-18 20:36:29 +0000 |
commit | 812230fd94e2661b1e69234f35f3ec0e3bcc9571 (patch) | |
tree | bd5e60573b099952a737e0081e0ab47a713a083f /src | |
parent | d1e0227ef14291242886be48424f723bf60bc439 (diff) |
soft-fp64: Don't open-code umulExtended
Results on the 308 shaders extracted from the fp64 portion of the OpenGL
CTS:
Tiger Lake and Ice Lake had similar results. (Tiger Lake shown)
total instructions in shared programs: 928859 -> 859509 (-7.47%)
instructions in affected programs: 866293 -> 796943 (-8.01%)
helped: 76
HURT: 0
helped stats (abs) min: 75 max: 8042 x̄: 912.50 x̃: 688
helped stats (rel) min: 5.35% max: 21.02% x̄: 10.35% x̃: 7.58%
95% mean confidence interval for instructions value: -1138.37 -686.63
95% mean confidence interval for instructions %-change: -11.69% -9.00%
Instructions are helped.
total cycles in shared programs: 7272912 -> 7072275 (-2.76%)
cycles in affected programs: 6763486 -> 6562849 (-2.97%)
helped: 76
HURT: 0
helped stats (abs) min: 214 max: 30136 x̄: 2639.96 x̃: 1923
helped stats (rel) min: 1.75% max: 9.20% x̄: 4.04% x̃: 2.41%
95% mean confidence interval for cycles value: -3455.29 -1824.63
95% mean confidence interval for cycles %-change: -4.69% -3.39%
Cycles are helped.
total spills in shared programs: 817 -> 814 (-0.37%)
spills in affected programs: 791 -> 788 (-0.38%)
helped: 2
HURT: 0
total fills in shared programs: 2438 -> 2488 (2.05%)
fills in affected programs: 2392 -> 2442 (2.09%)
helped: 0
HURT: 2
Reviewed-by: Jason Ekstrand <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4142>
Diffstat (limited to 'src')
-rw-r--r-- | src/compiler/glsl/float64.glsl | 40 |
1 files changed, 8 insertions, 32 deletions
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl index 7d58da3d18c..9834241872c 100644 --- a/src/compiler/glsl/float64.glsl +++ b/src/compiler/glsl/float64.glsl @@ -730,30 +730,6 @@ __fadd64(uint64_t a, uint64_t b) } } -/* Multiplies `a' by `b' to obtain a 64-bit product. The product is broken - * into two 32-bit pieces which are stored at the locations pointed to by - * `z0Ptr' and `z1Ptr'. - */ -void -__mul32To64(uint a, uint b, out uint z0Ptr, out uint z1Ptr) -{ - uint aLow = a & 0x0000FFFFu; - uint aHigh = a>>16; - uint bLow = b & 0x0000FFFFu; - uint bHigh = b>>16; - uint z1 = aLow * bLow; - uint zMiddleA = aLow * bHigh; - uint zMiddleB = aHigh * bLow; - uint z0 = aHigh * bHigh; - zMiddleA += zMiddleB; - z0 += ((uint(zMiddleA < zMiddleB)) << 16) + (zMiddleA >> 16); - zMiddleA <<= 16; - z1 += zMiddleA; - z0 += uint(z1 < zMiddleA); - z1Ptr = z1; - z0Ptr = z0; -} - /* Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the * 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit * product. The product is broken into four 32-bit pieces which are stored at @@ -773,12 +749,12 @@ __mul64To128(uint a0, uint a1, uint b0, uint b1, uint more1 = 0u; uint more2 = 0u; - __mul32To64(a1, b1, z2, z3); - __mul32To64(a1, b0, z1, more2); + umulExtended(a1, b1, z2, z3); + umulExtended(a1, b0, z1, more2); __add64(z1, more2, 0u, z2, z1, z2); - __mul32To64(a0, b0, z0, more1); + umulExtended(a0, b0, z0, more1); __add64(z0, more1, 0u, z1, z0, z1); - __mul32To64(a0, b1, more1, more2); + umulExtended(a0, b1, more1, more2); __add64(more1, more2, 0u, z2, more1, z2); __add64(z0, z1, 0u, more1, z0, z1); z3Ptr = z3; @@ -1442,7 +1418,7 @@ __estimateDiv64To32(uint a0, uint a1, uint b) return 0xFFFFFFFFu; b0 = b>>16; z = (b0<<16 <= a0) ? 0xFFFF0000u : (a0 / b0)<<16; - __mul32To64(b, z, term0, term1); + umulExtended(b, z, term0, term1); __sub64(a0, a1, term0, term1, rem0, rem1); while (int(rem0) < 0) { z -= 0x10000u; @@ -1612,7 +1588,7 @@ __fsqrt64(uint64_t a) zFrac0 = 0x7FFFFFFFu; doubleZFrac0 = zFrac0 + zFrac0; __shortShift64Left(aFracHi, aFracLo, 9 - (aExp & 1), aFracHi, aFracLo); - __mul32To64(zFrac0, zFrac0, term0, term1); + umulExtended(zFrac0, zFrac0, term0, term1); __sub64(aFracHi, aFracLo, term0, term1, rem0, rem1); while (int(rem0) < 0) { --zFrac0; @@ -1623,9 +1599,9 @@ __fsqrt64(uint64_t a) if ((zFrac1 & 0x1FFu) <= 5u) { if (zFrac1 == 0u) zFrac1 = 1u; - __mul32To64(doubleZFrac0, zFrac1, term1, term2); + umulExtended(doubleZFrac0, zFrac1, term1, term2); __sub64(rem1, 0u, term1, term2, rem1, rem2); - __mul32To64(zFrac1, zFrac1, term2, term3); + umulExtended(zFrac1, zFrac1, term2, term3); __sub96(rem1, rem2, 0u, 0u, term2, term3, rem1, rem2, rem3); while (int(rem1) < 0) { --zFrac1; |