glsl: Add "built-in" functions to do 64/64 => 64 division

These functions are directly available in shaders. A #define is added to detect the presence. This allows these functions to be tested using piglit regardless of whether the driver uses them for lowering. The GLSL spec says that functions and macros beginning with __ are reserved for use by the implementation... hey, that's us! v2: Use function inlining. Signed-off-by: Ian Romanick <[email protected]> Reviewed-by: Matt Turner <[email protected]>
author: Ian Romanick <[email protected]> 2016-10-17 17:54:40 -0700
committer: Ian Romanick <[email protected]> 2017-01-20 15:41:23 -0800
commit: 012f2995c307a83212a88ff18e13bf7be006cd77 (patch)
tree: c593bbbb7d840cf6c87d410ec9886c54902f6ecf /src/compiler/glsl/int64.glsl
parent: 50d52df278c547ffd53505a3ebf98f13cba5bd56 (diff)
1 files changed, 75 insertions, 2 deletions
diff --git a/src/compiler/glsl/int64.glsl b/src/compiler/glsl/int64.glsl
index a2bec011e3e..84e80ee349b 100644
--- a/src/compiler/glsl/int64.glsl
+++ b/src/compiler/glsl/int64.glsl
@@ -4,8 +4,9 @@
  *
  * Using version 1.40+ prevents built-in variables from being included.
  */
-#version 140
-#extension GL_MESA_shader_integer_functions: require
+#version 400
+#extension GL_ARB_gpu_shader_int64: require
+#extension GL_ARB_shading_language_420pack: require
 
 uvec2
 umul64(uvec2 a, uvec2 b)
@@ -28,3 +29,75 @@ sign64(ivec2 a)
 
    return result;
 }
+
+uvec4
+udivmod64(uvec2 n, uvec2 d)
+{
+   uvec2 quot = uvec2(0U, 0U);
+   int log2_denom = findMSB(d.y) + 32;
+
+   /* If the upper 32 bits of denom are non-zero, it is impossible for shifts
+    * greater than 32 bits to occur.  If the upper 32 bits of the numerator
+    * are zero, it is impossible for (denom << [63, 32]) <= numer unless
+    * denom == 0.
+    */
+   if (d.y == 0 && n.y >= d.x) {
+      log2_denom = findMSB(d.x);
+
+      /* Since the upper 32 bits of denom are zero, log2_denom <= 31 and we
+       * don't have to compare log2_denom inside the loop as is done in the
+       * general case (below).
+       */
+      for (int i = 31; i >= 1; i--) {
+	 if (log2_denom <= 31 - i && (d.x << i) <= n.y) {
+	    n.y -= d.x << i;
+	    quot.y |= 1U << i;
+	 }
+      }
+
+      /* log2_denom is always <= 31, so manually peel the last loop
+       * iteration.
+       */
+      if (d.x <= n.y) {
+	 n.y -= d.x;
+	 quot.y |= 1U;
+      }
+   }
+
+   uint64_t d64 = packUint2x32(d);
+   uint64_t n64 = packUint2x32(n);
+   for (int i = 31; i >= 1; i--) {
+      if (log2_denom <= 63 - i && (d64 << i) <= n64) {
+	 n64 -= d64 << i;
+	 quot.x |= 1U << i;
+      }
+   }
+
+   /* log2_denom is always <= 63, so manually peel the last loop
+    * iteration.
+    */
+   if (d64 <= n64) {
+      n64 -= d64;
+      quot.x |= 1U;
+   }
+
+   return uvec4(quot, unpackUint2x32(n64));
+}
+
+uvec2
+udiv64(uvec2 n, uvec2 d)
+{
+   return udivmod64(n, d).xy;
+}
+
+ivec2
+idiv64(ivec2 _n, ivec2 _d)
+{
+   const bool negate = (_n.y < 0) != (_d.y < 0);
+   uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n))));
+   uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d))));
+
+   uvec2 quot = udivmod64(n, d).xy;
+
+   return negate ? unpackInt2x32(-int64_t(packUint2x32(quot))) : ivec2(quot);
+}
author	Ian Romanick <[email protected]>	2016-10-17 17:54:40 -0700
committer	Ian Romanick <[email protected]>	2017-01-20 15:41:23 -0800
commit	012f2995c307a83212a88ff18e13bf7be006cd77 (patch)
tree	c593bbbb7d840cf6c87d410ec9886c54902f6ecf /src/compiler/glsl/int64.glsl
parent	50d52df278c547ffd53505a3ebf98f13cba5bd56 (diff)