summaryrefslogtreecommitdiffstats
path: root/src/compiler/glsl/int64.glsl
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2016-10-17 17:54:40 -0700
committerIan Romanick <[email protected]>2017-01-20 15:41:23 -0800
commit012f2995c307a83212a88ff18e13bf7be006cd77 (patch)
treec593bbbb7d840cf6c87d410ec9886c54902f6ecf /src/compiler/glsl/int64.glsl
parent50d52df278c547ffd53505a3ebf98f13cba5bd56 (diff)
glsl: Add "built-in" functions to do 64/64 => 64 division
These functions are directly available in shaders. A #define is added to detect the presence. This allows these functions to be tested using piglit regardless of whether the driver uses them for lowering. The GLSL spec says that functions and macros beginning with __ are reserved for use by the implementation... hey, that's us! v2: Use function inlining. Signed-off-by: Ian Romanick <[email protected]> Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/compiler/glsl/int64.glsl')
-rw-r--r--src/compiler/glsl/int64.glsl77
1 files changed, 75 insertions, 2 deletions
diff --git a/src/compiler/glsl/int64.glsl b/src/compiler/glsl/int64.glsl
index a2bec011e3e..84e80ee349b 100644
--- a/src/compiler/glsl/int64.glsl
+++ b/src/compiler/glsl/int64.glsl
@@ -4,8 +4,9 @@
*
* Using version 1.40+ prevents built-in variables from being included.
*/
-#version 140
-#extension GL_MESA_shader_integer_functions: require
+#version 400
+#extension GL_ARB_gpu_shader_int64: require
+#extension GL_ARB_shading_language_420pack: require
uvec2
umul64(uvec2 a, uvec2 b)
@@ -28,3 +29,75 @@ sign64(ivec2 a)
return result;
}
+
+uvec4
+udivmod64(uvec2 n, uvec2 d)
+{
+ uvec2 quot = uvec2(0U, 0U);
+ int log2_denom = findMSB(d.y) + 32;
+
+ /* If the upper 32 bits of denom are non-zero, it is impossible for shifts
+ * greater than 32 bits to occur. If the upper 32 bits of the numerator
+ * are zero, it is impossible for (denom << [63, 32]) <= numer unless
+ * denom == 0.
+ */
+ if (d.y == 0 && n.y >= d.x) {
+ log2_denom = findMSB(d.x);
+
+ /* Since the upper 32 bits of denom are zero, log2_denom <= 31 and we
+ * don't have to compare log2_denom inside the loop as is done in the
+ * general case (below).
+ */
+ for (int i = 31; i >= 1; i--) {
+ if (log2_denom <= 31 - i && (d.x << i) <= n.y) {
+ n.y -= d.x << i;
+ quot.y |= 1U << i;
+ }
+ }
+
+ /* log2_denom is always <= 31, so manually peel the last loop
+ * iteration.
+ */
+ if (d.x <= n.y) {
+ n.y -= d.x;
+ quot.y |= 1U;
+ }
+ }
+
+ uint64_t d64 = packUint2x32(d);
+ uint64_t n64 = packUint2x32(n);
+ for (int i = 31; i >= 1; i--) {
+ if (log2_denom <= 63 - i && (d64 << i) <= n64) {
+ n64 -= d64 << i;
+ quot.x |= 1U << i;
+ }
+ }
+
+ /* log2_denom is always <= 63, so manually peel the last loop
+ * iteration.
+ */
+ if (d64 <= n64) {
+ n64 -= d64;
+ quot.x |= 1U;
+ }
+
+ return uvec4(quot, unpackUint2x32(n64));
+}
+
+uvec2
+udiv64(uvec2 n, uvec2 d)
+{
+ return udivmod64(n, d).xy;
+}
+
+ivec2
+idiv64(ivec2 _n, ivec2 _d)
+{
+ const bool negate = (_n.y < 0) != (_d.y < 0);
+ uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n))));
+ uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d))));
+
+ uvec2 quot = udivmod64(n, d).xy;
+
+ return negate ? unpackInt2x32(-int64_t(packUint2x32(quot))) : ivec2(quot);
+}