diff options
author | Ian Romanick <[email protected]> | 2016-10-17 17:54:40 -0700 |
---|---|---|
committer | Ian Romanick <[email protected]> | 2017-01-20 15:41:23 -0800 |
commit | 012f2995c307a83212a88ff18e13bf7be006cd77 (patch) | |
tree | c593bbbb7d840cf6c87d410ec9886c54902f6ecf /src/compiler/glsl/int64.glsl | |
parent | 50d52df278c547ffd53505a3ebf98f13cba5bd56 (diff) |
glsl: Add "built-in" functions to do 64/64 => 64 division
These functions are directly available in shaders. A #define is added
to detect the presence. This allows these functions to be tested using
piglit regardless of whether the driver uses them for lowering. The
GLSL spec says that functions and macros beginning with __ are reserved
for use by the implementation... hey, that's us!
v2: Use function inlining.
Signed-off-by: Ian Romanick <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/compiler/glsl/int64.glsl')
-rw-r--r-- | src/compiler/glsl/int64.glsl | 77 |
1 files changed, 75 insertions, 2 deletions
diff --git a/src/compiler/glsl/int64.glsl b/src/compiler/glsl/int64.glsl index a2bec011e3e..84e80ee349b 100644 --- a/src/compiler/glsl/int64.glsl +++ b/src/compiler/glsl/int64.glsl @@ -4,8 +4,9 @@ * * Using version 1.40+ prevents built-in variables from being included. */ -#version 140 -#extension GL_MESA_shader_integer_functions: require +#version 400 +#extension GL_ARB_gpu_shader_int64: require +#extension GL_ARB_shading_language_420pack: require uvec2 umul64(uvec2 a, uvec2 b) @@ -28,3 +29,75 @@ sign64(ivec2 a) return result; } + +uvec4 +udivmod64(uvec2 n, uvec2 d) +{ + uvec2 quot = uvec2(0U, 0U); + int log2_denom = findMSB(d.y) + 32; + + /* If the upper 32 bits of denom are non-zero, it is impossible for shifts + * greater than 32 bits to occur. If the upper 32 bits of the numerator + * are zero, it is impossible for (denom << [63, 32]) <= numer unless + * denom == 0. + */ + if (d.y == 0 && n.y >= d.x) { + log2_denom = findMSB(d.x); + + /* Since the upper 32 bits of denom are zero, log2_denom <= 31 and we + * don't have to compare log2_denom inside the loop as is done in the + * general case (below). + */ + for (int i = 31; i >= 1; i--) { + if (log2_denom <= 31 - i && (d.x << i) <= n.y) { + n.y -= d.x << i; + quot.y |= 1U << i; + } + } + + /* log2_denom is always <= 31, so manually peel the last loop + * iteration. + */ + if (d.x <= n.y) { + n.y -= d.x; + quot.y |= 1U; + } + } + + uint64_t d64 = packUint2x32(d); + uint64_t n64 = packUint2x32(n); + for (int i = 31; i >= 1; i--) { + if (log2_denom <= 63 - i && (d64 << i) <= n64) { + n64 -= d64 << i; + quot.x |= 1U << i; + } + } + + /* log2_denom is always <= 63, so manually peel the last loop + * iteration. + */ + if (d64 <= n64) { + n64 -= d64; + quot.x |= 1U; + } + + return uvec4(quot, unpackUint2x32(n64)); +} + +uvec2 +udiv64(uvec2 n, uvec2 d) +{ + return udivmod64(n, d).xy; +} + +ivec2 +idiv64(ivec2 _n, ivec2 _d) +{ + const bool negate = (_n.y < 0) != (_d.y < 0); + uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n)))); + uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d)))); + + uvec2 quot = udivmod64(n, d).xy; + + return negate ? unpackInt2x32(-int64_t(packUint2x32(quot))) : ivec2(quot); +} |