diff options
Diffstat (limited to 'src/compiler/glsl/int64.glsl')
-rw-r--r-- | src/compiler/glsl/int64.glsl | 77 |
1 files changed, 75 insertions, 2 deletions
diff --git a/src/compiler/glsl/int64.glsl b/src/compiler/glsl/int64.glsl index a2bec011e3e..84e80ee349b 100644 --- a/src/compiler/glsl/int64.glsl +++ b/src/compiler/glsl/int64.glsl @@ -4,8 +4,9 @@ * * Using version 1.40+ prevents built-in variables from being included. */ -#version 140 -#extension GL_MESA_shader_integer_functions: require +#version 400 +#extension GL_ARB_gpu_shader_int64: require +#extension GL_ARB_shading_language_420pack: require uvec2 umul64(uvec2 a, uvec2 b) @@ -28,3 +29,75 @@ sign64(ivec2 a) return result; } + +uvec4 +udivmod64(uvec2 n, uvec2 d) +{ + uvec2 quot = uvec2(0U, 0U); + int log2_denom = findMSB(d.y) + 32; + + /* If the upper 32 bits of denom are non-zero, it is impossible for shifts + * greater than 32 bits to occur. If the upper 32 bits of the numerator + * are zero, it is impossible for (denom << [63, 32]) <= numer unless + * denom == 0. + */ + if (d.y == 0 && n.y >= d.x) { + log2_denom = findMSB(d.x); + + /* Since the upper 32 bits of denom are zero, log2_denom <= 31 and we + * don't have to compare log2_denom inside the loop as is done in the + * general case (below). + */ + for (int i = 31; i >= 1; i--) { + if (log2_denom <= 31 - i && (d.x << i) <= n.y) { + n.y -= d.x << i; + quot.y |= 1U << i; + } + } + + /* log2_denom is always <= 31, so manually peel the last loop + * iteration. + */ + if (d.x <= n.y) { + n.y -= d.x; + quot.y |= 1U; + } + } + + uint64_t d64 = packUint2x32(d); + uint64_t n64 = packUint2x32(n); + for (int i = 31; i >= 1; i--) { + if (log2_denom <= 63 - i && (d64 << i) <= n64) { + n64 -= d64 << i; + quot.x |= 1U << i; + } + } + + /* log2_denom is always <= 63, so manually peel the last loop + * iteration. + */ + if (d64 <= n64) { + n64 -= d64; + quot.x |= 1U; + } + + return uvec4(quot, unpackUint2x32(n64)); +} + +uvec2 +udiv64(uvec2 n, uvec2 d) +{ + return udivmod64(n, d).xy; +} + +ivec2 +idiv64(ivec2 _n, ivec2 _d) +{ + const bool negate = (_n.y < 0) != (_d.y < 0); + uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n)))); + uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d)))); + + uvec2 quot = udivmod64(n, d).xy; + + return negate ? unpackInt2x32(-int64_t(packUint2x32(quot))) : ivec2(quot); +} |