diff options
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_arit.c')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 87 |
1 files changed, 60 insertions, 27 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index e922474ef61..f348833206b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -557,23 +557,27 @@ lp_build_add(struct lp_build_context *bld, if (!type.floating && !type.fixed) { if (type.width * type.length == 128) { if (util_cpu_caps.has_sse2) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; - if (type.width == 16) - intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; + if (type.width == 8) + intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : + HAVE_LLVM < 0x0800 ? "llvm.x86.sse2.paddus.b" : NULL; + if (type.width == 16) + intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : + HAVE_LLVM < 0x0800 ? "llvm.x86.sse2.paddus.w" : NULL; } else if (util_cpu_caps.has_altivec) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs"; - if (type.width == 16) - intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" : "llvm.ppc.altivec.vadduhs"; + if (type.width == 8) + intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs"; + if (type.width == 16) + intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" : "llvm.ppc.altivec.vadduhs"; } } if (type.width * type.length == 256) { if (util_cpu_caps.has_avx2) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b"; - if (type.width == 16) - intrinsic = type.sign ? "llvm.x86.avx2.padds.w" : "llvm.x86.avx2.paddus.w"; + if (type.width == 8) + intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : + HAVE_LLVM < 0x0800 ? "llvm.x86.avx2.paddus.b" : NULL; + if (type.width == 16) + intrinsic = type.sign ? "llvm.x86.avx2.padds.w" : + HAVE_LLVM < 0x0800 ? "llvm.x86.avx2.paddus.w" : NULL; } } } @@ -592,8 +596,6 @@ lp_build_add(struct lp_build_context *bld, LLVMValueRef a_clamp_max = lp_build_min_simple(bld, a, LLVMBuildSub(builder, max_val, b, ""), GALLIVM_NAN_BEHAVIOR_UNDEFINED); LLVMValueRef a_clamp_min = lp_build_max_simple(bld, a, LLVMBuildSub(builder, min_val, b, ""), GALLIVM_NAN_BEHAVIOR_UNDEFINED); a = lp_build_select(bld, lp_build_cmp(bld, PIPE_FUNC_GREATER, b, bld->zero), a_clamp_max, a_clamp_min); - } else { - a = lp_build_min_simple(bld, a, lp_build_comp(bld, b), GALLIVM_NAN_BEHAVIOR_UNDEFINED); } } @@ -612,6 +614,24 @@ lp_build_add(struct lp_build_context *bld, if(bld->type.norm && (bld->type.floating || bld->type.fixed)) res = lp_build_min_simple(bld, res, bld->one, GALLIVM_NAN_BEHAVIOR_UNDEFINED); + if (type.norm && !type.floating && !type.fixed) { + if (!type.sign) { + /* + * newer llvm versions no longer support the intrinsics, but recognize + * the pattern. Since auto-upgrade of intrinsics doesn't work for jit + * code, it is important we match the pattern llvm uses (and pray llvm + * doesn't change it - and hope they decide on the same pattern for + * all backends supporting it...). + * NOTE: cmp/select does sext/trunc of the mask. Does not seem to + * interfere with llvm's ability to recognize the pattern but seems + * a bit brittle. + */ + LLVMValueRef overflowed = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, res); + res = lp_build_select(bld, overflowed, + LLVMConstAllOnes(bld->int_vec_type), res); + } + } + /* XXX clamp to floor of -1 or 0??? */ return res; @@ -858,23 +878,27 @@ lp_build_sub(struct lp_build_context *bld, if (!type.floating && !type.fixed) { if (type.width * type.length == 128) { if (util_cpu_caps.has_sse2) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; - if (type.width == 16) - intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; + if (type.width == 8) + intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : + HAVE_LLVM < 0x0800 ? "llvm.x86.sse2.psubus.b" : NULL; + if (type.width == 16) + intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : + HAVE_LLVM < 0x0800 ? "llvm.x86.sse2.psubus.w" : NULL; } else if (util_cpu_caps.has_altivec) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs"; - if (type.width == 16) - intrinsic = type.sign ? "llvm.ppc.altivec.vsubshs" : "llvm.ppc.altivec.vsubuhs"; + if (type.width == 8) + intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs"; + if (type.width == 16) + intrinsic = type.sign ? "llvm.ppc.altivec.vsubshs" : "llvm.ppc.altivec.vsubuhs"; } } if (type.width * type.length == 256) { if (util_cpu_caps.has_avx2) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.x86.avx2.psubs.b" : "llvm.x86.avx2.psubus.b"; - if (type.width == 16) - intrinsic = type.sign ? "llvm.x86.avx2.psubs.w" : "llvm.x86.avx2.psubus.w"; + if (type.width == 8) + intrinsic = type.sign ? "llvm.x86.avx2.psubs.b" : + HAVE_LLVM < 0x0800 ? "llvm.x86.avx2.psubus.b" : NULL; + if (type.width == 16) + intrinsic = type.sign ? "llvm.x86.avx2.psubs.w" : + HAVE_LLVM < 0x0800 ? "llvm.x86.avx2.psubus.w" : NULL; } } } @@ -894,7 +918,16 @@ lp_build_sub(struct lp_build_context *bld, LLVMValueRef a_clamp_min = lp_build_max_simple(bld, a, LLVMBuildAdd(builder, min_val, b, ""), GALLIVM_NAN_BEHAVIOR_UNDEFINED); a = lp_build_select(bld, lp_build_cmp(bld, PIPE_FUNC_GREATER, b, bld->zero), a_clamp_min, a_clamp_max); } else { - a = lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED); + /* + * This must match llvm pattern for saturated unsigned sub. + * (lp_build_max_simple actually does the job with its current + * definition but do it explicitly here.) + * NOTE: cmp/select does sext/trunc of the mask. Does not seem to + * interfere with llvm's ability to recognize the pattern but seems + * a bit brittle. + */ + LLVMValueRef no_ov = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b); + a = lp_build_select(bld, no_ov, a, b); } } |