diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 057c50ed278..02fb81afe51 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -555,6 +555,12 @@ lp_build_add(struct lp_build_context *bld, return bld->one; if (!type.floating && !type.fixed) { + if (HAVE_LLVM >= 0x0900) { + char intrin[32]; + intrinsic = type.sign ? "llvm.sadd.sat" : "llvm.uadd.sat"; + lp_format_intrinsic(intrin, sizeof intrin, intrinsic, bld->vec_type); + return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b); + } if (type.width * type.length == 128) { if (util_cpu_caps.has_sse2) { if (type.width == 8) @@ -625,6 +631,7 @@ lp_build_add(struct lp_build_context *bld, * NOTE: cmp/select does sext/trunc of the mask. Does not seem to * interfere with llvm's ability to recognize the pattern but seems * a bit brittle. + * NOTE: llvm 9+ always uses (non arch specific) intrinsic. */ LLVMValueRef overflowed = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, res); res = lp_build_select(bld, overflowed, @@ -876,6 +883,12 @@ lp_build_sub(struct lp_build_context *bld, return bld->zero; if (!type.floating && !type.fixed) { + if (HAVE_LLVM >= 0x0900) { + char intrin[32]; + intrinsic = type.sign ? "llvm.ssub.sat" : "llvm.usub.sat"; + lp_format_intrinsic(intrin, sizeof intrin, intrinsic, bld->vec_type); + return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b); + } if (type.width * type.length == 128) { if (util_cpu_caps.has_sse2) { if (type.width == 8) @@ -925,6 +938,7 @@ lp_build_sub(struct lp_build_context *bld, * NOTE: cmp/select does sext/trunc of the mask. Does not seem to * interfere with llvm's ability to recognize the pattern but seems * a bit brittle. + * NOTE: llvm 9+ always uses (non arch specific) intrinsic. */ LLVMValueRef no_ov = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b); a = lp_build_select(bld, no_ov, a, b); |