summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorRoland Scheidegger <[email protected]>2019-04-17 02:34:01 +0200
committerRoland Scheidegger <[email protected]>2019-04-17 17:42:13 +0200
commitdded2edf8bedb9a5bafd7788e52dc8e714527186 (patch)
treee736c1212250abcc87ea663f60a4c68426865a7a /src/gallium/auxiliary
parentb74e605cf47fe54c0d2a6df072c1874160cfd9eb (diff)
gallivm: fix saturated signed add / sub with llvm 9
llvm 8 removed saturated unsigned add / sub x86 sse2 intrinsics, and now llvm 9 removed the signed versions as well - they were proposed for removal earlier, but the pattern to recognize those was very complex, so it wasn't done then. However, instead of these arch-specific intrinsics, there's now arch-independent intrinsics for saturated add / sub, both for signed and unsigned, so use these. They should have only advantages (work with arbitrary vector sizes, optimal code for all archs), although I don't know how well they work in practice for other archs (at least for x86 they do the right thing). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110454 Reviewed-by: Brian Paul <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c14
1 files changed, 14 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 057c50ed278..02fb81afe51 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -555,6 +555,12 @@ lp_build_add(struct lp_build_context *bld,
return bld->one;
if (!type.floating && !type.fixed) {
+ if (HAVE_LLVM >= 0x0900) {
+ char intrin[32];
+ intrinsic = type.sign ? "llvm.sadd.sat" : "llvm.uadd.sat";
+ lp_format_intrinsic(intrin, sizeof intrin, intrinsic, bld->vec_type);
+ return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b);
+ }
if (type.width * type.length == 128) {
if (util_cpu_caps.has_sse2) {
if (type.width == 8)
@@ -625,6 +631,7 @@ lp_build_add(struct lp_build_context *bld,
* NOTE: cmp/select does sext/trunc of the mask. Does not seem to
* interfere with llvm's ability to recognize the pattern but seems
* a bit brittle.
+ * NOTE: llvm 9+ always uses (non arch specific) intrinsic.
*/
LLVMValueRef overflowed = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, res);
res = lp_build_select(bld, overflowed,
@@ -876,6 +883,12 @@ lp_build_sub(struct lp_build_context *bld,
return bld->zero;
if (!type.floating && !type.fixed) {
+ if (HAVE_LLVM >= 0x0900) {
+ char intrin[32];
+ intrinsic = type.sign ? "llvm.ssub.sat" : "llvm.usub.sat";
+ lp_format_intrinsic(intrin, sizeof intrin, intrinsic, bld->vec_type);
+ return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b);
+ }
if (type.width * type.length == 128) {
if (util_cpu_caps.has_sse2) {
if (type.width == 8)
@@ -925,6 +938,7 @@ lp_build_sub(struct lp_build_context *bld,
* NOTE: cmp/select does sext/trunc of the mask. Does not seem to
* interfere with llvm's ability to recognize the pattern but seems
* a bit brittle.
+ * NOTE: llvm 9+ always uses (non arch specific) intrinsic.
*/
LLVMValueRef no_ov = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
a = lp_build_select(bld, no_ov, a, b);