From e4f01da15d8c6ce3e8c77ff3ff3d2ce2574a3f7b Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Fri, 1 Apr 2016 11:50:28 +0100 Subject: gallivm: Prefer backend agnostic intrinsic for rounding. We could unconditionally use these instrinsics, but performance with SSE2 would suck, as LLVM falls back to calling libm. lp_test_arit. Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 46 ++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 01c6ba96e3f..12777436d7c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1675,13 +1675,13 @@ enum lp_build_round_mode * result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0. */ static inline LLVMValueRef -lp_build_round_sse41(struct lp_build_context *bld, - LLVMValueRef a, - enum lp_build_round_mode mode) +lp_build_nearest_sse41(struct lp_build_context *bld, + LLVMValueRef a) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); + LLVMValueRef mode = LLVMConstNull(i32t); const char *intrinsic; LLVMValueRef res; @@ -1714,7 +1714,7 @@ lp_build_round_sse41(struct lp_build_context *bld, args[0] = undef; args[1] = LLVMBuildInsertElement(builder, undef, a, index0, ""); - args[2] = LLVMConstInt(i32t, mode, 0); + args[2] = mode; res = lp_build_intrinsic(builder, intrinsic, vec_type, args, Elements(args), 0); @@ -1754,7 +1754,7 @@ lp_build_round_sse41(struct lp_build_context *bld, res = lp_build_intrinsic_binary(builder, intrinsic, bld->vec_type, a, - LLVMConstInt(i32t, mode, 0)); + mode); } return res; @@ -1856,8 +1856,40 @@ lp_build_round_arch(struct lp_build_context *bld, LLVMValueRef a, enum lp_build_round_mode mode) { - if (util_cpu_caps.has_sse4_1) - return lp_build_round_sse41(bld, a, mode); + if (util_cpu_caps.has_sse4_1) { + LLVMBuilderRef builder = bld->gallivm->builder; + const struct lp_type type = bld->type; + const char *intrinsic_root; + char intrinsic[32]; + + assert(type.floating); + assert(lp_check_value(type, a)); + (void)type; + + switch (mode) { + case LP_BUILD_ROUND_NEAREST: + if (HAVE_LLVM >= 0x0304) { + intrinsic_root = "llvm.round"; + } else { + return lp_build_nearest_sse41(bld, a); + } + break; + case LP_BUILD_ROUND_FLOOR: + intrinsic_root = "llvm.floor"; + break; + case LP_BUILD_ROUND_CEIL: + intrinsic_root = "llvm.ceil"; + break; + case LP_BUILD_ROUND_TRUNCATE: + intrinsic_root = "llvm.trunc"; + break; + } + + util_snprintf(intrinsic, sizeof intrinsic, "%s.v%uf%u", + intrinsic_root, type.length, type.width); + + return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a); + } else /* (util_cpu_caps.has_altivec) */ return lp_build_round_altivec(bld, a, mode); } -- cgit v1.2.3