diff options
author | Jose Fonseca <[email protected]> | 2016-04-01 11:50:28 +0100 |
---|---|---|
committer | Jose Fonseca <[email protected]> | 2016-04-03 22:09:07 +0100 |
commit | e4f01da15d8c6ce3e8c77ff3ff3d2ce2574a3f7b (patch) | |
tree | 2decca255387a937c279f7586aec9961ea9d25c1 /src | |
parent | 324451e73fae17c2844f24c7e02000bddc260e78 (diff) |
gallivm: Prefer backend agnostic intrinsic for rounding.
We could unconditionally use these instrinsics, but performance with SSE2
would suck, as LLVM falls back to calling libm.
lp_test_arit.
Reviewed-by: Roland Scheidegger <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 46 |
1 files changed, 39 insertions, 7 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 01c6ba96e3f..12777436d7c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1675,13 +1675,13 @@ enum lp_build_round_mode * result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0. */ static inline LLVMValueRef -lp_build_round_sse41(struct lp_build_context *bld, - LLVMValueRef a, - enum lp_build_round_mode mode) +lp_build_nearest_sse41(struct lp_build_context *bld, + LLVMValueRef a) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); + LLVMValueRef mode = LLVMConstNull(i32t); const char *intrinsic; LLVMValueRef res; @@ -1714,7 +1714,7 @@ lp_build_round_sse41(struct lp_build_context *bld, args[0] = undef; args[1] = LLVMBuildInsertElement(builder, undef, a, index0, ""); - args[2] = LLVMConstInt(i32t, mode, 0); + args[2] = mode; res = lp_build_intrinsic(builder, intrinsic, vec_type, args, Elements(args), 0); @@ -1754,7 +1754,7 @@ lp_build_round_sse41(struct lp_build_context *bld, res = lp_build_intrinsic_binary(builder, intrinsic, bld->vec_type, a, - LLVMConstInt(i32t, mode, 0)); + mode); } return res; @@ -1856,8 +1856,40 @@ lp_build_round_arch(struct lp_build_context *bld, LLVMValueRef a, enum lp_build_round_mode mode) { - if (util_cpu_caps.has_sse4_1) - return lp_build_round_sse41(bld, a, mode); + if (util_cpu_caps.has_sse4_1) { + LLVMBuilderRef builder = bld->gallivm->builder; + const struct lp_type type = bld->type; + const char *intrinsic_root; + char intrinsic[32]; + + assert(type.floating); + assert(lp_check_value(type, a)); + (void)type; + + switch (mode) { + case LP_BUILD_ROUND_NEAREST: + if (HAVE_LLVM >= 0x0304) { + intrinsic_root = "llvm.round"; + } else { + return lp_build_nearest_sse41(bld, a); + } + break; + case LP_BUILD_ROUND_FLOOR: + intrinsic_root = "llvm.floor"; + break; + case LP_BUILD_ROUND_CEIL: + intrinsic_root = "llvm.ceil"; + break; + case LP_BUILD_ROUND_TRUNCATE: + intrinsic_root = "llvm.trunc"; + break; + } + + util_snprintf(intrinsic, sizeof intrinsic, "%s.v%uf%u", + intrinsic_root, type.length, type.width); + + return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a); + } else /* (util_cpu_caps.has_altivec) */ return lp_build_round_altivec(bld, a, mode); } |