aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJose Fonseca <[email protected]>2016-04-01 11:50:28 +0100
committerJose Fonseca <[email protected]>2016-04-03 22:09:07 +0100
commite4f01da15d8c6ce3e8c77ff3ff3d2ce2574a3f7b (patch)
tree2decca255387a937c279f7586aec9961ea9d25c1
parent324451e73fae17c2844f24c7e02000bddc260e78 (diff)
gallivm: Prefer backend agnostic intrinsic for rounding.
We could unconditionally use these instrinsics, but performance with SSE2 would suck, as LLVM falls back to calling libm. lp_test_arit. Reviewed-by: Roland Scheidegger <[email protected]>
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c46
1 files changed, 39 insertions, 7 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 01c6ba96e3f..12777436d7c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1675,13 +1675,13 @@ enum lp_build_round_mode
* result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0.
*/
static inline LLVMValueRef
-lp_build_round_sse41(struct lp_build_context *bld,
- LLVMValueRef a,
- enum lp_build_round_mode mode)
+lp_build_nearest_sse41(struct lp_build_context *bld,
+ LLVMValueRef a)
{
LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type;
LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
+ LLVMValueRef mode = LLVMConstNull(i32t);
const char *intrinsic;
LLVMValueRef res;
@@ -1714,7 +1714,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
args[0] = undef;
args[1] = LLVMBuildInsertElement(builder, undef, a, index0, "");
- args[2] = LLVMConstInt(i32t, mode, 0);
+ args[2] = mode;
res = lp_build_intrinsic(builder, intrinsic,
vec_type, args, Elements(args), 0);
@@ -1754,7 +1754,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
res = lp_build_intrinsic_binary(builder, intrinsic,
bld->vec_type, a,
- LLVMConstInt(i32t, mode, 0));
+ mode);
}
return res;
@@ -1856,8 +1856,40 @@ lp_build_round_arch(struct lp_build_context *bld,
LLVMValueRef a,
enum lp_build_round_mode mode)
{
- if (util_cpu_caps.has_sse4_1)
- return lp_build_round_sse41(bld, a, mode);
+ if (util_cpu_caps.has_sse4_1) {
+ LLVMBuilderRef builder = bld->gallivm->builder;
+ const struct lp_type type = bld->type;
+ const char *intrinsic_root;
+ char intrinsic[32];
+
+ assert(type.floating);
+ assert(lp_check_value(type, a));
+ (void)type;
+
+ switch (mode) {
+ case LP_BUILD_ROUND_NEAREST:
+ if (HAVE_LLVM >= 0x0304) {
+ intrinsic_root = "llvm.round";
+ } else {
+ return lp_build_nearest_sse41(bld, a);
+ }
+ break;
+ case LP_BUILD_ROUND_FLOOR:
+ intrinsic_root = "llvm.floor";
+ break;
+ case LP_BUILD_ROUND_CEIL:
+ intrinsic_root = "llvm.ceil";
+ break;
+ case LP_BUILD_ROUND_TRUNCATE:
+ intrinsic_root = "llvm.trunc";
+ break;
+ }
+
+ util_snprintf(intrinsic, sizeof intrinsic, "%s.v%uf%u",
+ intrinsic_root, type.length, type.width);
+
+ return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
+ }
else /* (util_cpu_caps.has_altivec) */
return lp_build_round_altivec(bld, a, mode);
}