diff options
Diffstat (limited to 'src/gallium/drivers/llvmpipe/lp_bld_arit.c')
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_arit.c | 316 |
1 files changed, 258 insertions, 58 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 09a57ff33d5..e8c5fa3c2a1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -48,6 +48,7 @@ #include "util/u_memory.h" #include "util/u_debug.h" #include "util/u_string.h" +#include "util/u_cpu_detect.h" #include "lp_bld_type.h" #include "lp_bld_const.h" @@ -65,7 +66,7 @@ lp_build_min_simple(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const char *intrinsic = NULL; LLVMValueRef cond; @@ -113,36 +114,34 @@ lp_build_max_simple(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; const char *intrinsic = NULL; LLVMValueRef cond; /* TODO: optimize the constant case */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating) { - if(type.width == 32) + if(type.width == 32 && util_cpu_caps.has_sse) intrinsic = "llvm.x86.sse.max.ps"; - if(type.width == 64) + if(type.width == 64 && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.max.pd"; } else { - if(type.width == 8 && !type.sign) + if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pmaxu.b"; - if(type.width == 8 && type.sign) + if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxsb"; - if(type.width == 16 && !type.sign) + if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxuw"; - if(type.width == 16 && type.sign) + if(type.width == 16 && type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pmaxs.w"; - if(type.width == 32 && !type.sign) + if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxud"; - if(type.width == 32 && type.sign) + if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxsd"; } } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -159,7 +158,7 @@ LLVMValueRef lp_build_comp(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->one) return bld->zero; @@ -188,7 +187,7 @@ lp_build_add(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; if(a == bld->zero) @@ -204,15 +203,14 @@ lp_build_add(struct lp_build_context *bld, if(a == bld->one || b == bld->one) return bld->one; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width * type.length == 128 && + if(util_cpu_caps.has_sse2 && + type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -241,7 +239,7 @@ lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res; if(b == bld->zero) @@ -257,15 +255,14 @@ lp_build_sub(struct lp_build_context *bld, if(b == bld->one) return bld->zero; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width * type.length == 128 && + if(util_cpu_caps.has_sse2 && + type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -405,7 +402,7 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->zero; @@ -419,8 +416,7 @@ lp_build_mul(struct lp_build_context *bld, return bld->undef; if(!type.floating && !type.fixed && type.norm) { -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 8 && type.length == 16) { + if(util_cpu_caps.has_sse2 && type.width == 8 && type.length == 16) { LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8); LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16); static LLVMValueRef ml = NULL; @@ -456,7 +452,6 @@ lp_build_mul(struct lp_build_context *bld, return ab; } -#endif /* FIXME */ assert(0); @@ -477,7 +472,7 @@ lp_build_div(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->zero; @@ -493,15 +488,38 @@ lp_build_div(struct lp_build_context *bld, if(LLVMIsConstant(a) && LLVMIsConstant(b)) return LLVMConstFDiv(a, b); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 32 && type.length == 4) + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) return lp_build_mul(bld, a, lp_build_rcp(bld, b)); -#endif return LLVMBuildFDiv(bld->builder, a, b, ""); } +LLVMValueRef +lp_build_lerp(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef v0, + LLVMValueRef v1) +{ + return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0))); +} + + +LLVMValueRef +lp_build_lerp_2d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef v00, + LLVMValueRef v01, + LLVMValueRef v10, + LLVMValueRef v11) +{ + LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01); + LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11); + return lp_build_lerp(bld, y, v0, v1); +} + + /** * Generate min(a, b) * Do checks for special cases. @@ -565,33 +583,216 @@ LLVMValueRef lp_build_abs(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); if(!type.sign) return a; - /* XXX: is this really necessary? */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(!type.floating && type.width*type.length == 128) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - if(type.width == 8) + if(type.floating) { + /* Mask out the sign bit */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + return a; + } + + if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { + switch(type.width) { + case 8: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); - if(type.width == 16) + case 16: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a); - if(type.width == 32) + case 32: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); + } } -#endif return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, "")); } LLVMValueRef +lp_build_sgn(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef cond; + LLVMValueRef res; + + /* Handle non-zero case */ + if(!type.sign) { + /* if not zero then sign must be positive */ + res = bld->one; + } + else if(type.floating) { + /* Take the sign bit and add it to 1 constant */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef one; + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + one = LLVMConstBitCast(bld->one, int_vec_type); + res = LLVMBuildOr(bld->builder, sign, one, ""); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } + else + { + LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0); + cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); + res = lp_build_select(bld, cond, bld->one, minus_one); + } + + /* Handle zero */ + cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); + res = lp_build_select(bld, cond, bld->zero, bld->one); + + return res; +} + + +enum lp_build_round_sse41_mode +{ + LP_BUILD_ROUND_SSE41_NEAREST = 0, + LP_BUILD_ROUND_SSE41_FLOOR = 1, + LP_BUILD_ROUND_SSE41_CEIL = 2, + LP_BUILD_ROUND_SSE41_TRUNCATE = 3 +}; + + +static INLINE LLVMValueRef +lp_build_round_sse41(struct lp_build_context *bld, + LLVMValueRef a, + enum lp_build_round_sse41_mode mode) +{ + const struct lp_type type = bld->type; + LLVMTypeRef vec_type = lp_build_vec_type(type); + const char *intrinsic; + + assert(type.floating); + assert(type.width*type.length == 128); + + switch(type.width) { + case 32: + intrinsic = "llvm.x86.sse41.round.ps"; + break; + case 64: + intrinsic = "llvm.x86.sse41.round.pd"; + break; + default: + assert(0); + return bld->undef; + } + + return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a, + LLVMConstInt(LLVMInt32Type(), mode, 0)); +} + + +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_floor(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_ceil(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); + + /* FIXME */ + assert(0); + return bld->undef; +} + + +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); + + /* FIXME */ + assert(0); + return bld->undef; +} + + +/** + * Convert to integer, through whichever rounding method that's fastest, + * typically truncating to zero. + */ +LLVMValueRef +lp_build_int(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + + assert(type.floating); + + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); +} + + +LLVMValueRef +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a) +{ + a = lp_build_floor(bld, a); + a = lp_build_int(bld, a); + return a; +} + + +LLVMValueRef lp_build_sqrt(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -609,7 +810,7 @@ LLVMValueRef lp_build_rcp(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; if(a == bld->zero) return bld->undef; @@ -623,11 +824,9 @@ lp_build_rcp(struct lp_build_context *bld, if(LLVMIsConstant(a)) return LLVMConstFDiv(bld->one, a); - /* XXX: is this really necessary? */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 32 && type.length == 4) + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) + /* FIXME: improve precision */ return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); -#endif return LLVMBuildFDiv(bld->builder, bld->one, a, ""); } @@ -640,15 +839,12 @@ LLVMValueRef lp_build_rsqrt(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; assert(type.floating); - /* XXX: is this really necessary? */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 32 && type.length == 4) + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a); -#endif return lp_build_rcp(bld, lp_build_sqrt(bld, a)); } @@ -661,7 +857,7 @@ LLVMValueRef lp_build_cos(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -681,7 +877,7 @@ LLVMValueRef lp_build_sin(struct lp_build_context *bld, LLVMValueRef a) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -704,7 +900,8 @@ lp_build_pow(struct lp_build_context *bld, { /* TODO: optimize the constant case */ if(LLVMIsConstant(x) && LLVMIsConstant(y)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y)); } @@ -752,13 +949,14 @@ lp_build_polynomial(struct lp_build_context *bld, const double *coeffs, unsigned num_coeffs) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMValueRef res = NULL; unsigned i; /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); for (i = num_coeffs; i--; ) { LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]); @@ -800,7 +998,7 @@ lp_build_exp2_approx(struct lp_build_context *bld, LLVMValueRef *p_frac_part, LLVMValueRef *p_exp2) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef ipart = NULL; @@ -812,7 +1010,8 @@ lp_build_exp2_approx(struct lp_build_context *bld, if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); assert(type.floating && type.width == 32); @@ -893,7 +1092,7 @@ lp_build_log2_approx(struct lp_build_context *bld, LLVMValueRef *p_floor_log2, LLVMValueRef *p_log2) { - const union lp_type type = bld->type; + const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); @@ -911,7 +1110,8 @@ lp_build_log2_approx(struct lp_build_context *bld, if(p_exp || p_floor_log2 || p_log2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); assert(type.floating && type.width == 32); |