diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 29 |
1 files changed, 27 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 20ae958714b..f372a48846f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1177,9 +1177,34 @@ lp_build_rcp(struct lp_build_context *bld, if(LLVMIsConstant(a)) return LLVMConstFDiv(bld->one, a); - if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) - /* FIXME: improve precision */ + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { + /* + * XXX: Added precision is not always necessary, so only enable this + * when we have a better system in place to track minimum precision. + */ + +#if 0 + /* + * Do one Newton-Raphson step to improve precision: + * + * x1 = (2 - a * rcp(a)) * rcp(a) + */ + + LLVMValueRef two = lp_build_const_vec(bld->type, 2.0); + LLVMValueRef rcp_a; + LLVMValueRef res; + + rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); + + res = LLVMBuildMul(bld->builder, a, rcp_a, ""); + res = LLVMBuildSub(bld->builder, two, res, ""); + res = LLVMBuildMul(bld->builder, res, rcp_a, ""); + + return rcp_a; +#else return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); +#endif + } return LLVMBuildFDiv(bld->builder, bld->one, a, ""); } |