diff options
author | Zack Rusin <[email protected]> | 2013-12-06 01:28:25 -0500 |
---|---|---|
committer | Zack Rusin <[email protected]> | 2013-12-10 16:39:48 -0500 |
commit | 155139059ba588da1161eaa692515cacdead9f4e (patch) | |
tree | f64daec33014ca5ea6ccf8d93a58ae064dda28eb /src/gallium/auxiliary/gallivm | |
parent | 1e71493afa263791b2ff10afd2fbc36a7effa73f (diff) |
llvmpipe: fix blending with half-float formats
The fact that we flush denorms to zero breaks our half-float
conversion and blending. This patches enables denorms for
blending. It's a little tricky due to the llvm bug that makes
it incorrectly reorder the mxcsr intrinsics:
http://llvm.org/bugs/show_bug.cgi?id=6393
Signed-off-by: Zack Rusin <[email protected]>
Reviewed-by: José Fonseca <[email protected]>
Reviewed-by: Roland Scheidegger <[email protected]>
Signed-off-by: Zack Rusin <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 71 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.h | 11 |
2 files changed, 82 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 70929e752b0..440dd0b6ac2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -64,6 +64,17 @@ #include "lp_bld_arit.h" #include "lp_bld_flow.h" +#if defined(PIPE_ARCH_SSE) +#include <xmmintrin.h> +#endif + +#ifndef _MM_DENORMALS_ZERO_MASK +#define _MM_DENORMALS_ZERO_MASK 0x0040 +#endif + +#ifndef _MM_FLUSH_ZERO_MASK +#define _MM_FLUSH_ZERO_MASK 0x8000 +#endif #define EXP_POLY_DEGREE 5 @@ -3489,3 +3500,63 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm, return ret; } + +LLVMValueRef +lp_build_fpstate_get(struct gallivm_state *gallivm) +{ + if (util_cpu_caps.has_sse) { + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef mxcsr_ptr = lp_build_alloca( + gallivm, + LLVMInt32TypeInContext(gallivm->context), + "mxcsr_ptr"); + lp_build_intrinsic(builder, + "llvm.x86.sse.stmxcsr", + LLVMVoidTypeInContext(gallivm->context), + &mxcsr_ptr, 1); + return mxcsr_ptr; + } + return 0; +} + +void +lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm, + boolean zero) +{ + if (util_cpu_caps.has_sse) { + /* turn on DAZ (64) | FTZ (32768) = 32832 if available */ + int daz_ftz = _MM_FLUSH_ZERO_MASK; + + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef mxcsr_ptr = lp_build_fpstate_get(gallivm); + LLVMValueRef mxcsr = + LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr"); + + if (util_cpu_caps.has_daz) { + /* Enable denormals are zero mode */ + daz_ftz |= _MM_DENORMALS_ZERO_MASK; + } + if (zero) { + mxcsr = LLVMBuildOr(builder, mxcsr, + LLVMConstInt(LLVMTypeOf(mxcsr), daz_ftz, 0), ""); + } else { + mxcsr = LLVMBuildAnd(builder, mxcsr, + LLVMConstInt(LLVMTypeOf(mxcsr), ~daz_ftz, 0), ""); + } + + LLVMBuildStore(builder, mxcsr, mxcsr_ptr); + lp_build_fpstate_set(gallivm, mxcsr_ptr); + } +} + +void +lp_build_fpstate_set(struct gallivm_state *gallivm, + LLVMValueRef mxcsr_ptr) +{ + if (util_cpu_caps.has_sse) { + lp_build_intrinsic(gallivm->builder, + "llvm.x86.sse.ldmxcsr", + LLVMVoidTypeInContext(gallivm->context), + &mxcsr_ptr, 1); + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 75bf89e951e..9d2909340b2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -358,4 +358,15 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm, const struct lp_type type, LLVMValueRef x); + +LLVMValueRef +lp_build_fpstate_get(struct gallivm_state *gallivm); + +void +lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm, + boolean zero); +void +lp_build_fpstate_set(struct gallivm_state *gallivm, + LLVMValueRef mxcsr); + #endif /* !LP_BLD_ARIT_H */ |