From 155139059ba588da1161eaa692515cacdead9f4e Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 6 Dec 2013 01:28:25 -0500 Subject: llvmpipe: fix blending with half-float formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fact that we flush denorms to zero breaks our half-float conversion and blending. This patches enables denorms for blending. It's a little tricky due to the llvm bug that makes it incorrectly reorder the mxcsr intrinsics: http://llvm.org/bugs/show_bug.cgi?id=6393 Signed-off-by: Zack Rusin Reviewed-by: José Fonseca Reviewed-by: Roland Scheidegger Signed-off-by: Zack Rusin --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 71 +++++++++++++++++++++++++++++ src/gallium/auxiliary/gallivm/lp_bld_arit.h | 11 +++++ 2 files changed, 82 insertions(+) (limited to 'src/gallium/auxiliary/gallivm') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 70929e752b0..440dd0b6ac2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -64,6 +64,17 @@ #include "lp_bld_arit.h" #include "lp_bld_flow.h" +#if defined(PIPE_ARCH_SSE) +#include +#endif + +#ifndef _MM_DENORMALS_ZERO_MASK +#define _MM_DENORMALS_ZERO_MASK 0x0040 +#endif + +#ifndef _MM_FLUSH_ZERO_MASK +#define _MM_FLUSH_ZERO_MASK 0x8000 +#endif #define EXP_POLY_DEGREE 5 @@ -3489,3 +3500,63 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm, return ret; } + +LLVMValueRef +lp_build_fpstate_get(struct gallivm_state *gallivm) +{ + if (util_cpu_caps.has_sse) { + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef mxcsr_ptr = lp_build_alloca( + gallivm, + LLVMInt32TypeInContext(gallivm->context), + "mxcsr_ptr"); + lp_build_intrinsic(builder, + "llvm.x86.sse.stmxcsr", + LLVMVoidTypeInContext(gallivm->context), + &mxcsr_ptr, 1); + return mxcsr_ptr; + } + return 0; +} + +void +lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm, + boolean zero) +{ + if (util_cpu_caps.has_sse) { + /* turn on DAZ (64) | FTZ (32768) = 32832 if available */ + int daz_ftz = _MM_FLUSH_ZERO_MASK; + + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef mxcsr_ptr = lp_build_fpstate_get(gallivm); + LLVMValueRef mxcsr = + LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr"); + + if (util_cpu_caps.has_daz) { + /* Enable denormals are zero mode */ + daz_ftz |= _MM_DENORMALS_ZERO_MASK; + } + if (zero) { + mxcsr = LLVMBuildOr(builder, mxcsr, + LLVMConstInt(LLVMTypeOf(mxcsr), daz_ftz, 0), ""); + } else { + mxcsr = LLVMBuildAnd(builder, mxcsr, + LLVMConstInt(LLVMTypeOf(mxcsr), ~daz_ftz, 0), ""); + } + + LLVMBuildStore(builder, mxcsr, mxcsr_ptr); + lp_build_fpstate_set(gallivm, mxcsr_ptr); + } +} + +void +lp_build_fpstate_set(struct gallivm_state *gallivm, + LLVMValueRef mxcsr_ptr) +{ + if (util_cpu_caps.has_sse) { + lp_build_intrinsic(gallivm->builder, + "llvm.x86.sse.ldmxcsr", + LLVMVoidTypeInContext(gallivm->context), + &mxcsr_ptr, 1); + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 75bf89e951e..9d2909340b2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -358,4 +358,15 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm, const struct lp_type type, LLVMValueRef x); + +LLVMValueRef +lp_build_fpstate_get(struct gallivm_state *gallivm); + +void +lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm, + boolean zero); +void +lp_build_fpstate_set(struct gallivm_state *gallivm, + LLVMValueRef mxcsr); + #endif /* !LP_BLD_ARIT_H */ -- cgit v1.2.3