diff options
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 71 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 31 |
3 files changed, 108 insertions, 5 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 70929e752b0..440dd0b6ac2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -64,6 +64,17 @@ #include "lp_bld_arit.h" #include "lp_bld_flow.h" +#if defined(PIPE_ARCH_SSE) +#include <xmmintrin.h> +#endif + +#ifndef _MM_DENORMALS_ZERO_MASK +#define _MM_DENORMALS_ZERO_MASK 0x0040 +#endif + +#ifndef _MM_FLUSH_ZERO_MASK +#define _MM_FLUSH_ZERO_MASK 0x8000 +#endif #define EXP_POLY_DEGREE 5 @@ -3489,3 +3500,63 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm, return ret; } + +LLVMValueRef +lp_build_fpstate_get(struct gallivm_state *gallivm) +{ + if (util_cpu_caps.has_sse) { + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef mxcsr_ptr = lp_build_alloca( + gallivm, + LLVMInt32TypeInContext(gallivm->context), + "mxcsr_ptr"); + lp_build_intrinsic(builder, + "llvm.x86.sse.stmxcsr", + LLVMVoidTypeInContext(gallivm->context), + &mxcsr_ptr, 1); + return mxcsr_ptr; + } + return 0; +} + +void +lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm, + boolean zero) +{ + if (util_cpu_caps.has_sse) { + /* turn on DAZ (64) | FTZ (32768) = 32832 if available */ + int daz_ftz = _MM_FLUSH_ZERO_MASK; + + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef mxcsr_ptr = lp_build_fpstate_get(gallivm); + LLVMValueRef mxcsr = + LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr"); + + if (util_cpu_caps.has_daz) { + /* Enable denormals are zero mode */ + daz_ftz |= _MM_DENORMALS_ZERO_MASK; + } + if (zero) { + mxcsr = LLVMBuildOr(builder, mxcsr, + LLVMConstInt(LLVMTypeOf(mxcsr), daz_ftz, 0), ""); + } else { + mxcsr = LLVMBuildAnd(builder, mxcsr, + LLVMConstInt(LLVMTypeOf(mxcsr), ~daz_ftz, 0), ""); + } + + LLVMBuildStore(builder, mxcsr, mxcsr_ptr); + lp_build_fpstate_set(gallivm, mxcsr_ptr); + } +} + +void +lp_build_fpstate_set(struct gallivm_state *gallivm, + LLVMValueRef mxcsr_ptr) +{ + if (util_cpu_caps.has_sse) { + lp_build_intrinsic(gallivm->builder, + "llvm.x86.sse.ldmxcsr", + LLVMVoidTypeInContext(gallivm->context), + &mxcsr_ptr, 1); + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 75bf89e951e..9d2909340b2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -358,4 +358,15 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm, const struct lp_type type, LLVMValueRef x); + +LLVMValueRef +lp_build_fpstate_get(struct gallivm_state *gallivm); + +void +lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm, + boolean zero); +void +lp_build_fpstate_set(struct gallivm_state *gallivm, + LLVMValueRef mxcsr); + #endif /* !LP_BLD_ARIT_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 74c7360bfbb..93c24f6ba38 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -1554,6 +1554,28 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, const boolean is_1d = variant->key.resource_1d; unsigned num_fullblock_fs = is_1d ? 2 * num_fs : num_fs; + LLVMValueRef fpstate = 0; + + /* Get type from output format */ + lp_blend_type_from_format_desc(out_format_desc, &row_type); + lp_mem_type_from_format_desc(out_format_desc, &dst_type); + + /* + * Technically this code should go into lp_build_smallfloat_to_float + * and lp_build_float_to_smallfloat but due to the + * http://llvm.org/bugs/show_bug.cgi?id=6393 + * llvm reorders the mxcsr intrinsics in a way that breaks the code. + * So the ordering is important here and there shouldn't be any + * llvm ir instrunctions in this function before + * this, otherwise half-float format conversions won't work + * (again due to llvm bug #6393). + */ + if (dst_type.floating && dst_type.width != 32) { + /* We need to make sure that denorms are ok for half float + conversions */ + fpstate = lp_build_fpstate_get(gallivm); + lp_build_fpstate_set_denorms_zero(gallivm, FALSE); + } mask_type = lp_int32_vec4_type(); mask_type.length = fs_type.length; @@ -1587,11 +1609,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, undef_src_val = lp_build_undef(gallivm, fs_type); #endif - - /* Get type from output format */ - lp_blend_type_from_format_desc(out_format_desc, &row_type); - lp_mem_type_from_format_desc(out_format_desc, &dst_type); - row_type.length = fs_type.length; vector_width = dst_type.floating ? lp_native_vector_width : lp_integer_vector_width; @@ -2051,6 +2068,10 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, dst, dst_type, dst_count, dst_alignment); } + if (dst_type.floating && dst_type.width != 32) { + lp_build_fpstate_set(gallivm, fpstate); + } + if (do_branch) { lp_build_mask_end(&mask_ctx); } |