diff options
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 44 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.h | 12 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 4 |
5 files changed, 45 insertions, 28 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 00052ed021f..70929e752b0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -123,8 +123,10 @@ lp_build_min_simple(struct lp_build_context *bld, } } else if (type.floating && util_cpu_caps.has_altivec) { - debug_printf("%s: altivec doesn't support nan behavior modes\n", - __FUNCTION__); + if (nan_behavior == GALLIVM_NAN_RETURN_NAN) { + debug_printf("%s: altivec doesn't support nan return nan behavior\n", + __FUNCTION__); + } if (type.width == 32 && type.length == 4) { intrinsic = "llvm.ppc.altivec.vminfp"; intr_size = 128; @@ -159,8 +161,6 @@ lp_build_min_simple(struct lp_build_context *bld, } } else if (util_cpu_caps.has_altivec) { intr_size = 128; - debug_printf("%s: altivec doesn't support nan behavior modes\n", - __FUNCTION__); if (type.width == 8) { if (!type.sign) { intrinsic = "llvm.ppc.altivec.vminub"; @@ -191,7 +191,7 @@ lp_build_min_simple(struct lp_build_context *bld, */ if (util_cpu_caps.has_sse && type.floating && nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED && - nan_behavior != GALLIVM_NAN_RETURN_SECOND) { + nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) { LLVMValueRef isnan, max; max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, type, @@ -227,7 +227,7 @@ lp_build_min_simple(struct lp_build_context *bld, return lp_build_select(bld, cond, a, b); } break; - case GALLIVM_NAN_RETURN_SECOND: + case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN: cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b); return lp_build_select(bld, cond, a, b); case GALLIVM_NAN_BEHAVIOR_UNDEFINED: @@ -299,8 +299,10 @@ lp_build_max_simple(struct lp_build_context *bld, } } else if (type.floating && util_cpu_caps.has_altivec) { - debug_printf("%s: altivec doesn't support nan behavior modes\n", - __FUNCTION__); + if (nan_behavior == GALLIVM_NAN_RETURN_NAN) { + debug_printf("%s: altivec doesn't support nan return nan behavior\n", + __FUNCTION__); + } if (type.width == 32 || type.length == 4) { intrinsic = "llvm.ppc.altivec.vmaxfp"; intr_size = 128; @@ -336,8 +338,6 @@ lp_build_max_simple(struct lp_build_context *bld, } } else if (util_cpu_caps.has_altivec) { intr_size = 128; - debug_printf("%s: altivec doesn't support nan behavior modes\n", - __FUNCTION__); if (type.width == 8) { if (!type.sign) { intrinsic = "llvm.ppc.altivec.vmaxub"; @@ -362,7 +362,7 @@ lp_build_max_simple(struct lp_build_context *bld, if(intrinsic) { if (util_cpu_caps.has_sse && type.floating && nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED && - nan_behavior != GALLIVM_NAN_RETURN_SECOND) { + nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) { LLVMValueRef isnan, min; min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, type, @@ -398,7 +398,7 @@ lp_build_max_simple(struct lp_build_context *bld, return lp_build_select(bld, cond, a, b); } break; - case GALLIVM_NAN_RETURN_SECOND: + case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN: cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b); return lp_build_select(bld, cond, a, b); case GALLIVM_NAN_BEHAVIOR_UNDEFINED: @@ -1399,6 +1399,7 @@ lp_build_max_ext(struct lp_build_context *bld, /** * Generate clamp(a, min, max) + * NaN behavior (for any of a, min, max) is undefined. * Do checks for special cases. */ LLVMValueRef @@ -1418,6 +1419,20 @@ lp_build_clamp(struct lp_build_context *bld, /** + * Generate clamp(a, 0, 1) + * A NaN will get converted to zero. + */ +LLVMValueRef +lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld, + LLVMValueRef a) +{ + a = lp_build_max_ext(bld, a, bld->zero, GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); + a = lp_build_min(bld, a, bld->one); + return a; +} + + +/** * Generate abs(a) */ LLVMValueRef @@ -3029,9 +3044,8 @@ lp_build_exp2(struct lp_build_context *bld, /* We want to preserve NaN and make sure than for exp2 if x > 128, * the result is INF and if it's smaller than -126.9 the result is 0 */ x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, 128.0), x, - GALLIVM_NAN_RETURN_SECOND); - x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x, - GALLIVM_NAN_RETURN_SECOND); + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); + x = lp_build_max(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x); /* ipart = floor(x) */ /* fpart = x - ipart */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 49d4e2cdc4a..75bf89e951e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -142,9 +142,11 @@ enum gallivm_nan_behavior { GALLIVM_NAN_RETURN_NAN, /* If one of the inputs is NaN, the other operand is returned */ GALLIVM_NAN_RETURN_OTHER, - /* If one of the inputs is NaN, the second operand is returned. - * In min/max it will be as fast as undefined with sse opcodes */ - GALLIVM_NAN_RETURN_SECOND + /* If one of the inputs is NaN, the other operand is returned, + * but we guarantee the second operand is not a NaN. + * In min/max it will be as fast as undefined with sse opcodes, + * and archs having native return_other can benefit too. */ + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN }; LLVMValueRef @@ -176,6 +178,10 @@ lp_build_clamp(struct lp_build_context *bld, LLVMValueRef max); LLVMValueRef +lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_abs(struct lp_build_context *bld, LLVMValueRef a); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c index 2b1fe643849..6645151f514 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c @@ -326,7 +326,7 @@ lp_build_float_to_srgb_packed(struct gallivm_state *gallivm, * can't use lp_build_conv since we want to keep values as 32bit * here so we can interleave with rgb to go from SoA->AoS. */ - alpha = lp_build_clamp(&f32_bld, src[3], f32_bld.zero, f32_bld.one); + alpha = lp_build_clamp_zero_one_nanzero(&f32_bld, src[3]); alpha = lp_build_mul(&f32_bld, alpha, lp_build_const_vec(gallivm, src_type, 255.0f)); tmpsrgb[3] = lp_build_iround(&f32_bld, alpha); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 5f81066a9c1..5fc47ed155b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -1384,21 +1384,18 @@ emit_store_chan( assert(dtype == TGSI_TYPE_FLOAT || dtype == TGSI_TYPE_UNTYPED); value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); - value = lp_build_max_ext(float_bld, value, float_bld->zero, - GALLIVM_NAN_RETURN_SECOND); - value = lp_build_min_ext(float_bld, value, float_bld->one, - GALLIVM_NAN_BEHAVIOR_UNDEFINED); + value = lp_build_clamp_zero_one_nanzero(float_bld, value); break; case TGSI_SAT_MINUS_PLUS_ONE: assert(dtype == TGSI_TYPE_FLOAT || dtype == TGSI_TYPE_UNTYPED); value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); + /* This will give -1.0 for NaN which is probably not what we want. */ value = lp_build_max_ext(float_bld, value, lp_build_const_vec(gallivm, float_bld->type, -1.0), - GALLIVM_NAN_RETURN_SECOND); - value = lp_build_min_ext(float_bld, value, float_bld->one, - GALLIVM_NAN_BEHAVIOR_UNDEFINED); + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); + value = lp_build_min(float_bld, value, float_bld->one); break; default: diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 8223d2ad7eb..b5816e038f1 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -1760,11 +1760,11 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, assert(row_type.floating); lp_build_context_init(&f32_bld, gallivm, row_type); for (i = 0; i < src_count; i++) { - src[i] = lp_build_clamp(&f32_bld, src[i], f32_bld.zero, f32_bld.one); + src[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src[i]); } if (dual_source_blend) { for (i = 0; i < src_count; i++) { - src1[i] = lp_build_clamp(&f32_bld, src1[i], f32_bld.zero, f32_bld.one); + src1[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src1[i]); } } /* probably can't be different than row_type but better safe than sorry... */ |