summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c44
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.h12
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c4
5 files changed, 45 insertions, 28 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 00052ed021f..70929e752b0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -123,8 +123,10 @@ lp_build_min_simple(struct lp_build_context *bld,
}
}
else if (type.floating && util_cpu_caps.has_altivec) {
- debug_printf("%s: altivec doesn't support nan behavior modes\n",
- __FUNCTION__);
+ if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
+ debug_printf("%s: altivec doesn't support nan return nan behavior\n",
+ __FUNCTION__);
+ }
if (type.width == 32 && type.length == 4) {
intrinsic = "llvm.ppc.altivec.vminfp";
intr_size = 128;
@@ -159,8 +161,6 @@ lp_build_min_simple(struct lp_build_context *bld,
}
} else if (util_cpu_caps.has_altivec) {
intr_size = 128;
- debug_printf("%s: altivec doesn't support nan behavior modes\n",
- __FUNCTION__);
if (type.width == 8) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vminub";
@@ -191,7 +191,7 @@ lp_build_min_simple(struct lp_build_context *bld,
*/
if (util_cpu_caps.has_sse && type.floating &&
nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
- nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+ nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
LLVMValueRef isnan, max;
max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
type,
@@ -227,7 +227,7 @@ lp_build_min_simple(struct lp_build_context *bld,
return lp_build_select(bld, cond, a, b);
}
break;
- case GALLIVM_NAN_RETURN_SECOND:
+ case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
return lp_build_select(bld, cond, a, b);
case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
@@ -299,8 +299,10 @@ lp_build_max_simple(struct lp_build_context *bld,
}
}
else if (type.floating && util_cpu_caps.has_altivec) {
- debug_printf("%s: altivec doesn't support nan behavior modes\n",
- __FUNCTION__);
+ if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
+ debug_printf("%s: altivec doesn't support nan return nan behavior\n",
+ __FUNCTION__);
+ }
if (type.width == 32 || type.length == 4) {
intrinsic = "llvm.ppc.altivec.vmaxfp";
intr_size = 128;
@@ -336,8 +338,6 @@ lp_build_max_simple(struct lp_build_context *bld,
}
} else if (util_cpu_caps.has_altivec) {
intr_size = 128;
- debug_printf("%s: altivec doesn't support nan behavior modes\n",
- __FUNCTION__);
if (type.width == 8) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vmaxub";
@@ -362,7 +362,7 @@ lp_build_max_simple(struct lp_build_context *bld,
if(intrinsic) {
if (util_cpu_caps.has_sse && type.floating &&
nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
- nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+ nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
LLVMValueRef isnan, min;
min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
type,
@@ -398,7 +398,7 @@ lp_build_max_simple(struct lp_build_context *bld,
return lp_build_select(bld, cond, a, b);
}
break;
- case GALLIVM_NAN_RETURN_SECOND:
+ case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
return lp_build_select(bld, cond, a, b);
case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
@@ -1399,6 +1399,7 @@ lp_build_max_ext(struct lp_build_context *bld,
/**
* Generate clamp(a, min, max)
+ * NaN behavior (for any of a, min, max) is undefined.
* Do checks for special cases.
*/
LLVMValueRef
@@ -1418,6 +1419,20 @@ lp_build_clamp(struct lp_build_context *bld,
/**
+ * Generate clamp(a, 0, 1)
+ * A NaN will get converted to zero.
+ */
+LLVMValueRef
+lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ a = lp_build_max_ext(bld, a, bld->zero, GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+ a = lp_build_min(bld, a, bld->one);
+ return a;
+}
+
+
+/**
* Generate abs(a)
*/
LLVMValueRef
@@ -3029,9 +3044,8 @@ lp_build_exp2(struct lp_build_context *bld,
/* We want to preserve NaN and make sure than for exp2 if x > 128,
* the result is INF and if it's smaller than -126.9 the result is 0 */
x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, 128.0), x,
- GALLIVM_NAN_RETURN_SECOND);
- x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x,
- GALLIVM_NAN_RETURN_SECOND);
+ GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+ x = lp_build_max(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x);
/* ipart = floor(x) */
/* fpart = x - ipart */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 49d4e2cdc4a..75bf89e951e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -142,9 +142,11 @@ enum gallivm_nan_behavior {
GALLIVM_NAN_RETURN_NAN,
/* If one of the inputs is NaN, the other operand is returned */
GALLIVM_NAN_RETURN_OTHER,
- /* If one of the inputs is NaN, the second operand is returned.
- * In min/max it will be as fast as undefined with sse opcodes */
- GALLIVM_NAN_RETURN_SECOND
+ /* If one of the inputs is NaN, the other operand is returned,
+ * but we guarantee the second operand is not a NaN.
+ * In min/max it will be as fast as undefined with sse opcodes,
+ * and archs having native return_other can benefit too. */
+ GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN
};
LLVMValueRef
@@ -176,6 +178,10 @@ lp_build_clamp(struct lp_build_context *bld,
LLVMValueRef max);
LLVMValueRef
+lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
lp_build_abs(struct lp_build_context *bld,
LLVMValueRef a);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
index 2b1fe643849..6645151f514 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
@@ -326,7 +326,7 @@ lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
* can't use lp_build_conv since we want to keep values as 32bit
* here so we can interleave with rgb to go from SoA->AoS.
*/
- alpha = lp_build_clamp(&f32_bld, src[3], f32_bld.zero, f32_bld.one);
+ alpha = lp_build_clamp_zero_one_nanzero(&f32_bld, src[3]);
alpha = lp_build_mul(&f32_bld, alpha,
lp_build_const_vec(gallivm, src_type, 255.0f));
tmpsrgb[3] = lp_build_iround(&f32_bld, alpha);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 5f81066a9c1..5fc47ed155b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1384,21 +1384,18 @@ emit_store_chan(
assert(dtype == TGSI_TYPE_FLOAT ||
dtype == TGSI_TYPE_UNTYPED);
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
- value = lp_build_max_ext(float_bld, value, float_bld->zero,
- GALLIVM_NAN_RETURN_SECOND);
- value = lp_build_min_ext(float_bld, value, float_bld->one,
- GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+ value = lp_build_clamp_zero_one_nanzero(float_bld, value);
break;
case TGSI_SAT_MINUS_PLUS_ONE:
assert(dtype == TGSI_TYPE_FLOAT ||
dtype == TGSI_TYPE_UNTYPED);
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
+ /* This will give -1.0 for NaN which is probably not what we want. */
value = lp_build_max_ext(float_bld, value,
lp_build_const_vec(gallivm, float_bld->type, -1.0),
- GALLIVM_NAN_RETURN_SECOND);
- value = lp_build_min_ext(float_bld, value, float_bld->one,
- GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+ GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+ value = lp_build_min(float_bld, value, float_bld->one);
break;
default:
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 8223d2ad7eb..b5816e038f1 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1760,11 +1760,11 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
assert(row_type.floating);
lp_build_context_init(&f32_bld, gallivm, row_type);
for (i = 0; i < src_count; i++) {
- src[i] = lp_build_clamp(&f32_bld, src[i], f32_bld.zero, f32_bld.one);
+ src[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src[i]);
}
if (dual_source_blend) {
for (i = 0; i < src_count; i++) {
- src1[i] = lp_build_clamp(&f32_bld, src1[i], f32_bld.zero, f32_bld.one);
+ src1[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src1[i]);
}
}
/* probably can't be different than row_type but better safe than sorry... */