summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/gallivm/lp_bld_arit.c
diff options
context:
space:
mode:
authorRoland Scheidegger <[email protected]>2013-11-11 14:29:25 +0000
committerRoland Scheidegger <[email protected]>2013-11-14 12:24:55 +0000
commit754319490f6946a9ad5ee619822d5fe4254e6759 (patch)
treeaa89800a7fcd530661b8018feaeadba480182f44 /src/gallium/auxiliary/gallivm/lp_bld_arit.c
parenta15a19f0d1b024f5f18f1dfe878ae8d399e38469 (diff)
gallivm,llvmpipe: fix float->srgb conversion to handle NaNs
d3d10 requires us to convert NaNs to zero for any float->int conversion. We don't really do that but mostly seems to work. In particular I suspect the very common float->unorm8 path only really passes because it relies on sse2 pack intrinsics which just happen to work by luck for NaNs (float->int conversion in hw gives integer indeterminate value, which just happens to be -0x80000000 hence gets converted to zero in the end after pack intrinsics). However, float->srgb didn't get so lucky, because we need to clamp before blending and clamping resulted in NaN behavior being undefined (and actually got converted to 1.0 by clamping with sse2). Fix this by using a zero/one clamp with defined nan behavior as we can handle the NaN for free this way. I suspect there's more bugs lurking in this area (e.g. converting floats to snorm) as we don't really use defined NaN behavior everywhere but this seems to be good enough. While here respecify nan behavior modes a bit, in particular the return_second mode didn't really do what we wanted. From the caller's perspective, we really wanted to say we need the non-nan result, but we already know the second arg isn't a NaN. So we use this now instead, which means that cpu architectures which actually implement min/max by always returning non-nan (that is adhering to ieee754-2008 rules) don't need to bend over backwards for nothing. Reviewed-by: Jose Fonseca <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_arit.c')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c44
1 files changed, 29 insertions, 15 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 00052ed021f..70929e752b0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -123,8 +123,10 @@ lp_build_min_simple(struct lp_build_context *bld,
}
}
else if (type.floating && util_cpu_caps.has_altivec) {
- debug_printf("%s: altivec doesn't support nan behavior modes\n",
- __FUNCTION__);
+ if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
+ debug_printf("%s: altivec doesn't support nan return nan behavior\n",
+ __FUNCTION__);
+ }
if (type.width == 32 && type.length == 4) {
intrinsic = "llvm.ppc.altivec.vminfp";
intr_size = 128;
@@ -159,8 +161,6 @@ lp_build_min_simple(struct lp_build_context *bld,
}
} else if (util_cpu_caps.has_altivec) {
intr_size = 128;
- debug_printf("%s: altivec doesn't support nan behavior modes\n",
- __FUNCTION__);
if (type.width == 8) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vminub";
@@ -191,7 +191,7 @@ lp_build_min_simple(struct lp_build_context *bld,
*/
if (util_cpu_caps.has_sse && type.floating &&
nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
- nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+ nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
LLVMValueRef isnan, max;
max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
type,
@@ -227,7 +227,7 @@ lp_build_min_simple(struct lp_build_context *bld,
return lp_build_select(bld, cond, a, b);
}
break;
- case GALLIVM_NAN_RETURN_SECOND:
+ case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
return lp_build_select(bld, cond, a, b);
case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
@@ -299,8 +299,10 @@ lp_build_max_simple(struct lp_build_context *bld,
}
}
else if (type.floating && util_cpu_caps.has_altivec) {
- debug_printf("%s: altivec doesn't support nan behavior modes\n",
- __FUNCTION__);
+ if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
+ debug_printf("%s: altivec doesn't support nan return nan behavior\n",
+ __FUNCTION__);
+ }
if (type.width == 32 || type.length == 4) {
intrinsic = "llvm.ppc.altivec.vmaxfp";
intr_size = 128;
@@ -336,8 +338,6 @@ lp_build_max_simple(struct lp_build_context *bld,
}
} else if (util_cpu_caps.has_altivec) {
intr_size = 128;
- debug_printf("%s: altivec doesn't support nan behavior modes\n",
- __FUNCTION__);
if (type.width == 8) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vmaxub";
@@ -362,7 +362,7 @@ lp_build_max_simple(struct lp_build_context *bld,
if(intrinsic) {
if (util_cpu_caps.has_sse && type.floating &&
nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
- nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+ nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
LLVMValueRef isnan, min;
min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
type,
@@ -398,7 +398,7 @@ lp_build_max_simple(struct lp_build_context *bld,
return lp_build_select(bld, cond, a, b);
}
break;
- case GALLIVM_NAN_RETURN_SECOND:
+ case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
return lp_build_select(bld, cond, a, b);
case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
@@ -1399,6 +1399,7 @@ lp_build_max_ext(struct lp_build_context *bld,
/**
* Generate clamp(a, min, max)
+ * NaN behavior (for any of a, min, max) is undefined.
* Do checks for special cases.
*/
LLVMValueRef
@@ -1418,6 +1419,20 @@ lp_build_clamp(struct lp_build_context *bld,
/**
+ * Generate clamp(a, 0, 1)
+ * A NaN will get converted to zero.
+ */
+LLVMValueRef
+lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ a = lp_build_max_ext(bld, a, bld->zero, GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+ a = lp_build_min(bld, a, bld->one);
+ return a;
+}
+
+
+/**
* Generate abs(a)
*/
LLVMValueRef
@@ -3029,9 +3044,8 @@ lp_build_exp2(struct lp_build_context *bld,
/* We want to preserve NaN and make sure than for exp2 if x > 128,
* the result is INF and if it's smaller than -126.9 the result is 0 */
x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, 128.0), x,
- GALLIVM_NAN_RETURN_SECOND);
- x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x,
- GALLIVM_NAN_RETURN_SECOND);
+ GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+ x = lp_build_max(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x);
/* ipart = floor(x) */
/* fpart = x - ipart */