5 files changed, 45 insertions, 28 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 00052ed021f..70929e752b0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -123,8 +123,10 @@ lp_build_min_simple(struct lp_build_context *bld,
       }
    }
    else if (type.floating && util_cpu_caps.has_altivec) {
-      debug_printf("%s: altivec doesn't support nan behavior modes\n",
-                   __FUNCTION__);
+      if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
+         debug_printf("%s: altivec doesn't support nan return nan behavior\n",
+                      __FUNCTION__);
+      }
       if (type.width == 32 && type.length == 4) {
          intrinsic = "llvm.ppc.altivec.vminfp";
          intr_size = 128;
@@ -159,8 +161,6 @@ lp_build_min_simple(struct lp_build_context *bld,
       }
    } else if (util_cpu_caps.has_altivec) {
       intr_size = 128;
-      debug_printf("%s: altivec doesn't support nan behavior modes\n",
-                   __FUNCTION__);
       if (type.width == 8) {
          if (!type.sign) {
             intrinsic = "llvm.ppc.altivec.vminub";
@@ -191,7 +191,7 @@ lp_build_min_simple(struct lp_build_context *bld,
        */
       if (util_cpu_caps.has_sse && type.floating &&
           nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
-          nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+          nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
          LLVMValueRef isnan, max;
          max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
                                                    type,
@@ -227,7 +227,7 @@ lp_build_min_simple(struct lp_build_context *bld,
          return lp_build_select(bld, cond, a, b);
       }
          break;
-      case GALLIVM_NAN_RETURN_SECOND:
+      case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
          cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
          return lp_build_select(bld, cond, a, b);
       case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
@@ -299,8 +299,10 @@ lp_build_max_simple(struct lp_build_context *bld,
       }
    }
    else if (type.floating && util_cpu_caps.has_altivec) {
-      debug_printf("%s: altivec doesn't support nan behavior modes\n",
-                   __FUNCTION__);
+      if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
+         debug_printf("%s: altivec doesn't support nan return nan behavior\n",
+                      __FUNCTION__);
+      }
       if (type.width == 32 || type.length == 4) {
          intrinsic = "llvm.ppc.altivec.vmaxfp";
          intr_size = 128;
@@ -336,8 +338,6 @@ lp_build_max_simple(struct lp_build_context *bld,
       }
    } else if (util_cpu_caps.has_altivec) {
      intr_size = 128;
-     debug_printf("%s: altivec doesn't support nan behavior modes\n",
-                  __FUNCTION__);
      if (type.width == 8) {
        if (!type.sign) {
          intrinsic = "llvm.ppc.altivec.vmaxub";
@@ -362,7 +362,7 @@ lp_build_max_simple(struct lp_build_context *bld,
    if(intrinsic) {
       if (util_cpu_caps.has_sse && type.floating &&
           nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
-          nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+          nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
          LLVMValueRef isnan, min;
          min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
                                                    type,
@@ -398,7 +398,7 @@ lp_build_max_simple(struct lp_build_context *bld,
          return lp_build_select(bld, cond, a, b);
       }
          break;
-      case GALLIVM_NAN_RETURN_SECOND:
+      case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
          cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
          return lp_build_select(bld, cond, a, b);
       case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
@@ -1399,6 +1399,7 @@ lp_build_max_ext(struct lp_build_context *bld,
 
 /**
  * Generate clamp(a, min, max)
+ * NaN behavior (for any of a, min, max) is undefined.
  * Do checks for special cases.
  */
 LLVMValueRef
@@ -1418,6 +1419,20 @@ lp_build_clamp(struct lp_build_context *bld,
 
 
 /**
+ * Generate clamp(a, 0, 1)
+ * A NaN will get converted to zero.
+ */
+LLVMValueRef
+lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld,
+                                LLVMValueRef a)
+{
+   a = lp_build_max_ext(bld, a, bld->zero, GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+   a = lp_build_min(bld, a, bld->one);
+   return a;
+}
+
+
+/**
  * Generate abs(a)
  */
 LLVMValueRef
@@ -3029,9 +3044,8 @@ lp_build_exp2(struct lp_build_context *bld,
    /* We want to preserve NaN and make sure than for exp2 if x > 128,
     * the result is INF  and if it's smaller than -126.9 the result is 0 */
    x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type,  128.0), x,
-                        GALLIVM_NAN_RETURN_SECOND);
-   x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x,
-                        GALLIVM_NAN_RETURN_SECOND);
+                        GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+   x = lp_build_max(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x);
 
    /* ipart = floor(x) */
    /* fpart = x - ipart */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 49d4e2cdc4a..75bf89e951e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -142,9 +142,11 @@ enum gallivm_nan_behavior {
    GALLIVM_NAN_RETURN_NAN,
    /* If one of the inputs is NaN, the other operand is returned */
    GALLIVM_NAN_RETURN_OTHER,
-   /* If one of the inputs is NaN, the second operand is returned.
-    * In min/max it will be as fast as undefined with sse opcodes */
-   GALLIVM_NAN_RETURN_SECOND
+   /* If one of the inputs is NaN, the other operand is returned,
+    * but we guarantee the second operand is not a NaN.
+    * In min/max it will be as fast as undefined with sse opcodes,
+    * and archs having native return_other can benefit too. */
+   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN
 };
 
 LLVMValueRef
@@ -176,6 +178,10 @@ lp_build_clamp(struct lp_build_context *bld,
                LLVMValueRef max);
 
 LLVMValueRef
+lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld,
+                                LLVMValueRef a);
+
+LLVMValueRef
 lp_build_abs(struct lp_build_context *bld,
              LLVMValueRef a);
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
index 2b1fe643849..6645151f514 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
@@ -326,7 +326,7 @@ lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
     * can't use lp_build_conv since we want to keep values as 32bit
     * here so we can interleave with rgb to go from SoA->AoS.
     */
-   alpha = lp_build_clamp(&f32_bld, src[3], f32_bld.zero, f32_bld.one);
+   alpha = lp_build_clamp_zero_one_nanzero(&f32_bld, src[3]);
    alpha = lp_build_mul(&f32_bld, alpha,
                         lp_build_const_vec(gallivm, src_type, 255.0f));
    tmpsrgb[3] = lp_build_iround(&f32_bld, alpha);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 5f81066a9c1..5fc47ed155b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1384,21 +1384,18 @@ emit_store_chan(
       assert(dtype == TGSI_TYPE_FLOAT ||
              dtype == TGSI_TYPE_UNTYPED);
       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
-      value = lp_build_max_ext(float_bld, value, float_bld->zero,
-                               GALLIVM_NAN_RETURN_SECOND);
-      value = lp_build_min_ext(float_bld, value, float_bld->one,
-                               GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+      value = lp_build_clamp_zero_one_nanzero(float_bld, value);
       break;
 
    case TGSI_SAT_MINUS_PLUS_ONE:
       assert(dtype == TGSI_TYPE_FLOAT ||
              dtype == TGSI_TYPE_UNTYPED);
       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
+      /* This will give -1.0 for NaN which is probably not what we want. */
       value = lp_build_max_ext(float_bld, value,
                                lp_build_const_vec(gallivm, float_bld->type, -1.0),
-                               GALLIVM_NAN_RETURN_SECOND);
-      value = lp_build_min_ext(float_bld, value, float_bld->one,
-                               GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+                               GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+      value = lp_build_min(float_bld, value, float_bld->one);
       break;
 
    default:
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 8223d2ad7eb..b5816e038f1 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1760,11 +1760,11 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
       assert(row_type.floating);
       lp_build_context_init(&f32_bld, gallivm, row_type);
       for (i = 0; i < src_count; i++) {
-         src[i] = lp_build_clamp(&f32_bld, src[i], f32_bld.zero, f32_bld.one);
+         src[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src[i]);
       }
       if (dual_source_blend) {
          for (i = 0; i < src_count; i++) {
-            src1[i] = lp_build_clamp(&f32_bld, src1[i], f32_bld.zero, f32_bld.one);
+            src1[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src1[i]);
          }
       }
       /* probably can't be different than row_type but better safe than sorry... */