gallivm: consolidate code for float-to-half and float-to-packed conversion.

This replaces the existing float-to-half implementation. There are definitely a couple of differences - the old implementation had unspecified(?) rounding behavior, and could at least in theory construct Inf values out of NaNs. NaNs and Infs should now always be properly propagated, and rounding behavior is now towards zero (note this means too large but non-Infinity values get propagated to max representable value, not Infinity). The implementation will definitely not match util code, however (which does nearest rounding, which also means too large values will get propagated to Infinity). Also fix a bogus round mask probably leading to rounding bugs... v2: fix a logic bug in handling infs/nans. Reviewed-by: Jose Fonseca <[email protected]>
author: Roland Scheidegger <[email protected]> 2013-04-02 17:41:44 +0200
committer: Roland Scheidegger <[email protected]> 2013-04-02 18:24:31 +0200
commit: 3febc4a1cdcfc3151195300e3f4ab10e8d46bedf (patch)
tree: 5d33368f840f22e9f2f5f2e725f376ec0cc7c01e
parent: 9be624b3ef32ae6311010cf05531e12051b647dc (diff)
3 files changed, 102 insertions, 108 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 43c59f36e57..38a577cdba5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -176,7 +176,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
    struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
    LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
 
-   /* Convert int16 vector to int32 vector by zero ext */
+   /* Convert int16 vector to int32 vector by zero ext (might generate bad code) */
    LLVMValueRef h             = LLVMBuildZExt(builder, src, int_vec_type, "");
    return lp_build_smallfloat_to_float(gallivm, f32_type, h, 10, 5, 0, true);
 }
@@ -184,16 +184,13 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
 
 /**
  * Converts float32 to int16 half-float
- * Note this can be performed in 1 instruction if vcvtps2ph exists (sse5 i think?)
+ * Note this can be performed in 1 instruction if vcvtps2ph exists (f16c/cvt16)
  * [llvm.x86.vcvtps2ph / _mm_cvtps_ph]
  *
  * @param src           value to convert
  *
- * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
- * ref https://gist.github.com/2156668
- *
- * XXX: This is an approximation. It is faster but certain NaNs are converted to
- * infinity, and rounding is not correct.
+ * Convert float32 to half floats, preserving Infs and NaNs,
+ * with rounding towards zero (trunc).
  */
 LLVMValueRef
 lp_build_float_to_half(struct gallivm_state *gallivm,
@@ -203,60 +200,13 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
    LLVMTypeRef f32_vec_type = LLVMTypeOf(src);
    unsigned length = LLVMGetTypeKind(f32_vec_type) == LLVMVectorTypeKind
                    ? LLVMGetVectorSize(f32_vec_type) : 1;
-   struct lp_type f32_type = lp_type_float_vec(32, 32 * length);
-   struct lp_type u32_type = lp_type_uint_vec(32, 32 * length);
+   struct lp_type i32_type = lp_type_int_vec(32, 32 * length);
    struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
-   LLVMTypeRef u32_vec_type = lp_build_vec_type(gallivm, u32_type);
-   LLVMTypeRef i16_vec_type = lp_build_vec_type(gallivm, i16_type);
-   struct lp_build_context f32_bld;
-   struct lp_build_context u32_bld;
    LLVMValueRef result;
 
-   lp_build_context_init(&f32_bld, gallivm, f32_type);
-   lp_build_context_init(&u32_bld, gallivm, u32_type);
-
-   {
-      /* Constants */
-      LLVMValueRef u32_f32inf    = lp_build_const_int_vec(gallivm, u32_type, 0xff << 23);
-      LLVMValueRef u32_expinf    = lp_build_const_int_vec(gallivm, u32_type, 0xe0 << 23);
-      LLVMValueRef f32_f16max    = lp_build_const_vec(gallivm, f32_type, 65536.0); // 0x8f << 23
-      LLVMValueRef f32_magic     = lp_build_const_vec(gallivm, f32_type, 1.92592994e-34); // 0x0f << 23
-
-      /* Cast from float32 to int32 */
-      LLVMValueRef f             = LLVMBuildBitCast(builder, src, u32_vec_type, "");
-
-      /* Remove sign */
-      LLVMValueRef srcabs         = lp_build_abs(&f32_bld, src);
-      LLVMValueRef fabs           = LLVMBuildBitCast(builder, srcabs, u32_vec_type, "");
-
-      /* Magic conversion */
-      LLVMValueRef clamped       = lp_build_min(&f32_bld, f32_f16max, srcabs);
-      LLVMValueRef scaled        = LLVMBuildBitCast(builder,
-                                                    LLVMBuildFMul(builder,
-                                                                  clamped,
-                                                                  f32_magic,
-                                                                  ""),
-                                                    u32_vec_type,
-                                                    "");
-      /* Make sure Inf/NaN and unormalised survive */
-      LLVMValueRef infnancase    = LLVMBuildXor(builder, u32_expinf, fabs, "");
-      LLVMValueRef b_notnormal   = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GEQUAL,
-                                                    srcabs,
-                                                    LLVMBuildBitCast(builder, u32_f32inf, f32_vec_type, ""));
-
-      /* Merge normal / unnormal case */
-      LLVMValueRef merged        = lp_build_select(&u32_bld, b_notnormal, infnancase, scaled);
-      LLVMValueRef shifted       = lp_build_shr_imm(&u32_bld, merged, 13);
-
-      /* Sign bit */
-      LLVMValueRef justsign      = LLVMBuildXor(builder, f, fabs, "");
-      LLVMValueRef signshifted   = lp_build_shr_imm(&u32_bld, justsign, 16);
-
-      /* Combine result */
-      result                     = LLVMBuildOr(builder, shifted, signshifted, "");
-   }
-
-   result = LLVMBuildTrunc(builder, result, i16_vec_type, "");
+   result = lp_build_float_to_smallfloat(gallivm, i32_type, src, 10, 5, 0, true);
+   /* Convert int32 vector to int16 vector by trunc (might generate bad code) */
+   result = LLVMBuildTrunc(builder, result, lp_build_vec_type(gallivm, i16_type), "");
 
    /*
     * Debugging code.
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
index aa8c729b819..12a03180180 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -127,6 +127,15 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
  */
 
 LLVMValueRef
+lp_build_float_to_smallfloat(struct gallivm_state *gallivm,
+                             struct lp_type i32_type,
+                             LLVMValueRef src,
+                             unsigned mantissa_bits,
+                             unsigned exponent_bits,
+                             unsigned mantissa_start,
+                             boolean has_sign);
+
+LLVMValueRef
 lp_build_smallfloat_to_float(struct gallivm_state *gallivm,
                              struct lp_type f32_type,
                              LLVMValueRef src,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_float.c b/src/gallium/auxiliary/gallivm/lp_bld_format_float.c
index f899d6d64d5..a8cfe02f0d8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_float.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_float.c
@@ -47,13 +47,14 @@
 /**
  * Convert float32 to a float-like value with less exponent and mantissa
  * bits. The mantissa is still biased, and the mantissa still has an implied 1,
- * but there's no sign bit.
+ * and there may be a sign bit.
  *
  * @param src             (vector) float value to convert
  * @param mantissa_bits   the number of mantissa bits
  * @param exponent_bits   the number of exponent bits
+ * @param mantissa_start  the start position of the small float in result value
+ * @param has_sign        if the small float has a sign bit
  *
- * Unlike float_to_half using accurate method here.
  * This implements round-towards-zero (trunc) hence too large numbers get
  * converted to largest representable number, not infinity.
  * Small numbers may get converted to denorms, depending on normal
@@ -61,27 +62,30 @@
  * Note that compared to the references, below, we skip any rounding bias
  * since we do rounding towards zero - OpenGL allows rounding towards zero
  * (though not preferred) and DX10 even seems to require it.
- * Note that this will not do any packing - the value will
- * look like a "rescaled float" (except for Inf/NaN) but be returned
- * as int32.
+ * Note that this will pack mantissa, exponent and sign bit (if any) together,
+ * and shift the result to mantissa_start.
  *
  * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
  * ref https://gist.github.com/rygorous/2156668
  */
-static LLVMValueRef
-lp_build_float_to_smallfloat_nosign(struct gallivm_state *gallivm,
-                                    struct lp_type i32_type,
-                                    LLVMValueRef src,
-                                    unsigned mantissa_bits,
-                                    unsigned exponent_bits)
+LLVMValueRef
+lp_build_float_to_smallfloat(struct gallivm_state *gallivm,
+                             struct lp_type i32_type,
+                             LLVMValueRef src,
+                             unsigned mantissa_bits,
+                             unsigned exponent_bits,
+                             unsigned mantissa_start,
+                             boolean has_sign)
 {
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef i32_floatexpmask, i32_smallexpmask, magic, normal;
-   LLVMValueRef clamped, tmp, i32_roundmask, small_max, src_abs;
-   LLVMValueRef is_nan, is_posinf, is_nan_or_posinf, i32_qnanbit, nan_or_posinf;
+   LLVMValueRef rescale_src, tmp, i32_roundmask, small_max;
+   LLVMValueRef is_nan, i32_qnanbit, src_abs, shift, infcheck_src, res;
+   LLVMValueRef is_inf, is_nan_or_inf, nan_or_inf, mask;
    struct lp_type f32_type = lp_type_float_vec(32, 32 * i32_type.length);
    struct lp_build_context f32_bld, i32_bld;
    LLVMValueRef zero = lp_build_const_vec(gallivm, f32_type, 0.0f);
+   unsigned exponent_start = mantissa_start + mantissa_bits;
 
    lp_build_context_init(&f32_bld, gallivm, f32_type);
    lp_build_context_init(&i32_bld, gallivm, i32_type);
@@ -90,16 +94,29 @@ lp_build_float_to_smallfloat_nosign(struct gallivm_state *gallivm,
                                              ((1 << exponent_bits) - 1) << 23);
    i32_floatexpmask = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23);
 
+   src_abs = lp_build_abs(&f32_bld, src);
+   src_abs = LLVMBuildBitCast(builder, src_abs, i32_bld.vec_type, "");
+
+   if (has_sign) {
+      rescale_src = src_abs;
+      infcheck_src = src_abs;
+      src = LLVMBuildBitCast(builder, src, i32_bld.vec_type, "");
+   }
+   else {
+      /* clamp to pos range (can still have sign bit if NaN or negative zero) */
+      rescale_src = lp_build_max(&f32_bld, src, zero);
+      rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, "");
+      src = LLVMBuildBitCast(builder, src, i32_bld.vec_type, "");
+      infcheck_src = src;
+   }
+
    /* "ordinary" number */
-   /* clamp to pos range (can still have sign bit if NaN or negative zero) */
-   clamped = lp_build_max(&f32_bld, src, zero);
-   clamped = LLVMBuildBitCast(builder, clamped, i32_bld.vec_type, "");
    /* get rid of excess mantissa bits, and while here also potential sign bit */
    i32_roundmask = lp_build_const_int_vec(gallivm, i32_type,
-                                          ~((1 << (23 - mantissa_bits)) - 1) |
+                                          ~((1 << (23 - mantissa_bits)) - 1) &
                                           0x7fffffff);
 
-   tmp = lp_build_and(&i32_bld, clamped, i32_roundmask);
+   tmp = lp_build_and(&i32_bld, rescale_src, i32_roundmask);
    tmp = LLVMBuildBitCast(builder, tmp, f32_bld.vec_type, "");
    /* bias exponent (and denormalize if necessary) */
    magic = lp_build_const_int_vec(gallivm, i32_type,
@@ -107,7 +124,7 @@ lp_build_float_to_smallfloat_nosign(struct gallivm_state *gallivm,
    magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, "");
    normal = lp_build_mul(&f32_bld, tmp, magic);
 
-   /* clamp to max value */
+   /* clamp to max value - largest non-infinity number */
    small_max = lp_build_const_int_vec(gallivm, i32_type,
                                       (((1 << exponent_bits) - 2) << 23) |
                                       (((1 << mantissa_bits) - 1) << (23 - mantissa_bits)));
@@ -118,25 +135,56 @@ lp_build_float_to_smallfloat_nosign(struct gallivm_state *gallivm,
    /*
     * handle nan/inf cases
     * a little bit tricky since -Inf -> 0, +Inf -> +Inf, +-Nan -> +Nan
-    * Note that on a lucky day, we could simplify this a bit,
-    * by just using the max(src, zero) result - this will have -Inf
-    * clamped to 0, and MIGHT preserve the NaNs.
+    * (for no sign) else ->Inf -> ->Inf too.
+    * could use explicit "unordered" comparison checking for NaNs
+    * which might save us from calculating src_abs too.
+    * (Cannot actually save the comparison since we need to distinguish
+    * Inf and NaN cases anyway, but it would be better for AVX.)
     */
-   src_abs = lp_build_abs(&f32_bld, src);
-   src_abs = LLVMBuildBitCast(builder, src_abs, i32_bld.vec_type, "");
-   src = LLVMBuildBitCast(builder, src, i32_bld.vec_type, "");
    is_nan = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER,
                              src_abs, i32_floatexpmask);
-   is_posinf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_EQUAL,
-                                src, i32_floatexpmask);
-   is_nan_or_posinf = lp_build_and(&i32_bld, is_nan, is_posinf);
+   is_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_EQUAL,
+                             infcheck_src, i32_floatexpmask);
+   is_nan_or_inf = lp_build_or(&i32_bld, is_nan, is_inf);
    /* could also set more mantissa bits but need at least the highest mantissa bit */
    i32_qnanbit = lp_build_const_vec(gallivm, i32_type, 1 << 22);
    /* combine maxexp with qnanbit */
-   nan_or_posinf = lp_build_or(&i32_bld, i32_smallexpmask,
-                               lp_build_and(&i32_bld, is_nan, i32_qnanbit));
+   nan_or_inf = lp_build_or(&i32_bld, i32_smallexpmask,
+                            lp_build_and(&i32_bld, is_nan, i32_qnanbit));
+
+   res = lp_build_select(&i32_bld, is_nan_or_inf, nan_or_inf, normal);
 
-   return lp_build_select(&i32_bld, is_nan_or_posinf, nan_or_posinf, normal);
+   /* add back sign bit at right position */
+   if (has_sign) {
+      LLVMValueRef sign;
+      struct lp_type u32_type = lp_type_uint_vec(32, 32 * i32_type.length);
+      struct lp_build_context u32_bld;
+      lp_build_context_init(&u32_bld, gallivm, u32_type);
+
+      mask = lp_build_const_int_vec(gallivm, i32_type, 0x80000000);
+      shift = lp_build_const_int_vec(gallivm, i32_type, 8 - exponent_bits);
+      sign = lp_build_and(&i32_bld, mask, src);
+      sign = lp_build_shr(&u32_bld, sign, shift);
+      res = lp_build_or(&i32_bld, sign, res);
+   }
+
+   /* shift to final position */
+   if (exponent_start < 23) {
+      shift = lp_build_const_int_vec(gallivm, i32_type, 23 - exponent_start);
+      res = lp_build_shr(&i32_bld, res, shift);
+   }
+   else {
+      shift = lp_build_const_int_vec(gallivm, i32_type, exponent_start - 23);
+      res = lp_build_shl(&i32_bld, res, shift);
+   }
+   if (mantissa_start > 0) {
+      /* generally shouldn't get bits to mask off but can happen with denormals */
+      unsigned maskbits = (1 << (mantissa_bits + exponent_bits + has_sign)) - 1;
+      mask = lp_build_const_int_vec(gallivm, i32_type,
+                                    maskbits << mantissa_start);
+      res = lp_build_and(&i32_bld, res, mask);
+   }
+   return res;
 }
 
 
@@ -149,7 +197,7 @@ LLVMValueRef
 lp_build_float_to_r11g11b10(struct gallivm_state *gallivm,
                             LLVMValueRef *src)
 {
-   LLVMValueRef dst, rcomp, bcomp, gcomp, shift, mask;
+   LLVMValueRef dst, rcomp, bcomp, gcomp;
    struct lp_build_context i32_bld;
    LLVMTypeRef src_type = LLVMTypeOf(*src);
    unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
@@ -158,25 +206,12 @@ lp_build_float_to_r11g11b10(struct gallivm_state *gallivm,
 
    lp_build_context_init(&i32_bld, gallivm, i32_type);
 
-   /* "rescale" - this does the actual conversion except the packing */
-   rcomp = lp_build_float_to_smallfloat_nosign(gallivm, i32_type, src[0], 6, 5);
-   gcomp = lp_build_float_to_smallfloat_nosign(gallivm, i32_type, src[1], 6, 5);
-   bcomp = lp_build_float_to_smallfloat_nosign(gallivm, i32_type, src[2], 5, 5);
-
-   /* pack rescaled SoA floats to r11g11b10 AoS values */
-   shift = lp_build_const_int_vec(gallivm, i32_type, 23 - 6);
-   rcomp = lp_build_shr(&i32_bld, rcomp, shift);
-
-   shift = lp_build_const_int_vec(gallivm, i32_type, 23 - 17);
-   mask = lp_build_const_int_vec(gallivm, i32_type, 0x7ff << 11);
-   gcomp = lp_build_shr(&i32_bld, gcomp, shift);
-   gcomp = lp_build_and(&i32_bld, gcomp, mask);
-
-   shift = lp_build_const_int_vec(gallivm, i32_type, 27 - 23);
-   mask = lp_build_const_int_vec(gallivm, i32_type, 0x3ff << 22);
-   bcomp = lp_build_shl(&i32_bld, bcomp, shift);
-   bcomp = lp_build_and(&i32_bld, bcomp, mask);
+   /* "rescale" and put in right position */
+   rcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[0], 6, 5, 0, false);
+   gcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[1], 6, 5, 11, false);
+   bcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[2], 5, 5, 22, false);
 
+   /* combine the values */
    dst = lp_build_or(&i32_bld, rcomp, gcomp);
    return lp_build_or(&i32_bld, dst, bcomp);
 }
author	Roland Scheidegger <[email protected]>	2013-04-02 17:41:44 +0200
committer	Roland Scheidegger <[email protected]>	2013-04-02 18:24:31 +0200
commit	3febc4a1cdcfc3151195300e3f4ab10e8d46bedf (patch)
tree	5d33368f840f22e9f2f5f2e725f376ec0cc7c01e
parent	9be624b3ef32ae6311010cf05531e12051b647dc (diff)