diff options
author | Roland Scheidegger <[email protected]> | 2013-04-02 17:41:44 +0200 |
---|---|---|
committer | Roland Scheidegger <[email protected]> | 2013-04-02 18:24:31 +0200 |
commit | 3febc4a1cdcfc3151195300e3f4ab10e8d46bedf (patch) | |
tree | 5d33368f840f22e9f2f5f2e725f376ec0cc7c01e /src/gallium/auxiliary/gallivm/lp_bld_format_float.c | |
parent | 9be624b3ef32ae6311010cf05531e12051b647dc (diff) |
gallivm: consolidate code for float-to-half and float-to-packed conversion.
This replaces the existing float-to-half implementation.
There are definitely a couple of differences - the old implementation
had unspecified(?) rounding behavior, and could at least in theory
construct Inf values out of NaNs. NaNs and Infs should now always be
properly propagated, and rounding behavior is now towards zero
(note this means too large but non-Infinity values get propagated to max
representable value, not Infinity).
The implementation will definitely not match util code, however (which
does nearest rounding, which also means too large values will get
propagated to Infinity).
Also fix a bogus round mask probably leading to rounding bugs...
v2: fix a logic bug in handling infs/nans.
Reviewed-by: Jose Fonseca <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_format_float.c')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_float.c | 135 |
1 files changed, 85 insertions, 50 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_float.c b/src/gallium/auxiliary/gallivm/lp_bld_format_float.c index f899d6d64d5..a8cfe02f0d8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_float.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_float.c @@ -47,13 +47,14 @@ /** * Convert float32 to a float-like value with less exponent and mantissa * bits. The mantissa is still biased, and the mantissa still has an implied 1, - * but there's no sign bit. + * and there may be a sign bit. * * @param src (vector) float value to convert * @param mantissa_bits the number of mantissa bits * @param exponent_bits the number of exponent bits + * @param mantissa_start the start position of the small float in result value + * @param has_sign if the small float has a sign bit * - * Unlike float_to_half using accurate method here. * This implements round-towards-zero (trunc) hence too large numbers get * converted to largest representable number, not infinity. * Small numbers may get converted to denorms, depending on normal @@ -61,27 +62,30 @@ * Note that compared to the references, below, we skip any rounding bias * since we do rounding towards zero - OpenGL allows rounding towards zero * (though not preferred) and DX10 even seems to require it. - * Note that this will not do any packing - the value will - * look like a "rescaled float" (except for Inf/NaN) but be returned - * as int32. + * Note that this will pack mantissa, exponent and sign bit (if any) together, + * and shift the result to mantissa_start. * * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ * ref https://gist.github.com/rygorous/2156668 */ -static LLVMValueRef -lp_build_float_to_smallfloat_nosign(struct gallivm_state *gallivm, - struct lp_type i32_type, - LLVMValueRef src, - unsigned mantissa_bits, - unsigned exponent_bits) +LLVMValueRef +lp_build_float_to_smallfloat(struct gallivm_state *gallivm, + struct lp_type i32_type, + LLVMValueRef src, + unsigned mantissa_bits, + unsigned exponent_bits, + unsigned mantissa_start, + boolean has_sign) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef i32_floatexpmask, i32_smallexpmask, magic, normal; - LLVMValueRef clamped, tmp, i32_roundmask, small_max, src_abs; - LLVMValueRef is_nan, is_posinf, is_nan_or_posinf, i32_qnanbit, nan_or_posinf; + LLVMValueRef rescale_src, tmp, i32_roundmask, small_max; + LLVMValueRef is_nan, i32_qnanbit, src_abs, shift, infcheck_src, res; + LLVMValueRef is_inf, is_nan_or_inf, nan_or_inf, mask; struct lp_type f32_type = lp_type_float_vec(32, 32 * i32_type.length); struct lp_build_context f32_bld, i32_bld; LLVMValueRef zero = lp_build_const_vec(gallivm, f32_type, 0.0f); + unsigned exponent_start = mantissa_start + mantissa_bits; lp_build_context_init(&f32_bld, gallivm, f32_type); lp_build_context_init(&i32_bld, gallivm, i32_type); @@ -90,16 +94,29 @@ lp_build_float_to_smallfloat_nosign(struct gallivm_state *gallivm, ((1 << exponent_bits) - 1) << 23); i32_floatexpmask = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23); + src_abs = lp_build_abs(&f32_bld, src); + src_abs = LLVMBuildBitCast(builder, src_abs, i32_bld.vec_type, ""); + + if (has_sign) { + rescale_src = src_abs; + infcheck_src = src_abs; + src = LLVMBuildBitCast(builder, src, i32_bld.vec_type, ""); + } + else { + /* clamp to pos range (can still have sign bit if NaN or negative zero) */ + rescale_src = lp_build_max(&f32_bld, src, zero); + rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, ""); + src = LLVMBuildBitCast(builder, src, i32_bld.vec_type, ""); + infcheck_src = src; + } + /* "ordinary" number */ - /* clamp to pos range (can still have sign bit if NaN or negative zero) */ - clamped = lp_build_max(&f32_bld, src, zero); - clamped = LLVMBuildBitCast(builder, clamped, i32_bld.vec_type, ""); /* get rid of excess mantissa bits, and while here also potential sign bit */ i32_roundmask = lp_build_const_int_vec(gallivm, i32_type, - ~((1 << (23 - mantissa_bits)) - 1) | + ~((1 << (23 - mantissa_bits)) - 1) & 0x7fffffff); - tmp = lp_build_and(&i32_bld, clamped, i32_roundmask); + tmp = lp_build_and(&i32_bld, rescale_src, i32_roundmask); tmp = LLVMBuildBitCast(builder, tmp, f32_bld.vec_type, ""); /* bias exponent (and denormalize if necessary) */ magic = lp_build_const_int_vec(gallivm, i32_type, @@ -107,7 +124,7 @@ lp_build_float_to_smallfloat_nosign(struct gallivm_state *gallivm, magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, ""); normal = lp_build_mul(&f32_bld, tmp, magic); - /* clamp to max value */ + /* clamp to max value - largest non-infinity number */ small_max = lp_build_const_int_vec(gallivm, i32_type, (((1 << exponent_bits) - 2) << 23) | (((1 << mantissa_bits) - 1) << (23 - mantissa_bits))); @@ -118,25 +135,56 @@ lp_build_float_to_smallfloat_nosign(struct gallivm_state *gallivm, /* * handle nan/inf cases * a little bit tricky since -Inf -> 0, +Inf -> +Inf, +-Nan -> +Nan - * Note that on a lucky day, we could simplify this a bit, - * by just using the max(src, zero) result - this will have -Inf - * clamped to 0, and MIGHT preserve the NaNs. + * (for no sign) else ->Inf -> ->Inf too. + * could use explicit "unordered" comparison checking for NaNs + * which might save us from calculating src_abs too. + * (Cannot actually save the comparison since we need to distinguish + * Inf and NaN cases anyway, but it would be better for AVX.) */ - src_abs = lp_build_abs(&f32_bld, src); - src_abs = LLVMBuildBitCast(builder, src_abs, i32_bld.vec_type, ""); - src = LLVMBuildBitCast(builder, src, i32_bld.vec_type, ""); is_nan = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER, src_abs, i32_floatexpmask); - is_posinf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_EQUAL, - src, i32_floatexpmask); - is_nan_or_posinf = lp_build_and(&i32_bld, is_nan, is_posinf); + is_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_EQUAL, + infcheck_src, i32_floatexpmask); + is_nan_or_inf = lp_build_or(&i32_bld, is_nan, is_inf); /* could also set more mantissa bits but need at least the highest mantissa bit */ i32_qnanbit = lp_build_const_vec(gallivm, i32_type, 1 << 22); /* combine maxexp with qnanbit */ - nan_or_posinf = lp_build_or(&i32_bld, i32_smallexpmask, - lp_build_and(&i32_bld, is_nan, i32_qnanbit)); + nan_or_inf = lp_build_or(&i32_bld, i32_smallexpmask, + lp_build_and(&i32_bld, is_nan, i32_qnanbit)); + + res = lp_build_select(&i32_bld, is_nan_or_inf, nan_or_inf, normal); - return lp_build_select(&i32_bld, is_nan_or_posinf, nan_or_posinf, normal); + /* add back sign bit at right position */ + if (has_sign) { + LLVMValueRef sign; + struct lp_type u32_type = lp_type_uint_vec(32, 32 * i32_type.length); + struct lp_build_context u32_bld; + lp_build_context_init(&u32_bld, gallivm, u32_type); + + mask = lp_build_const_int_vec(gallivm, i32_type, 0x80000000); + shift = lp_build_const_int_vec(gallivm, i32_type, 8 - exponent_bits); + sign = lp_build_and(&i32_bld, mask, src); + sign = lp_build_shr(&u32_bld, sign, shift); + res = lp_build_or(&i32_bld, sign, res); + } + + /* shift to final position */ + if (exponent_start < 23) { + shift = lp_build_const_int_vec(gallivm, i32_type, 23 - exponent_start); + res = lp_build_shr(&i32_bld, res, shift); + } + else { + shift = lp_build_const_int_vec(gallivm, i32_type, exponent_start - 23); + res = lp_build_shl(&i32_bld, res, shift); + } + if (mantissa_start > 0) { + /* generally shouldn't get bits to mask off but can happen with denormals */ + unsigned maskbits = (1 << (mantissa_bits + exponent_bits + has_sign)) - 1; + mask = lp_build_const_int_vec(gallivm, i32_type, + maskbits << mantissa_start); + res = lp_build_and(&i32_bld, res, mask); + } + return res; } @@ -149,7 +197,7 @@ LLVMValueRef lp_build_float_to_r11g11b10(struct gallivm_state *gallivm, LLVMValueRef *src) { - LLVMValueRef dst, rcomp, bcomp, gcomp, shift, mask; + LLVMValueRef dst, rcomp, bcomp, gcomp; struct lp_build_context i32_bld; LLVMTypeRef src_type = LLVMTypeOf(*src); unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ? @@ -158,25 +206,12 @@ lp_build_float_to_r11g11b10(struct gallivm_state *gallivm, lp_build_context_init(&i32_bld, gallivm, i32_type); - /* "rescale" - this does the actual conversion except the packing */ - rcomp = lp_build_float_to_smallfloat_nosign(gallivm, i32_type, src[0], 6, 5); - gcomp = lp_build_float_to_smallfloat_nosign(gallivm, i32_type, src[1], 6, 5); - bcomp = lp_build_float_to_smallfloat_nosign(gallivm, i32_type, src[2], 5, 5); - - /* pack rescaled SoA floats to r11g11b10 AoS values */ - shift = lp_build_const_int_vec(gallivm, i32_type, 23 - 6); - rcomp = lp_build_shr(&i32_bld, rcomp, shift); - - shift = lp_build_const_int_vec(gallivm, i32_type, 23 - 17); - mask = lp_build_const_int_vec(gallivm, i32_type, 0x7ff << 11); - gcomp = lp_build_shr(&i32_bld, gcomp, shift); - gcomp = lp_build_and(&i32_bld, gcomp, mask); - - shift = lp_build_const_int_vec(gallivm, i32_type, 27 - 23); - mask = lp_build_const_int_vec(gallivm, i32_type, 0x3ff << 22); - bcomp = lp_build_shl(&i32_bld, bcomp, shift); - bcomp = lp_build_and(&i32_bld, bcomp, mask); + /* "rescale" and put in right position */ + rcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[0], 6, 5, 0, false); + gcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[1], 6, 5, 11, false); + bcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[2], 5, 5, 22, false); + /* combine the values */ dst = lp_build_or(&i32_bld, rcomp, gcomp); return lp_build_or(&i32_bld, dst, bcomp); } |