diff options
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_float.c | 85 |
1 files changed, 65 insertions, 20 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_float.c b/src/gallium/auxiliary/gallivm/lp_bld_format_float.c index f68a617b810..b87174e4a20 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_float.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_float.c @@ -309,33 +309,78 @@ lp_build_smallfloat_to_float(struct gallivm_state *gallivm, ((1 << (mantissa_bits + exponent_bits)) - 1) << (23 - mantissa_bits)); srcabs = lp_build_and(&i32_bld, src, maskabs); - srcabs = LLVMBuildBitCast(builder, srcabs, f32_bld.vec_type, ""); /* now do the actual scaling */ smallexpmask = lp_build_const_int_vec(gallivm, i32_type, ((1 << exponent_bits) - 1) << 23); i32_floatexpmask = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23); - /* - * magic number has exponent new exp bias + (new exp bias - old exp bias), - * mantissa is 0. - */ - magic = lp_build_const_int_vec(gallivm, i32_type, - (255 - (1 << (exponent_bits - 1))) << 23); - magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, ""); - /* adjust exponent and fix denorms */ - res = lp_build_mul(&f32_bld, srcabs, magic); + if (0) { + /* + * Note that this code path, while simpler, will convert small + * float denorms to floats according to current cpu denorm mode, if + * denorms are disabled it will flush them to zero! + * If cpu denorms are enabled, it should be faster though as long as + * there's no denorms in the inputs, but if there are actually denorms + * it's likely to be an order of magnitude slower (on x86 cpus). + */ - /* - * if exp was max (== NaN or Inf) set new exp to max (keep mantissa), - * so a simple "or" will do (because exp adjust will leave mantissa intact) - */ - /* use float compare (better for AVX 8-wide / no AVX2 but else should use int) */ - smallexpmask = LLVMBuildBitCast(builder, smallexpmask, f32_bld.vec_type, ""); - wasinfnan = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GEQUAL, srcabs, smallexpmask); - res = LLVMBuildBitCast(builder, res, i32_bld.vec_type, ""); - tmp = lp_build_and(&i32_bld, i32_floatexpmask, wasinfnan); - res = lp_build_or(&i32_bld, tmp, res); + srcabs = LLVMBuildBitCast(builder, srcabs, f32_bld.vec_type, ""); + + /* + * magic number has exponent new exp bias + (new exp bias - old exp bias), + * mantissa is 0. + */ + magic = lp_build_const_int_vec(gallivm, i32_type, + (255 - (1 << (exponent_bits - 1))) << 23); + magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, ""); + + /* adjust exponent and fix denorms */ + res = lp_build_mul(&f32_bld, srcabs, magic); + + /* + * if exp was max (== NaN or Inf) set new exp to max (keep mantissa), + * so a simple "or" will do (because exp adjust will leave mantissa intact) + */ + /* use float compare (better for AVX 8-wide / no AVX2 but else should use int) */ + smallexpmask = LLVMBuildBitCast(builder, smallexpmask, f32_bld.vec_type, ""); + wasinfnan = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GEQUAL, srcabs, smallexpmask); + res = LLVMBuildBitCast(builder, res, i32_bld.vec_type, ""); + tmp = lp_build_and(&i32_bld, i32_floatexpmask, wasinfnan); + res = lp_build_or(&i32_bld, tmp, res); + } + + else { + LLVMValueRef exp_one, isdenorm, denorm, normal, exp_adj; + + /* denorm (or zero) if exponent is zero */ + exp_one = lp_build_const_int_vec(gallivm, i32_type, 1 << 23); + isdenorm = lp_build_cmp(&i32_bld, PIPE_FUNC_LESS, srcabs, exp_one); + + /* inf or nan if exponent is max */ + wasinfnan = lp_build_cmp(&i32_bld, PIPE_FUNC_GEQUAL, srcabs, smallexpmask); + + /* for denormal (or zero), add (== or) magic exp to mantissa (== srcabs) (as int) + * then subtract it (as float). + * Another option would be to just do inttofp then do a rescale mul. + */ + magic = lp_build_const_int_vec(gallivm, i32_type, + (127 - ((1 << (exponent_bits - 1)) - 2)) << 23); + denorm = lp_build_or(&i32_bld, srcabs, magic); + denorm = LLVMBuildBitCast(builder, denorm, f32_bld.vec_type, ""); + denorm = lp_build_sub(&f32_bld, denorm, + LLVMBuildBitCast(builder, magic, f32_bld.vec_type, "")); + denorm = LLVMBuildBitCast(builder, denorm, i32_bld.vec_type, ""); + + /* for normals, Infs, Nans fix up exponent */ + exp_adj = lp_build_const_int_vec(gallivm, i32_type, + (127 - ((1 << (exponent_bits - 1)) - 1)) << 23); + normal = lp_build_add(&i32_bld, srcabs, exp_adj); + tmp = lp_build_and(&i32_bld, wasinfnan, i32_floatexpmask); + normal = lp_build_or(&i32_bld, tmp, normal); + + res = lp_build_select(&i32_bld, isdenorm, denorm, normal); + } if (has_sign) { LLVMValueRef signmask = lp_build_const_int_vec(gallivm, i32_type, 0x80000000); |