diff options
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_conv.c')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_conv.c | 52 |
1 files changed, 41 insertions, 11 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index cbea966882a..56c15818e7c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -257,6 +257,7 @@ lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm, bias = (double)(1ULL << (mantissa - dst_width)); res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), ""); + /* instead of fadd/and could (with sse2) just use lp_build_iround */ res = LLVMBuildFAdd(builder, res, lp_build_const_vec(gallivm, src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); res = LLVMBuildAnd(builder, res, @@ -742,7 +743,6 @@ lp_build_conv(struct gallivm_state *gallivm, } else { double dst_scale = lp_const_scale(dst_type); - LLVMTypeRef tmp_vec_type; if (dst_scale != 1.0) { LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, dst_scale); @@ -750,19 +750,37 @@ lp_build_conv(struct gallivm_state *gallivm, tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } - /* Use an equally sized integer for intermediate computations */ - tmp_type.floating = FALSE; - tmp_vec_type = lp_build_vec_type(gallivm, tmp_type); - for(i = 0; i < num_tmps; ++i) { + /* + * these functions will use fptosi in some form which won't work + * with 32bit uint dst. + */ + assert(dst_type.sign || dst_type.width < 32); + + if (dst_type.sign && dst_type.norm && !dst_type.fixed) { + struct lp_build_context bld; + + lp_build_context_init(&bld, gallivm, tmp_type); + for(i = 0; i < num_tmps; ++i) { + tmp[i] = lp_build_iround(&bld, tmp[i]); + } + tmp_type.floating = FALSE; + } + else { + LLVMTypeRef tmp_vec_type; + + tmp_type.floating = FALSE; + tmp_vec_type = lp_build_vec_type(gallivm, tmp_type); + for(i = 0; i < num_tmps; ++i) { #if 0 - if(dst_type.sign) - tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); - else - tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, ""); + if(dst_type.sign) + tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); + else + tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, ""); #else - /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */ - tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); + /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */ + tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, ""); #endif + } } } } @@ -860,6 +878,18 @@ lp_build_conv(struct gallivm_state *gallivm, for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, ""); } + + /* the formula above will produce value below -1.0 for most negative + * value but everything seems happy with that hence disable for now */ + if (0 && !src_type.fixed && src_type.norm && src_type.sign) { + struct lp_build_context bld; + + lp_build_context_init(&bld, gallivm, dst_type); + for(i = 0; i < num_tmps; ++i) { + tmp[i] = lp_build_max(&bld, tmp[i], + lp_build_const_vec(gallivm, dst_type, -1.0f)); + } + } } } else { |