diff options
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_conv.c | 75 |
1 files changed, 26 insertions, 49 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index 9c7846031ec..0973e1f16f3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -336,61 +336,38 @@ lp_build_conv(struct gallivm_state *gallivm, util_cpu_caps.has_sse2) { - int i; + struct lp_build_context bld; + struct lp_type int16_type = dst_type; + struct lp_type int32_type = dst_type; + LLVMValueRef const_255f; + unsigned i, j; + + lp_build_context_init(&bld, gallivm, src_type); + + int16_type.width *= 2; + int16_type.length /= 2; + int16_type.sign = 1; + + int32_type.width *= 4; + int32_type.length /= 4; + int32_type.sign = 1; - for (i = 0; i < num_dsts; i++, src += 4) { - struct lp_type int16_type = dst_type; - struct lp_type int32_type = dst_type; + const_255f = lp_build_const_vec(gallivm, src_type, 255.0f); + + for (i = 0; i < num_dsts; ++i, src += 4) { LLVMValueRef lo, hi; - LLVMValueRef src_int0; - LLVMValueRef src_int1; - LLVMValueRef src_int2; - LLVMValueRef src_int3; - LLVMTypeRef int32_vec_type; - LLVMTypeRef src_vec_type; - LLVMValueRef const_255f; - LLVMValueRef a, b, c, d; - - int16_type.width *= 2; - int16_type.length /= 2; - int16_type.sign = 1; - - int32_type.width *= 4; - int32_type.length /= 4; - int32_type.sign = 1; - - src_vec_type = lp_build_vec_type(gallivm, src_type); - int32_vec_type = lp_build_vec_type(gallivm, int32_type); - - const_255f = lp_build_const_vec(gallivm, src_type, 255.0f); - - a = LLVMBuildFMul(builder, src[0], const_255f, ""); - b = LLVMBuildFMul(builder, src[1], const_255f, ""); - c = LLVMBuildFMul(builder, src[2], const_255f, ""); - d = LLVMBuildFMul(builder, src[3], const_255f, ""); - - { - struct lp_build_context bld; - - bld.gallivm = gallivm; - bld.type = src_type; - bld.vec_type = src_vec_type; - bld.int_elem_type = lp_build_elem_type(gallivm, int32_type); - bld.int_vec_type = int32_vec_type; - bld.undef = lp_build_undef(gallivm, src_type); - bld.zero = lp_build_zero(gallivm, src_type); - bld.one = lp_build_one(gallivm, src_type); - - src_int0 = lp_build_iround(&bld, a); - src_int1 = lp_build_iround(&bld, b); - src_int2 = lp_build_iround(&bld, c); - src_int3 = lp_build_iround(&bld, d); + + for (j = 0; j < 4; ++j) { + tmp[j] = LLVMBuildFMul(builder, src[j], const_255f, ""); + tmp[j] = lp_build_iround(&bld, tmp[j]); } + /* relying on clamping behavior of sse2 intrinsics here */ - lo = lp_build_pack2(gallivm, int32_type, int16_type, src_int0, src_int1); - hi = lp_build_pack2(gallivm, int32_type, int16_type, src_int2, src_int3); + lo = lp_build_pack2(gallivm, int32_type, int16_type, tmp[0], tmp[1]); + hi = lp_build_pack2(gallivm, int32_type, int16_type, tmp[2], tmp[3]); dst[i] = lp_build_pack2(gallivm, int16_type, dst_type, lo, hi); } + return; } |