diff options
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 50 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_blend.c | 134 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 53 |
4 files changed, 191 insertions, 53 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index a1edd349f1f..321c6e4edf0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -541,38 +541,38 @@ lp_build_add(struct lp_build_context *bld, assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); - if(a == bld->zero) + if (a == bld->zero) return b; - if(b == bld->zero) + if (b == bld->zero) return a; - if(a == bld->undef || b == bld->undef) + if (a == bld->undef || b == bld->undef) return bld->undef; - if(bld->type.norm) { + if (type.norm) { const char *intrinsic = NULL; - if(a == bld->one || b == bld->one) + if (!type.sign && (a == bld->one || b == bld->one)) return bld->one; if (!type.floating && !type.fixed) { if (type.width * type.length == 128) { - if(util_cpu_caps.has_sse2) { - if(type.width == 8) + if (util_cpu_caps.has_sse2) { + if (type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; } else if (util_cpu_caps.has_altivec) { - if(type.width == 8) + if (type.width == 8) intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" : "llvm.ppc.altivec.vadduhs"; } } if (type.width * type.length == 256) { - if(util_cpu_caps.has_avx2) { - if(type.width == 8) + if (util_cpu_caps.has_avx2) { + if (type.width == 8) intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.x86.avx2.padds.w" : "llvm.x86.avx2.paddus.w"; } } @@ -842,38 +842,38 @@ lp_build_sub(struct lp_build_context *bld, assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); - if(b == bld->zero) + if (b == bld->zero) return a; - if(a == bld->undef || b == bld->undef) + if (a == bld->undef || b == bld->undef) return bld->undef; - if(a == b) + if (a == b) return bld->zero; - if(bld->type.norm) { + if (type.norm) { const char *intrinsic = NULL; - if(b == bld->one) + if (!type.sign && b == bld->one) return bld->zero; if (!type.floating && !type.fixed) { if (type.width * type.length == 128) { if (util_cpu_caps.has_sse2) { - if(type.width == 8) + if (type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; } else if (util_cpu_caps.has_altivec) { - if(type.width == 8) + if (type.width == 8) intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.ppc.altivec.vsubshs" : "llvm.ppc.altivec.vsubuhs"; } } if (type.width * type.length == 256) { if (util_cpu_caps.has_avx2) { - if(type.width == 8) + if (type.width == 8) intrinsic = type.sign ? "llvm.x86.avx2.psubs.b" : "llvm.x86.avx2.psubus.b"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.x86.avx2.psubs.w" : "llvm.x86.avx2.psubus.w"; } } @@ -963,7 +963,7 @@ lp_build_sub(struct lp_build_context *bld, * @sa Michael Herf, The "double blend trick", May 2000, * http://www.stereopsis.com/doubleblend.html */ -static LLVMValueRef +LLVMValueRef lp_build_mul_norm(struct gallivm_state *gallivm, struct lp_type wide_type, LLVMValueRef a, LLVMValueRef b) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 2a4137a6780..f5b2800a2cf 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -71,6 +71,13 @@ lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); + +LLVMValueRef +lp_build_mul_norm(struct gallivm_state *gallivm, + struct lp_type wide_type, + LLVMValueRef a, + LLVMValueRef b); + LLVMValueRef lp_build_mul(struct lp_build_context *bld, LLVMValueRef a, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c index 1feb415c9e5..02ec55eddbd 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.c @@ -35,6 +35,7 @@ #include "gallivm/lp_bld_swizzle.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_pack.h" #include "lp_bld_blend.h" @@ -65,11 +66,11 @@ lp_build_blend_func_commutative(unsigned func) boolean lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) { - if(rgb_func == alpha_func) + if (rgb_func == alpha_func) return FALSE; - if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) + if (rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) return TRUE; - if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) + if (rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) return TRUE; return FALSE; } @@ -81,11 +82,65 @@ lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) static inline boolean lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) { + STATIC_ASSERT((PIPE_BLENDFACTOR_ZERO ^ 0x10) == PIPE_BLENDFACTOR_ONE); + STATIC_ASSERT((PIPE_BLENDFACTOR_CONST_COLOR ^ 0x10) == + PIPE_BLENDFACTOR_INV_CONST_COLOR); return dst_factor == (src_factor ^ 0x10); } /** + * Whether this is a inverse blend factor + */ +static inline boolean +is_inverse_factor(unsigned factor) +{ + STATIC_ASSERT(PIPE_BLENDFACTOR_ZERO == 0x11); + return factor > 0x11; +} + + +/** + * Calculates the (expanded to wider type) multiplication + * of 2 normalized numbers. + */ +static void +lp_build_mul_norm_expand(struct lp_build_context *bld, + LLVMValueRef a, LLVMValueRef b, + LLVMValueRef *resl, LLVMValueRef *resh, + boolean signedness_differs) +{ + const struct lp_type type = bld->type; + struct lp_type wide_type = lp_wider_type(type); + struct lp_type wide_type2 = wide_type; + struct lp_type type2 = type; + LLVMValueRef al, ah, bl, bh; + + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + assert(!type.floating && !type.fixed && type.norm); + + if (a == bld->zero || b == bld->zero) { + LLVMValueRef zero = LLVMConstNull(lp_build_vec_type(bld->gallivm, wide_type)); + *resl = zero; + *resh = zero; + return; + } + + if (signedness_differs) { + type2.sign = !type.sign; + wide_type2.sign = !wide_type2.sign; + } + + lp_build_unpack2_native(bld->gallivm, type, wide_type, a, &al, &ah); + lp_build_unpack2_native(bld->gallivm, type2, wide_type2, b, &bl, &bh); + + *resl = lp_build_mul_norm(bld->gallivm, wide_type, al, bl); + *resh = lp_build_mul_norm(bld->gallivm, wide_type, ah, bh); +} + + +/** * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml */ LLVMValueRef @@ -155,7 +210,7 @@ lp_build_blend(struct lp_build_context *bld, } else { return lp_build_lerp(bld, dst_factor, src, dst, 0); } - } else if(bld->type.floating && func == PIPE_BLEND_SUBTRACT) { + } else if (bld->type.floating && func == PIPE_BLEND_SUBTRACT) { result = lp_build_add(bld, src, dst); if (factor_src < factor_dst) { @@ -165,7 +220,7 @@ lp_build_blend(struct lp_build_context *bld, result = lp_build_mul(bld, result, dst_factor); return lp_build_sub(bld, src, result); } - } else if(bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) { + } else if (bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) { result = lp_build_add(bld, src, dst); if (factor_src < factor_dst) { @@ -192,9 +247,72 @@ lp_build_blend(struct lp_build_context *bld, if (optimise_only) return NULL; - src_term = lp_build_mul(bld, src, src_factor); - dst_term = lp_build_mul(bld, dst, dst_factor); - return lp_build_blend_func(bld, func, src_term, dst_term); + if ((bld->type.norm && bld->type.sign) && + (is_inverse_factor(factor_src) || is_inverse_factor(factor_dst))) { + /* + * With snorm blending, the inverse blend factors range from [0,2] + * instead of [-1,1], so the ordinary signed normalized arithmetic + * doesn't quite work. Unpack must be unsigned, and the add/sub + * must be done with wider type. + * (Note that it's not quite obvious what the blend equation wrt to + * clamping should actually be based on GL spec in this case, but + * really the incoming src values are clamped to [-1,1] (the dst is + * always clamped already), and then NO further clamping occurs until + * the end.) + */ + struct lp_build_context bldw; + struct lp_type wide_type = lp_wider_type(bld->type); + LLVMValueRef src_terml, src_termh, dst_terml, dst_termh; + LLVMValueRef resl, resh; + + /* + * We don't need saturate math for the sub/add, since we have + * x+1 bit numbers in x*2 wide type (result is x+2 bits). + * (Doesn't really matter on x86 sse2 though as we use saturated + * intrinsics.) + */ + wide_type.norm = 0; + lp_build_context_init(&bldw, bld->gallivm, wide_type); + + /* + * XXX This is a bit hackish. Note that -128 really should + * be -1.0, the same as -127. However, we did not actually clamp + * things anywhere (relying on pack intrinsics instead) therefore + * we will get -128, and the inverted factor then 255. But the mul + * can overflow in this case (rather the rounding fixups for the mul, + * -128*255 will be positive). + * So we clamp the src and dst up here but only when necessary (we + * should do this before calculating blend factors but it's enough + * for avoiding overflow). + */ + if (is_inverse_factor(factor_src)) { + src = lp_build_max(bld, src, + lp_build_const_vec(bld->gallivm, bld->type, -1.0)); + } + if (is_inverse_factor(factor_dst)) { + dst = lp_build_max(bld, dst, + lp_build_const_vec(bld->gallivm, bld->type, -1.0)); + } + + lp_build_mul_norm_expand(bld, src, src_factor, &src_terml, &src_termh, + is_inverse_factor(factor_src) ? TRUE : FALSE); + lp_build_mul_norm_expand(bld, dst, dst_factor, &dst_terml, &dst_termh, + is_inverse_factor(factor_dst) ? TRUE : FALSE); + resl = lp_build_blend_func(&bldw, func, src_terml, dst_terml); + resh = lp_build_blend_func(&bldw, func, src_termh, dst_termh); + + /* + * XXX pack2_native is not ok because the values have to be in dst + * range. We need native pack though for the correct order on avx2. + * Will break on everything not implementing clamping pack intrinsics + * (i.e. everything but sse2 and altivec). + */ + return lp_build_pack2_native(bld->gallivm, wide_type, bld->type, resl, resh); + } else { + src_term = lp_build_mul(bld, src, src_factor); + dst_term = lp_build_mul(bld, dst, dst_factor); + return lp_build_blend_func(bld, func, src_term, dst_term); + } } void diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index 45c5c2bb65e..c16ef1a2e91 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -112,22 +112,34 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, case PIPE_BLENDFACTOR_DST_ALPHA: return bld->dst; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - if(alpha) + if (alpha) return bld->base.one; else { /* * If there's no dst alpha the complement is zero but for unclamped - * float inputs min can be non-zero (negative). + * float inputs (or snorm inputs) min can be non-zero (negative). */ - if (!bld->has_dst_alpha) { - if (!bld->saturate) + if (!bld->saturate) { + if (!bld->has_dst_alpha) { bld->saturate = lp_build_min(&bld->base, src_alpha, bld->base.zero); - } - else { - if(!bld->inv_dst) - bld->inv_dst = lp_build_comp(&bld->base, bld->dst); - if(!bld->saturate) + } + else if (bld->base.type.norm && bld->base.type.sign) { + /* + * The complement/min totally doesn't work, since + * the complement is in range [0,2] but the other + * min input is [-1,1]. However, we can just clamp to 0 + * before doing the complement... + */ + LLVMValueRef inv_dst; + inv_dst = lp_build_max(&bld->base, bld->base.zero, bld->dst); + inv_dst = lp_build_comp(&bld->base, inv_dst); + bld->saturate = lp_build_min(&bld->base, src_alpha, inv_dst); + } else { + if (!bld->inv_dst) { + bld->inv_dst = lp_build_comp(&bld->base, bld->dst); + } bld->saturate = lp_build_min(&bld->base, src_alpha, bld->inv_dst); + } } return bld->saturate; } @@ -140,24 +152,24 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, case PIPE_BLENDFACTOR_SRC1_ALPHA: return src1_alpha; case PIPE_BLENDFACTOR_INV_SRC_COLOR: - if(!bld->inv_src) + if (!bld->inv_src) bld->inv_src = lp_build_comp(&bld->base, bld->src); return bld->inv_src; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - if(!bld->inv_src_alpha) + if (!bld->inv_src_alpha) bld->inv_src_alpha = lp_build_comp(&bld->base, src_alpha); return bld->inv_src_alpha; case PIPE_BLENDFACTOR_INV_DST_COLOR: case PIPE_BLENDFACTOR_INV_DST_ALPHA: - if(!bld->inv_dst) + if (!bld->inv_dst) bld->inv_dst = lp_build_comp(&bld->base, bld->dst); return bld->inv_dst; case PIPE_BLENDFACTOR_INV_CONST_COLOR: - if(!bld->inv_const) + if (!bld->inv_const) bld->inv_const = lp_build_comp(&bld->base, bld->const_); return bld->inv_const; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - if(!bld->inv_const_alpha) + if (!bld->inv_const_alpha) bld->inv_const_alpha = lp_build_comp(&bld->base, const_alpha); return bld->inv_const_alpha; case PIPE_BLENDFACTOR_INV_SRC1_COLOR: @@ -331,7 +343,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm, bld.const_alpha = const_alpha; bld.has_dst_alpha = FALSE; - /* Find the alpha channel if not provided seperately */ + /* Find the alpha channel if not provided separately */ if (!src_alpha) { for (i = 0; i < 4; ++i) { if (swizzle[i] == 3) { @@ -349,7 +361,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm, } if (blend->logicop_enable) { - if(!type.floating) { + if (!type.floating) { result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst); } else { @@ -361,6 +373,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm, boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1; + boolean alpha_only = nr_channels == 1 && alpha_swizzle == PIPE_SWIZZLE_X; src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor, state->alpha_src_factor, @@ -374,8 +387,8 @@ lp_build_blend_aos(struct gallivm_state *gallivm, result = lp_build_blend(&bld.base, state->rgb_func, - state->rgb_src_factor, - state->rgb_dst_factor, + alpha_only ? state->alpha_src_factor : state->rgb_src_factor, + alpha_only ? state->alpha_dst_factor : state->rgb_dst_factor, src, dst, src_factor, @@ -383,8 +396,8 @@ lp_build_blend_aos(struct gallivm_state *gallivm, rgb_alpha_same, false); - if(state->rgb_func != state->alpha_func && nr_channels > 1 && - alpha_swizzle != PIPE_SWIZZLE_NONE) { + if (state->rgb_func != state->alpha_func && nr_channels > 1 && + alpha_swizzle != PIPE_SWIZZLE_NONE) { LLVMValueRef alpha; alpha = lp_build_blend(&bld.base, |