diff options
-rw-r--r-- | src/gallium/drivers/llvmpipe/Makefile | 1 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/SConscript | 1 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_blend.c | 191 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_blend.h | 44 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 152 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c | 63 |
6 files changed, 290 insertions, 162 deletions
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 1a4e93d7ce3..26fbde9a169 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -5,6 +5,7 @@ LIBNAME = llvmpipe C_SOURCES = \ lp_bld_alpha.c \ + lp_bld_blend.c \ lp_bld_blend_aos.c \ lp_bld_blend_logicop.c \ lp_bld_blend_soa.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index c1a63ee877f..85560a1c716 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -32,6 +32,7 @@ llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ 'lp_bld_alpha.c', + 'lp_bld_blend.c', 'lp_bld_blend_aos.c', 'lp_bld_blend_logicop.c', 'lp_bld_blend_soa.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c new file mode 100644 index 00000000000..a87a220ea48 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.c @@ -0,0 +1,191 @@ +/************************************************************************** + * + * Copyright 2012 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_state.h" +#include "util/u_debug.h" + +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_arit.h" + +#include "lp_bld_blend.h" + +/** + * Is (a OP b) == (b OP a)? + */ +boolean +lp_build_blend_func_commutative(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + case PIPE_BLEND_MIN: + case PIPE_BLEND_MAX: + return TRUE; + case PIPE_BLEND_SUBTRACT: + case PIPE_BLEND_REVERSE_SUBTRACT: + return FALSE; + default: + assert(0); + return TRUE; + } +} + + +/** + * Whether the blending functions are the reverse of each other. + */ +boolean +lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) +{ + if(rgb_func == alpha_func) + return FALSE; + if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) + return TRUE; + if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) + return TRUE; + return FALSE; +} + + +/** + * Whether the blending factors are complementary of each other. + */ +static INLINE boolean +lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) +{ + return dst_factor == (src_factor ^ 0x10); +} + + +/** + * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml + */ +LLVMValueRef +lp_build_blend_func(struct lp_build_context *bld, + unsigned func, + LLVMValueRef term1, + LLVMValueRef term2) +{ + switch (func) { + case PIPE_BLEND_ADD: + return lp_build_add(bld, term1, term2); + case PIPE_BLEND_SUBTRACT: + return lp_build_sub(bld, term1, term2); + case PIPE_BLEND_REVERSE_SUBTRACT: + return lp_build_sub(bld, term2, term1); + case PIPE_BLEND_MIN: + return lp_build_min(bld, term1, term2); + case PIPE_BLEND_MAX: + return lp_build_max(bld, term1, term2); + default: + assert(0); + return bld->zero; + } +} + + +/** + * Performs optimisations and blending independent of SoA/AoS + * + * @param func the blend function + * @param factor_src PIPE_BLENDFACTOR_xxx + * @param factor_dst PIPE_BLENDFACTOR_xxx + * @param src source rgba + * @param dst dest rgba + * @param src_factor src factor computed value + * @param dst_factor dst factor computed value + * @param not_alpha_dependent same factors accross all channels of src/dst + * + * not_alpha_dependent should be: + * SoA: always true as it is only one channel at a time + * AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor + * + * Note that pretty much every possible optimisation can only be done on non-unorm targets + * due to unorm values not going above 1.0 meaning factorisation can change results. + * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1. + */ +LLVMValueRef +lp_build_blend(struct lp_build_context *bld, + unsigned func, + unsigned factor_src, + unsigned factor_dst, + LLVMValueRef src, + LLVMValueRef dst, + LLVMValueRef src_factor, + LLVMValueRef dst_factor, + boolean not_alpha_dependent, + boolean optimise_only) +{ + LLVMValueRef result, src_term, dst_term; + + /* If we are not alpha dependent we can mess with the src/dst factors */ + if (not_alpha_dependent) { + if (lp_build_blend_factor_complementary(factor_src, factor_dst)) { + if (func == PIPE_BLEND_ADD) { + if (factor_src < factor_dst) { + return lp_build_lerp(bld, src_factor, dst, src); + } else { + return lp_build_lerp(bld, dst_factor, src, dst); + } + } else if(bld->type.floating && func == PIPE_BLEND_SUBTRACT) { + result = lp_build_add(bld, src, dst); + result = lp_build_mul(bld, result, src_factor); + + if (factor_src < factor_dst) { + return lp_build_sub(bld, result, dst); + } else { + return lp_build_sub(bld, src, result); + } + } else if(bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) { + result = lp_build_add(bld, src, dst); + result = lp_build_mul(bld, result, src_factor); + + if (factor_src < factor_dst) { + return lp_build_sub(bld, dst, result); + } else { + return lp_build_sub(bld, result, src); + } + } + } + + if (bld->type.floating && factor_src == factor_dst) { + if (func == PIPE_BLEND_ADD || + func == PIPE_BLEND_SUBTRACT || + func == PIPE_BLEND_REVERSE_SUBTRACT) { + LLVMValueRef result; + result = lp_build_blend_func(bld, func, src, dst); + return lp_build_mul(bld, result, src_factor); + } + } + } + + if (optimise_only) + return NULL; + + src_term = lp_build_mul(bld, src, src_factor); + dst_term = lp_build_mul(bld, dst, dst_factor); + return lp_build_blend_func(bld, func, src_term, dst_term); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h index c0c95a27129..68e55ac05b3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -40,25 +40,17 @@ struct lp_type; struct lp_build_context; -/** - * Whether the blending function is commutative or not. - */ -boolean -lp_build_blend_func_commutative(unsigned func); - - -/** - * Whether the blending functions are the reverse of each other. - */ -boolean -lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func); - - LLVMValueRef -lp_build_blend_func(struct lp_build_context *bld, - unsigned func, - LLVMValueRef term1, - LLVMValueRef term2); +lp_build_blend(struct lp_build_context *bld, + unsigned func, + unsigned factor_src, + unsigned factor_dst, + LLVMValueRef src, + LLVMValueRef dst, + LLVMValueRef src_factor, + LLVMValueRef dst_factor, + boolean not_alpha_dependent, + boolean optimise_only); LLVMValueRef @@ -98,4 +90,20 @@ lp_build_logicop(LLVMBuilderRef builder, LLVMValueRef dst); +LLVMValueRef +lp_build_blend_func(struct lp_build_context *bld, + unsigned func, + LLVMValueRef term1, + LLVMValueRef term2); + + +boolean +lp_build_blend_func_reverse(unsigned rgb_func, + unsigned alpha_func); + + +boolean +lp_build_blend_func_commutative(unsigned func); + + #endif /* !LP_BLD_BLEND_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index 59d5f545966..66df662711c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -220,84 +220,21 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, */ static LLVMValueRef lp_build_blend_factor(struct lp_build_blend_aos_context *bld, - LLVMValueRef factor1, unsigned rgb_factor, unsigned alpha_factor, unsigned alpha_swizzle) { - LLVMValueRef rgb_factor_; - LLVMValueRef alpha_factor_; - LLVMValueRef factor2; + LLVMValueRef rgb_factor_, alpha_factor_; enum lp_build_blend_swizzle rgb_swizzle; - rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); - alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); + rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); - rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); - - factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle); - - return lp_build_mul(&bld->base, factor1, factor2); -} - - -/** - * Is (a OP b) == (b OP a)? - */ -boolean -lp_build_blend_func_commutative(unsigned func) -{ - switch (func) { - case PIPE_BLEND_ADD: - case PIPE_BLEND_MIN: - case PIPE_BLEND_MAX: - return TRUE; - case PIPE_BLEND_SUBTRACT: - case PIPE_BLEND_REVERSE_SUBTRACT: - return FALSE; - default: - assert(0); - return TRUE; - } -} - - -boolean -lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) -{ - if(rgb_func == alpha_func) - return FALSE; - if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) - return TRUE; - if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) - return TRUE; - return FALSE; -} - - -/** - * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml - */ -LLVMValueRef -lp_build_blend_func(struct lp_build_context *bld, - unsigned func, - LLVMValueRef term1, - LLVMValueRef term2) -{ - switch (func) { - case PIPE_BLEND_ADD: - return lp_build_add(bld, term1, term2); - case PIPE_BLEND_SUBTRACT: - return lp_build_sub(bld, term1, term2); - case PIPE_BLEND_REVERSE_SUBTRACT: - return lp_build_sub(bld, term2, term1); - case PIPE_BLEND_MIN: - return lp_build_min(bld, term1, term2); - case PIPE_BLEND_MAX: - return lp_build_max(bld, term1, term2); - default: - assert(0); - return bld->zero; + if (alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { + rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); + alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); + return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle); + } else { + return rgb_factor_; } } @@ -329,9 +266,9 @@ lp_build_blend_aos(struct gallivm_state *gallivm, LLVMValueRef const_, const unsigned char swizzle[4]) { + const struct pipe_rt_blend_state * state = &blend->rt[rt]; struct lp_build_blend_aos_context bld; - LLVMValueRef src_term; - LLVMValueRef dst_term; + LLVMValueRef src_factor, dst_factor; LLVMValueRef result; unsigned alpha_swizzle = swizzle[3]; boolean fullcolormask; @@ -343,45 +280,60 @@ lp_build_blend_aos(struct gallivm_state *gallivm, bld.dst = dst; bld.const_ = const_; - if (!blend->rt[rt].blend_enable) { + if (swizzle[3] > UTIL_FORMAT_SWIZZLE_W || swizzle[3] == swizzle[0]) + alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE; + + if (!state->blend_enable) { result = src; } else { - - /* TODO: There are still a few optimization opportunities here. For certain - * combinations it is possible to reorder the operations and therefore saving - * some instructions. */ - - src_term = lp_build_blend_factor(&bld, src, blend->rt[rt].rgb_src_factor, - blend->rt[rt].alpha_src_factor, alpha_swizzle); - dst_term = lp_build_blend_factor(&bld, dst, blend->rt[rt].rgb_dst_factor, - blend->rt[rt].alpha_dst_factor, alpha_swizzle); - - lp_build_name(src_term, "src_term"); - lp_build_name(dst_term, "dst_term"); - - if(blend->rt[rt].rgb_func == blend->rt[rt].alpha_func) { - result = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); - } - else { - /* Seperate RGB / A functions */ - - LLVMValueRef rgb; + boolean rgb_alpha_same = state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor; + assert(rgb_alpha_same || alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE); + + src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor, + state->alpha_src_factor, alpha_swizzle); + dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor, + state->alpha_dst_factor, alpha_swizzle); + + result = lp_build_blend(&bld.base, + state->rgb_func, + state->rgb_src_factor, + state->rgb_dst_factor, + src, + dst, + src_factor, + dst_factor, + rgb_alpha_same, + false); + + if(state->rgb_func != state->alpha_func && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { LLVMValueRef alpha; - rgb = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); - alpha = lp_build_blend_func(&bld.base, blend->rt[rt].alpha_func, src_term, dst_term); - - result = lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); + alpha = lp_build_blend(&bld.base, + state->alpha_func, + state->alpha_src_factor, + state->alpha_dst_factor, + src, + dst, + src_factor, + dst_factor, + rgb_alpha_same, + false); + + result = lp_build_blend_swizzle(&bld, + result, + alpha, + LP_BUILD_BLEND_SWIZZLE_RGBA, + alpha_swizzle); } } /* Check if color mask is necessary */ - fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), blend->rt[rt].colormask); + fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), state->colormask); if (!fullcolormask) { LLVMValueRef color_mask; - color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, blend->rt[rt].colormask, swizzle); + color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, swizzle); lp_build_name(color_mask, "color_mask"); /* Combine with input mask if necessary */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c index 4d5bc9642d9..bba58b10492 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c @@ -196,13 +196,6 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, } -static boolean -lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) -{ - return dst_factor == (src_factor ^ 0x10); -} - - /** * Generate blend code in SOA mode. * \param rt render target index (to index the blend / colormask state) @@ -252,42 +245,6 @@ lp_build_blend_soa(struct gallivm_state *gallivm, unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; boolean func_commutative = lp_build_blend_func_commutative(func); - if (func == PIPE_BLEND_ADD && - lp_build_blend_factor_complementary(src_factor, dst_factor) && 0) { - /* - * Special case linear interpolation, (i.e., complementary factors). - */ - - LLVMValueRef weight; - if (src_factor < dst_factor) { - weight = lp_build_blend_soa_factor(&bld, src_factor, i); - res[i] = lp_build_lerp(&bld.base, weight, dst[i], src[i]); - } else { - weight = lp_build_blend_soa_factor(&bld, dst_factor, i); - res[i] = lp_build_lerp(&bld.base, weight, src[i], dst[i]); - } - continue; - } - - if ((func == PIPE_BLEND_ADD || - func == PIPE_BLEND_SUBTRACT || - func == PIPE_BLEND_REVERSE_SUBTRACT) && - src_factor == dst_factor && - type.floating) { - /* - * Special common factor. - * - * XXX: Only for floating points for now, since saturation will - * cause different results. - */ - - LLVMValueRef factor; - factor = lp_build_blend_soa_factor(&bld, src_factor, i); - res[i] = lp_build_blend_func(&bld.base, func, src[i], dst[i]); - res[i] = lp_build_mul(&bld.base, res[i], factor); - continue; - } - /* * Compute src/dst factors. */ @@ -298,6 +255,24 @@ lp_build_blend_soa(struct gallivm_state *gallivm, bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i); /* + * Check if lp_build_blend can perform any optimisations + */ + res[i] = lp_build_blend(&bld.base, + func, + src_factor, + dst_factor, + bld.factor[0][0][i], + bld.factor[1][0][i], + bld.factor[0][1][i], + bld.factor[1][1][i], + true, + true); + + if (res[i]) { + continue; + } + + /* * Compute src/dst terms */ @@ -311,7 +286,7 @@ lp_build_blend_soa(struct gallivm_state *gallivm, break; } - if(j < i) + if(j < i && bld.term[k][j]) bld.term[k][i] = bld.term[k][j]; else bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]); |