summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorRoland Scheidegger <[email protected]>2017-11-18 06:23:35 +0100
committerRoland Scheidegger <[email protected]>2017-11-21 04:06:29 +0100
commitb5957cee920cd7a62e4e726538dbbe44c12e33ab (patch)
tree18f8df358fe595c18b033f83d0e896aebeaa651e /src/gallium/auxiliary
parent464c2d80834e4ccf7f28fb62b82a2fa13e6445fb (diff)
llvmpipe: fix snorm blending
The blend math gets a bit funky due to inverse blend factors being in range [0,2] rather than [-1,1], our normalized math can't really cover this. src_alpha_saturate blend factor has a similar problem too. (Note that piglit fbo-blending-formats test is mostly useless for anything but unorm formats, since not just all src/dst values are between [0,1], but the tests are crafted in a way that the results are between [0,1] too.) v2: some formatting fixes, and fix a fairly obscure (to debug) issue with alpha-only formats (not related to snorm at all), where blend optimization would think it could simplify the blend equation if the blend factors were complementary, however was using the completely unrelated rgb blend factors instead of the alpha ones... Reviewed-by: Jose Fonseca <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c50
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.h7
2 files changed, 32 insertions, 25 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index a1edd349f1f..321c6e4edf0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -541,38 +541,38 @@ lp_build_add(struct lp_build_context *bld,
assert(lp_check_value(type, a));
assert(lp_check_value(type, b));
- if(a == bld->zero)
+ if (a == bld->zero)
return b;
- if(b == bld->zero)
+ if (b == bld->zero)
return a;
- if(a == bld->undef || b == bld->undef)
+ if (a == bld->undef || b == bld->undef)
return bld->undef;
- if(bld->type.norm) {
+ if (type.norm) {
const char *intrinsic = NULL;
- if(a == bld->one || b == bld->one)
+ if (!type.sign && (a == bld->one || b == bld->one))
return bld->one;
if (!type.floating && !type.fixed) {
if (type.width * type.length == 128) {
- if(util_cpu_caps.has_sse2) {
- if(type.width == 8)
+ if (util_cpu_caps.has_sse2) {
+ if (type.width == 8)
intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
- if(type.width == 16)
+ if (type.width == 16)
intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
} else if (util_cpu_caps.has_altivec) {
- if(type.width == 8)
+ if (type.width == 8)
intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs";
- if(type.width == 16)
+ if (type.width == 16)
intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" : "llvm.ppc.altivec.vadduhs";
}
}
if (type.width * type.length == 256) {
- if(util_cpu_caps.has_avx2) {
- if(type.width == 8)
+ if (util_cpu_caps.has_avx2) {
+ if (type.width == 8)
intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b";
- if(type.width == 16)
+ if (type.width == 16)
intrinsic = type.sign ? "llvm.x86.avx2.padds.w" : "llvm.x86.avx2.paddus.w";
}
}
@@ -842,38 +842,38 @@ lp_build_sub(struct lp_build_context *bld,
assert(lp_check_value(type, a));
assert(lp_check_value(type, b));
- if(b == bld->zero)
+ if (b == bld->zero)
return a;
- if(a == bld->undef || b == bld->undef)
+ if (a == bld->undef || b == bld->undef)
return bld->undef;
- if(a == b)
+ if (a == b)
return bld->zero;
- if(bld->type.norm) {
+ if (type.norm) {
const char *intrinsic = NULL;
- if(b == bld->one)
+ if (!type.sign && b == bld->one)
return bld->zero;
if (!type.floating && !type.fixed) {
if (type.width * type.length == 128) {
if (util_cpu_caps.has_sse2) {
- if(type.width == 8)
+ if (type.width == 8)
intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
- if(type.width == 16)
+ if (type.width == 16)
intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
} else if (util_cpu_caps.has_altivec) {
- if(type.width == 8)
+ if (type.width == 8)
intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs";
- if(type.width == 16)
+ if (type.width == 16)
intrinsic = type.sign ? "llvm.ppc.altivec.vsubshs" : "llvm.ppc.altivec.vsubuhs";
}
}
if (type.width * type.length == 256) {
if (util_cpu_caps.has_avx2) {
- if(type.width == 8)
+ if (type.width == 8)
intrinsic = type.sign ? "llvm.x86.avx2.psubs.b" : "llvm.x86.avx2.psubus.b";
- if(type.width == 16)
+ if (type.width == 16)
intrinsic = type.sign ? "llvm.x86.avx2.psubs.w" : "llvm.x86.avx2.psubus.w";
}
}
@@ -963,7 +963,7 @@ lp_build_sub(struct lp_build_context *bld,
* @sa Michael Herf, The "double blend trick", May 2000,
* http://www.stereopsis.com/doubleblend.html
*/
-static LLVMValueRef
+LLVMValueRef
lp_build_mul_norm(struct gallivm_state *gallivm,
struct lp_type wide_type,
LLVMValueRef a, LLVMValueRef b)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 2a4137a6780..f5b2800a2cf 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -71,6 +71,13 @@ lp_build_sub(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
+
+LLVMValueRef
+lp_build_mul_norm(struct gallivm_state *gallivm,
+ struct lp_type wide_type,
+ LLVMValueRef a,
+ LLVMValueRef b);
+
LLVMValueRef
lp_build_mul(struct lp_build_context *bld,
LLVMValueRef a,