diff options
author | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2012-11-22 11:20:42 -0600 |
---|---|---|
committer | José Fonseca <jfonseca@vmware.com> | 2012-11-29 11:51:53 +0000 |
commit | dd5c58081672bd495e0ffef1c3cc1229620f0f88 (patch) | |
tree | 851854783ebfe4035e489984d15b610caa70d515 /src/gallium/auxiliary | |
parent | 2ea7d3dabd01608c1d0b020ef941912bd3893a96 (diff) |
gallivm: Altivec vector add/sub intrisics
This patch add correct vector addition and substraction intrisics when
using Altivec with PPC. Current code uses default path and LLVM backend
ends up issuing carry-out arithmetic instruction while it is expected
saturated ones.
It also includes a fix for PowerPC where char are unsigned by default,
resulting in bogus values for vector shifting.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 40 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_swizzle.c | 2 |
2 files changed, 27 insertions, 15 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index a4d7d98cc94..c3df3bf73b2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -365,13 +365,19 @@ lp_build_add(struct lp_build_context *bld, if(a == bld->one || b == bld->one) return bld->one; - if(util_cpu_caps.has_sse2 && - type.width * type.length == 128 && - !type.floating && !type.fixed) { - if(type.width == 8) - intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; - if(type.width == 16) - intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; + if (type.width * type.length == 128 && + !type.floating && !type.fixed) { + if(util_cpu_caps.has_sse2) { + if(type.width == 8) + intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; + if(type.width == 16) + intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; + } else if (util_cpu_caps.has_altivec) { + if(type.width == 8) + intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs"; + if(type.width == 16) + intrinsic = type.sign ? "llvm.ppc.altivec.vaddsws" : "llvm.ppc.altivec.vadduws"; + } } if(intrinsic) @@ -636,13 +642,19 @@ lp_build_sub(struct lp_build_context *bld, if(b == bld->one) return bld->zero; - if(util_cpu_caps.has_sse2 && - type.width * type.length == 128 && - !type.floating && !type.fixed) { - if(type.width == 8) - intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; - if(type.width == 16) - intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; + if (type.width * type.length == 128 && + !type.floating && !type.fixed) { + if (util_cpu_caps.has_sse2) { + if(type.width == 8) + intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; + if(type.width == 16) + intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; + } else if (util_cpu_caps.has_altivec) { + if(type.width == 8) + intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs"; + if(type.width == 16) + intrinsic = type.sign ? "llvm.ppc.altivec.vsubsws" : "llvm.ppc.altivec.vsubuws"; + } } if(intrinsic) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 377884a78cf..ae4033b6086 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -246,7 +246,7 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld, * YYYY YYYY .... YYYY <= output */ struct lp_type type4; - const char shifts[4][2] = { + const int shifts[4][2] = { { 1, 2}, {-1, 2}, { 1, -2}, |