diff options
author | Adhemerval Zanella <[email protected]> | 2012-11-22 11:03:11 -0600 |
---|---|---|
committer | José Fonseca <[email protected]> | 2012-11-29 11:51:46 +0000 |
commit | 2ea7d3dabd01608c1d0b020ef941912bd3893a96 (patch) | |
tree | 1d5764f58e81a66ba343cf7411e6b0c26374ebf4 /src | |
parent | 31c63b058edd988004e1bea261a03235a8752229 (diff) |
gallivm: Altivec vector max/min intrisics
This patch adds the PPC Altivec instrics max/min instruction for
supported Altivec vector types (16xi8, 8xi16, 4xi32, 4xf32).
Reviewed-by: Roland Scheidegger <[email protected]>
Reviewed-by: Jose Fonseca <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 56 |
1 files changed, 54 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index ca96a6b1df7..a4d7d98cc94 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -116,7 +116,12 @@ lp_build_min_simple(struct lp_build_context *bld, } } } - else if (util_cpu_caps.has_sse2 && type.length >= 2) { + else if (type.floating && util_cpu_caps.has_altivec) { + if (type.width == 32 && type.length == 4) { + intrinsic = "llvm.ppc.altivec.vminfp"; + intr_size = 128; + } + } else if (util_cpu_caps.has_sse2 && type.length >= 2) { intr_size = 128; if ((type.width == 8 || type.width == 16) && (type.width * type.length <= 64) && @@ -144,6 +149,27 @@ lp_build_min_simple(struct lp_build_context *bld, intrinsic = "llvm.x86.sse41.pminsd"; } } + } else if (util_cpu_caps.has_altivec) { + intr_size = 128; + if (type.width == 8) { + if (!type.sign) { + intrinsic = "llvm.ppc.altivec.vminub"; + } else { + intrinsic = "llvm.ppc.altivec.vminsb"; + } + } else if (type.width == 16) { + if (!type.sign) { + intrinsic = "llvm.ppc.altivec.vminuh"; + } else { + intrinsic = "llvm.ppc.altivec.vminsh"; + } + } else if (type.width == 32) { + if (!type.sign) { + intrinsic = "llvm.ppc.altivec.vminuw"; + } else { + intrinsic = "llvm.ppc.altivec.vminsw"; + } + } } if(intrinsic) { @@ -206,7 +232,12 @@ lp_build_max_simple(struct lp_build_context *bld, } } } - else if (util_cpu_caps.has_sse2 && type.length >= 2) { + else if (type.floating && util_cpu_caps.has_altivec) { + if (type.width == 32 || type.length == 4) { + intrinsic = "llvm.ppc.altivec.vmaxfp"; + intr_size = 128; + } + } else if (util_cpu_caps.has_sse2 && type.length >= 2) { intr_size = 128; if ((type.width == 8 || type.width == 16) && (type.width * type.length <= 64) && @@ -235,6 +266,27 @@ lp_build_max_simple(struct lp_build_context *bld, intrinsic = "llvm.x86.sse41.pmaxsd"; } } + } else if (util_cpu_caps.has_altivec) { + intr_size = 128; + if (type.width == 8) { + if (!type.sign) { + intrinsic = "llvm.ppc.altivec.vmaxub"; + } else { + intrinsic = "llvm.ppc.altivec.vmaxsb"; + } + } else if (type.width == 16) { + if (!type.sign) { + intrinsic = "llvm.ppc.altivec.vmaxuh"; + } else { + intrinsic = "llvm.ppc.altivec.vmaxsh"; + } + } else if (type.width == 32) { + if (!type.sign) { + intrinsic = "llvm.ppc.altivec.vmaxuw"; + } else { + intrinsic = "llvm.ppc.altivec.vmaxsw"; + } + } } if(intrinsic) { |