summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdhemerval Zanella <[email protected]>2012-11-22 11:03:11 -0600
committerJosé Fonseca <[email protected]>2012-11-29 11:51:46 +0000
commit2ea7d3dabd01608c1d0b020ef941912bd3893a96 (patch)
tree1d5764f58e81a66ba343cf7411e6b0c26374ebf4
parent31c63b058edd988004e1bea261a03235a8752229 (diff)
gallivm: Altivec vector max/min intrisics
This patch adds the PPC Altivec instrics max/min instruction for supported Altivec vector types (16xi8, 8xi16, 4xi32, 4xf32). Reviewed-by: Roland Scheidegger <[email protected]> Reviewed-by: Jose Fonseca <[email protected]>
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c56
1 files changed, 54 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index ca96a6b1df7..a4d7d98cc94 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -116,7 +116,12 @@ lp_build_min_simple(struct lp_build_context *bld,
}
}
}
- else if (util_cpu_caps.has_sse2 && type.length >= 2) {
+ else if (type.floating && util_cpu_caps.has_altivec) {
+ if (type.width == 32 && type.length == 4) {
+ intrinsic = "llvm.ppc.altivec.vminfp";
+ intr_size = 128;
+ }
+ } else if (util_cpu_caps.has_sse2 && type.length >= 2) {
intr_size = 128;
if ((type.width == 8 || type.width == 16) &&
(type.width * type.length <= 64) &&
@@ -144,6 +149,27 @@ lp_build_min_simple(struct lp_build_context *bld,
intrinsic = "llvm.x86.sse41.pminsd";
}
}
+ } else if (util_cpu_caps.has_altivec) {
+ intr_size = 128;
+ if (type.width == 8) {
+ if (!type.sign) {
+ intrinsic = "llvm.ppc.altivec.vminub";
+ } else {
+ intrinsic = "llvm.ppc.altivec.vminsb";
+ }
+ } else if (type.width == 16) {
+ if (!type.sign) {
+ intrinsic = "llvm.ppc.altivec.vminuh";
+ } else {
+ intrinsic = "llvm.ppc.altivec.vminsh";
+ }
+ } else if (type.width == 32) {
+ if (!type.sign) {
+ intrinsic = "llvm.ppc.altivec.vminuw";
+ } else {
+ intrinsic = "llvm.ppc.altivec.vminsw";
+ }
+ }
}
if(intrinsic) {
@@ -206,7 +232,12 @@ lp_build_max_simple(struct lp_build_context *bld,
}
}
}
- else if (util_cpu_caps.has_sse2 && type.length >= 2) {
+ else if (type.floating && util_cpu_caps.has_altivec) {
+ if (type.width == 32 || type.length == 4) {
+ intrinsic = "llvm.ppc.altivec.vmaxfp";
+ intr_size = 128;
+ }
+ } else if (util_cpu_caps.has_sse2 && type.length >= 2) {
intr_size = 128;
if ((type.width == 8 || type.width == 16) &&
(type.width * type.length <= 64) &&
@@ -235,6 +266,27 @@ lp_build_max_simple(struct lp_build_context *bld,
intrinsic = "llvm.x86.sse41.pmaxsd";
}
}
+ } else if (util_cpu_caps.has_altivec) {
+ intr_size = 128;
+ if (type.width == 8) {
+ if (!type.sign) {
+ intrinsic = "llvm.ppc.altivec.vmaxub";
+ } else {
+ intrinsic = "llvm.ppc.altivec.vmaxsb";
+ }
+ } else if (type.width == 16) {
+ if (!type.sign) {
+ intrinsic = "llvm.ppc.altivec.vmaxuh";
+ } else {
+ intrinsic = "llvm.ppc.altivec.vmaxsh";
+ }
+ } else if (type.width == 32) {
+ if (!type.sign) {
+ intrinsic = "llvm.ppc.altivec.vmaxuw";
+ } else {
+ intrinsic = "llvm.ppc.altivec.vmaxsw";
+ }
+ }
}
if(intrinsic) {