From 2ea7d3dabd01608c1d0b020ef941912bd3893a96 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Thu, 22 Nov 2012 11:03:11 -0600 Subject: gallivm: Altivec vector max/min intrisics This patch adds the PPC Altivec instrics max/min instruction for supported Altivec vector types (16xi8, 8xi16, 4xi32, 4xf32). Reviewed-by: Roland Scheidegger Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 56 +++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary/gallivm') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index ca96a6b1df7..a4d7d98cc94 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -116,7 +116,12 @@ lp_build_min_simple(struct lp_build_context *bld, } } } - else if (util_cpu_caps.has_sse2 && type.length >= 2) { + else if (type.floating && util_cpu_caps.has_altivec) { + if (type.width == 32 && type.length == 4) { + intrinsic = "llvm.ppc.altivec.vminfp"; + intr_size = 128; + } + } else if (util_cpu_caps.has_sse2 && type.length >= 2) { intr_size = 128; if ((type.width == 8 || type.width == 16) && (type.width * type.length <= 64) && @@ -144,6 +149,27 @@ lp_build_min_simple(struct lp_build_context *bld, intrinsic = "llvm.x86.sse41.pminsd"; } } + } else if (util_cpu_caps.has_altivec) { + intr_size = 128; + if (type.width == 8) { + if (!type.sign) { + intrinsic = "llvm.ppc.altivec.vminub"; + } else { + intrinsic = "llvm.ppc.altivec.vminsb"; + } + } else if (type.width == 16) { + if (!type.sign) { + intrinsic = "llvm.ppc.altivec.vminuh"; + } else { + intrinsic = "llvm.ppc.altivec.vminsh"; + } + } else if (type.width == 32) { + if (!type.sign) { + intrinsic = "llvm.ppc.altivec.vminuw"; + } else { + intrinsic = "llvm.ppc.altivec.vminsw"; + } + } } if(intrinsic) { @@ -206,7 +232,12 @@ lp_build_max_simple(struct lp_build_context *bld, } } } - else if (util_cpu_caps.has_sse2 && type.length >= 2) { + else if (type.floating && util_cpu_caps.has_altivec) { + if (type.width == 32 || type.length == 4) { + intrinsic = "llvm.ppc.altivec.vmaxfp"; + intr_size = 128; + } + } else if (util_cpu_caps.has_sse2 && type.length >= 2) { intr_size = 128; if ((type.width == 8 || type.width == 16) && (type.width * type.length <= 64) && @@ -235,6 +266,27 @@ lp_build_max_simple(struct lp_build_context *bld, intrinsic = "llvm.x86.sse41.pmaxsd"; } } + } else if (util_cpu_caps.has_altivec) { + intr_size = 128; + if (type.width == 8) { + if (!type.sign) { + intrinsic = "llvm.ppc.altivec.vmaxub"; + } else { + intrinsic = "llvm.ppc.altivec.vmaxsb"; + } + } else if (type.width == 16) { + if (!type.sign) { + intrinsic = "llvm.ppc.altivec.vmaxuh"; + } else { + intrinsic = "llvm.ppc.altivec.vmaxsh"; + } + } else if (type.width == 32) { + if (!type.sign) { + intrinsic = "llvm.ppc.altivec.vmaxuw"; + } else { + intrinsic = "llvm.ppc.altivec.vmaxsw"; + } + } } if(intrinsic) { -- cgit v1.2.3