summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/gallivm/lp_bld_conv.c
diff options
context:
space:
mode:
authorOded Gabbay <[email protected]>2016-01-07 19:50:12 +0200
committerOded Gabbay <[email protected]>2016-01-07 22:07:02 +0200
commitf41b6cfb07ede2be053c57e38d4d6b9433f90bf1 (patch)
tree3982968585c5c825aa68e7e3e7e4820b716deeb4 /src/gallium/auxiliary/gallivm/lp_bld_conv.c
parentbca18057a359f98b5db0a6453abe4dc7dd70a31d (diff)
llvmpipe: use sse2 conv code for altivec
In lp_build_conv() and lp_build_conv_auto(), there is a special case of conversion when sse2 is present. That code path is suitable without any changes to altivec, because all the functions that are called in that code path already support altivec. This patch increase the FPS in POWER arch across the board between 10%-25% I checked ipers, glxgears, glxspheres64, openarena, xonotic and glmark2. Signed-off-by: Oded Gabbay <[email protected]> Reviewed-by: Roland Scheidegger <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_conv.c')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 14244470c90..7854142f736 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -458,7 +458,7 @@ int lp_build_conv_auto(struct gallivm_state *gallivm,
{
/* Special case 4x4f --> 1x16ub */
if (src_type.length == 4 &&
- util_cpu_caps.has_sse2)
+ (util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec))
{
num_dsts = (num_srcs + 3) / 4;
dst_type->length = num_srcs * 4 >= 16 ? 16 : num_srcs * 4;
@@ -545,7 +545,7 @@ lp_build_conv(struct gallivm_state *gallivm,
((dst_type.length == 16 && 4 * num_dsts == num_srcs) ||
(num_dsts == 1 && dst_type.length * num_srcs == 16 && num_srcs != 3)) &&
- util_cpu_caps.has_sse2)
+ (util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec))
{
struct lp_build_context bld;
struct lp_type int16_type, int32_type;