summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/gallivm
diff options
context:
space:
mode:
authorJose Fonseca <[email protected]>2016-04-02 15:13:38 +0100
committerJose Fonseca <[email protected]>2016-04-03 09:51:27 +0100
commitb284f1f7f9afa560d6f4c0863681a7e36913955c (patch)
tree5fa8673a99b0e8c356a7058b790af6632758caa3 /src/gallium/auxiliary/gallivm
parent11c4e5b45c246b638b5d98822ff512d1bb09c4fd (diff)
gallivm: Fix performance regressions due to vector selects.
LLVM often can't determine the mask elements are all ones/zeros, and there doesn't seem to be a good way to hint that. Thanks to Roland Scheidegger for spotting and analyzing the issue. Reviewed-by: Roland Scheidegger <[email protected]>
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c40
1 files changed, 18 insertions, 22 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 5b0b6c6b234..91f316c4565 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -39,6 +39,7 @@
#include "lp_bld_type.h"
#include "lp_bld_const.h"
+#include "lp_bld_swizzle.h"
#include "lp_bld_init.h"
#include "lp_bld_intr.h"
#include "lp_bld_debug.h"
@@ -314,35 +315,30 @@ lp_build_select(struct lp_build_context *bld,
mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
res = LLVMBuildSelect(builder, mask, a, b, "");
}
- else if (HAVE_LLVM >= 0x0303) {
+ else if (LLVMIsConstant(mask) ||
+ LLVMGetInstructionOpcode(mask) == LLVMSExt) {
/* Generate a vector select.
*
- * Using vector selects would avoid emitting intrinsics, but they weren't
- * properly supported yet for a long time.
- *
- * LLVM 3.3 appears to reliably support it.
- *
- * LLVM 3.1 supports it, but it yields buggy code (e.g. lp_blend_test).
- *
- * LLVM 3.0 includes experimental support provided the -promote-elements
- * options is passed to LLVM's command line (e.g., via
- * llvm::cl::ParseCommandLineOptions), but resulting code quality is much
- * worse, probably because some optimization passes don't know how to
- * handle vector selects.
- *
- * See also:
- * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html
+ * Using vector selects should avoid emitting intrinsics hence avoid
+ * hidering optimization passes, but vector selects weren't properly
+ * supported yet for a long time, and LLVM will generate poor code when
+ * the mask is not the result of a comparison.
*/
/* Convert the mask to a vector of booleans.
- * XXX: There are two ways to do this. Decide what's best.
+ *
+ * XXX: In x86 the mask is controlled by the MSB, so if we shifted the
+ * mask by `type.width - 1`, LLVM should realize the mask is ready. Alas
+ * what really happens is that LLVM will emit two shifts back to back.
*/
- if (1) {
- LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
- mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
- } else {
- mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), "");
+ if (0) {
+ LLVMValueRef shift = LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0);
+ shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift);
+ mask = LLVMBuildLShr(builder, mask, shift, "");
}
+ LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
+ mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
+
res = LLVMBuildSelect(builder, mask, a, b, "");
}
else if (((util_cpu_caps.has_sse4_1 &&