summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c293
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.h30
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c180
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.h6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c12
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c13
6 files changed, 482 insertions, 52 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 74b4e9fce29..d2d91f50da9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -73,11 +73,14 @@
/**
* Generate min(a, b)
* No checks for special case values of a or b = 1 or 0 are done.
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
*/
static LLVMValueRef
lp_build_min_simple(struct lp_build_context *bld,
LLVMValueRef a,
- LLVMValueRef b)
+ LLVMValueRef b,
+ enum gallivm_nan_behavior nan_behavior)
{
const struct lp_type type = bld->type;
const char *intrinsic = NULL;
@@ -120,6 +123,8 @@ lp_build_min_simple(struct lp_build_context *bld,
}
}
else if (type.floating && util_cpu_caps.has_altivec) {
+ debug_printf("%s: altivec doesn't support nan behavior modes\n",
+ __FUNCTION__);
if (type.width == 32 && type.length == 4) {
intrinsic = "llvm.ppc.altivec.vminfp";
intr_size = 128;
@@ -131,7 +136,7 @@ lp_build_min_simple(struct lp_build_context *bld,
(gallivm_debug & GALLIVM_DEBUG_PERF)) {
debug_printf("%s: inefficient code, bogus shuffle due to packing\n",
__FUNCTION__);
- }
+ }
if (type.width == 8 && !type.sign) {
intrinsic = "llvm.x86.sse2.pminu.b";
}
@@ -147,53 +152,111 @@ lp_build_min_simple(struct lp_build_context *bld,
}
if (type.width == 32 && !type.sign) {
intrinsic = "llvm.x86.sse41.pminud";
- }
+ }
if (type.width == 32 && type.sign) {
intrinsic = "llvm.x86.sse41.pminsd";
}
}
} else if (util_cpu_caps.has_altivec) {
- intr_size = 128;
- if (type.width == 8) {
- if (!type.sign) {
- intrinsic = "llvm.ppc.altivec.vminub";
- } else {
- intrinsic = "llvm.ppc.altivec.vminsb";
- }
- } else if (type.width == 16) {
- if (!type.sign) {
- intrinsic = "llvm.ppc.altivec.vminuh";
- } else {
- intrinsic = "llvm.ppc.altivec.vminsh";
- }
- } else if (type.width == 32) {
- if (!type.sign) {
- intrinsic = "llvm.ppc.altivec.vminuw";
- } else {
- intrinsic = "llvm.ppc.altivec.vminsw";
- }
- }
+ intr_size = 128;
+ debug_printf("%s: altivec doesn't support nan behavior modes\n",
+ __FUNCTION__);
+ if (type.width == 8) {
+ if (!type.sign) {
+ intrinsic = "llvm.ppc.altivec.vminub";
+ } else {
+ intrinsic = "llvm.ppc.altivec.vminsb";
+ }
+ } else if (type.width == 16) {
+ if (!type.sign) {
+ intrinsic = "llvm.ppc.altivec.vminuh";
+ } else {
+ intrinsic = "llvm.ppc.altivec.vminsh";
+ }
+ } else if (type.width == 32) {
+ if (!type.sign) {
+ intrinsic = "llvm.ppc.altivec.vminuw";
+ } else {
+ intrinsic = "llvm.ppc.altivec.vminsw";
+ }
+ }
}
if(intrinsic) {
- return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
- type,
- intr_size, a, b);
+ /* We need to handle nan's for floating point numbers. If one of the
+ * inputs is nan the other should be returned (required by both D3D10+
+ * and OpenCL).
+ * The sse intrinsics return the second operator in case of nan by
+ * default so we need to special code to handle those.
+ */
+ if (util_cpu_caps.has_sse && type.floating &&
+ nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
+ nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+ LLVMValueRef isnan, max;
+ max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+ type,
+ intr_size, a, b);
+ if (nan_behavior == GALLIVM_NAN_RETURN_OTHER) {
+ isnan = lp_build_isnan(bld, b);
+ return lp_build_select(bld, isnan, a, max);
+ } else {
+ assert(nan_behavior == GALLIVM_NAN_RETURN_NAN);
+ isnan = lp_build_isnan(bld, a);
+ return lp_build_select(bld, isnan, a, max);
+ }
+ } else {
+ return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+ type,
+ intr_size, a, b);
+ }
}
- cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
- return lp_build_select(bld, cond, a, b);
+ if (type.floating) {
+ switch (nan_behavior) {
+ case GALLIVM_NAN_RETURN_NAN: {
+ LLVMValueRef isnan = lp_build_isnan(bld, b);
+ cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+ cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+ return lp_build_select(bld, cond, a, b);
+ }
+ break;
+ case GALLIVM_NAN_RETURN_OTHER: {
+ LLVMValueRef isnan = lp_build_isnan(bld, a);
+ cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+ cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+ return lp_build_select(bld, cond, a, b);
+ }
+ break;
+ case GALLIVM_NAN_RETURN_SECOND:
+ cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
+ return lp_build_select(bld, cond, a, b);
+ case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
+ cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+ return lp_build_select(bld, cond, a, b);
+ break;
+ default:
+ assert(0);
+ cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+ return lp_build_select(bld, cond, a, b);
+ }
+ } else {
+ cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+ return lp_build_select(bld, cond, a, b);
+ }
}
/**
* Generate max(a, b)
* No checks for special case values of a or b = 1 or 0 are done.
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
*/
static LLVMValueRef
lp_build_max_simple(struct lp_build_context *bld,
LLVMValueRef a,
- LLVMValueRef b)
+ LLVMValueRef b,
+ enum gallivm_nan_behavior nan_behavior)
{
const struct lp_type type = bld->type;
const char *intrinsic = NULL;
@@ -236,6 +299,8 @@ lp_build_max_simple(struct lp_build_context *bld,
}
}
else if (type.floating && util_cpu_caps.has_altivec) {
+ debug_printf("%s: altivec doesn't support nan behavior modes\n",
+ __FUNCTION__);
if (type.width == 32 || type.length == 4) {
intrinsic = "llvm.ppc.altivec.vmaxfp";
intr_size = 128;
@@ -271,6 +336,8 @@ lp_build_max_simple(struct lp_build_context *bld,
}
} else if (util_cpu_caps.has_altivec) {
intr_size = 128;
+ debug_printf("%s: altivec doesn't support nan behavior modes\n",
+ __FUNCTION__);
if (type.width == 8) {
if (!type.sign) {
intrinsic = "llvm.ppc.altivec.vmaxub";
@@ -293,13 +360,60 @@ lp_build_max_simple(struct lp_build_context *bld,
}
if(intrinsic) {
- return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
- type,
- intr_size, a, b);
+ if (util_cpu_caps.has_sse && type.floating &&
+ nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
+ nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+ LLVMValueRef isnan, min;
+ min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+ type,
+ intr_size, a, b);
+ if (nan_behavior == GALLIVM_NAN_RETURN_OTHER) {
+ isnan = lp_build_isnan(bld, b);
+ return lp_build_select(bld, isnan, a, min);
+ } else {
+ assert(nan_behavior == GALLIVM_NAN_RETURN_NAN);
+ isnan = lp_build_isnan(bld, a);
+ return lp_build_select(bld, isnan, a, min);
+ }
+ } else {
+ return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
+ type,
+ intr_size, a, b);
+ }
}
- cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
- return lp_build_select(bld, cond, a, b);
+ if (type.floating) {
+ switch (nan_behavior) {
+ case GALLIVM_NAN_RETURN_NAN: {
+ LLVMValueRef isnan = lp_build_isnan(bld, b);
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+ cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+ return lp_build_select(bld, cond, a, b);
+ }
+ break;
+ case GALLIVM_NAN_RETURN_OTHER: {
+ LLVMValueRef isnan = lp_build_isnan(bld, a);
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+ cond = LLVMBuildXor(bld->gallivm->builder, cond, isnan, "");
+ return lp_build_select(bld, cond, a, b);
+ }
+ break;
+ case GALLIVM_NAN_RETURN_SECOND:
+ cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
+ return lp_build_select(bld, cond, a, b);
+ case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+ return lp_build_select(bld, cond, a, b);
+ break;
+ default:
+ assert(0);
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+ return lp_build_select(bld, cond, a, b);
+ }
+ } else {
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+ return lp_build_select(bld, cond, a, b);
+ }
}
@@ -389,7 +503,7 @@ lp_build_add(struct lp_build_context *bld,
/* TODO: handle signed case */
if(type.norm && !type.floating && !type.fixed && !type.sign)
- a = lp_build_min_simple(bld, a, lp_build_comp(bld, b));
+ a = lp_build_min_simple(bld, a, lp_build_comp(bld, b), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
if(LLVMIsConstant(a) && LLVMIsConstant(b))
if (type.floating)
@@ -404,7 +518,7 @@ lp_build_add(struct lp_build_context *bld,
/* clamp to ceiling of 1.0 */
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
- res = lp_build_min_simple(bld, res, bld->one);
+ res = lp_build_min_simple(bld, res, bld->one, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
/* XXX clamp to floor of -1 or 0??? */
@@ -670,7 +784,7 @@ lp_build_sub(struct lp_build_context *bld,
/* TODO: handle signed case */
if(type.norm && !type.floating && !type.fixed && !type.sign)
- a = lp_build_max_simple(bld, a, b);
+ a = lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
if(LLVMIsConstant(a) && LLVMIsConstant(b))
if (type.floating)
@@ -684,7 +798,7 @@ lp_build_sub(struct lp_build_context *bld,
res = LLVMBuildSub(builder, a, b, "");
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
- res = lp_build_max_simple(bld, res, bld->zero);
+ res = lp_build_max_simple(bld, res, bld->zero, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
return res;
}
@@ -1144,7 +1258,7 @@ lp_build_lerp_3d(struct lp_build_context *bld,
/**
* Generate min(a, b)
- * Do checks for special cases.
+ * Do checks for special cases but not for nans.
*/
LLVMValueRef
lp_build_min(struct lp_build_context *bld,
@@ -1172,13 +1286,48 @@ lp_build_min(struct lp_build_context *bld,
return a;
}
- return lp_build_min_simple(bld, a, b);
+ return lp_build_min_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
}
/**
+ * Generate min(a, b)
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
+ */
+LLVMValueRef
+lp_build_min_ext(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ enum gallivm_nan_behavior nan_behavior)
+{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, b));
+
+ if(a == bld->undef || b == bld->undef)
+ return bld->undef;
+
+ if(a == b)
+ return a;
+
+ if (bld->type.norm) {
+ if (!bld->type.sign) {
+ if (a == bld->zero || b == bld->zero) {
+ return bld->zero;
+ }
+ }
+ if(a == bld->one)
+ return b;
+ if(b == bld->one)
+ return a;
+ }
+
+ return lp_build_min_simple(bld, a, b, nan_behavior);
+}
+
+/**
* Generate max(a, b)
- * Do checks for special cases.
+ * Do checks for special cases, but NaN behavior is undefined.
*/
LLVMValueRef
lp_build_max(struct lp_build_context *bld,
@@ -1207,11 +1356,48 @@ lp_build_max(struct lp_build_context *bld,
}
}
- return lp_build_max_simple(bld, a, b);
+ return lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
}
/**
+ * Generate max(a, b)
+ * Checks for special cases.
+ * NaN's are handled according to the behavior specified by the
+ * nan_behavior argument.
+ */
+LLVMValueRef
+lp_build_max_ext(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ enum gallivm_nan_behavior nan_behavior)
+{
+ assert(lp_check_value(bld->type, a));
+ assert(lp_check_value(bld->type, b));
+
+ if(a == bld->undef || b == bld->undef)
+ return bld->undef;
+
+ if(a == b)
+ return a;
+
+ if(bld->type.norm) {
+ if(a == bld->one || b == bld->one)
+ return bld->one;
+ if (!bld->type.sign) {
+ if (a == bld->zero) {
+ return b;
+ }
+ if (b == bld->zero) {
+ return a;
+ }
+ }
+ }
+
+ return lp_build_max_simple(bld, a, b, nan_behavior);
+}
+
+/**
* Generate clamp(a, min, max)
* Do checks for special cases.
*/
@@ -3343,3 +3529,26 @@ lp_build_mod(struct lp_build_context *bld,
res = LLVMBuildURem(builder, x, y, "");
return res;
}
+
+
+/*
+ * For floating inputs it creates and returns a mask
+ * which is all 1's for channels which are NaN.
+ * Channels inside x which are not NaN will be 0.
+ */
+LLVMValueRef
+lp_build_isnan(struct lp_build_context *bld,
+ LLVMValueRef x)
+{
+ LLVMValueRef mask;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, bld->type);
+
+ assert(bld->type.floating);
+ assert(lp_check_value(bld->type, x));
+
+ mask = LLVMBuildFCmp(bld->gallivm->builder, LLVMRealOEQ, x, x,
+ "isnotnan");
+ mask = LLVMBuildNot(bld->gallivm->builder, mask, "");
+ mask = LLVMBuildSExt(bld->gallivm->builder, mask, int_vec_type, "isnan");
+ return mask;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 04e180c94fd..14b3a164faa 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -131,6 +131,20 @@ lp_build_lerp_3d(struct lp_build_context *bld,
LLVMValueRef v111,
unsigned flags);
+/**
+ * Specifies floating point NaN behavior.
+ */
+enum gallivm_nan_behavior {
+ /* Results are undefined with NaN. Results in fastest code */
+ GALLIVM_NAN_BEHAVIOR_UNDEFINED,
+ /* If input is NaN, NaN is returned */
+ GALLIVM_NAN_RETURN_NAN,
+ /* If one of the inputs is NaN, the other operand is returned */
+ GALLIVM_NAN_RETURN_OTHER,
+ /* If one of the inputs is NaN, the second operand is returned.
+ * In min/max it will be as fast as undefined with sse opcodes */
+ GALLIVM_NAN_RETURN_SECOND
+};
LLVMValueRef
lp_build_min(struct lp_build_context *bld,
@@ -138,11 +152,23 @@ lp_build_min(struct lp_build_context *bld,
LLVMValueRef b);
LLVMValueRef
+lp_build_min_ext(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ enum gallivm_nan_behavior nan_behavior);
+
+LLVMValueRef
lp_build_max(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
LLVMValueRef
+lp_build_max_ext(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ enum gallivm_nan_behavior nan_behavior);
+
+LLVMValueRef
lp_build_clamp(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef min,
@@ -309,4 +335,8 @@ lp_build_mod(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef y);
+LLVMValueRef
+lp_build_isnan(struct lp_build_context *bld,
+ LLVMValueRef x);
+
#endif /* !LP_BLD_ARIT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 168bc262921..8b800cfde47 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -241,8 +241,6 @@ lp_build_compare(struct gallivm_state *gallivm,
#endif
#endif /* HAVE_LLVM < 0x0207 */
- /* XXX: It is not clear if we should use the ordered or unordered operators */
-
if(type.floating) {
LLVMRealPredicate op;
switch(func) {
@@ -368,11 +366,189 @@ lp_build_compare(struct gallivm_state *gallivm,
return res;
}
+/**
+ * Build code to compare two values 'a' and 'b' using the given func.
+ * \param func one of PIPE_FUNC_x
+ * If the operands are floating point numbers, the function will use
+ * ordered comparison which means that it will return true if both
+ * operands are not a NaN and the specified condition evaluates to true.
+ * The result values will be 0 for false or ~0 for true.
+ */
+LLVMValueRef
+lp_build_cmp_ordered(struct lp_build_context *bld,
+ unsigned func,
+ LLVMValueRef a,
+ LLVMValueRef b)
+{
+ struct gallivm_state *gallivm = bld->gallivm;
+ const struct lp_type type = bld->type;
+
+
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
+ LLVMValueRef zeros = LLVMConstNull(int_vec_type);
+ LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
+ LLVMValueRef cond;
+ LLVMValueRef res;
+
+ assert(func >= PIPE_FUNC_NEVER);
+ assert(func <= PIPE_FUNC_ALWAYS);
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+ if(func == PIPE_FUNC_NEVER)
+ return zeros;
+ if(func == PIPE_FUNC_ALWAYS)
+ return ones;
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ /*
+ * There are no unsigned integer comparison instructions in SSE.
+ */
+
+ if (!type.floating && !type.sign &&
+ type.width * type.length == 128 &&
+ util_cpu_caps.has_sse2 &&
+ (func == PIPE_FUNC_LESS ||
+ func == PIPE_FUNC_LEQUAL ||
+ func == PIPE_FUNC_GREATER ||
+ func == PIPE_FUNC_GEQUAL) &&
+ (gallivm_debug & GALLIVM_DEBUG_PERF)) {
+ debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
+ __FUNCTION__, type.length, type.width);
+ }
+#endif
+ if(type.floating) {
+ LLVMRealPredicate op;
+ switch(func) {
+ case PIPE_FUNC_NEVER:
+ op = LLVMRealPredicateFalse;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ op = LLVMRealPredicateTrue;
+ break;
+ case PIPE_FUNC_EQUAL:
+ op = LLVMRealOEQ;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ op = LLVMRealONE;
+ break;
+ case PIPE_FUNC_LESS:
+ op = LLVMRealOLT;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ op = LLVMRealOLE;
+ break;
+ case PIPE_FUNC_GREATER:
+ op = LLVMRealOGT;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ op = LLVMRealOGE;
+ break;
+ default:
+ assert(0);
+ return lp_build_undef(gallivm, type);
+ }
+
+#if HAVE_LLVM >= 0x0207
+ cond = LLVMBuildFCmp(builder, op, a, b, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+#else
+ if (type.length == 1) {
+ cond = LLVMBuildFCmp(builder, op, a, b, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+ }
+ else {
+ unsigned i;
+
+ res = LLVMGetUndef(int_vec_type);
+
+ debug_printf("%s: warning: using slow element-wise float"
+ " vector comparison\n", __FUNCTION__);
+ for (i = 0; i < type.length; ++i) {
+ LLVMValueRef index = lp_build_const_int32(gallivm, i);
+ cond = LLVMBuildFCmp(builder, op,
+ LLVMBuildExtractElement(builder, a, index, ""),
+ LLVMBuildExtractElement(builder, b, index, ""),
+ "");
+ cond = LLVMBuildSelect(builder, cond,
+ LLVMConstExtractElement(ones, index),
+ LLVMConstExtractElement(zeros, index),
+ "");
+ res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ }
+ }
+#endif
+ }
+ else {
+ LLVMIntPredicate op;
+ switch(func) {
+ case PIPE_FUNC_EQUAL:
+ op = LLVMIntEQ;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ op = LLVMIntNE;
+ break;
+ case PIPE_FUNC_LESS:
+ op = type.sign ? LLVMIntSLT : LLVMIntULT;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ op = type.sign ? LLVMIntSLE : LLVMIntULE;
+ break;
+ case PIPE_FUNC_GREATER:
+ op = type.sign ? LLVMIntSGT : LLVMIntUGT;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ op = type.sign ? LLVMIntSGE : LLVMIntUGE;
+ break;
+ default:
+ assert(0);
+ return lp_build_undef(gallivm, type);
+ }
+
+#if HAVE_LLVM >= 0x0207
+ cond = LLVMBuildICmp(builder, op, a, b, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+#else
+ if (type.length == 1) {
+ cond = LLVMBuildICmp(builder, op, a, b, "");
+ res = LLVMBuildSExt(builder, cond, int_vec_type, "");
+ }
+ else {
+ unsigned i;
+
+ res = LLVMGetUndef(int_vec_type);
+
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("%s: using slow element-wise int"
+ " vector comparison\n", __FUNCTION__);
+ }
+
+ for(i = 0; i < type.length; ++i) {
+ LLVMValueRef index = lp_build_const_int32(gallivm, i);
+ cond = LLVMBuildICmp(builder, op,
+ LLVMBuildExtractElement(builder, a, index, ""),
+ LLVMBuildExtractElement(builder, b, index, ""),
+ "");
+ cond = LLVMBuildSelect(builder, cond,
+ LLVMConstExtractElement(ones, index),
+ LLVMConstExtractElement(zeros, index),
+ "");
+ res = LLVMBuildInsertElement(builder, res, cond, index, "");
+ }
+ }
+#endif
+ }
+
+ return res;
+}
/**
* Build code to compare two values 'a' and 'b' using the given func.
* \param func one of PIPE_FUNC_x
+ * If the operands are floating point numbers, the function will use
+ * unordered comparison which means that it will return true if either
+ * operand is a NaN or the specified condition evaluates to true.
* The result values will be 0 for false or ~0 for true.
*/
LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
index f5304240a59..00fb0268dd6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
@@ -64,6 +64,12 @@ lp_build_cmp(struct lp_build_context *bld,
LLVMValueRef b);
LLVMValueRef
+lp_build_cmp_ordered(struct lp_build_context *bld,
+ unsigned func,
+ LLVMValueRef a,
+ LLVMValueRef b);
+
+LLVMValueRef
lp_build_select_bitwise(struct lp_build_context *bld,
LLVMValueRef mask,
LLVMValueRef a,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index e99c8ef134e..f23e08b77fb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -1274,8 +1274,10 @@ max_emit_cpu(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
- emit_data->output[emit_data->chan] = lp_build_max(&bld_base->base,
- emit_data->args[0], emit_data->args[1]);
+ emit_data->output[emit_data->chan] =
+ lp_build_max_ext(&bld_base->base,
+ emit_data->args[0], emit_data->args[1],
+ GALLIVM_NAN_RETURN_OTHER);
}
/* TGSI_OPCODE_MIN (CPU Only) */
@@ -1285,8 +1287,10 @@ min_emit_cpu(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
- emit_data->output[emit_data->chan] = lp_build_min(&bld_base->base,
- emit_data->args[0], emit_data->args[1]);
+ emit_data->output[emit_data->chan] =
+ lp_build_min_ext(&bld_base->base,
+ emit_data->args[0], emit_data->args[1],
+ GALLIVM_NAN_RETURN_OTHER);
}
/* TGSI_OPCODE_MOD (CPU Only) */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index c8d4fb8cd57..4355b3a92d1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1396,16 +1396,21 @@ emit_store_chan(
assert(dtype == TGSI_TYPE_FLOAT ||
dtype == TGSI_TYPE_UNTYPED);
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
- value = lp_build_max(float_bld, value, float_bld->zero);
- value = lp_build_min(float_bld, value, float_bld->one);
+ value = lp_build_max_ext(float_bld, value, float_bld->zero,
+ GALLIVM_NAN_RETURN_SECOND);
+ value = lp_build_min_ext(float_bld, value, float_bld->one,
+ GALLIVM_NAN_BEHAVIOR_UNDEFINED);
break;
case TGSI_SAT_MINUS_PLUS_ONE:
assert(dtype == TGSI_TYPE_FLOAT ||
dtype == TGSI_TYPE_UNTYPED);
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
- value = lp_build_max(float_bld, value, lp_build_const_vec(gallivm, float_bld->type, -1.0));
- value = lp_build_min(float_bld, value, float_bld->one);
+ value = lp_build_max_ext(float_bld, value,
+ lp_build_const_vec(gallivm, float_bld->type, -1.0),
+ GALLIVM_NAN_RETURN_SECOND);
+ value = lp_build_min_ext(float_bld, value, float_bld->one,
+ GALLIVM_NAN_BEHAVIOR_UNDEFINED);
break;
default: