summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c52
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c16
3 files changed, 53 insertions, 17 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 1a50e82c241..524917abe27 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -327,6 +327,8 @@ lp_build_select(struct lp_build_context *bld,
* supported yet for a long time, and LLVM will generate poor code when
* the mask is not the result of a comparison.
* Also, llvm 3.7 may miscompile them (bug 94972).
+ * XXX: Even if the instruction was an SExt, this may still produce
+ * terrible code. Try piglit stencil-twoside.
*/
/* Convert the mask to a vector of booleans.
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 0c27c2f8972..d5d5c5a786e 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -963,16 +963,48 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
if (stencil[0].enabled) {
if (face) {
- LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
-
- /* front_facing = face != 0 ? ~0 : 0 */
- front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
- front_facing = LLVMBuildSExt(builder, front_facing,
- LLVMIntTypeInContext(gallivm->context,
- s_bld.type.length*s_bld.type.width),
- "");
- front_facing = LLVMBuildBitCast(builder, front_facing,
- s_bld.int_vec_type, "");
+ if (0) {
+ /*
+ * XXX: the scalar expansion below produces atrocious code
+ * (basically producing a 64bit scalar value, then moving the 2
+ * 32bit pieces separately to simd, plus 4 shuffles, which is
+ * seriously lame). But the scalar-simd transitions are always
+ * tricky, so no big surprise there.
+ * This here would be way better, however llvm has some serious
+ * trouble later using it in the select, probably because it will
+ * recognize the expression as constant and move the simd value
+ * away (out of the loop) - and then it will suddenly try
+ * constructing i1 high-bit masks out of it later...
+ * (Try piglit stencil-twoside.)
+ * Note this is NOT due to using SExt/Trunc, it fails exactly the
+ * same even when using native compare/select.
+ * I cannot reproduce this problem when using stand-alone compiler
+ * though, suggesting some problem with optimization passes...
+ * (With stand-alone compilation, the construction of this mask
+ * value, no matter if the easy 3 instruction here or the complex
+ * 16+ one below, never gets separated from where it's used.)
+ * The scalar code still has the same problem, but the generated
+ * code looks a bit better at least for some reason, even if
+ * mostly by luck (the fundamental issue clearly is the same).
+ */
+ front_facing = lp_build_broadcast(gallivm, s_bld.vec_type, face);
+ /* front_facing = face != 0 ? ~0 : 0 */
+ front_facing = lp_build_compare(gallivm, s_bld.type,
+ PIPE_FUNC_NOTEQUAL,
+ front_facing, s_bld.zero);
+ } else {
+ LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
+
+ /* front_facing = face != 0 ? ~0 : 0 */
+ front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
+ front_facing = LLVMBuildSExt(builder, front_facing,
+ LLVMIntTypeInContext(gallivm->context,
+ s_bld.type.length*s_bld.type.width),
+ "");
+ front_facing = LLVMBuildBitCast(builder, front_facing,
+ s_bld.int_vec_type, "");
+
+ }
}
s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 09108159feb..a36389ccc32 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -127,7 +127,7 @@ generate_quad_mask(struct gallivm_state *gallivm,
struct lp_type mask_type;
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
LLVMValueRef bits[16];
- LLVMValueRef mask;
+ LLVMValueRef mask, bits_vec;
int shift, i;
/*
@@ -179,15 +179,15 @@ generate_quad_mask(struct gallivm_state *gallivm,
bits[4*i + 2] = LLVMConstInt(i32t, 1ULL << (j + 4), 0);
bits[4*i + 3] = LLVMConstInt(i32t, 1ULL << (j + 5), 0);
}
- mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, fs_type.length), "");
+ bits_vec = LLVMConstVector(bits, fs_type.length);
+ mask = LLVMBuildAnd(builder, mask, bits_vec, "");
/*
- * mask = mask != 0 ? ~0 : 0
+ * mask = mask == bits ? ~0 : 0
*/
mask = lp_build_compare(gallivm,
- mask_type, PIPE_FUNC_NOTEQUAL,
- mask,
- lp_build_const_int_vec(gallivm, mask_type, 0));
+ mask_type, PIPE_FUNC_EQUAL,
+ mask, bits_vec);
return mask;
}
@@ -2476,8 +2476,10 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
for (i = 0; i < key->nr_cbufs; ++i) {
debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i]));
}
- if (key->depth.enabled) {
+ if (key->depth.enabled || key->stencil[0].enabled) {
debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format));
+ }
+ if (key->depth.enabled) {
debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE));
debug_printf("depth.writemask = %u\n", key->depth.writemask);
}