diff options
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_depth.c | 52 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 16 |
2 files changed, 51 insertions, 17 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 0c27c2f8972..d5d5c5a786e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -963,16 +963,48 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, if (stencil[0].enabled) { if (face) { - LLVMValueRef zero = lp_build_const_int32(gallivm, 0); - - /* front_facing = face != 0 ? ~0 : 0 */ - front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); - front_facing = LLVMBuildSExt(builder, front_facing, - LLVMIntTypeInContext(gallivm->context, - s_bld.type.length*s_bld.type.width), - ""); - front_facing = LLVMBuildBitCast(builder, front_facing, - s_bld.int_vec_type, ""); + if (0) { + /* + * XXX: the scalar expansion below produces atrocious code + * (basically producing a 64bit scalar value, then moving the 2 + * 32bit pieces separately to simd, plus 4 shuffles, which is + * seriously lame). But the scalar-simd transitions are always + * tricky, so no big surprise there. + * This here would be way better, however llvm has some serious + * trouble later using it in the select, probably because it will + * recognize the expression as constant and move the simd value + * away (out of the loop) - and then it will suddenly try + * constructing i1 high-bit masks out of it later... + * (Try piglit stencil-twoside.) + * Note this is NOT due to using SExt/Trunc, it fails exactly the + * same even when using native compare/select. + * I cannot reproduce this problem when using stand-alone compiler + * though, suggesting some problem with optimization passes... + * (With stand-alone compilation, the construction of this mask + * value, no matter if the easy 3 instruction here or the complex + * 16+ one below, never gets separated from where it's used.) + * The scalar code still has the same problem, but the generated + * code looks a bit better at least for some reason, even if + * mostly by luck (the fundamental issue clearly is the same). + */ + front_facing = lp_build_broadcast(gallivm, s_bld.vec_type, face); + /* front_facing = face != 0 ? ~0 : 0 */ + front_facing = lp_build_compare(gallivm, s_bld.type, + PIPE_FUNC_NOTEQUAL, + front_facing, s_bld.zero); + } else { + LLVMValueRef zero = lp_build_const_int32(gallivm, 0); + + /* front_facing = face != 0 ? ~0 : 0 */ + front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); + front_facing = LLVMBuildSExt(builder, front_facing, + LLVMIntTypeInContext(gallivm->context, + s_bld.type.length*s_bld.type.width), + ""); + front_facing = LLVMBuildBitCast(builder, front_facing, + s_bld.int_vec_type, ""); + + } } s_pass_mask = lp_build_stencil_test(&s_bld, stencil, diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 09108159feb..a36389ccc32 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -127,7 +127,7 @@ generate_quad_mask(struct gallivm_state *gallivm, struct lp_type mask_type; LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef bits[16]; - LLVMValueRef mask; + LLVMValueRef mask, bits_vec; int shift, i; /* @@ -179,15 +179,15 @@ generate_quad_mask(struct gallivm_state *gallivm, bits[4*i + 2] = LLVMConstInt(i32t, 1ULL << (j + 4), 0); bits[4*i + 3] = LLVMConstInt(i32t, 1ULL << (j + 5), 0); } - mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, fs_type.length), ""); + bits_vec = LLVMConstVector(bits, fs_type.length); + mask = LLVMBuildAnd(builder, mask, bits_vec, ""); /* - * mask = mask != 0 ? ~0 : 0 + * mask = mask == bits ? ~0 : 0 */ mask = lp_build_compare(gallivm, - mask_type, PIPE_FUNC_NOTEQUAL, - mask, - lp_build_const_int_vec(gallivm, mask_type, 0)); + mask_type, PIPE_FUNC_EQUAL, + mask, bits_vec); return mask; } @@ -2476,8 +2476,10 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) for (i = 0; i < key->nr_cbufs; ++i) { debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i])); } - if (key->depth.enabled) { + if (key->depth.enabled || key->stencil[0].enabled) { debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format)); + } + if (key->depth.enabled) { debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE)); debug_printf("depth.writemask = %u\n", key->depth.writemask); } |