summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.c3
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c275
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.h15
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c28
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c53
9 files changed, 259 insertions, 148 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index 9eb9ab0261f..0a57e39611c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -255,7 +255,8 @@ lp_build_concat_n(struct gallivm_state *gallivm,
/**
* Interleave vector elements.
*
- * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions.
+ * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions
+ * (but not for 256bit AVX vectors).
*/
LLVMValueRef
lp_build_interleave2(struct gallivm_state *gallivm,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index cc29c5c885f..7ac0029051d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1545,6 +1545,11 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
}
}
+ else if (util_format_has_stencil(bld.format_desc) &&
+ !util_format_has_depth(bld.format_desc)) {
+ /* for stencil only formats, sample stencil (uint) */
+ bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
+ }
if (!static_texture_state->level_zero_only) {
derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 1cd36b87909..2376ca73c51 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -313,7 +313,7 @@ lp_depth_type(const struct util_format_description *format_desc,
if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
type.floating = TRUE;
assert(z_swizzle == 0);
- assert(format_desc->channel[z_swizzle].size == format_desc->block.bits);
+ assert(format_desc->channel[z_swizzle].size == 32);
}
else if(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
assert(format_desc->block.bits <= 32);
@@ -347,15 +347,18 @@ static boolean
get_z_shift_and_mask(const struct util_format_description *format_desc,
unsigned *shift, unsigned *width, unsigned *mask)
{
- const unsigned total_bits = format_desc->block.bits;
+ unsigned total_bits;
unsigned z_swizzle;
unsigned chan;
unsigned padding_left, padding_right;
-
+
assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
assert(format_desc->block.width == 1);
assert(format_desc->block.height == 1);
+ /* 64bit d/s format is special already extracted 32 bits */
+ total_bits = format_desc->block.bits > 32 ? 32 : format_desc->block.bits;
+
z_swizzle = format_desc->swizzle[0];
if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
@@ -402,6 +405,14 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
return FALSE;
+ /* just special case 64bit d/s format */
+ if (format_desc->block.bits > 32) {
+ assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
+ *shift = 0;
+ *mask = 0xff;
+ return TRUE;
+ }
+
*shift = 0;
for (chan = 0; chan < s_swizzle; chan++)
*shift += format_desc->channel[chan].size;
@@ -517,24 +528,31 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
* \param loop_counter the current loop iteration
* \param depth_ptr pointer to the depth/stencil values of this 4x4 block
* \param depth_stride stride of the depth/stencil buffer
+ * \param z_fb contains z values loaded from fb (may include padding)
+ * \param s_fb contains s values loaded from fb (may include padding)
*/
-LLVMValueRef
+void
lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
struct lp_type z_src_type,
const struct util_format_description *format_desc,
LLVMValueRef depth_ptr,
LLVMValueRef depth_stride,
+ LLVMValueRef *z_fb,
+ LLVMValueRef *s_fb,
LLVMValueRef loop_counter)
{
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
- LLVMValueRef zs_dst, zs_dst1, zs_dst2;
+ LLVMValueRef zs_dst1, zs_dst2;
LLVMValueRef zs_dst_ptr;
LLVMValueRef depth_offset1, depth_offset2;
- unsigned depth_bits = format_desc->block.bits/8;
+ LLVMTypeRef load_ptr_type;
+ unsigned depth_bytes = format_desc->block.bits / 8;
struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
struct lp_type zs_load_type = zs_type;
+
zs_load_type.length = zs_load_type.length / 2;
+ load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);
if (z_src_type.length == 4) {
unsigned i;
@@ -545,7 +563,7 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
depth_stride, "");
depth_offset1 = LLVMBuildMul(builder, looplsb,
- lp_build_const_int32(gallivm, depth_bits * 2), "");
+ lp_build_const_int32(gallivm, depth_bytes * 2), "");
depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
/* just concatenate the loaded 2x2 values into 4-wide vector */
@@ -564,7 +582,6 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
* 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
*/
for (i = 0; i < 8; i++) {
-
shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
}
}
@@ -573,63 +590,103 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
/* Load current z/stencil values from z/stencil buffer */
zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
- zs_dst_ptr = LLVMBuildBitCast(builder,
- zs_dst_ptr,
- LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+ zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
- zs_dst_ptr = LLVMBuildBitCast(builder,
- zs_dst_ptr,
- LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+ zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
- zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
- LLVMConstVector(shuffles, zs_type.length), "");
+ *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
+ LLVMConstVector(shuffles, zs_type.length), "");
+ *s_fb = *z_fb;
if (format_desc->block.bits < z_src_type.width) {
/* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
- zs_dst = LLVMBuildZExt(builder, zs_dst, lp_build_int_vec_type(gallivm, z_src_type), "");
+ *z_fb = LLVMBuildZExt(builder, *z_fb,
+ lp_build_int_vec_type(gallivm, z_src_type), "");
}
- lp_build_name(zs_dst, "zs_dst");
+ else if (format_desc->block.bits > 32) {
+ /* rely on llvm to handle too wide vector we have here nicely */
+ unsigned i;
+ struct lp_type typex2 = zs_type;
+ struct lp_type s_type = zs_type;
+ LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4];
+ LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4];
+ LLVMValueRef tmp;
+
+ typex2.width = typex2.width / 2;
+ typex2.length = typex2.length * 2;
+ s_type.width = s_type.width / 2;
+ s_type.floating = 0;
+
+ tmp = LLVMBuildBitCast(builder, *z_fb,
+ lp_build_vec_type(gallivm, typex2), "");
+
+ for (i = 0; i < zs_type.length; i++) {
+ shuffles1[i] = lp_build_const_int32(gallivm, i * 2);
+ shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1);
+ }
+ *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
+ LLVMConstVector(shuffles1, zs_type.length), "");
+ *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
+ LLVMConstVector(shuffles2, zs_type.length), "");
+ *s_fb = LLVMBuildBitCast(builder, *s_fb,
+ lp_build_vec_type(gallivm, s_type), "");
+ lp_build_name(*s_fb, "s_dst");
+ }
- return zs_dst;
+ lp_build_name(*z_fb, "z_dst");
+ lp_build_name(*s_fb, "s_dst");
+ lp_build_name(*z_fb, "z_dst");
}
/**
* Store depth/stencil values.
* Incoming values are swizzled (typically n 2x2 quads), stored linear.
- * If there's a mask it will do reload/select/store otherwise just store.
+ * If there's a mask it will do select/store otherwise just store.
*
* \param type the data type of the fragment depth/stencil values
* \param format_desc description of the depth/stencil surface
* \param mask the alive/dead pixel mask for the quad (vector)
+ * \param z_fb z values read from fb (with padding)
+ * \param s_fb s values read from fb (with padding)
* \param loop_counter the current loop iteration
* \param depth_ptr pointer to the depth/stencil values of this 4x4 block
* \param depth_stride stride of the depth/stencil buffer
- * \param zs_value the depth/stencil values to store
+ * \param z_value the depth values to store (with padding)
+ * \param s_value the stencil values to store (with padding)
*/
void
lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
struct lp_type z_src_type,
const struct util_format_description *format_desc,
struct lp_build_mask_context *mask,
+ LLVMValueRef z_fb,
+ LLVMValueRef s_fb,
LLVMValueRef loop_counter,
LLVMValueRef depth_ptr,
LLVMValueRef depth_stride,
- LLVMValueRef zs_value)
+ LLVMValueRef z_value,
+ LLVMValueRef s_value)
{
struct lp_build_context z_bld;
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef mask_value = NULL;
- LLVMValueRef zs_dst = NULL, zs_dst1, zs_dst2;
+ LLVMValueRef zs_dst1, zs_dst2;
LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
LLVMValueRef depth_offset1, depth_offset2;
- unsigned depth_bits = format_desc->block.bits/8;
+ LLVMTypeRef load_ptr_type;
+ unsigned depth_bytes = format_desc->block.bits / 8;
struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
struct lp_type zs_load_type = zs_type;
+
zs_load_type.length = zs_load_type.length / 2;
+ load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);
+
+ if (zs_type.width > 32)
+ zs_type.width = 32;
lp_build_context_init(&z_bld, gallivm, zs_type);
@@ -638,7 +695,6 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
* outside the fs loop to avoid all the swizzle stuff.
*/
if (z_src_type.length == 4) {
- unsigned i;
LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
lp_build_const_int32(gallivm, 1), "");
LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
@@ -646,13 +702,8 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
depth_stride, "");
depth_offset1 = LLVMBuildMul(builder, looplsb,
- lp_build_const_int32(gallivm, depth_bits * 2), "");
+ lp_build_const_int32(gallivm, depth_bytes * 2), "");
depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
-
- /* just concatenate the loaded 2x2 values into 4-wide vector */
- for (i = 0; i < 4; i++) {
- shuffles[i] = lp_build_const_int32(gallivm, i);
- }
}
else {
unsigned i;
@@ -669,55 +720,75 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
}
}
-
depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
- zs_dst_ptr1 = LLVMBuildBitCast(builder,
- zs_dst_ptr1,
- LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+ zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, "");
zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
- zs_dst_ptr2 = LLVMBuildBitCast(builder,
- zs_dst_ptr2,
- LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+ zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, "");
- if (mask) {
- zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr1, "");
- zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr2, "");
- zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
- LLVMConstVector(shuffles, zs_type.length),
- "zsbufval");
+ if (format_desc->block.bits > 32) {
+ s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, "");
+ }
+ if (mask) {
mask_value = lp_build_mask_value(mask);
+ z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb);
+ if (format_desc->block.bits > 32) {
+ s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, "");
+ s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb);
+ }
}
if (zs_type.width < z_src_type.width) {
- /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */
- zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, "");
- if (mask)
- mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.vec_type, "");
+ /* Truncate ZS values (e.g., when writing to Z16_UNORM) */
+ z_value = LLVMBuildTrunc(builder, z_value, z_bld.vec_type, "");
}
- if (mask) {
- zs_value = lp_build_select(&z_bld, mask_value, zs_value, zs_dst);
- }
-
- if (z_src_type.length == 4) {
- zs_dst1 = lp_build_extract_range(gallivm, zs_value, 0, 2);
- zs_dst2 = lp_build_extract_range(gallivm, zs_value, 2, 2);
+ if (format_desc->block.bits <= 32) {
+ if (z_src_type.length == 4) {
+ zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2);
+ zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2);
+ }
+ else {
+ assert(z_src_type.length == 8);
+ zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value,
+ LLVMConstVector(&shuffles[0],
+ zs_load_type.length), "");
+ zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value,
+ LLVMConstVector(&shuffles[4],
+ zs_load_type.length), "");
+ }
}
else {
- assert(z_src_type.length == 8);
- zs_dst1 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
- LLVMConstVector(&shuffles[0],
- zs_load_type.length),
- "");
- zs_dst2 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
- LLVMConstVector(&shuffles[4],
- zs_load_type.length),
- "");
-
+ if (z_src_type.length == 4) {
+ zs_dst1 = lp_build_interleave2(gallivm, zs_type,
+ z_value, s_value, 0);
+ zs_dst2 = lp_build_interleave2(gallivm, zs_type,
+ z_value, s_value, 1);
+ }
+ else {
+ unsigned i;
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2];
+ assert(z_src_type.length == 8);
+ for (i = 0; i < 8; i++) {
+ shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
+ shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 +
+ z_src_type.length);
+ }
+ zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value,
+ LLVMConstVector(&shuffles[0],
+ z_src_type.length), "");
+ zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value,
+ LLVMConstVector(&shuffles[8],
+ z_src_type.length), "");
+ }
+ zs_dst1 = LLVMBuildBitCast(builder, zs_dst1,
+ lp_build_vec_type(gallivm, zs_load_type), "");
+ zs_dst2 = LLVMBuildBitCast(builder, zs_dst2,
+ lp_build_vec_type(gallivm, zs_load_type), "");
}
+
LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
}
@@ -745,13 +816,14 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
struct lp_build_mask_context *mask,
LLVMValueRef stencil_refs[2],
LLVMValueRef z_src,
- LLVMValueRef zs_dst,
+ LLVMValueRef z_fb,
+ LLVMValueRef s_fb,
LLVMValueRef face,
- LLVMValueRef *zs_value,
+ LLVMValueRef *z_value,
+ LLVMValueRef *s_value,
boolean do_branch)
{
LLVMBuilderRef builder = gallivm->builder;
- struct lp_type zs_type;
struct lp_type z_type;
struct lp_build_context z_bld;
struct lp_build_context s_bld;
@@ -763,7 +835,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
LLVMValueRef orig_mask = lp_build_mask_value(mask);
LLVMValueRef front_facing = NULL;
-
+ boolean have_z, have_s;
/*
* Depths are expected to be between 0 and 1, even if they are stored in
@@ -780,12 +852,9 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
}
/* Pick the type matching the depth-stencil format. */
- zs_type = lp_depth_type(format_desc, z_src_type.length);
+ z_type = lp_depth_type(format_desc, z_src_type.length);
/* Pick the intermediate type for depth operations. */
- z_type = zs_type;
- /* FIXME: Cope with a depth test type with higher bit width. */
- assert(zs_type.width <= z_src_type.width);
z_type.width = z_src_type.width;
assert(z_type.length == z_src_type.length);
@@ -819,13 +888,11 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
if (depth->enabled) {
assert(z_swizzle < 4);
- assert(format_desc->block.bits <= z_type.width);
if (z_type.floating) {
assert(z_swizzle == 0);
assert(format_desc->channel[z_swizzle].type ==
UTIL_FORMAT_TYPE_FLOAT);
- assert(format_desc->channel[z_swizzle].size ==
- format_desc->block.bits);
+ assert(format_desc->channel[z_swizzle].size == 32);
}
else {
assert(format_desc->channel[z_swizzle].type ==
@@ -849,7 +916,13 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
{
unsigned s_shift, s_mask;
- if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) {
+ z_dst = z_fb;
+ stencil_vals = s_fb;
+
+ have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask);
+ have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask);
+
+ if (have_z) {
if (z_mask != 0xffffffff) {
z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
}
@@ -859,26 +932,20 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
*/
if (z_shift) {
LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
- z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
+ z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst");
} else if (z_bitmask) {
- /* TODO: Instead of loading a mask from memory and ANDing, it's
- * probably faster to just shake the bits with two shifts. */
- z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
+ z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst");
} else {
- z_dst = zs_dst;
lp_build_name(z_dst, "z_dst");
}
}
- if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
+ if (have_s) {
if (s_shift) {
LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
- stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
+ stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, "");
stencil_shift = shift; /* used below */
}
- else {
- stencil_vals = zs_dst;
- }
if (s_mask != 0xffffffff) {
LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
@@ -997,7 +1064,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
LLVMValueRef z_fail_mask, z_pass_mask;
/* apply Z-fail operator */
- z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass);
+ z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass);
stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
stencil_refs, stencil_vals,
z_fail_mask, front_facing);
@@ -1019,8 +1086,8 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
s_pass_mask, front_facing);
}
- /* Put Z and ztencil bits in the right place */
- if (z_dst && z_shift) {
+ /* Put Z and stencil bits in the right place */
+ if (have_z && z_shift) {
LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
z_dst = LLVMBuildShl(builder, z_dst, shift, "");
}
@@ -1028,18 +1095,24 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
stencil_vals = LLVMBuildShl(builder, stencil_vals,
stencil_shift, "");
- /* Finally, merge/store the z/stencil values */
+ /* Finally, merge the z/stencil values */
if ((depth->enabled && depth->writemask) ||
- (stencil[0].enabled && stencil[0].writemask)) {
-
- if (z_dst && stencil_vals)
- zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, "");
- else if (z_dst)
- zs_dst = z_dst;
- else
- zs_dst = stencil_vals;
-
- *zs_value = zs_dst;
+ (stencil[0].enabled && (stencil[0].writemask ||
+ (stencil[1].enabled && stencil[1].writemask)))) {
+
+ if (format_desc->block.bits <= 32) {
+ if (have_z && have_s)
+ *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, "");
+ else if (have_z)
+ *z_value = z_dst;
+ else
+ *z_value = stencil_vals;
+ *s_value = *z_value;
+ }
+ else {
+ *z_value = z_dst;
+ *s_value = stencil_vals;
+ }
}
if (s_pass_mask)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index c000494667d..2534dc309ce 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -63,17 +63,21 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
struct lp_build_mask_context *mask,
LLVMValueRef stencil_refs[2],
LLVMValueRef z_src,
- LLVMValueRef zs_dst,
+ LLVMValueRef z_fb,
+ LLVMValueRef s_fb,
LLVMValueRef face,
- LLVMValueRef *zs_value,
+ LLVMValueRef *z_value,
+ LLVMValueRef *s_value,
boolean do_branch);
-LLVMValueRef
+void
lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
struct lp_type z_src_type,
const struct util_format_description *format_desc,
LLVMValueRef depth_ptr,
LLVMValueRef depth_stride,
+ LLVMValueRef *z_fb,
+ LLVMValueRef *s_fb,
LLVMValueRef loop_counter);
void
@@ -81,10 +85,13 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
struct lp_type z_src_type,
const struct util_format_description *format_desc,
struct lp_build_mask_context *mask,
+ LLVMValueRef z_fb,
+ LLVMValueRef s_fb,
LLVMValueRef loop_counter,
LLVMValueRef depth_ptr,
LLVMValueRef depth_stride,
- LLVMValueRef zs_value);
+ LLVMValueRef z_value,
+ LLVMValueRef s_value);
void
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index a557db4b4dc..8a4b00f785d 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -193,8 +193,10 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
const struct lp_scene *scene = task->scene;
- uint32_t clear_value = arg.clear_zstencil.value;
- uint32_t clear_mask = arg.clear_zstencil.mask;
+ uint64_t clear_value64 = arg.clear_zstencil.value;
+ uint64_t clear_mask64 = arg.clear_zstencil.mask;
+ uint32_t clear_value = (uint32_t) clear_value64;
+ uint32_t clear_mask = (uint32_t) clear_mask64;
const unsigned height = TILE_SIZE;
const unsigned width = TILE_SIZE;
const unsigned block_size = scene->zsbuf.blocksize;
@@ -260,6 +262,28 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
}
}
break;
+ case 8:
+ clear_value64 &= clear_mask64;
+ if (clear_mask64 == 0xffffffffffULL) {
+ for (i = 0; i < height; i++) {
+ uint64_t *row = (uint64_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = clear_value64;
+ dst += dst_stride;
+ }
+ }
+ else {
+ for (i = 0; i < height; i++) {
+ uint64_t *row = (uint64_t *)dst;
+ for (j = 0; j < width; j++) {
+ uint64_t tmp = ~clear_mask64 & *row;
+ *row++ = clear_value64 | tmp;
+ }
+ dst += dst_stride;
+ }
+ }
+ break;
+
default:
assert(0);
break;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 2f5fa227619..8dd3615e78a 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -154,8 +154,8 @@ union lp_rast_cmd_arg {
const struct lp_rast_state *set_state;
union pipe_color_union clear_color;
struct {
- uint32_t value;
- uint32_t mask;
+ uint64_t value;
+ uint64_t mask;
} clear_zstencil;
const struct lp_rast_state *state;
struct lp_fence *fence;
@@ -217,7 +217,7 @@ lp_rast_arg_fence( struct lp_fence *fence )
static INLINE union lp_rast_cmd_arg
-lp_rast_arg_clearzs( unsigned value, unsigned mask )
+lp_rast_arg_clearzs( uint64_t value, uint64_t mask )
{
union lp_rast_cmd_arg arg;
arg.clear_zstencil.value = value;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index f9cbdaba62c..9fef34e77e0 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -390,8 +390,8 @@ lp_setup_try_clear( struct lp_setup_context *setup,
unsigned stencil,
unsigned flags )
{
- uint32_t zsmask = 0;
- uint32_t zsvalue = 0;
+ uint64_t zsmask = 0;
+ uint64_t zsvalue = 0;
union lp_rast_cmd_arg color_arg;
unsigned i;
@@ -404,16 +404,16 @@ lp_setup_try_clear( struct lp_setup_context *setup,
if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
- uint32_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
+ uint8_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
- zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format,
- depth,
- stencil);
+ zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format,
+ depth,
+ stencil);
- zsmask = util_pack_mask_z_stencil(setup->fb.zsbuf->format,
- zmask,
- smask);
+ zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
+ zmask,
+ smask);
zsvalue &= zsmask;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index efc2ab76813..6b35a0209ca 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -112,8 +112,8 @@ struct lp_setup_context
struct {
unsigned flags;
union lp_rast_cmd_arg color; /**< lp_rast_clear_color() cmd */
- unsigned zsmask;
- unsigned zsvalue; /**< lp_rast_clear_zstencil() cmd */
+ uint64_t zsmask;
+ uint64_t zsvalue; /**< lp_rast_clear_zstencil() cmd */
} clear;
enum setup_state {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 69212109a87..1dfc75a42dc 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -239,7 +239,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
LLVMValueRef mask_ptr, mask_val;
LLVMValueRef consts_ptr;
LLVMValueRef z;
- LLVMValueRef zs_value = NULL;
+ LLVMValueRef z_value, s_value;
+ LLVMValueRef z_fb, s_fb;
LLVMValueRef stencil_refs[2];
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
struct lp_build_for_loop_state loop_state;
@@ -259,8 +260,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
memset(&system_values, 0, sizeof(system_values));
if (key->depth.enabled ||
- key->stencil[0].enabled ||
- key->stencil[1].enabled) {
+ key->stencil[0].enabled) {
zs_format_desc = util_format_description(key->zsbuf_format);
assert(zs_format_desc);
@@ -281,7 +281,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
}
if (!(key->depth.enabled && key->depth.writemask) &&
- !(key->stencil[0].enabled && key->stencil[0].writemask))
+ !((key->stencil[0].enabled && (key->stencil[0].writemask ||
+ (key->stencil[1].enabled &&
+ key->stencil[1].writemask)))))
depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
}
else {
@@ -337,11 +339,10 @@ generate_fs_loop(struct gallivm_state *gallivm,
z = interp->pos[2];
if (depth_mode & EARLY_DEPTH_TEST) {
- LLVMValueRef zs_dst_val;
- zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
- zs_format_desc,
- depth_ptr, depth_stride,
- loop_state.counter);
+ lp_build_depth_stencil_load_swizzled(gallivm, type,
+ zs_format_desc,
+ depth_ptr, depth_stride,
+ &z_fb, &s_fb, loop_state.counter);
lp_build_depth_stencil_test(gallivm,
&key->depth,
key->stencil,
@@ -349,16 +350,16 @@ generate_fs_loop(struct gallivm_state *gallivm,
zs_format_desc,
&mask,
stencil_refs,
- z,
- zs_dst_val,
+ z, z_fb, s_fb,
facing,
- &zs_value,
+ &z_value, &s_value,
!simple_shader);
if (depth_mode & EARLY_DEPTH_WRITE) {
lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
- NULL, loop_state.counter,
- depth_ptr, depth_stride, zs_value);
+ NULL, NULL, NULL, loop_state.counter,
+ depth_ptr, depth_stride,
+ z_value, s_value);
}
}
@@ -394,7 +395,6 @@ generate_fs_loop(struct gallivm_state *gallivm,
/* Late Z test */
if (depth_mode & LATE_DEPTH_TEST) {
- LLVMValueRef zs_dst_val;
int pos0 = find_output_by_semantic(&shader->info.base,
TGSI_SEMANTIC_POSITION,
0);
@@ -403,10 +403,10 @@ generate_fs_loop(struct gallivm_state *gallivm,
z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
}
- zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
- zs_format_desc,
- depth_ptr, depth_stride,
- loop_state.counter);
+ lp_build_depth_stencil_load_swizzled(gallivm, type,
+ zs_format_desc,
+ depth_ptr, depth_stride,
+ &z_fb, &s_fb, loop_state.counter);
lp_build_depth_stencil_test(gallivm,
&key->depth,
@@ -415,16 +415,16 @@ generate_fs_loop(struct gallivm_state *gallivm,
zs_format_desc,
&mask,
stencil_refs,
- z,
- zs_dst_val,
+ z, z_fb, s_fb,
facing,
- &zs_value,
+ &z_value, &s_value,
!simple_shader);
/* Late Z write */
if (depth_mode & LATE_DEPTH_WRITE) {
lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
- NULL, loop_state.counter,
- depth_ptr, depth_stride, zs_value);
+ NULL, NULL, NULL, loop_state.counter,
+ depth_ptr, depth_stride,
+ z_value, s_value);
}
}
else if ((depth_mode & EARLY_DEPTH_TEST) &&
@@ -435,8 +435,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
* write that out.
*/
lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
- &mask, loop_state.counter,
- depth_ptr, depth_stride, zs_value);
+ &mask, z_fb, s_fb, loop_state.counter,
+ depth_ptr, depth_stride,
+ z_value, s_value);
}