diff options
author | Roland Scheidegger <[email protected]> | 2013-05-18 00:16:03 +0200 |
---|---|---|
committer | Roland Scheidegger <[email protected]> | 2013-05-18 00:32:33 +0200 |
commit | 070a9afb5476b58a2824fac5c94bbe4f78a2d8b9 (patch) | |
tree | e77b6e523a25460af9b97fe45e6793e4ab7c0297 | |
parent | f3ad716e8f36fa1360703b73eafed1824c29db6e (diff) |
llvmpipe: handle z32s8x24 depth/stencil format
We need to split up the depth and stencil values in this case, and there's
some new logic required to handle float depth and stencil simultaneously.
Also make sure we get the 64bit zs clear values and masks propagated
correctly.
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_pack.c | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_depth.c | 275 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_depth.h | 15 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.c | 28 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_setup.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_setup_context.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 53 |
9 files changed, 259 insertions, 148 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index 9eb9ab0261f..0a57e39611c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -255,7 +255,8 @@ lp_build_concat_n(struct gallivm_state *gallivm, /** * Interleave vector elements. * - * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions. + * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions + * (but not for 256bit AVX vectors). */ LLVMValueRef lp_build_interleave2(struct gallivm_state *gallivm, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index cc29c5c885f..7ac0029051d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -1545,6 +1545,11 @@ lp_build_sample_soa(struct gallivm_state *gallivm, bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length); } } + else if (util_format_has_stencil(bld.format_desc) && + !util_format_has_depth(bld.format_desc)) { + /* for stencil only formats, sample stencil (uint) */ + bld.texel_type = lp_type_int_vec(type.width, type.width * type.length); + } if (!static_texture_state->level_zero_only) { derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 1cd36b87909..2376ca73c51 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -313,7 +313,7 @@ lp_depth_type(const struct util_format_description *format_desc, if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { type.floating = TRUE; assert(z_swizzle == 0); - assert(format_desc->channel[z_swizzle].size == format_desc->block.bits); + assert(format_desc->channel[z_swizzle].size == 32); } else if(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { assert(format_desc->block.bits <= 32); @@ -347,15 +347,18 @@ static boolean get_z_shift_and_mask(const struct util_format_description *format_desc, unsigned *shift, unsigned *width, unsigned *mask) { - const unsigned total_bits = format_desc->block.bits; + unsigned total_bits; unsigned z_swizzle; unsigned chan; unsigned padding_left, padding_right; - + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); + /* 64bit d/s format is special already extracted 32 bits */ + total_bits = format_desc->block.bits > 32 ? 32 : format_desc->block.bits; + z_swizzle = format_desc->swizzle[0]; if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) @@ -402,6 +405,14 @@ get_s_shift_and_mask(const struct util_format_description *format_desc, if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) return FALSE; + /* just special case 64bit d/s format */ + if (format_desc->block.bits > 32) { + assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); + *shift = 0; + *mask = 0xff; + return TRUE; + } + *shift = 0; for (chan = 0; chan < s_swizzle; chan++) *shift += format_desc->channel[chan].size; @@ -517,24 +528,31 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, * \param loop_counter the current loop iteration * \param depth_ptr pointer to the depth/stencil values of this 4x4 block * \param depth_stride stride of the depth/stencil buffer + * \param z_fb contains z values loaded from fb (may include padding) + * \param s_fb contains s values loaded from fb (may include padding) */ -LLVMValueRef +void lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, + LLVMValueRef *z_fb, + LLVMValueRef *s_fb, LLVMValueRef loop_counter) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; - LLVMValueRef zs_dst, zs_dst1, zs_dst2; + LLVMValueRef zs_dst1, zs_dst2; LLVMValueRef zs_dst_ptr; LLVMValueRef depth_offset1, depth_offset2; - unsigned depth_bits = format_desc->block.bits/8; + LLVMTypeRef load_ptr_type; + unsigned depth_bytes = format_desc->block.bits / 8; struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); struct lp_type zs_load_type = zs_type; + zs_load_type.length = zs_load_type.length / 2; + load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); if (z_src_type.length == 4) { unsigned i; @@ -545,7 +563,7 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, depth_stride, ""); depth_offset1 = LLVMBuildMul(builder, looplsb, - lp_build_const_int32(gallivm, depth_bits * 2), ""); + lp_build_const_int32(gallivm, depth_bytes * 2), ""); depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); /* just concatenate the loaded 2x2 values into 4-wide vector */ @@ -564,7 +582,6 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. */ for (i = 0; i < 8; i++) { - shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); } } @@ -573,63 +590,103 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, /* Load current z/stencil values from z/stencil buffer */ zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); - zs_dst_ptr = LLVMBuildBitCast(builder, - zs_dst_ptr, - LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, ""); zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); - zs_dst_ptr = LLVMBuildBitCast(builder, - zs_dst_ptr, - LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, ""); - zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, - LLVMConstVector(shuffles, zs_type.length), ""); + *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, + LLVMConstVector(shuffles, zs_type.length), ""); + *s_fb = *z_fb; if (format_desc->block.bits < z_src_type.width) { /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */ - zs_dst = LLVMBuildZExt(builder, zs_dst, lp_build_int_vec_type(gallivm, z_src_type), ""); + *z_fb = LLVMBuildZExt(builder, *z_fb, + lp_build_int_vec_type(gallivm, z_src_type), ""); } - lp_build_name(zs_dst, "zs_dst"); + else if (format_desc->block.bits > 32) { + /* rely on llvm to handle too wide vector we have here nicely */ + unsigned i; + struct lp_type typex2 = zs_type; + struct lp_type s_type = zs_type; + LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4]; + LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4]; + LLVMValueRef tmp; + + typex2.width = typex2.width / 2; + typex2.length = typex2.length * 2; + s_type.width = s_type.width / 2; + s_type.floating = 0; + + tmp = LLVMBuildBitCast(builder, *z_fb, + lp_build_vec_type(gallivm, typex2), ""); + + for (i = 0; i < zs_type.length; i++) { + shuffles1[i] = lp_build_const_int32(gallivm, i * 2); + shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1); + } + *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp, + LLVMConstVector(shuffles1, zs_type.length), ""); + *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp, + LLVMConstVector(shuffles2, zs_type.length), ""); + *s_fb = LLVMBuildBitCast(builder, *s_fb, + lp_build_vec_type(gallivm, s_type), ""); + lp_build_name(*s_fb, "s_dst"); + } - return zs_dst; + lp_build_name(*z_fb, "z_dst"); + lp_build_name(*s_fb, "s_dst"); + lp_build_name(*z_fb, "z_dst"); } /** * Store depth/stencil values. * Incoming values are swizzled (typically n 2x2 quads), stored linear. - * If there's a mask it will do reload/select/store otherwise just store. + * If there's a mask it will do select/store otherwise just store. * * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) + * \param z_fb z values read from fb (with padding) + * \param s_fb s values read from fb (with padding) * \param loop_counter the current loop iteration * \param depth_ptr pointer to the depth/stencil values of this 4x4 block * \param depth_stride stride of the depth/stencil buffer - * \param zs_value the depth/stencil values to store + * \param z_value the depth values to store (with padding) + * \param s_value the stencil values to store (with padding) */ void lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, + LLVMValueRef z_fb, + LLVMValueRef s_fb, LLVMValueRef loop_counter, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, - LLVMValueRef zs_value) + LLVMValueRef z_value, + LLVMValueRef s_value) { struct lp_build_context z_bld; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef mask_value = NULL; - LLVMValueRef zs_dst = NULL, zs_dst1, zs_dst2; + LLVMValueRef zs_dst1, zs_dst2; LLVMValueRef zs_dst_ptr1, zs_dst_ptr2; LLVMValueRef depth_offset1, depth_offset2; - unsigned depth_bits = format_desc->block.bits/8; + LLVMTypeRef load_ptr_type; + unsigned depth_bytes = format_desc->block.bits / 8; struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); struct lp_type zs_load_type = zs_type; + zs_load_type.length = zs_load_type.length / 2; + load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); + + if (zs_type.width > 32) + zs_type.width = 32; lp_build_context_init(&z_bld, gallivm, zs_type); @@ -638,7 +695,6 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, * outside the fs loop to avoid all the swizzle stuff. */ if (z_src_type.length == 4) { - unsigned i; LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, @@ -646,13 +702,8 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, depth_stride, ""); depth_offset1 = LLVMBuildMul(builder, looplsb, - lp_build_const_int32(gallivm, depth_bits * 2), ""); + lp_build_const_int32(gallivm, depth_bytes * 2), ""); depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); - - /* just concatenate the loaded 2x2 values into 4-wide vector */ - for (i = 0; i < 4; i++) { - shuffles[i] = lp_build_const_int32(gallivm, i); - } } else { unsigned i; @@ -669,55 +720,75 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, } } - depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); - zs_dst_ptr1 = LLVMBuildBitCast(builder, - zs_dst_ptr1, - LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); + zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, ""); zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); - zs_dst_ptr2 = LLVMBuildBitCast(builder, - zs_dst_ptr2, - LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); + zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, ""); - if (mask) { - zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr1, ""); - zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr2, ""); - zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, - LLVMConstVector(shuffles, zs_type.length), - "zsbufval"); + if (format_desc->block.bits > 32) { + s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, ""); + } + if (mask) { mask_value = lp_build_mask_value(mask); + z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb); + if (format_desc->block.bits > 32) { + s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, ""); + s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb); + } } if (zs_type.width < z_src_type.width) { - /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */ - zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, ""); - if (mask) - mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.vec_type, ""); + /* Truncate ZS values (e.g., when writing to Z16_UNORM) */ + z_value = LLVMBuildTrunc(builder, z_value, z_bld.vec_type, ""); } - if (mask) { - zs_value = lp_build_select(&z_bld, mask_value, zs_value, zs_dst); - } - - if (z_src_type.length == 4) { - zs_dst1 = lp_build_extract_range(gallivm, zs_value, 0, 2); - zs_dst2 = lp_build_extract_range(gallivm, zs_value, 2, 2); + if (format_desc->block.bits <= 32) { + if (z_src_type.length == 4) { + zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2); + zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2); + } + else { + assert(z_src_type.length == 8); + zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value, + LLVMConstVector(&shuffles[0], + zs_load_type.length), ""); + zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value, + LLVMConstVector(&shuffles[4], + zs_load_type.length), ""); + } } else { - assert(z_src_type.length == 8); - zs_dst1 = LLVMBuildShuffleVector(builder, zs_value, zs_value, - LLVMConstVector(&shuffles[0], - zs_load_type.length), - ""); - zs_dst2 = LLVMBuildShuffleVector(builder, zs_value, zs_value, - LLVMConstVector(&shuffles[4], - zs_load_type.length), - ""); - + if (z_src_type.length == 4) { + zs_dst1 = lp_build_interleave2(gallivm, zs_type, + z_value, s_value, 0); + zs_dst2 = lp_build_interleave2(gallivm, zs_type, + z_value, s_value, 1); + } + else { + unsigned i; + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2]; + assert(z_src_type.length == 8); + for (i = 0; i < 8; i++) { + shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); + shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 + + z_src_type.length); + } + zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value, + LLVMConstVector(&shuffles[0], + z_src_type.length), ""); + zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value, + LLVMConstVector(&shuffles[8], + z_src_type.length), ""); + } + zs_dst1 = LLVMBuildBitCast(builder, zs_dst1, + lp_build_vec_type(gallivm, zs_load_type), ""); + zs_dst2 = LLVMBuildBitCast(builder, zs_dst2, + lp_build_vec_type(gallivm, zs_load_type), ""); } + LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1); LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2); } @@ -745,13 +816,14 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, - LLVMValueRef zs_dst, + LLVMValueRef z_fb, + LLVMValueRef s_fb, LLVMValueRef face, - LLVMValueRef *zs_value, + LLVMValueRef *z_value, + LLVMValueRef *s_value, boolean do_branch) { LLVMBuilderRef builder = gallivm->builder; - struct lp_type zs_type; struct lp_type z_type; struct lp_build_context z_bld; struct lp_build_context s_bld; @@ -763,7 +835,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, LLVMValueRef z_pass = NULL, s_pass_mask = NULL; LLVMValueRef orig_mask = lp_build_mask_value(mask); LLVMValueRef front_facing = NULL; - + boolean have_z, have_s; /* * Depths are expected to be between 0 and 1, even if they are stored in @@ -780,12 +852,9 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, } /* Pick the type matching the depth-stencil format. */ - zs_type = lp_depth_type(format_desc, z_src_type.length); + z_type = lp_depth_type(format_desc, z_src_type.length); /* Pick the intermediate type for depth operations. */ - z_type = zs_type; - /* FIXME: Cope with a depth test type with higher bit width. */ - assert(zs_type.width <= z_src_type.width); z_type.width = z_src_type.width; assert(z_type.length == z_src_type.length); @@ -819,13 +888,11 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, if (depth->enabled) { assert(z_swizzle < 4); - assert(format_desc->block.bits <= z_type.width); if (z_type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); - assert(format_desc->channel[z_swizzle].size == - format_desc->block.bits); + assert(format_desc->channel[z_swizzle].size == 32); } else { assert(format_desc->channel[z_swizzle].type == @@ -849,7 +916,13 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, { unsigned s_shift, s_mask; - if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) { + z_dst = z_fb; + stencil_vals = s_fb; + + have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask); + have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask); + + if (have_z) { if (z_mask != 0xffffffff) { z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); } @@ -859,26 +932,20 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, */ if (z_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); - z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); + z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst"); } else if (z_bitmask) { - /* TODO: Instead of loading a mask from memory and ANDing, it's - * probably faster to just shake the bits with two shifts. */ - z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); + z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst"); } else { - z_dst = zs_dst; lp_build_name(z_dst, "z_dst"); } } - if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { + if (have_s) { if (s_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); - stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); + stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, ""); stencil_shift = shift; /* used below */ } - else { - stencil_vals = zs_dst; - } if (s_mask != 0xffffffff) { LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); @@ -997,7 +1064,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ - z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass); + z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, z_fail_mask, front_facing); @@ -1019,8 +1086,8 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, s_pass_mask, front_facing); } - /* Put Z and ztencil bits in the right place */ - if (z_dst && z_shift) { + /* Put Z and stencil bits in the right place */ + if (have_z && z_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); z_dst = LLVMBuildShl(builder, z_dst, shift, ""); } @@ -1028,18 +1095,24 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, stencil_vals = LLVMBuildShl(builder, stencil_vals, stencil_shift, ""); - /* Finally, merge/store the z/stencil values */ + /* Finally, merge the z/stencil values */ if ((depth->enabled && depth->writemask) || - (stencil[0].enabled && stencil[0].writemask)) { - - if (z_dst && stencil_vals) - zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, ""); - else if (z_dst) - zs_dst = z_dst; - else - zs_dst = stencil_vals; - - *zs_value = zs_dst; + (stencil[0].enabled && (stencil[0].writemask || + (stencil[1].enabled && stencil[1].writemask)))) { + + if (format_desc->block.bits <= 32) { + if (have_z && have_s) + *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, ""); + else if (have_z) + *z_value = z_dst; + else + *z_value = stencil_vals; + *s_value = *z_value; + } + else { + *z_value = z_dst; + *s_value = stencil_vals; + } } if (s_pass_mask) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index c000494667d..2534dc309ce 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -63,17 +63,21 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, - LLVMValueRef zs_dst, + LLVMValueRef z_fb, + LLVMValueRef s_fb, LLVMValueRef face, - LLVMValueRef *zs_value, + LLVMValueRef *z_value, + LLVMValueRef *s_value, boolean do_branch); -LLVMValueRef +void lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, + LLVMValueRef *z_fb, + LLVMValueRef *s_fb, LLVMValueRef loop_counter); void @@ -81,10 +85,13 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, + LLVMValueRef z_fb, + LLVMValueRef s_fb, LLVMValueRef loop_counter, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, - LLVMValueRef zs_value); + LLVMValueRef z_value, + LLVMValueRef s_value); void diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index a557db4b4dc..8a4b00f785d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -193,8 +193,10 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_scene *scene = task->scene; - uint32_t clear_value = arg.clear_zstencil.value; - uint32_t clear_mask = arg.clear_zstencil.mask; + uint64_t clear_value64 = arg.clear_zstencil.value; + uint64_t clear_mask64 = arg.clear_zstencil.mask; + uint32_t clear_value = (uint32_t) clear_value64; + uint32_t clear_mask = (uint32_t) clear_mask64; const unsigned height = TILE_SIZE; const unsigned width = TILE_SIZE; const unsigned block_size = scene->zsbuf.blocksize; @@ -260,6 +262,28 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, } } break; + case 8: + clear_value64 &= clear_mask64; + if (clear_mask64 == 0xffffffffffULL) { + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst; + for (j = 0; j < width; j++) + *row++ = clear_value64; + dst += dst_stride; + } + } + else { + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst; + for (j = 0; j < width; j++) { + uint64_t tmp = ~clear_mask64 & *row; + *row++ = clear_value64 | tmp; + } + dst += dst_stride; + } + } + break; + default: assert(0); break; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 2f5fa227619..8dd3615e78a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -154,8 +154,8 @@ union lp_rast_cmd_arg { const struct lp_rast_state *set_state; union pipe_color_union clear_color; struct { - uint32_t value; - uint32_t mask; + uint64_t value; + uint64_t mask; } clear_zstencil; const struct lp_rast_state *state; struct lp_fence *fence; @@ -217,7 +217,7 @@ lp_rast_arg_fence( struct lp_fence *fence ) static INLINE union lp_rast_cmd_arg -lp_rast_arg_clearzs( unsigned value, unsigned mask ) +lp_rast_arg_clearzs( uint64_t value, uint64_t mask ) { union lp_rast_cmd_arg arg; arg.clear_zstencil.value = value; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index f9cbdaba62c..9fef34e77e0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -390,8 +390,8 @@ lp_setup_try_clear( struct lp_setup_context *setup, unsigned stencil, unsigned flags ) { - uint32_t zsmask = 0; - uint32_t zsvalue = 0; + uint64_t zsmask = 0; + uint64_t zsvalue = 0; union lp_rast_cmd_arg color_arg; unsigned i; @@ -404,16 +404,16 @@ lp_setup_try_clear( struct lp_setup_context *setup, if (flags & PIPE_CLEAR_DEPTHSTENCIL) { uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0; - uint32_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; + uint8_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; - zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format, - depth, - stencil); + zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format, + depth, + stencil); - zsmask = util_pack_mask_z_stencil(setup->fb.zsbuf->format, - zmask, - smask); + zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format, + zmask, + smask); zsvalue &= zsmask; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index efc2ab76813..6b35a0209ca 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -112,8 +112,8 @@ struct lp_setup_context struct { unsigned flags; union lp_rast_cmd_arg color; /**< lp_rast_clear_color() cmd */ - unsigned zsmask; - unsigned zsvalue; /**< lp_rast_clear_zstencil() cmd */ + uint64_t zsmask; + uint64_t zsvalue; /**< lp_rast_clear_zstencil() cmd */ } clear; enum setup_state { diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 69212109a87..1dfc75a42dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -239,7 +239,8 @@ generate_fs_loop(struct gallivm_state *gallivm, LLVMValueRef mask_ptr, mask_val; LLVMValueRef consts_ptr; LLVMValueRef z; - LLVMValueRef zs_value = NULL; + LLVMValueRef z_value, s_value; + LLVMValueRef z_fb, s_fb; LLVMValueRef stencil_refs[2]; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; struct lp_build_for_loop_state loop_state; @@ -259,8 +260,7 @@ generate_fs_loop(struct gallivm_state *gallivm, memset(&system_values, 0, sizeof(system_values)); if (key->depth.enabled || - key->stencil[0].enabled || - key->stencil[1].enabled) { + key->stencil[0].enabled) { zs_format_desc = util_format_description(key->zsbuf_format); assert(zs_format_desc); @@ -281,7 +281,9 @@ generate_fs_loop(struct gallivm_state *gallivm, } if (!(key->depth.enabled && key->depth.writemask) && - !(key->stencil[0].enabled && key->stencil[0].writemask)) + !((key->stencil[0].enabled && (key->stencil[0].writemask || + (key->stencil[1].enabled && + key->stencil[1].writemask))))) depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE); } else { @@ -337,11 +339,10 @@ generate_fs_loop(struct gallivm_state *gallivm, z = interp->pos[2]; if (depth_mode & EARLY_DEPTH_TEST) { - LLVMValueRef zs_dst_val; - zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type, - zs_format_desc, - depth_ptr, depth_stride, - loop_state.counter); + lp_build_depth_stencil_load_swizzled(gallivm, type, + zs_format_desc, + depth_ptr, depth_stride, + &z_fb, &s_fb, loop_state.counter); lp_build_depth_stencil_test(gallivm, &key->depth, key->stencil, @@ -349,16 +350,16 @@ generate_fs_loop(struct gallivm_state *gallivm, zs_format_desc, &mask, stencil_refs, - z, - zs_dst_val, + z, z_fb, s_fb, facing, - &zs_value, + &z_value, &s_value, !simple_shader); if (depth_mode & EARLY_DEPTH_WRITE) { lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc, - NULL, loop_state.counter, - depth_ptr, depth_stride, zs_value); + NULL, NULL, NULL, loop_state.counter, + depth_ptr, depth_stride, + z_value, s_value); } } @@ -394,7 +395,6 @@ generate_fs_loop(struct gallivm_state *gallivm, /* Late Z test */ if (depth_mode & LATE_DEPTH_TEST) { - LLVMValueRef zs_dst_val; int pos0 = find_output_by_semantic(&shader->info.base, TGSI_SEMANTIC_POSITION, 0); @@ -403,10 +403,10 @@ generate_fs_loop(struct gallivm_state *gallivm, z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); } - zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type, - zs_format_desc, - depth_ptr, depth_stride, - loop_state.counter); + lp_build_depth_stencil_load_swizzled(gallivm, type, + zs_format_desc, + depth_ptr, depth_stride, + &z_fb, &s_fb, loop_state.counter); lp_build_depth_stencil_test(gallivm, &key->depth, @@ -415,16 +415,16 @@ generate_fs_loop(struct gallivm_state *gallivm, zs_format_desc, &mask, stencil_refs, - z, - zs_dst_val, + z, z_fb, s_fb, facing, - &zs_value, + &z_value, &s_value, !simple_shader); /* Late Z write */ if (depth_mode & LATE_DEPTH_WRITE) { lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc, - NULL, loop_state.counter, - depth_ptr, depth_stride, zs_value); + NULL, NULL, NULL, loop_state.counter, + depth_ptr, depth_stride, + z_value, s_value); } } else if ((depth_mode & EARLY_DEPTH_TEST) && @@ -435,8 +435,9 @@ generate_fs_loop(struct gallivm_state *gallivm, * write that out. */ lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc, - &mask, loop_state.counter, - depth_ptr, depth_stride, zs_value); + &mask, z_fb, s_fb, loop_state.counter, + depth_ptr, depth_stride, + z_value, s_value); } |