diff options
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_pack.c | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_depth.c | 275 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_depth.h | 15 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.c | 28 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_setup.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_setup_context.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 53 |
9 files changed, 259 insertions, 148 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index 9eb9ab0261f..0a57e39611c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -255,7 +255,8 @@ lp_build_concat_n(struct gallivm_state *gallivm, /** * Interleave vector elements. * - * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions. + * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions + * (but not for 256bit AVX vectors). */ LLVMValueRef lp_build_interleave2(struct gallivm_state *gallivm, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index cc29c5c885f..7ac0029051d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -1545,6 +1545,11 @@ lp_build_sample_soa(struct gallivm_state *gallivm, bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length); } } + else if (util_format_has_stencil(bld.format_desc) && + !util_format_has_depth(bld.format_desc)) { + /* for stencil only formats, sample stencil (uint) */ + bld.texel_type = lp_type_int_vec(type.width, type.width * type.length); + } if (!static_texture_state->level_zero_only) { derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 1cd36b87909..2376ca73c51 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -313,7 +313,7 @@ lp_depth_type(const struct util_format_description *format_desc, if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { type.floating = TRUE; assert(z_swizzle == 0); - assert(format_desc->channel[z_swizzle].size == format_desc->block.bits); + assert(format_desc->channel[z_swizzle].size == 32); } else if(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { assert(format_desc->block.bits <= 32); @@ -347,15 +347,18 @@ static boolean get_z_shift_and_mask(const struct util_format_description *format_desc, unsigned *shift, unsigned *width, unsigned *mask) { - const unsigned total_bits = format_desc->block.bits; + unsigned total_bits; unsigned z_swizzle; unsigned chan; unsigned padding_left, padding_right; - + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); + /* 64bit d/s format is special already extracted 32 bits */ + total_bits = format_desc->block.bits > 32 ? 32 : format_desc->block.bits; + z_swizzle = format_desc->swizzle[0]; if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) @@ -402,6 +405,14 @@ get_s_shift_and_mask(const struct util_format_description *format_desc, if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) return FALSE; + /* just special case 64bit d/s format */ + if (format_desc->block.bits > 32) { + assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); + *shift = 0; + *mask = 0xff; + return TRUE; + } + *shift = 0; for (chan = 0; chan < s_swizzle; chan++) *shift += format_desc->channel[chan].size; @@ -517,24 +528,31 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, * \param loop_counter the current loop iteration * \param depth_ptr pointer to the depth/stencil values of this 4x4 block * \param depth_stride stride of the depth/stencil buffer + * \param z_fb contains z values loaded from fb (may include padding) + * \param s_fb contains s values loaded from fb (may include padding) */ -LLVMValueRef +void lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, + LLVMValueRef *z_fb, + LLVMValueRef *s_fb, LLVMValueRef loop_counter) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; - LLVMValueRef zs_dst, zs_dst1, zs_dst2; + LLVMValueRef zs_dst1, zs_dst2; LLVMValueRef zs_dst_ptr; LLVMValueRef depth_offset1, depth_offset2; - unsigned depth_bits = format_desc->block.bits/8; + LLVMTypeRef load_ptr_type; + unsigned depth_bytes = format_desc->block.bits / 8; struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); struct lp_type zs_load_type = zs_type; + zs_load_type.length = zs_load_type.length / 2; + load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); if (z_src_type.length == 4) { unsigned i; @@ -545,7 +563,7 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, depth_stride, ""); depth_offset1 = LLVMBuildMul(builder, looplsb, - lp_build_const_int32(gallivm, depth_bits * 2), ""); + lp_build_const_int32(gallivm, depth_bytes * 2), ""); depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); /* just concatenate the loaded 2x2 values into 4-wide vector */ @@ -564,7 +582,6 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. */ for (i = 0; i < 8; i++) { - shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); } } @@ -573,63 +590,103 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, /* Load current z/stencil values from z/stencil buffer */ zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); - zs_dst_ptr = LLVMBuildBitCast(builder, - zs_dst_ptr, - LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, ""); zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); - zs_dst_ptr = LLVMBuildBitCast(builder, - zs_dst_ptr, - LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, ""); - zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, - LLVMConstVector(shuffles, zs_type.length), ""); + *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, + LLVMConstVector(shuffles, zs_type.length), ""); + *s_fb = *z_fb; if (format_desc->block.bits < z_src_type.width) { /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */ - zs_dst = LLVMBuildZExt(builder, zs_dst, lp_build_int_vec_type(gallivm, z_src_type), ""); + *z_fb = LLVMBuildZExt(builder, *z_fb, + lp_build_int_vec_type(gallivm, z_src_type), ""); } - lp_build_name(zs_dst, "zs_dst"); + else if (format_desc->block.bits > 32) { + /* rely on llvm to handle too wide vector we have here nicely */ + unsigned i; + struct lp_type typex2 = zs_type; + struct lp_type s_type = zs_type; + LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4]; + LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4]; + LLVMValueRef tmp; + + typex2.width = typex2.width / 2; + typex2.length = typex2.length * 2; + s_type.width = s_type.width / 2; + s_type.floating = 0; + + tmp = LLVMBuildBitCast(builder, *z_fb, + lp_build_vec_type(gallivm, typex2), ""); + + for (i = 0; i < zs_type.length; i++) { + shuffles1[i] = lp_build_const_int32(gallivm, i * 2); + shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1); + } + *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp, + LLVMConstVector(shuffles1, zs_type.length), ""); + *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp, + LLVMConstVector(shuffles2, zs_type.length), ""); + *s_fb = LLVMBuildBitCast(builder, *s_fb, + lp_build_vec_type(gallivm, s_type), ""); + lp_build_name(*s_fb, "s_dst"); + } - return zs_dst; + lp_build_name(*z_fb, "z_dst"); + lp_build_name(*s_fb, "s_dst"); + lp_build_name(*z_fb, "z_dst"); } /** * Store depth/stencil values. * Incoming values are swizzled (typically n 2x2 quads), stored linear. - * If there's a mask it will do reload/select/store otherwise just store. + * If there's a mask it will do select/store otherwise just store. * * \param type the data type of the fragment depth/stencil values * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) + * \param z_fb z values read from fb (with padding) + * \param s_fb s values read from fb (with padding) * \param loop_counter the current loop iteration * \param depth_ptr pointer to the depth/stencil values of this 4x4 block * \param depth_stride stride of the depth/stencil buffer - * \param zs_value the depth/stencil values to store + * \param z_value the depth values to store (with padding) + * \param s_value the stencil values to store (with padding) */ void lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, + LLVMValueRef z_fb, + LLVMValueRef s_fb, LLVMValueRef loop_counter, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, - LLVMValueRef zs_value) + LLVMValueRef z_value, + LLVMValueRef s_value) { struct lp_build_context z_bld; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef mask_value = NULL; - LLVMValueRef zs_dst = NULL, zs_dst1, zs_dst2; + LLVMValueRef zs_dst1, zs_dst2; LLVMValueRef zs_dst_ptr1, zs_dst_ptr2; LLVMValueRef depth_offset1, depth_offset2; - unsigned depth_bits = format_desc->block.bits/8; + LLVMTypeRef load_ptr_type; + unsigned depth_bytes = format_desc->block.bits / 8; struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); struct lp_type zs_load_type = zs_type; + zs_load_type.length = zs_load_type.length / 2; + load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); + + if (zs_type.width > 32) + zs_type.width = 32; lp_build_context_init(&z_bld, gallivm, zs_type); @@ -638,7 +695,6 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, * outside the fs loop to avoid all the swizzle stuff. */ if (z_src_type.length == 4) { - unsigned i; LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, lp_build_const_int32(gallivm, 1), ""); LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, @@ -646,13 +702,8 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, depth_stride, ""); depth_offset1 = LLVMBuildMul(builder, looplsb, - lp_build_const_int32(gallivm, depth_bits * 2), ""); + lp_build_const_int32(gallivm, depth_bytes * 2), ""); depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); - - /* just concatenate the loaded 2x2 values into 4-wide vector */ - for (i = 0; i < 4; i++) { - shuffles[i] = lp_build_const_int32(gallivm, i); - } } else { unsigned i; @@ -669,55 +720,75 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, } } - depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); - zs_dst_ptr1 = LLVMBuildBitCast(builder, - zs_dst_ptr1, - LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); + zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, ""); zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); - zs_dst_ptr2 = LLVMBuildBitCast(builder, - zs_dst_ptr2, - LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); + zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, ""); - if (mask) { - zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr1, ""); - zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr2, ""); - zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, - LLVMConstVector(shuffles, zs_type.length), - "zsbufval"); + if (format_desc->block.bits > 32) { + s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, ""); + } + if (mask) { mask_value = lp_build_mask_value(mask); + z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb); + if (format_desc->block.bits > 32) { + s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, ""); + s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb); + } } if (zs_type.width < z_src_type.width) { - /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */ - zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, ""); - if (mask) - mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.vec_type, ""); + /* Truncate ZS values (e.g., when writing to Z16_UNORM) */ + z_value = LLVMBuildTrunc(builder, z_value, z_bld.vec_type, ""); } - if (mask) { - zs_value = lp_build_select(&z_bld, mask_value, zs_value, zs_dst); - } - - if (z_src_type.length == 4) { - zs_dst1 = lp_build_extract_range(gallivm, zs_value, 0, 2); - zs_dst2 = lp_build_extract_range(gallivm, zs_value, 2, 2); + if (format_desc->block.bits <= 32) { + if (z_src_type.length == 4) { + zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2); + zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2); + } + else { + assert(z_src_type.length == 8); + zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value, + LLVMConstVector(&shuffles[0], + zs_load_type.length), ""); + zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value, + LLVMConstVector(&shuffles[4], + zs_load_type.length), ""); + } } else { - assert(z_src_type.length == 8); - zs_dst1 = LLVMBuildShuffleVector(builder, zs_value, zs_value, - LLVMConstVector(&shuffles[0], - zs_load_type.length), - ""); - zs_dst2 = LLVMBuildShuffleVector(builder, zs_value, zs_value, - LLVMConstVector(&shuffles[4], - zs_load_type.length), - ""); - + if (z_src_type.length == 4) { + zs_dst1 = lp_build_interleave2(gallivm, zs_type, + z_value, s_value, 0); + zs_dst2 = lp_build_interleave2(gallivm, zs_type, + z_value, s_value, 1); + } + else { + unsigned i; + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2]; + assert(z_src_type.length == 8); + for (i = 0; i < 8; i++) { + shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); + shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 + + z_src_type.length); + } + zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value, + LLVMConstVector(&shuffles[0], + z_src_type.length), ""); + zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value, + LLVMConstVector(&shuffles[8], + z_src_type.length), ""); + } + zs_dst1 = LLVMBuildBitCast(builder, zs_dst1, + lp_build_vec_type(gallivm, zs_load_type), ""); + zs_dst2 = LLVMBuildBitCast(builder, zs_dst2, + lp_build_vec_type(gallivm, zs_load_type), ""); } + LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1); LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2); } @@ -745,13 +816,14 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, - LLVMValueRef zs_dst, + LLVMValueRef z_fb, + LLVMValueRef s_fb, LLVMValueRef face, - LLVMValueRef *zs_value, + LLVMValueRef *z_value, + LLVMValueRef *s_value, boolean do_branch) { LLVMBuilderRef builder = gallivm->builder; - struct lp_type zs_type; struct lp_type z_type; struct lp_build_context z_bld; struct lp_build_context s_bld; @@ -763,7 +835,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, LLVMValueRef z_pass = NULL, s_pass_mask = NULL; LLVMValueRef orig_mask = lp_build_mask_value(mask); LLVMValueRef front_facing = NULL; - + boolean have_z, have_s; /* * Depths are expected to be between 0 and 1, even if they are stored in @@ -780,12 +852,9 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, } /* Pick the type matching the depth-stencil format. */ - zs_type = lp_depth_type(format_desc, z_src_type.length); + z_type = lp_depth_type(format_desc, z_src_type.length); /* Pick the intermediate type for depth operations. */ - z_type = zs_type; - /* FIXME: Cope with a depth test type with higher bit width. */ - assert(zs_type.width <= z_src_type.width); z_type.width = z_src_type.width; assert(z_type.length == z_src_type.length); @@ -819,13 +888,11 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, if (depth->enabled) { assert(z_swizzle < 4); - assert(format_desc->block.bits <= z_type.width); if (z_type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); - assert(format_desc->channel[z_swizzle].size == - format_desc->block.bits); + assert(format_desc->channel[z_swizzle].size == 32); } else { assert(format_desc->channel[z_swizzle].type == @@ -849,7 +916,13 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, { unsigned s_shift, s_mask; - if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) { + z_dst = z_fb; + stencil_vals = s_fb; + + have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask); + have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask); + + if (have_z) { if (z_mask != 0xffffffff) { z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); } @@ -859,26 +932,20 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, */ if (z_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); - z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); + z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst"); } else if (z_bitmask) { - /* TODO: Instead of loading a mask from memory and ANDing, it's - * probably faster to just shake the bits with two shifts. */ - z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); + z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst"); } else { - z_dst = zs_dst; lp_build_name(z_dst, "z_dst"); } } - if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { + if (have_s) { if (s_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); - stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); + stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, ""); stencil_shift = shift; /* used below */ } - else { - stencil_vals = zs_dst; - } if (s_mask != 0xffffffff) { LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); @@ -997,7 +1064,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ - z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass); + z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass); stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, z_fail_mask, front_facing); @@ -1019,8 +1086,8 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, s_pass_mask, front_facing); } - /* Put Z and ztencil bits in the right place */ - if (z_dst && z_shift) { + /* Put Z and stencil bits in the right place */ + if (have_z && z_shift) { LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); z_dst = LLVMBuildShl(builder, z_dst, shift, ""); } @@ -1028,18 +1095,24 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, stencil_vals = LLVMBuildShl(builder, stencil_vals, stencil_shift, ""); - /* Finally, merge/store the z/stencil values */ + /* Finally, merge the z/stencil values */ if ((depth->enabled && depth->writemask) || - (stencil[0].enabled && stencil[0].writemask)) { - - if (z_dst && stencil_vals) - zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, ""); - else if (z_dst) - zs_dst = z_dst; - else - zs_dst = stencil_vals; - - *zs_value = zs_dst; + (stencil[0].enabled && (stencil[0].writemask || + (stencil[1].enabled && stencil[1].writemask)))) { + + if (format_desc->block.bits <= 32) { + if (have_z && have_s) + *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, ""); + else if (have_z) + *z_value = z_dst; + else + *z_value = stencil_vals; + *s_value = *z_value; + } + else { + *z_value = z_dst; + *s_value = stencil_vals; + } } if (s_pass_mask) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index c000494667d..2534dc309ce 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -63,17 +63,21 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, - LLVMValueRef zs_dst, + LLVMValueRef z_fb, + LLVMValueRef s_fb, LLVMValueRef face, - LLVMValueRef *zs_value, + LLVMValueRef *z_value, + LLVMValueRef *s_value, boolean do_branch); -LLVMValueRef +void lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, + LLVMValueRef *z_fb, + LLVMValueRef *s_fb, LLVMValueRef loop_counter); void @@ -81,10 +85,13 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, + LLVMValueRef z_fb, + LLVMValueRef s_fb, LLVMValueRef loop_counter, LLVMValueRef depth_ptr, LLVMValueRef depth_stride, - LLVMValueRef zs_value); + LLVMValueRef z_value, + LLVMValueRef s_value); void diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index a557db4b4dc..8a4b00f785d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -193,8 +193,10 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_scene *scene = task->scene; - uint32_t clear_value = arg.clear_zstencil.value; - uint32_t clear_mask = arg.clear_zstencil.mask; + uint64_t clear_value64 = arg.clear_zstencil.value; + uint64_t clear_mask64 = arg.clear_zstencil.mask; + uint32_t clear_value = (uint32_t) clear_value64; + uint32_t clear_mask = (uint32_t) clear_mask64; const unsigned height = TILE_SIZE; const unsigned width = TILE_SIZE; const unsigned block_size = scene->zsbuf.blocksize; @@ -260,6 +262,28 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, } } break; + case 8: + clear_value64 &= clear_mask64; + if (clear_mask64 == 0xffffffffffULL) { + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst; + for (j = 0; j < width; j++) + *row++ = clear_value64; + dst += dst_stride; + } + } + else { + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst; + for (j = 0; j < width; j++) { + uint64_t tmp = ~clear_mask64 & *row; + *row++ = clear_value64 | tmp; + } + dst += dst_stride; + } + } + break; + default: assert(0); break; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 2f5fa227619..8dd3615e78a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -154,8 +154,8 @@ union lp_rast_cmd_arg { const struct lp_rast_state *set_state; union pipe_color_union clear_color; struct { - uint32_t value; - uint32_t mask; + uint64_t value; + uint64_t mask; } clear_zstencil; const struct lp_rast_state *state; struct lp_fence *fence; @@ -217,7 +217,7 @@ lp_rast_arg_fence( struct lp_fence *fence ) static INLINE union lp_rast_cmd_arg -lp_rast_arg_clearzs( unsigned value, unsigned mask ) +lp_rast_arg_clearzs( uint64_t value, uint64_t mask ) { union lp_rast_cmd_arg arg; arg.clear_zstencil.value = value; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index f9cbdaba62c..9fef34e77e0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -390,8 +390,8 @@ lp_setup_try_clear( struct lp_setup_context *setup, unsigned stencil, unsigned flags ) { - uint32_t zsmask = 0; - uint32_t zsvalue = 0; + uint64_t zsmask = 0; + uint64_t zsvalue = 0; union lp_rast_cmd_arg color_arg; unsigned i; @@ -404,16 +404,16 @@ lp_setup_try_clear( struct lp_setup_context *setup, if (flags & PIPE_CLEAR_DEPTHSTENCIL) { uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0; - uint32_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; + uint8_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; - zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format, - depth, - stencil); + zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format, + depth, + stencil); - zsmask = util_pack_mask_z_stencil(setup->fb.zsbuf->format, - zmask, - smask); + zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format, + zmask, + smask); zsvalue &= zsmask; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index efc2ab76813..6b35a0209ca 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -112,8 +112,8 @@ struct lp_setup_context struct { unsigned flags; union lp_rast_cmd_arg color; /**< lp_rast_clear_color() cmd */ - unsigned zsmask; - unsigned zsvalue; /**< lp_rast_clear_zstencil() cmd */ + uint64_t zsmask; + uint64_t zsvalue; /**< lp_rast_clear_zstencil() cmd */ } clear; enum setup_state { diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 69212109a87..1dfc75a42dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -239,7 +239,8 @@ generate_fs_loop(struct gallivm_state *gallivm, LLVMValueRef mask_ptr, mask_val; LLVMValueRef consts_ptr; LLVMValueRef z; - LLVMValueRef zs_value = NULL; + LLVMValueRef z_value, s_value; + LLVMValueRef z_fb, s_fb; LLVMValueRef stencil_refs[2]; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; struct lp_build_for_loop_state loop_state; @@ -259,8 +260,7 @@ generate_fs_loop(struct gallivm_state *gallivm, memset(&system_values, 0, sizeof(system_values)); if (key->depth.enabled || - key->stencil[0].enabled || - key->stencil[1].enabled) { + key->stencil[0].enabled) { zs_format_desc = util_format_description(key->zsbuf_format); assert(zs_format_desc); @@ -281,7 +281,9 @@ generate_fs_loop(struct gallivm_state *gallivm, } if (!(key->depth.enabled && key->depth.writemask) && - !(key->stencil[0].enabled && key->stencil[0].writemask)) + !((key->stencil[0].enabled && (key->stencil[0].writemask || + (key->stencil[1].enabled && + key->stencil[1].writemask))))) depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE); } else { @@ -337,11 +339,10 @@ generate_fs_loop(struct gallivm_state *gallivm, z = interp->pos[2]; if (depth_mode & EARLY_DEPTH_TEST) { - LLVMValueRef zs_dst_val; - zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type, - zs_format_desc, - depth_ptr, depth_stride, - loop_state.counter); + lp_build_depth_stencil_load_swizzled(gallivm, type, + zs_format_desc, + depth_ptr, depth_stride, + &z_fb, &s_fb, loop_state.counter); lp_build_depth_stencil_test(gallivm, &key->depth, key->stencil, @@ -349,16 +350,16 @@ generate_fs_loop(struct gallivm_state *gallivm, zs_format_desc, &mask, stencil_refs, - z, - zs_dst_val, + z, z_fb, s_fb, facing, - &zs_value, + &z_value, &s_value, !simple_shader); if (depth_mode & EARLY_DEPTH_WRITE) { lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc, - NULL, loop_state.counter, - depth_ptr, depth_stride, zs_value); + NULL, NULL, NULL, loop_state.counter, + depth_ptr, depth_stride, + z_value, s_value); } } @@ -394,7 +395,6 @@ generate_fs_loop(struct gallivm_state *gallivm, /* Late Z test */ if (depth_mode & LATE_DEPTH_TEST) { - LLVMValueRef zs_dst_val; int pos0 = find_output_by_semantic(&shader->info.base, TGSI_SEMANTIC_POSITION, 0); @@ -403,10 +403,10 @@ generate_fs_loop(struct gallivm_state *gallivm, z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); } - zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type, - zs_format_desc, - depth_ptr, depth_stride, - loop_state.counter); + lp_build_depth_stencil_load_swizzled(gallivm, type, + zs_format_desc, + depth_ptr, depth_stride, + &z_fb, &s_fb, loop_state.counter); lp_build_depth_stencil_test(gallivm, &key->depth, @@ -415,16 +415,16 @@ generate_fs_loop(struct gallivm_state *gallivm, zs_format_desc, &mask, stencil_refs, - z, - zs_dst_val, + z, z_fb, s_fb, facing, - &zs_value, + &z_value, &s_value, !simple_shader); /* Late Z write */ if (depth_mode & LATE_DEPTH_WRITE) { lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc, - NULL, loop_state.counter, - depth_ptr, depth_stride, zs_value); + NULL, NULL, NULL, loop_state.counter, + depth_ptr, depth_stride, + z_value, s_value); } } else if ((depth_mode & EARLY_DEPTH_TEST) && @@ -435,8 +435,9 @@ generate_fs_loop(struct gallivm_state *gallivm, * write that out. */ lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc, - &mask, loop_state.counter, - depth_ptr, depth_stride, zs_value); + &mask, z_fb, s_fb, loop_state.counter, + depth_ptr, depth_stride, + z_value, s_value); } |