diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_depth.c | 317 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_depth.h | 36 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_jit.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast.c | 167 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast_priv.h | 105 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_scene.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 56 |
7 files changed, 414 insertions, 273 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index b9dbdc5a8ac..1cd36b87909 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -36,21 +36,13 @@ * flushing would avoid this, but it would most likely result in depth fighting * artifacts. * - * We are free to use a different pixel layout though. Since our basic - * processing unit is a quad (2x2 pixel block) we store the depth/stencil - * values tiled, a quad at time. That is, a depth buffer containing - * - * Z11 Z12 Z13 Z14 ... - * Z21 Z22 Z23 Z24 ... - * Z31 Z32 Z33 Z34 ... - * Z41 Z42 Z43 Z44 ... - * ... ... ... ... ... - * - * will actually be stored in memory as - * - * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ... - * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ... - * ... ... ... ... ... ... ... ... ... + * Since we're using linear layout for everything, but we need to deal with + * 2x2 quads, we need to load/store multiple values and swizzle them into + * place (we could avoid this by doing depth/stencil testing in linear format, + * which would be easy for late depth/stencil test as we could do that after + * the fragment shader loop just as we do for color buffers, but more tricky + * for early depth test as we'd need both masks and interpolated depth in + * linear format). * * * @author Jose Fonseca <[email protected]> @@ -71,6 +63,7 @@ #include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_pack.h" #include "lp_bld_depth.h" @@ -515,6 +508,219 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, } +/** + * Load depth/stencil values. + * The stored values are linear, swizzle them. + * + * \param type the data type of the fragment depth/stencil values + * \param format_desc description of the depth/stencil surface + * \param loop_counter the current loop iteration + * \param depth_ptr pointer to the depth/stencil values of this 4x4 block + * \param depth_stride stride of the depth/stencil buffer + */ +LLVMValueRef +lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + LLVMValueRef depth_ptr, + LLVMValueRef depth_stride, + LLVMValueRef loop_counter) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; + LLVMValueRef zs_dst, zs_dst1, zs_dst2; + LLVMValueRef zs_dst_ptr; + LLVMValueRef depth_offset1, depth_offset2; + unsigned depth_bits = format_desc->block.bits/8; + struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); + struct lp_type zs_load_type = zs_type; + zs_load_type.length = zs_load_type.length / 2; + + if (z_src_type.length == 4) { + unsigned i; + LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, + lp_build_const_int32(gallivm, 1), ""); + LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, + lp_build_const_int32(gallivm, 2), ""); + LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, + depth_stride, ""); + depth_offset1 = LLVMBuildMul(builder, looplsb, + lp_build_const_int32(gallivm, depth_bits * 2), ""); + depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); + + /* just concatenate the loaded 2x2 values into 4-wide vector */ + for (i = 0; i < 4; i++) { + shuffles[i] = lp_build_const_int32(gallivm, i); + } + } + else { + unsigned i; + LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, + lp_build_const_int32(gallivm, 1), ""); + assert(z_src_type.length == 8); + depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); + /* + * We load 2x4 values, and need to swizzle them (order + * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. + */ + for (i = 0; i < 8; i++) { + + shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); + } + } + + depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); + + /* Load current z/stencil values from z/stencil buffer */ + zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); + zs_dst_ptr = LLVMBuildBitCast(builder, + zs_dst_ptr, + LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); + zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, ""); + zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); + zs_dst_ptr = LLVMBuildBitCast(builder, + zs_dst_ptr, + LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); + zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, ""); + + zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, + LLVMConstVector(shuffles, zs_type.length), ""); + + if (format_desc->block.bits < z_src_type.width) { + /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */ + zs_dst = LLVMBuildZExt(builder, zs_dst, lp_build_int_vec_type(gallivm, z_src_type), ""); + } + + lp_build_name(zs_dst, "zs_dst"); + + return zs_dst; +} + +/** + * Store depth/stencil values. + * Incoming values are swizzled (typically n 2x2 quads), stored linear. + * If there's a mask it will do reload/select/store otherwise just store. + * + * \param type the data type of the fragment depth/stencil values + * \param format_desc description of the depth/stencil surface + * \param mask the alive/dead pixel mask for the quad (vector) + * \param loop_counter the current loop iteration + * \param depth_ptr pointer to the depth/stencil values of this 4x4 block + * \param depth_stride stride of the depth/stencil buffer + * \param zs_value the depth/stencil values to store + */ +void +lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef loop_counter, + LLVMValueRef depth_ptr, + LLVMValueRef depth_stride, + LLVMValueRef zs_value) +{ + struct lp_build_context z_bld; + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef mask_value = NULL; + LLVMValueRef zs_dst = NULL, zs_dst1, zs_dst2; + LLVMValueRef zs_dst_ptr1, zs_dst_ptr2; + LLVMValueRef depth_offset1, depth_offset2; + unsigned depth_bits = format_desc->block.bits/8; + struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); + struct lp_type zs_load_type = zs_type; + zs_load_type.length = zs_load_type.length / 2; + + lp_build_context_init(&z_bld, gallivm, zs_type); + + /* + * This is far from ideal, at least for late depth write we should do this + * outside the fs loop to avoid all the swizzle stuff. + */ + if (z_src_type.length == 4) { + unsigned i; + LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, + lp_build_const_int32(gallivm, 1), ""); + LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, + lp_build_const_int32(gallivm, 2), ""); + LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, + depth_stride, ""); + depth_offset1 = LLVMBuildMul(builder, looplsb, + lp_build_const_int32(gallivm, depth_bits * 2), ""); + depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); + + /* just concatenate the loaded 2x2 values into 4-wide vector */ + for (i = 0; i < 4; i++) { + shuffles[i] = lp_build_const_int32(gallivm, i); + } + } + else { + unsigned i; + LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, + lp_build_const_int32(gallivm, 1), ""); + assert(z_src_type.length == 8); + depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); + /* + * We load 2x4 values, and need to swizzle them (order + * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. + */ + for (i = 0; i < 8; i++) { + shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); + } + } + + + depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); + + zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); + zs_dst_ptr1 = LLVMBuildBitCast(builder, + zs_dst_ptr1, + LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); + zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); + zs_dst_ptr2 = LLVMBuildBitCast(builder, + zs_dst_ptr2, + LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), ""); + + if (mask) { + zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr1, ""); + zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr2, ""); + zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, + LLVMConstVector(shuffles, zs_type.length), + "zsbufval"); + + mask_value = lp_build_mask_value(mask); + } + + if (zs_type.width < z_src_type.width) { + /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */ + zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, ""); + if (mask) + mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.vec_type, ""); + } + + if (mask) { + zs_value = lp_build_select(&z_bld, mask_value, zs_value, zs_dst); + } + + if (z_src_type.length == 4) { + zs_dst1 = lp_build_extract_range(gallivm, zs_value, 0, 2); + zs_dst2 = lp_build_extract_range(gallivm, zs_value, 2, 2); + } + else { + assert(z_src_type.length == 8); + zs_dst1 = LLVMBuildShuffleVector(builder, zs_value, zs_value, + LLVMConstVector(&shuffles[0], + zs_load_type.length), + ""); + zs_dst2 = LLVMBuildShuffleVector(builder, zs_value, zs_value, + LLVMConstVector(&shuffles[4], + zs_load_type.length), + ""); + + } + LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1); + LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2); +} /** * Generate code for performing depth and/or stencil tests. @@ -527,7 +733,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32) - * \param zs_dst_ptr pointer to depth/stencil values in framebuffer + * \param zs_dst the depth/stencil values in framebuffer * \param face contains boolean value indicating front/back facing polygon */ void @@ -539,7 +745,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], LLVMValueRef z_src, - LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_dst, LLVMValueRef face, LLVMValueRef *zs_value, boolean do_branch) @@ -551,7 +757,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, struct lp_build_context s_bld; struct lp_type s_type; unsigned z_shift = 0, z_width = 0, z_mask = 0; - LLVMValueRef zs_dst, z_dst = NULL; + LLVMValueRef z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; @@ -638,19 +844,6 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, s_type = lp_int_type(z_type); lp_build_context_init(&s_bld, gallivm, s_type); - /* Load current z/stencil value from z/stencil buffer */ - zs_dst_ptr = LLVMBuildBitCast(builder, - zs_dst_ptr, - LLVMPointerType(lp_build_vec_type(gallivm, zs_type), 0), ""); - zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); - if (format_desc->block.bits < z_type.width) { - /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */ - zs_dst = LLVMBuildZExt(builder, zs_dst, z_bld.vec_type, ""); - } - - lp_build_name(zs_dst, "zs_dst"); - - /* Compute and apply the Z/stencil bitmasks and shifts. */ { @@ -860,65 +1053,3 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, } - -void -lp_build_depth_write(struct gallivm_state *gallivm, - struct lp_type z_src_type, - const struct util_format_description *format_desc, - LLVMValueRef zs_dst_ptr, - LLVMValueRef zs_value) -{ - LLVMBuilderRef builder = gallivm->builder; - - if (format_desc->block.bits < z_src_type.width) { - /* Truncate income ZS values (e.g., when writing to Z16_UNORM) */ - LLVMTypeRef zs_type = LLVMIntTypeInContext(gallivm->context, format_desc->block.bits); - if (z_src_type.length > 1) { - zs_type = LLVMVectorType(zs_type, z_src_type.length); - } - zs_value = LLVMBuildTrunc(builder, zs_value, zs_type, ""); - } - - zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, - LLVMPointerType(LLVMTypeOf(zs_value), 0), ""); - - LLVMBuildStore(builder, zs_value, zs_dst_ptr); -} - - -void -lp_build_deferred_depth_write(struct gallivm_state *gallivm, - struct lp_type z_src_type, - const struct util_format_description *format_desc, - struct lp_build_mask_context *mask, - LLVMValueRef zs_dst_ptr, - LLVMValueRef zs_value) -{ - struct lp_type z_type; - struct lp_build_context z_bld; - LLVMValueRef z_dst; - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef mask_value; - - /* XXX: pointlessly redo type logic: - */ - z_type = lp_depth_type(format_desc, z_src_type.length); - lp_build_context_init(&z_bld, gallivm, z_type); - - zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, - LLVMPointerType(z_bld.vec_type, 0), ""); - - z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); - - mask_value = lp_build_mask_value(mask); - - if (z_type.width < z_src_type.width) { - /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */ - zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, ""); - mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.int_vec_type, ""); - } - - z_dst = lp_build_select(&z_bld, mask_value, zs_value, z_dst); - - LLVMBuildStore(builder, z_dst, zs_dst_ptr); -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index 33cb0dd4a9e..c000494667d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -58,30 +58,34 @@ void lp_build_depth_stencil_test(struct gallivm_state *gallivm, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], - struct lp_type type, + struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], - LLVMValueRef zs_src, - LLVMValueRef zs_dst_ptr, - LLVMValueRef facing, + LLVMValueRef z_src, + LLVMValueRef zs_dst, + LLVMValueRef face, LLVMValueRef *zs_value, boolean do_branch); -void -lp_build_depth_write(struct gallivm_state *gallivm, - struct lp_type z_src_type, - const struct util_format_description *format_desc, - LLVMValueRef zs_dst_ptr, - LLVMValueRef zs_value); +LLVMValueRef +lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + LLVMValueRef depth_ptr, + LLVMValueRef depth_stride, + LLVMValueRef loop_counter); void -lp_build_deferred_depth_write(struct gallivm_state *gallivm, - struct lp_type z_src_type, - const struct util_format_description *format_desc, - struct lp_build_mask_context *mask, - LLVMValueRef zs_dst_ptr, - LLVMValueRef zs_value); +lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef loop_counter, + LLVMValueRef depth_ptr, + LLVMValueRef depth_stride, + LLVMValueRef zs_value); + void lp_build_occlusion_count(struct gallivm_state *gallivm, diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 4eddb2a2f3c..4e9ca764fe7 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -193,6 +193,7 @@ enum { * @param mask mask of visible pixels in block * @param thread_data task thread data * @param stride color buffer row stride in bytes + * @param depth_stride depth buffer row stride in bytes */ typedef void (*lp_jit_frag_func)(const struct lp_jit_context *context, @@ -206,7 +207,8 @@ typedef void void *depth, uint32_t mask, struct lp_jit_thread_data *thread_data, - unsigned *stride); + unsigned *stride, + unsigned depth_stride); void diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index ef49ba9ab6f..a557db4b4dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -89,51 +89,15 @@ static void lp_rast_tile_begin(struct lp_rasterizer_task *task, const struct cmd_bin *bin) { - const struct lp_scene *scene = task->scene; - enum lp_texture_usage usage; - LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, bin->x, bin->y); task->bin = bin; task->x = bin->x * TILE_SIZE; task->y = bin->y * TILE_SIZE; - /* reset pointers to color tile(s) */ + /* reset pointers to color and depth tile(s) */ memset(task->color_tiles, 0, sizeof(task->color_tiles)); - - /* get pointer to depth/stencil tile */ - { - struct pipe_surface *zsbuf = task->scene->fb.zsbuf; - if (zsbuf) { - struct llvmpipe_resource *lpt = llvmpipe_resource(zsbuf->texture); - - if (scene->has_depthstencil_clear) - usage = LP_TEX_USAGE_WRITE_ALL; - else - usage = LP_TEX_USAGE_READ_WRITE; - - /* "prime" the tile: convert data from linear to tiled if necessary - * and update the tile's layout info. - */ - (void) llvmpipe_get_texture_tile(lpt, - zsbuf->u.tex.first_layer, - zsbuf->u.tex.level, - usage, - task->x, - task->y); - /* Get actual pointer to the tile data. Note that depth/stencil - * data is tiled differently than color data. - */ - task->depth_tile = lp_rast_get_depth_block_pointer(task, - task->x, - task->y); - - assert(task->depth_tile); - } - else { - task->depth_tile = NULL; - } - } + task->depth_tile = NULL; } @@ -220,8 +184,6 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, - - /** * Clear the rasterizer's current z/stencil tile. * This is a bin command called during bin processing. @@ -233,10 +195,10 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, const struct lp_scene *scene = task->scene; uint32_t clear_value = arg.clear_zstencil.value; uint32_t clear_mask = arg.clear_zstencil.mask; - const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT; - const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT; + const unsigned height = TILE_SIZE; + const unsigned width = TILE_SIZE; const unsigned block_size = scene->zsbuf.blocksize; - const unsigned dst_stride = scene->zsbuf.stride * TILE_VECTOR_HEIGHT; + const unsigned dst_stride = scene->zsbuf.stride; uint8_t *dst; unsigned i, j; @@ -244,65 +206,64 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, __FUNCTION__, clear_value, clear_mask); /* - * Clear the area of the swizzled depth/depth buffer matching this tile, in - * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time. - * - * The swizzled depth format is such that the depths for - * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets. + * Clear the area of the depth/depth buffer matching this tile. */ - dst = task->depth_tile; + if (scene->fb.zsbuf) { - clear_value &= clear_mask; + dst = lp_rast_get_unswizzled_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE); - switch (block_size) { - case 1: - assert(clear_mask == 0xff); - memset(dst, (uint8_t) clear_value, height * width); - break; - case 2: - if (clear_mask == 0xffff) { - for (i = 0; i < height; i++) { - uint16_t *row = (uint16_t *)dst; - for (j = 0; j < width; j++) - *row++ = (uint16_t) clear_value; - dst += dst_stride; + clear_value &= clear_mask; + + switch (block_size) { + case 1: + assert(clear_mask == 0xff); + memset(dst, (uint8_t) clear_value, height * width); + break; + case 2: + if (clear_mask == 0xffff) { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) + *row++ = (uint16_t) clear_value; + dst += dst_stride; + } } - } - else { - for (i = 0; i < height; i++) { - uint16_t *row = (uint16_t *)dst; - for (j = 0; j < width; j++) { - uint16_t tmp = ~clear_mask & *row; - *row++ = clear_value | tmp; + else { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) { + uint16_t tmp = ~clear_mask & *row; + *row++ = clear_value | tmp; + } + dst += dst_stride; } - dst += dst_stride; } - } - break; - case 4: - if (clear_mask == 0xffffffff) { - for (i = 0; i < height; i++) { - uint32_t *row = (uint32_t *)dst; - for (j = 0; j < width; j++) - *row++ = clear_value; - dst += dst_stride; + break; + case 4: + if (clear_mask == 0xffffffff) { + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst; + for (j = 0; j < width; j++) + *row++ = clear_value; + dst += dst_stride; + } } - } - else { - for (i = 0; i < height; i++) { - uint32_t *row = (uint32_t *)dst; - for (j = 0; j < width; j++) { - uint32_t tmp = ~clear_mask & *row; - *row++ = clear_value | tmp; + else { + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst; + for (j = 0; j < width; j++) { + uint32_t tmp = ~clear_mask & *row; + *row++ = clear_value | tmp; + } + dst += dst_stride; } - dst += dst_stride; } + break; + default: + assert(0); + break; } - break; - default: - assert(0); - break; } } @@ -343,7 +304,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, for (x = 0; x < TILE_SIZE; x += 4) { uint8_t *color[PIPE_MAX_COLOR_BUFS]; unsigned stride[PIPE_MAX_COLOR_BUFS]; - uint32_t *depth; + uint8_t *depth = NULL; + unsigned depth_stride = 0; unsigned i; /* color buffer */ @@ -354,7 +316,11 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, } /* depth buffer */ - depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y); + if (scene->zsbuf.map) { + depth = lp_rast_get_unswizzled_depth_block_pointer(task, tile_x + x, tile_y + y); + depth_stride = scene->zsbuf.stride; + } + /* run shader on 4x4 block */ BEGIN_JIT_CALL(state, task); @@ -368,7 +334,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, depth, 0xffff, &task->thread_data, - stride); + stride, + depth_stride); END_JIT_CALL(); } } @@ -412,7 +379,8 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, const struct lp_scene *scene = task->scene; uint8_t *color[PIPE_MAX_COLOR_BUFS]; unsigned stride[PIPE_MAX_COLOR_BUFS]; - void *depth; + void *depth = NULL; + unsigned depth_stride = 0; unsigned i; assert(state); @@ -434,8 +402,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, } /* depth buffer */ - depth = lp_rast_get_depth_block_pointer(task, x, y); - + if (scene->zsbuf.map) { + depth_stride = scene->zsbuf.stride; + depth = lp_rast_get_unswizzled_depth_block_pointer(task, x, y); + } assert(lp_check_alignment(state->jit_context.u8_blend_color, 16)); @@ -451,7 +421,8 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, depth, mask, &task->thread_data, - stride); + stride, + depth_stride); END_JIT_CALL(); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index c0f41f69fe2..7d01da15113 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -140,48 +140,39 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, /** - * Get the pointer to a 4x4 depth/stencil block. - * We'll map the z/stencil buffer on demand here. - * Note that this may be called even when there's no z/stencil buffer - return - * NULL in that case. - * \param x, y location of 4x4 block in window coords + * Get pointer to the unswizzled color tile */ -static INLINE void * -lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task, - unsigned x, unsigned y) +static INLINE uint8_t * +lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task, + unsigned buf, enum lp_texture_usage usage) { const struct lp_scene *scene = task->scene; - void *depth; + unsigned format_bytes; - assert(x < scene->tiles_x * TILE_SIZE); - assert(y < scene->tiles_y * TILE_SIZE); - assert((x % TILE_VECTOR_WIDTH) == 0); - assert((y % TILE_VECTOR_HEIGHT) == 0); + assert(task->x < scene->tiles_x * TILE_SIZE); + assert(task->y < scene->tiles_y * TILE_SIZE); + assert(task->x % TILE_SIZE == 0); + assert(task->y % TILE_SIZE == 0); + assert(buf < scene->fb.nr_cbufs); - if (!scene->zsbuf.map) { - /* Either out of memory or no zsbuf. Can't tell without access - * to the state. Just use dummy tile memory, but don't print - * the oom warning as this most likely because there is no - * zsbuf. - */ - return lp_dummy_tile; - } + if (!task->color_tiles[buf]) { + struct pipe_surface *cbuf = scene->fb.cbufs[buf]; + assert(cbuf); - depth = (scene->zsbuf.map + - scene->zsbuf.stride * y + - scene->zsbuf.blocksize * x * TILE_VECTOR_HEIGHT); + format_bytes = util_format_get_blocksize(cbuf->format); + task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y + format_bytes * task->x; + } - assert(lp_check_alignment(depth, 16)); - return depth; + return task->color_tiles[buf]; } /** - * Get pointer to the unswizzled color tile + * Get pointer to the unswizzled depth tile */ static INLINE uint8_t * -lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task, - unsigned buf, enum lp_texture_usage usage) +lp_rast_get_unswizzled_depth_tile_pointer(struct lp_rasterizer_task *task, + enum lp_texture_usage usage) { const struct lp_scene *scene = task->scene; unsigned format_bytes; @@ -190,17 +181,16 @@ lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task, assert(task->y < scene->tiles_y * TILE_SIZE); assert(task->x % TILE_SIZE == 0); assert(task->y % TILE_SIZE == 0); - assert(buf < scene->fb.nr_cbufs); - if (!task->color_tiles[buf]) { - struct pipe_surface *cbuf = scene->fb.cbufs[buf]; - assert(cbuf); + if (!task->depth_tile) { + struct pipe_surface *dbuf = scene->fb.zsbuf; + assert(dbuf); - format_bytes = util_format_get_blocksize(cbuf->format); - task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y + format_bytes * task->x; + format_bytes = util_format_get_blocksize(dbuf->format); + task->depth_tile = scene->zsbuf.map + scene->zsbuf.stride * task->y + format_bytes * task->x; } - return task->color_tiles[buf]; + return task->depth_tile; } @@ -237,6 +227,38 @@ lp_rast_get_unswizzled_color_block_pointer(struct lp_rasterizer_task *task, } +/** + * Get the pointer to an unswizzled 4x4 depth block (within an unswizzled 64x64 tile). + * \param x, y location of 4x4 block in window coords + */ +static INLINE uint8_t * +lp_rast_get_unswizzled_depth_block_pointer(struct lp_rasterizer_task *task, + unsigned x, unsigned y) +{ + unsigned px, py, pixel_offset, format_bytes; + uint8_t *depth; + + assert(x < task->scene->tiles_x * TILE_SIZE); + assert(y < task->scene->tiles_y * TILE_SIZE); + assert((x % TILE_VECTOR_WIDTH) == 0); + assert((y % TILE_VECTOR_HEIGHT) == 0); + + format_bytes = util_format_get_blocksize(task->scene->fb.zsbuf->format); + + depth = lp_rast_get_unswizzled_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE); + assert(depth); + + px = x % TILE_SIZE; + py = y % TILE_SIZE; + pixel_offset = px * format_bytes + py * task->scene->zsbuf.stride; + + depth = depth + pixel_offset; + + assert(lp_check_alignment(depth, llvmpipe_get_format_alignment(task->scene->fb.zsbuf->format))); + return depth; +} + + /** * Shade all pixels in a 4x4 block. The fragment code omits the @@ -253,7 +275,8 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, struct lp_fragment_shader_variant *variant = state->variant; uint8_t *color[PIPE_MAX_COLOR_BUFS]; unsigned stride[PIPE_MAX_COLOR_BUFS]; - void *depth; + void *depth = NULL; + unsigned depth_stride = 0; unsigned i; /* color buffer */ @@ -263,7 +286,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y); } - depth = lp_rast_get_depth_block_pointer(task, x, y); + if (scene->zsbuf.map) { + depth = lp_rast_get_unswizzled_depth_block_pointer(task, x, y); + depth_stride = scene->zsbuf.stride; + } /* run shader on 4x4 block */ BEGIN_JIT_CALL(state, task); @@ -277,7 +303,8 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, depth, 0xffff, &task->thread_data, - stride ); + stride, + depth_stride); END_JIT_CALL(); } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index a8885863ef0..e05ea753b4b 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -185,7 +185,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene) zsbuf->u.tex.level, zsbuf->u.tex.first_layer, LP_TEX_USAGE_READ_WRITE, - LP_TEX_LAYOUT_NONE); + LP_TEX_LAYOUT_LINEAR); } } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 1a9a194c8be..69212109a87 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -229,7 +229,7 @@ generate_fs_loop(struct gallivm_state *gallivm, LLVMValueRef mask_store, LLVMValueRef (*out_color)[4], LLVMValueRef depth_ptr, - unsigned depth_bits, + LLVMValueRef depth_stride, LLVMValueRef facing, LLVMValueRef thread_data_ptr) { @@ -241,8 +241,6 @@ generate_fs_loop(struct gallivm_state *gallivm, LLVMValueRef z; LLVMValueRef zs_value = NULL; LLVMValueRef stencil_refs[2]; - LLVMValueRef depth_ptr_i; - LLVMValueRef depth_offset; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; struct lp_build_for_loop_state loop_state; struct lp_build_mask_context mask; @@ -308,12 +306,6 @@ generate_fs_loop(struct gallivm_state *gallivm, &loop_state.counter, 1, "mask_ptr"); mask_val = LLVMBuildLoad(builder, mask_ptr, ""); - depth_offset = LLVMBuildMul(builder, loop_state.counter, - lp_build_const_int32(gallivm, depth_bits * type.length), - ""); - - depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, ""); - memset(outputs, 0, sizeof outputs); for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { @@ -345,6 +337,11 @@ generate_fs_loop(struct gallivm_state *gallivm, z = interp->pos[2]; if (depth_mode & EARLY_DEPTH_TEST) { + LLVMValueRef zs_dst_val; + zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type, + zs_format_desc, + depth_ptr, depth_stride, + loop_state.counter); lp_build_depth_stencil_test(gallivm, &key->depth, key->stencil, @@ -353,12 +350,15 @@ generate_fs_loop(struct gallivm_state *gallivm, &mask, stencil_refs, z, - depth_ptr_i, facing, + zs_dst_val, + facing, &zs_value, !simple_shader); if (depth_mode & EARLY_DEPTH_WRITE) { - lp_build_depth_write(gallivm, type, zs_format_desc, depth_ptr_i, zs_value); + lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc, + NULL, loop_state.counter, + depth_ptr, depth_stride, zs_value); } } @@ -394,6 +394,7 @@ generate_fs_loop(struct gallivm_state *gallivm, /* Late Z test */ if (depth_mode & LATE_DEPTH_TEST) { + LLVMValueRef zs_dst_val; int pos0 = find_output_by_semantic(&shader->info.base, TGSI_SEMANTIC_POSITION, 0); @@ -402,6 +403,11 @@ generate_fs_loop(struct gallivm_state *gallivm, z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); } + zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type, + zs_format_desc, + depth_ptr, depth_stride, + loop_state.counter); + lp_build_depth_stencil_test(gallivm, &key->depth, key->stencil, @@ -410,12 +416,15 @@ generate_fs_loop(struct gallivm_state *gallivm, &mask, stencil_refs, z, - depth_ptr_i, facing, + zs_dst_val, + facing, &zs_value, !simple_shader); /* Late Z write */ if (depth_mode & LATE_DEPTH_WRITE) { - lp_build_depth_write(gallivm, type, zs_format_desc, depth_ptr_i, zs_value); + lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc, + NULL, loop_state.counter, + depth_ptr, depth_stride, zs_value); } } else if ((depth_mode & EARLY_DEPTH_TEST) && @@ -425,12 +434,9 @@ generate_fs_loop(struct gallivm_state *gallivm, * depth value, update from zs_value with the new mask value and * write that out. */ - lp_build_deferred_depth_write(gallivm, - type, - zs_format_desc, - &mask, - depth_ptr_i, - zs_value); + lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc, + &mask, loop_state.counter, + depth_ptr, depth_stride, zs_value); } @@ -1749,7 +1755,7 @@ generate_fragment(struct llvmpipe_context *lp, struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef blend_vec_type; - LLVMTypeRef arg_types[12]; + LLVMTypeRef arg_types[13]; LLVMTypeRef func_type; LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context); LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context); @@ -1762,6 +1768,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef color_ptr_ptr; LLVMValueRef stride_ptr; LLVMValueRef depth_ptr; + LLVMValueRef depth_stride; LLVMValueRef mask_input; LLVMValueRef thread_data_ptr; LLVMBasicBlockRef block; @@ -1772,7 +1779,6 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4]; LLVMValueRef function; LLVMValueRef facing; - const struct util_format_description *zs_format_desc; unsigned num_fs; unsigned i; unsigned chan; @@ -1847,6 +1853,7 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[9] = int32_type; /* mask_input */ arg_types[10] = variant->jit_thread_data_ptr_type; /* per thread data */ arg_types[11] = LLVMPointerType(int32_type, 0); /* stride */ + arg_types[12] = int32_type; /* depth_stride */ func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), arg_types, Elements(arg_types), 0); @@ -1875,6 +1882,7 @@ generate_fragment(struct llvmpipe_context *lp, mask_input = LLVMGetParam(function, 9); thread_data_ptr = LLVMGetParam(function, 10); stride_ptr = LLVMGetParam(function, 11); + depth_stride = LLVMGetParam(function, 12); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); @@ -1887,6 +1895,7 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(thread_data_ptr, "thread_data"); lp_build_name(mask_input, "mask_input"); lp_build_name(stride_ptr, "stride_ptr"); + lp_build_name(depth_stride, "depth_stride"); /* * Function body @@ -1900,10 +1909,7 @@ generate_fragment(struct llvmpipe_context *lp, /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->state, context_ptr); - zs_format_desc = util_format_description(key->zsbuf_format); - { - unsigned depth_bits = zs_format_desc->block.bits/8; LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs); LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type); LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type, @@ -1951,7 +1957,7 @@ generate_fragment(struct llvmpipe_context *lp, mask_store, /* output */ color_store, depth_ptr, - depth_bits, + depth_stride, facing, thread_data_ptr); |