aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c317
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.h36
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c167
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h105
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c56
7 files changed, 414 insertions, 273 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index b9dbdc5a8ac..1cd36b87909 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -36,21 +36,13 @@
* flushing would avoid this, but it would most likely result in depth fighting
* artifacts.
*
- * We are free to use a different pixel layout though. Since our basic
- * processing unit is a quad (2x2 pixel block) we store the depth/stencil
- * values tiled, a quad at time. That is, a depth buffer containing
- *
- * Z11 Z12 Z13 Z14 ...
- * Z21 Z22 Z23 Z24 ...
- * Z31 Z32 Z33 Z34 ...
- * Z41 Z42 Z43 Z44 ...
- * ... ... ... ... ...
- *
- * will actually be stored in memory as
- *
- * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
- * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
- * ... ... ... ... ... ... ... ... ...
+ * Since we're using linear layout for everything, but we need to deal with
+ * 2x2 quads, we need to load/store multiple values and swizzle them into
+ * place (we could avoid this by doing depth/stencil testing in linear format,
+ * which would be easy for late depth/stencil test as we could do that after
+ * the fragment shader loop just as we do for color buffers, but more tricky
+ * for early depth test as we'd need both masks and interpolated depth in
+ * linear format).
*
*
* @author Jose Fonseca <[email protected]>
@@ -71,6 +63,7 @@
#include "gallivm/lp_bld_intr.h"
#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_pack.h"
#include "lp_bld_depth.h"
@@ -515,6 +508,219 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
}
+/**
+ * Load depth/stencil values.
+ * The stored values are linear, swizzle them.
+ *
+ * \param type the data type of the fragment depth/stencil values
+ * \param format_desc description of the depth/stencil surface
+ * \param loop_counter the current loop iteration
+ * \param depth_ptr pointer to the depth/stencil values of this 4x4 block
+ * \param depth_stride stride of the depth/stencil buffer
+ */
+LLVMValueRef
+lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ LLVMValueRef depth_ptr,
+ LLVMValueRef depth_stride,
+ LLVMValueRef loop_counter)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
+ LLVMValueRef zs_dst, zs_dst1, zs_dst2;
+ LLVMValueRef zs_dst_ptr;
+ LLVMValueRef depth_offset1, depth_offset2;
+ unsigned depth_bits = format_desc->block.bits/8;
+ struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
+ struct lp_type zs_load_type = zs_type;
+ zs_load_type.length = zs_load_type.length / 2;
+
+ if (z_src_type.length == 4) {
+ unsigned i;
+ LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
+ lp_build_const_int32(gallivm, 1), "");
+ LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
+ lp_build_const_int32(gallivm, 2), "");
+ LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
+ depth_stride, "");
+ depth_offset1 = LLVMBuildMul(builder, looplsb,
+ lp_build_const_int32(gallivm, depth_bits * 2), "");
+ depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
+
+ /* just concatenate the loaded 2x2 values into 4-wide vector */
+ for (i = 0; i < 4; i++) {
+ shuffles[i] = lp_build_const_int32(gallivm, i);
+ }
+ }
+ else {
+ unsigned i;
+ LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
+ lp_build_const_int32(gallivm, 1), "");
+ assert(z_src_type.length == 8);
+ depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
+ /*
+ * We load 2x4 values, and need to swizzle them (order
+ * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
+ */
+ for (i = 0; i < 8; i++) {
+
+ shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
+ }
+ }
+
+ depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
+
+ /* Load current z/stencil values from z/stencil buffer */
+ zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
+ zs_dst_ptr = LLVMBuildBitCast(builder,
+ zs_dst_ptr,
+ LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+ zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+ zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
+ zs_dst_ptr = LLVMBuildBitCast(builder,
+ zs_dst_ptr,
+ LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+ zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+
+ zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
+ LLVMConstVector(shuffles, zs_type.length), "");
+
+ if (format_desc->block.bits < z_src_type.width) {
+ /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
+ zs_dst = LLVMBuildZExt(builder, zs_dst, lp_build_int_vec_type(gallivm, z_src_type), "");
+ }
+
+ lp_build_name(zs_dst, "zs_dst");
+
+ return zs_dst;
+}
+
+/**
+ * Store depth/stencil values.
+ * Incoming values are swizzled (typically n 2x2 quads), stored linear.
+ * If there's a mask it will do reload/select/store otherwise just store.
+ *
+ * \param type the data type of the fragment depth/stencil values
+ * \param format_desc description of the depth/stencil surface
+ * \param mask the alive/dead pixel mask for the quad (vector)
+ * \param loop_counter the current loop iteration
+ * \param depth_ptr pointer to the depth/stencil values of this 4x4 block
+ * \param depth_stride stride of the depth/stencil buffer
+ * \param zs_value the depth/stencil values to store
+ */
+void
+lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef loop_counter,
+ LLVMValueRef depth_ptr,
+ LLVMValueRef depth_stride,
+ LLVMValueRef zs_value)
+{
+ struct lp_build_context z_bld;
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef mask_value = NULL;
+ LLVMValueRef zs_dst = NULL, zs_dst1, zs_dst2;
+ LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
+ LLVMValueRef depth_offset1, depth_offset2;
+ unsigned depth_bits = format_desc->block.bits/8;
+ struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
+ struct lp_type zs_load_type = zs_type;
+ zs_load_type.length = zs_load_type.length / 2;
+
+ lp_build_context_init(&z_bld, gallivm, zs_type);
+
+ /*
+ * This is far from ideal, at least for late depth write we should do this
+ * outside the fs loop to avoid all the swizzle stuff.
+ */
+ if (z_src_type.length == 4) {
+ unsigned i;
+ LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
+ lp_build_const_int32(gallivm, 1), "");
+ LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
+ lp_build_const_int32(gallivm, 2), "");
+ LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
+ depth_stride, "");
+ depth_offset1 = LLVMBuildMul(builder, looplsb,
+ lp_build_const_int32(gallivm, depth_bits * 2), "");
+ depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
+
+ /* just concatenate the loaded 2x2 values into 4-wide vector */
+ for (i = 0; i < 4; i++) {
+ shuffles[i] = lp_build_const_int32(gallivm, i);
+ }
+ }
+ else {
+ unsigned i;
+ LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
+ lp_build_const_int32(gallivm, 1), "");
+ assert(z_src_type.length == 8);
+ depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
+ /*
+ * We load 2x4 values, and need to swizzle them (order
+ * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
+ */
+ for (i = 0; i < 8; i++) {
+ shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
+ }
+ }
+
+
+ depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
+
+ zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
+ zs_dst_ptr1 = LLVMBuildBitCast(builder,
+ zs_dst_ptr1,
+ LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+ zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
+ zs_dst_ptr2 = LLVMBuildBitCast(builder,
+ zs_dst_ptr2,
+ LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+
+ if (mask) {
+ zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr1, "");
+ zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr2, "");
+ zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
+ LLVMConstVector(shuffles, zs_type.length),
+ "zsbufval");
+
+ mask_value = lp_build_mask_value(mask);
+ }
+
+ if (zs_type.width < z_src_type.width) {
+ /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */
+ zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, "");
+ if (mask)
+ mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.vec_type, "");
+ }
+
+ if (mask) {
+ zs_value = lp_build_select(&z_bld, mask_value, zs_value, zs_dst);
+ }
+
+ if (z_src_type.length == 4) {
+ zs_dst1 = lp_build_extract_range(gallivm, zs_value, 0, 2);
+ zs_dst2 = lp_build_extract_range(gallivm, zs_value, 2, 2);
+ }
+ else {
+ assert(z_src_type.length == 8);
+ zs_dst1 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
+ LLVMConstVector(&shuffles[0],
+ zs_load_type.length),
+ "");
+ zs_dst2 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
+ LLVMConstVector(&shuffles[4],
+ zs_load_type.length),
+ "");
+
+ }
+ LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
+ LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
+}
/**
* Generate code for performing depth and/or stencil tests.
@@ -527,7 +733,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
* \param mask the alive/dead pixel mask for the quad (vector)
* \param stencil_refs the front/back stencil ref values (scalar)
* \param z_src the incoming depth/stencil values (n 2x2 quad values, float32)
- * \param zs_dst_ptr pointer to depth/stencil values in framebuffer
+ * \param zs_dst the depth/stencil values in framebuffer
* \param face contains boolean value indicating front/back facing polygon
*/
void
@@ -539,7 +745,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
struct lp_build_mask_context *mask,
LLVMValueRef stencil_refs[2],
LLVMValueRef z_src,
- LLVMValueRef zs_dst_ptr,
+ LLVMValueRef zs_dst,
LLVMValueRef face,
LLVMValueRef *zs_value,
boolean do_branch)
@@ -551,7 +757,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
struct lp_build_context s_bld;
struct lp_type s_type;
unsigned z_shift = 0, z_width = 0, z_mask = 0;
- LLVMValueRef zs_dst, z_dst = NULL;
+ LLVMValueRef z_dst = NULL;
LLVMValueRef stencil_vals = NULL;
LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
@@ -638,19 +844,6 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
s_type = lp_int_type(z_type);
lp_build_context_init(&s_bld, gallivm, s_type);
- /* Load current z/stencil value from z/stencil buffer */
- zs_dst_ptr = LLVMBuildBitCast(builder,
- zs_dst_ptr,
- LLVMPointerType(lp_build_vec_type(gallivm, zs_type), 0), "");
- zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
- if (format_desc->block.bits < z_type.width) {
- /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
- zs_dst = LLVMBuildZExt(builder, zs_dst, z_bld.vec_type, "");
- }
-
- lp_build_name(zs_dst, "zs_dst");
-
-
/* Compute and apply the Z/stencil bitmasks and shifts.
*/
{
@@ -860,65 +1053,3 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
}
-
-void
-lp_build_depth_write(struct gallivm_state *gallivm,
- struct lp_type z_src_type,
- const struct util_format_description *format_desc,
- LLVMValueRef zs_dst_ptr,
- LLVMValueRef zs_value)
-{
- LLVMBuilderRef builder = gallivm->builder;
-
- if (format_desc->block.bits < z_src_type.width) {
- /* Truncate income ZS values (e.g., when writing to Z16_UNORM) */
- LLVMTypeRef zs_type = LLVMIntTypeInContext(gallivm->context, format_desc->block.bits);
- if (z_src_type.length > 1) {
- zs_type = LLVMVectorType(zs_type, z_src_type.length);
- }
- zs_value = LLVMBuildTrunc(builder, zs_value, zs_type, "");
- }
-
- zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
- LLVMPointerType(LLVMTypeOf(zs_value), 0), "");
-
- LLVMBuildStore(builder, zs_value, zs_dst_ptr);
-}
-
-
-void
-lp_build_deferred_depth_write(struct gallivm_state *gallivm,
- struct lp_type z_src_type,
- const struct util_format_description *format_desc,
- struct lp_build_mask_context *mask,
- LLVMValueRef zs_dst_ptr,
- LLVMValueRef zs_value)
-{
- struct lp_type z_type;
- struct lp_build_context z_bld;
- LLVMValueRef z_dst;
- LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef mask_value;
-
- /* XXX: pointlessly redo type logic:
- */
- z_type = lp_depth_type(format_desc, z_src_type.length);
- lp_build_context_init(&z_bld, gallivm, z_type);
-
- zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
- LLVMPointerType(z_bld.vec_type, 0), "");
-
- z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
-
- mask_value = lp_build_mask_value(mask);
-
- if (z_type.width < z_src_type.width) {
- /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */
- zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, "");
- mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.int_vec_type, "");
- }
-
- z_dst = lp_build_select(&z_bld, mask_value, zs_value, z_dst);
-
- LLVMBuildStore(builder, z_dst, zs_dst_ptr);
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index 33cb0dd4a9e..c000494667d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -58,30 +58,34 @@ void
lp_build_depth_stencil_test(struct gallivm_state *gallivm,
const struct pipe_depth_state *depth,
const struct pipe_stencil_state stencil[2],
- struct lp_type type,
+ struct lp_type z_src_type,
const struct util_format_description *format_desc,
struct lp_build_mask_context *mask,
LLVMValueRef stencil_refs[2],
- LLVMValueRef zs_src,
- LLVMValueRef zs_dst_ptr,
- LLVMValueRef facing,
+ LLVMValueRef z_src,
+ LLVMValueRef zs_dst,
+ LLVMValueRef face,
LLVMValueRef *zs_value,
boolean do_branch);
-void
-lp_build_depth_write(struct gallivm_state *gallivm,
- struct lp_type z_src_type,
- const struct util_format_description *format_desc,
- LLVMValueRef zs_dst_ptr,
- LLVMValueRef zs_value);
+LLVMValueRef
+lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ LLVMValueRef depth_ptr,
+ LLVMValueRef depth_stride,
+ LLVMValueRef loop_counter);
void
-lp_build_deferred_depth_write(struct gallivm_state *gallivm,
- struct lp_type z_src_type,
- const struct util_format_description *format_desc,
- struct lp_build_mask_context *mask,
- LLVMValueRef zs_dst_ptr,
- LLVMValueRef zs_value);
+lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef loop_counter,
+ LLVMValueRef depth_ptr,
+ LLVMValueRef depth_stride,
+ LLVMValueRef zs_value);
+
void
lp_build_occlusion_count(struct gallivm_state *gallivm,
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 4eddb2a2f3c..4e9ca764fe7 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -193,6 +193,7 @@ enum {
* @param mask mask of visible pixels in block
* @param thread_data task thread data
* @param stride color buffer row stride in bytes
+ * @param depth_stride depth buffer row stride in bytes
*/
typedef void
(*lp_jit_frag_func)(const struct lp_jit_context *context,
@@ -206,7 +207,8 @@ typedef void
void *depth,
uint32_t mask,
struct lp_jit_thread_data *thread_data,
- unsigned *stride);
+ unsigned *stride,
+ unsigned depth_stride);
void
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index ef49ba9ab6f..a557db4b4dc 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -89,51 +89,15 @@ static void
lp_rast_tile_begin(struct lp_rasterizer_task *task,
const struct cmd_bin *bin)
{
- const struct lp_scene *scene = task->scene;
- enum lp_texture_usage usage;
-
LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, bin->x, bin->y);
task->bin = bin;
task->x = bin->x * TILE_SIZE;
task->y = bin->y * TILE_SIZE;
- /* reset pointers to color tile(s) */
+ /* reset pointers to color and depth tile(s) */
memset(task->color_tiles, 0, sizeof(task->color_tiles));
-
- /* get pointer to depth/stencil tile */
- {
- struct pipe_surface *zsbuf = task->scene->fb.zsbuf;
- if (zsbuf) {
- struct llvmpipe_resource *lpt = llvmpipe_resource(zsbuf->texture);
-
- if (scene->has_depthstencil_clear)
- usage = LP_TEX_USAGE_WRITE_ALL;
- else
- usage = LP_TEX_USAGE_READ_WRITE;
-
- /* "prime" the tile: convert data from linear to tiled if necessary
- * and update the tile's layout info.
- */
- (void) llvmpipe_get_texture_tile(lpt,
- zsbuf->u.tex.first_layer,
- zsbuf->u.tex.level,
- usage,
- task->x,
- task->y);
- /* Get actual pointer to the tile data. Note that depth/stencil
- * data is tiled differently than color data.
- */
- task->depth_tile = lp_rast_get_depth_block_pointer(task,
- task->x,
- task->y);
-
- assert(task->depth_tile);
- }
- else {
- task->depth_tile = NULL;
- }
- }
+ task->depth_tile = NULL;
}
@@ -220,8 +184,6 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
-
-
/**
* Clear the rasterizer's current z/stencil tile.
* This is a bin command called during bin processing.
@@ -233,10 +195,10 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
const struct lp_scene *scene = task->scene;
uint32_t clear_value = arg.clear_zstencil.value;
uint32_t clear_mask = arg.clear_zstencil.mask;
- const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT;
- const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT;
+ const unsigned height = TILE_SIZE;
+ const unsigned width = TILE_SIZE;
const unsigned block_size = scene->zsbuf.blocksize;
- const unsigned dst_stride = scene->zsbuf.stride * TILE_VECTOR_HEIGHT;
+ const unsigned dst_stride = scene->zsbuf.stride;
uint8_t *dst;
unsigned i, j;
@@ -244,65 +206,64 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
__FUNCTION__, clear_value, clear_mask);
/*
- * Clear the area of the swizzled depth/depth buffer matching this tile, in
- * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time.
- *
- * The swizzled depth format is such that the depths for
- * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets.
+ * Clear the area of the depth/depth buffer matching this tile.
*/
- dst = task->depth_tile;
+ if (scene->fb.zsbuf) {
- clear_value &= clear_mask;
+ dst = lp_rast_get_unswizzled_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
- switch (block_size) {
- case 1:
- assert(clear_mask == 0xff);
- memset(dst, (uint8_t) clear_value, height * width);
- break;
- case 2:
- if (clear_mask == 0xffff) {
- for (i = 0; i < height; i++) {
- uint16_t *row = (uint16_t *)dst;
- for (j = 0; j < width; j++)
- *row++ = (uint16_t) clear_value;
- dst += dst_stride;
+ clear_value &= clear_mask;
+
+ switch (block_size) {
+ case 1:
+ assert(clear_mask == 0xff);
+ memset(dst, (uint8_t) clear_value, height * width);
+ break;
+ case 2:
+ if (clear_mask == 0xffff) {
+ for (i = 0; i < height; i++) {
+ uint16_t *row = (uint16_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = (uint16_t) clear_value;
+ dst += dst_stride;
+ }
}
- }
- else {
- for (i = 0; i < height; i++) {
- uint16_t *row = (uint16_t *)dst;
- for (j = 0; j < width; j++) {
- uint16_t tmp = ~clear_mask & *row;
- *row++ = clear_value | tmp;
+ else {
+ for (i = 0; i < height; i++) {
+ uint16_t *row = (uint16_t *)dst;
+ for (j = 0; j < width; j++) {
+ uint16_t tmp = ~clear_mask & *row;
+ *row++ = clear_value | tmp;
+ }
+ dst += dst_stride;
}
- dst += dst_stride;
}
- }
- break;
- case 4:
- if (clear_mask == 0xffffffff) {
- for (i = 0; i < height; i++) {
- uint32_t *row = (uint32_t *)dst;
- for (j = 0; j < width; j++)
- *row++ = clear_value;
- dst += dst_stride;
+ break;
+ case 4:
+ if (clear_mask == 0xffffffff) {
+ for (i = 0; i < height; i++) {
+ uint32_t *row = (uint32_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = clear_value;
+ dst += dst_stride;
+ }
}
- }
- else {
- for (i = 0; i < height; i++) {
- uint32_t *row = (uint32_t *)dst;
- for (j = 0; j < width; j++) {
- uint32_t tmp = ~clear_mask & *row;
- *row++ = clear_value | tmp;
+ else {
+ for (i = 0; i < height; i++) {
+ uint32_t *row = (uint32_t *)dst;
+ for (j = 0; j < width; j++) {
+ uint32_t tmp = ~clear_mask & *row;
+ *row++ = clear_value | tmp;
+ }
+ dst += dst_stride;
}
- dst += dst_stride;
}
+ break;
+ default:
+ assert(0);
+ break;
}
- break;
- default:
- assert(0);
- break;
}
}
@@ -343,7 +304,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
for (x = 0; x < TILE_SIZE; x += 4) {
uint8_t *color[PIPE_MAX_COLOR_BUFS];
unsigned stride[PIPE_MAX_COLOR_BUFS];
- uint32_t *depth;
+ uint8_t *depth = NULL;
+ unsigned depth_stride = 0;
unsigned i;
/* color buffer */
@@ -354,7 +316,11 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
}
/* depth buffer */
- depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y);
+ if (scene->zsbuf.map) {
+ depth = lp_rast_get_unswizzled_depth_block_pointer(task, tile_x + x, tile_y + y);
+ depth_stride = scene->zsbuf.stride;
+ }
+
/* run shader on 4x4 block */
BEGIN_JIT_CALL(state, task);
@@ -368,7 +334,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
depth,
0xffff,
&task->thread_data,
- stride);
+ stride,
+ depth_stride);
END_JIT_CALL();
}
}
@@ -412,7 +379,8 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
const struct lp_scene *scene = task->scene;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
unsigned stride[PIPE_MAX_COLOR_BUFS];
- void *depth;
+ void *depth = NULL;
+ unsigned depth_stride = 0;
unsigned i;
assert(state);
@@ -434,8 +402,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
}
/* depth buffer */
- depth = lp_rast_get_depth_block_pointer(task, x, y);
-
+ if (scene->zsbuf.map) {
+ depth_stride = scene->zsbuf.stride;
+ depth = lp_rast_get_unswizzled_depth_block_pointer(task, x, y);
+ }
assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
@@ -451,7 +421,8 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
depth,
mask,
&task->thread_data,
- stride);
+ stride,
+ depth_stride);
END_JIT_CALL();
}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index c0f41f69fe2..7d01da15113 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -140,48 +140,39 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
/**
- * Get the pointer to a 4x4 depth/stencil block.
- * We'll map the z/stencil buffer on demand here.
- * Note that this may be called even when there's no z/stencil buffer - return
- * NULL in that case.
- * \param x, y location of 4x4 block in window coords
+ * Get pointer to the unswizzled color tile
*/
-static INLINE void *
-lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
- unsigned x, unsigned y)
+static INLINE uint8_t *
+lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task,
+ unsigned buf, enum lp_texture_usage usage)
{
const struct lp_scene *scene = task->scene;
- void *depth;
+ unsigned format_bytes;
- assert(x < scene->tiles_x * TILE_SIZE);
- assert(y < scene->tiles_y * TILE_SIZE);
- assert((x % TILE_VECTOR_WIDTH) == 0);
- assert((y % TILE_VECTOR_HEIGHT) == 0);
+ assert(task->x < scene->tiles_x * TILE_SIZE);
+ assert(task->y < scene->tiles_y * TILE_SIZE);
+ assert(task->x % TILE_SIZE == 0);
+ assert(task->y % TILE_SIZE == 0);
+ assert(buf < scene->fb.nr_cbufs);
- if (!scene->zsbuf.map) {
- /* Either out of memory or no zsbuf. Can't tell without access
- * to the state. Just use dummy tile memory, but don't print
- * the oom warning as this most likely because there is no
- * zsbuf.
- */
- return lp_dummy_tile;
- }
+ if (!task->color_tiles[buf]) {
+ struct pipe_surface *cbuf = scene->fb.cbufs[buf];
+ assert(cbuf);
- depth = (scene->zsbuf.map +
- scene->zsbuf.stride * y +
- scene->zsbuf.blocksize * x * TILE_VECTOR_HEIGHT);
+ format_bytes = util_format_get_blocksize(cbuf->format);
+ task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y + format_bytes * task->x;
+ }
- assert(lp_check_alignment(depth, 16));
- return depth;
+ return task->color_tiles[buf];
}
/**
- * Get pointer to the unswizzled color tile
+ * Get pointer to the unswizzled depth tile
*/
static INLINE uint8_t *
-lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task,
- unsigned buf, enum lp_texture_usage usage)
+lp_rast_get_unswizzled_depth_tile_pointer(struct lp_rasterizer_task *task,
+ enum lp_texture_usage usage)
{
const struct lp_scene *scene = task->scene;
unsigned format_bytes;
@@ -190,17 +181,16 @@ lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task,
assert(task->y < scene->tiles_y * TILE_SIZE);
assert(task->x % TILE_SIZE == 0);
assert(task->y % TILE_SIZE == 0);
- assert(buf < scene->fb.nr_cbufs);
- if (!task->color_tiles[buf]) {
- struct pipe_surface *cbuf = scene->fb.cbufs[buf];
- assert(cbuf);
+ if (!task->depth_tile) {
+ struct pipe_surface *dbuf = scene->fb.zsbuf;
+ assert(dbuf);
- format_bytes = util_format_get_blocksize(cbuf->format);
- task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y + format_bytes * task->x;
+ format_bytes = util_format_get_blocksize(dbuf->format);
+ task->depth_tile = scene->zsbuf.map + scene->zsbuf.stride * task->y + format_bytes * task->x;
}
- return task->color_tiles[buf];
+ return task->depth_tile;
}
@@ -237,6 +227,38 @@ lp_rast_get_unswizzled_color_block_pointer(struct lp_rasterizer_task *task,
}
+/**
+ * Get the pointer to an unswizzled 4x4 depth block (within an unswizzled 64x64 tile).
+ * \param x, y location of 4x4 block in window coords
+ */
+static INLINE uint8_t *
+lp_rast_get_unswizzled_depth_block_pointer(struct lp_rasterizer_task *task,
+ unsigned x, unsigned y)
+{
+ unsigned px, py, pixel_offset, format_bytes;
+ uint8_t *depth;
+
+ assert(x < task->scene->tiles_x * TILE_SIZE);
+ assert(y < task->scene->tiles_y * TILE_SIZE);
+ assert((x % TILE_VECTOR_WIDTH) == 0);
+ assert((y % TILE_VECTOR_HEIGHT) == 0);
+
+ format_bytes = util_format_get_blocksize(task->scene->fb.zsbuf->format);
+
+ depth = lp_rast_get_unswizzled_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
+ assert(depth);
+
+ px = x % TILE_SIZE;
+ py = y % TILE_SIZE;
+ pixel_offset = px * format_bytes + py * task->scene->zsbuf.stride;
+
+ depth = depth + pixel_offset;
+
+ assert(lp_check_alignment(depth, llvmpipe_get_format_alignment(task->scene->fb.zsbuf->format)));
+ return depth;
+}
+
+
/**
* Shade all pixels in a 4x4 block. The fragment code omits the
@@ -253,7 +275,8 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
struct lp_fragment_shader_variant *variant = state->variant;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
unsigned stride[PIPE_MAX_COLOR_BUFS];
- void *depth;
+ void *depth = NULL;
+ unsigned depth_stride = 0;
unsigned i;
/* color buffer */
@@ -263,7 +286,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y);
}
- depth = lp_rast_get_depth_block_pointer(task, x, y);
+ if (scene->zsbuf.map) {
+ depth = lp_rast_get_unswizzled_depth_block_pointer(task, x, y);
+ depth_stride = scene->zsbuf.stride;
+ }
/* run shader on 4x4 block */
BEGIN_JIT_CALL(state, task);
@@ -277,7 +303,8 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
depth,
0xffff,
&task->thread_data,
- stride );
+ stride,
+ depth_stride);
END_JIT_CALL();
}
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index a8885863ef0..e05ea753b4b 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -185,7 +185,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
zsbuf->u.tex.level,
zsbuf->u.tex.first_layer,
LP_TEX_USAGE_READ_WRITE,
- LP_TEX_LAYOUT_NONE);
+ LP_TEX_LAYOUT_LINEAR);
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 1a9a194c8be..69212109a87 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -229,7 +229,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
LLVMValueRef mask_store,
LLVMValueRef (*out_color)[4],
LLVMValueRef depth_ptr,
- unsigned depth_bits,
+ LLVMValueRef depth_stride,
LLVMValueRef facing,
LLVMValueRef thread_data_ptr)
{
@@ -241,8 +241,6 @@ generate_fs_loop(struct gallivm_state *gallivm,
LLVMValueRef z;
LLVMValueRef zs_value = NULL;
LLVMValueRef stencil_refs[2];
- LLVMValueRef depth_ptr_i;
- LLVMValueRef depth_offset;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
struct lp_build_for_loop_state loop_state;
struct lp_build_mask_context mask;
@@ -308,12 +306,6 @@ generate_fs_loop(struct gallivm_state *gallivm,
&loop_state.counter, 1, "mask_ptr");
mask_val = LLVMBuildLoad(builder, mask_ptr, "");
- depth_offset = LLVMBuildMul(builder, loop_state.counter,
- lp_build_const_int32(gallivm, depth_bits * type.length),
- "");
-
- depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
-
memset(outputs, 0, sizeof outputs);
for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
@@ -345,6 +337,11 @@ generate_fs_loop(struct gallivm_state *gallivm,
z = interp->pos[2];
if (depth_mode & EARLY_DEPTH_TEST) {
+ LLVMValueRef zs_dst_val;
+ zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
+ zs_format_desc,
+ depth_ptr, depth_stride,
+ loop_state.counter);
lp_build_depth_stencil_test(gallivm,
&key->depth,
key->stencil,
@@ -353,12 +350,15 @@ generate_fs_loop(struct gallivm_state *gallivm,
&mask,
stencil_refs,
z,
- depth_ptr_i, facing,
+ zs_dst_val,
+ facing,
&zs_value,
!simple_shader);
if (depth_mode & EARLY_DEPTH_WRITE) {
- lp_build_depth_write(gallivm, type, zs_format_desc, depth_ptr_i, zs_value);
+ lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+ NULL, loop_state.counter,
+ depth_ptr, depth_stride, zs_value);
}
}
@@ -394,6 +394,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
/* Late Z test */
if (depth_mode & LATE_DEPTH_TEST) {
+ LLVMValueRef zs_dst_val;
int pos0 = find_output_by_semantic(&shader->info.base,
TGSI_SEMANTIC_POSITION,
0);
@@ -402,6 +403,11 @@ generate_fs_loop(struct gallivm_state *gallivm,
z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
}
+ zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
+ zs_format_desc,
+ depth_ptr, depth_stride,
+ loop_state.counter);
+
lp_build_depth_stencil_test(gallivm,
&key->depth,
key->stencil,
@@ -410,12 +416,15 @@ generate_fs_loop(struct gallivm_state *gallivm,
&mask,
stencil_refs,
z,
- depth_ptr_i, facing,
+ zs_dst_val,
+ facing,
&zs_value,
!simple_shader);
/* Late Z write */
if (depth_mode & LATE_DEPTH_WRITE) {
- lp_build_depth_write(gallivm, type, zs_format_desc, depth_ptr_i, zs_value);
+ lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+ NULL, loop_state.counter,
+ depth_ptr, depth_stride, zs_value);
}
}
else if ((depth_mode & EARLY_DEPTH_TEST) &&
@@ -425,12 +434,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
* depth value, update from zs_value with the new mask value and
* write that out.
*/
- lp_build_deferred_depth_write(gallivm,
- type,
- zs_format_desc,
- &mask,
- depth_ptr_i,
- zs_value);
+ lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+ &mask, loop_state.counter,
+ depth_ptr, depth_stride, zs_value);
}
@@ -1749,7 +1755,7 @@ generate_fragment(struct llvmpipe_context *lp,
struct lp_type blend_type;
LLVMTypeRef fs_elem_type;
LLVMTypeRef blend_vec_type;
- LLVMTypeRef arg_types[12];
+ LLVMTypeRef arg_types[13];
LLVMTypeRef func_type;
LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
@@ -1762,6 +1768,7 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMValueRef color_ptr_ptr;
LLVMValueRef stride_ptr;
LLVMValueRef depth_ptr;
+ LLVMValueRef depth_stride;
LLVMValueRef mask_input;
LLVMValueRef thread_data_ptr;
LLVMBasicBlockRef block;
@@ -1772,7 +1779,6 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
LLVMValueRef function;
LLVMValueRef facing;
- const struct util_format_description *zs_format_desc;
unsigned num_fs;
unsigned i;
unsigned chan;
@@ -1847,6 +1853,7 @@ generate_fragment(struct llvmpipe_context *lp,
arg_types[9] = int32_type; /* mask_input */
arg_types[10] = variant->jit_thread_data_ptr_type; /* per thread data */
arg_types[11] = LLVMPointerType(int32_type, 0); /* stride */
+ arg_types[12] = int32_type; /* depth_stride */
func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
arg_types, Elements(arg_types), 0);
@@ -1875,6 +1882,7 @@ generate_fragment(struct llvmpipe_context *lp,
mask_input = LLVMGetParam(function, 9);
thread_data_ptr = LLVMGetParam(function, 10);
stride_ptr = LLVMGetParam(function, 11);
+ depth_stride = LLVMGetParam(function, 12);
lp_build_name(context_ptr, "context");
lp_build_name(x, "x");
@@ -1887,6 +1895,7 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(thread_data_ptr, "thread_data");
lp_build_name(mask_input, "mask_input");
lp_build_name(stride_ptr, "stride_ptr");
+ lp_build_name(depth_stride, "depth_stride");
/*
* Function body
@@ -1900,10 +1909,7 @@ generate_fragment(struct llvmpipe_context *lp,
/* code generated texture sampling */
sampler = lp_llvm_sampler_soa_create(key->state, context_ptr);
- zs_format_desc = util_format_description(key->zsbuf_format);
-
{
- unsigned depth_bits = zs_format_desc->block.bits/8;
LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs);
LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type);
LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type,
@@ -1951,7 +1957,7 @@ generate_fragment(struct llvmpipe_context *lp,
mask_store, /* output */
color_store,
depth_ptr,
- depth_bits,
+ depth_stride,
facing,
thread_data_ptr);