diff options
author | Ilia Mirkin <[email protected]> | 2015-04-22 14:35:00 -0400 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2015-04-27 20:17:07 -0400 |
commit | 9fc3f472784b2ba53655b715d602268bef5bf12e (patch) | |
tree | e688bbd65a3c15c3f767ad4e94dad70e54a36ac5 | |
parent | 1571da6ac31ade482f5e4adc82eb66d42a1bb389 (diff) |
freedreno/a3xx: add support for S8 and Z32F_S8
Enables ARB_depth_buffer_float. There is no sampling support for
interleaved Z32F_S8, so we store the two textures separately, one as
Z32F, the other as S8. As a result, we need a lot of additional logic
for restores and transfers.
Signed-off-by: Ilia Mirkin <[email protected]>
-rw-r--r-- | docs/relnotes/10.6.0.html | 1 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_format.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 80 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_context.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_draw.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_gmem.c | 29 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_gmem.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_resource.c | 122 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_resource.h | 3 |
10 files changed, 236 insertions, 44 deletions
diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 5695ae4d77c..ff248423da6 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -46,6 +46,7 @@ Note: some of the new features are only available with certain drivers. <ul> <li>GL_AMD_pinned_memory on r600, radeonsi</li> <li>GL_ARB_clip_control on i965</li> +<li>GL_ARB_depth_buffer_float on freedreno</li> <li>GL_ARB_depth_clamp on freedreno</li> <li>GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600</li> <li>GL_ARB_draw_instanced on freedreno</li> diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index ee473e6d564..af086963075 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -383,9 +383,17 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, } struct fd_resource *rsc = fd_resource(psurf[i]->texture); + enum pipe_format format = fd3_gmem_restore_format(psurf[i]->format); + /* The restore blit_zs shader expects stencil in sampler 0, and depth + * in sampler 1 + */ + if (rsc->stencil && i == 0) { + rsc = rsc->stencil; + format = fd3_gmem_restore_format(rsc->base.b.format); + } + unsigned lvl = psurf[i]->u.tex.level; struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl); - enum pipe_format format = fd3_gmem_restore_format(psurf[i]->format); debug_assert(psurf[i]->u.tex.first_layer == psurf[i]->u.tex.last_layer); @@ -412,6 +420,9 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, for (i = 0; i < bufs; i++) { if (psurf[i]) { struct fd_resource *rsc = fd_resource(psurf[i]->texture); + /* Matches above logic for blit_zs shader */ + if (rsc->stencil && i == 0) + rsc = rsc->stencil; unsigned lvl = psurf[i]->u.tex.level; uint32_t offset = fd_resource_offset(rsc, lvl, psurf[i]->u.tex.first_layer); OUT_RELOC(ring, rsc->bo, offset, 0, 0); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c index 76cb3182169..ec87aa979e3 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c @@ -91,6 +91,8 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { _T(I8_UINT, 8_UINT, NONE, WZYX), _T(I8_SINT, 8_SINT, NONE, WZYX), + _T(S8_UINT, 8_UINT, R8_UNORM, WZYX), + /* 16-bit */ VT(R16_UNORM, 16_UNORM, NONE, WZYX), VT(R16_SNORM, 16_SNORM, NONE, WZYX), @@ -196,6 +198,7 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { _T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), _T(Z32_FLOAT, Z32_FLOAT, R8G8B8A8_UNORM, WZYX), + _T(Z32_FLOAT_S8X24_UINT, Z32_FLOAT,R8G8B8A8_UNORM, WZYX), /* 48-bit */ V_(R16G16B16_UNORM, 16_16_16_UNORM, NONE, WZYX), @@ -296,6 +299,8 @@ fd3_pipe2swap(enum pipe_format format) enum a3xx_tex_fetchsize fd3_pipe2fetchsize(enum pipe_format format) { + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + format = PIPE_FORMAT_Z32_FLOAT; switch (util_format_get_blocksizebits(format)) { case 8: return TFETCH_1_BYTE; case 16: return TFETCH_2_BYTE; @@ -324,6 +329,8 @@ fd3_gmem_restore_format(enum pipe_format format) return PIPE_FORMAT_R8G8B8A8_UNORM; case PIPE_FORMAT_Z16_UNORM: return PIPE_FORMAT_R8G8_UNORM; + case PIPE_FORMAT_S8_UINT: + return PIPE_FORMAT_R8_UNORM; default: return format; } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index d76acb2b100..7d3975761dd 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -72,12 +72,20 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, struct pipe_surface *psurf = bufs[i]; rsc = fd_resource(psurf->texture); - slice = fd_resource_slice(rsc, psurf->u.tex.level); - format = fd3_pipe2color(psurf->format); - swap = fd3_pipe2swap(psurf->format); pformat = psurf->format; + /* In case we're drawing to Z32F_S8, the "color" actually goes to + * the stencil + */ + if (rsc->stencil) { + rsc = rsc->stencil; + pformat = rsc->base.b.format; + bases++; + } + slice = fd_resource_slice(rsc, psurf->u.tex.level); + format = fd3_pipe2color(pformat); + swap = fd3_pipe2swap(pformat); if (decode_srgb) - srgb = util_format_is_srgb(psurf->format); + srgb = util_format_is_srgb(pformat); else pformat = util_format_linear(pformat); @@ -299,12 +307,17 @@ emit_binning_workaround(struct fd_context *ctx) static void emit_gmem2mem_surf(struct fd_context *ctx, - enum adreno_rb_copy_control_mode mode, - uint32_t base, struct pipe_surface *psurf) + enum adreno_rb_copy_control_mode mode, + bool stencil, + uint32_t base, struct pipe_surface *psurf) { struct fd_ringbuffer *ring = ctx->ring; struct fd_resource *rsc = fd_resource(psurf->texture); enum pipe_format format = psurf->format; + if (stencil) { + rsc = rsc->stencil; + format = rsc->base.b.format; + } struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level); uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); @@ -322,10 +335,10 @@ emit_gmem2mem_surf(struct fd_context *ctx, OUT_RELOCW(ring, rsc->bo, offset, 0, -1); /* RB_COPY_DEST_BASE */ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp)); OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | - A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) | + A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(format)) | A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) | A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) | - A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(psurf->format))); + A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format))); fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); @@ -421,9 +434,15 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) fd3_program_emit(ring, &emit, 0, NULL); fd3_emit_vertex_bufs(ring, &emit); - if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) - emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, - ctx->gmem.zsbuf_base, pfb->zsbuf); + if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + if (!rsc->stencil || ctx->resolve & FD_BUFFER_DEPTH) + emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, false, + ctx->gmem.zsbuf_base[0], pfb->zsbuf); + if (rsc->stencil && ctx->resolve & FD_BUFFER_STENCIL) + emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, true, + ctx->gmem.zsbuf_base[1], pfb->zsbuf); + } if (ctx->resolve & FD_BUFFER_COLOR) { for (i = 0; i < pfb->nr_cbufs; i++) { @@ -431,7 +450,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) continue; if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i))) continue; - emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, + emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, false, ctx->gmem.cbuf_base[i], pfb->cbufs[i]); } } @@ -454,6 +473,7 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[], struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w) { struct fd_ringbuffer *ring = ctx->ring; + struct pipe_surface *zsbufs[2]; assert(bufs > 0); @@ -464,7 +484,8 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[], emit_mrt(ring, bufs, psurf, bases, bin_w, false); - if (psurf[0] && psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) { + if (psurf[0] && (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT || + psurf[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) { /* Depth is stored as unorm in gmem, so we have to write it in using a * special blit shader which writes depth. */ @@ -480,8 +501,18 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[], A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32)); OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * ctx->gmem.bin_w)); - OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1); - OUT_RING(ring, 0); + if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) { + OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1); + OUT_RING(ring, 0); + } else { + /* The gmem_restore_tex logic will put the first buffer's stencil + * as color. Supply it with the proper information to make that + * happen. + */ + zsbufs[0] = zsbufs[1] = psurf[0]; + psurf = zsbufs; + bufs = 2; + } } else { OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1)); @@ -509,7 +540,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) .half_precision = (fd3_half_precision(pfb->cbufs[0]) && fd3_half_precision(pfb->cbufs[1]) && fd3_half_precision(pfb->cbufs[2]) && - fd3_half_precision(pfb->cbufs[3])), + fd3_half_precision(pfb->cbufs[3])) }, }; float x0, y0, x1, y1; @@ -592,6 +623,10 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); + OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2); + OUT_RING(ring, 0); /* RB_STENCIL_INFO */ + OUT_RING(ring, 0); /* RB_STENCIL_PITCH */ + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | @@ -640,7 +675,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) emit.key.half_precision = false; } fd3_program_emit(ring, &emit, 1, &pfb->zsbuf); - emit_mem2gmem_surf(ctx, &gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); + emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); } OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); @@ -950,14 +985,19 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) uint32_t reg; OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); - reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base); + reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]); if (pfb->zsbuf) { reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); } OUT_RING(ring, reg); if (pfb->zsbuf) { - uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format); - OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w)); + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + OUT_RING(ring, A3XX_RB_DEPTH_PITCH(rsc->cpp * gmem->bin_w)); + if (rsc->stencil) { + OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2); + OUT_RING(ring, A3XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1])); + OUT_RING(ring, A3XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w)); + } } else { OUT_RING(ring, 0x00000000); } diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 5d92da42e56..668ef3629bf 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -121,8 +121,12 @@ fd_context_render(struct pipe_context *pctx) for (i = 0; i < pfb->nr_cbufs; i++) if (pfb->cbufs[i]) fd_resource(pfb->cbufs[i]->texture)->dirty = false; - if (pfb->zsbuf) - fd_resource(pfb->zsbuf->texture)->dirty = false; + if (pfb->zsbuf) { + rsc = fd_resource(pfb->zsbuf->texture); + rsc->dirty = false; + if (rsc->stencil) + rsc->stencil->dirty = false; + } /* go through all the used resources and clear their reading flag */ LIST_FOR_EACH_ENTRY_SAFE(rsc, rsc_tmp, &ctx->used_resources, list) { diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index fed3e64f202..c9e317c7dc9 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -88,8 +88,12 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } if (fd_stencil_enabled(ctx)) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); buffers |= FD_BUFFER_STENCIL; - fd_resource(pfb->zsbuf->texture)->dirty = true; + if (rsc->stencil) + rsc->stencil->dirty = true; + else + rsc->dirty = true; ctx->gmem_reason |= FD_GMEM_STENCIL_ENABLED; } @@ -215,7 +219,12 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, fd_resource(pfb->cbufs[i]->texture)->dirty = true; if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { - fd_resource(pfb->zsbuf->texture)->dirty = true; + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + if (rsc->stencil && buffers & PIPE_CLEAR_STENCIL) + rsc->stencil->dirty = true; + if (!rsc->stencil || buffers & PIPE_CLEAR_DEPTH) + rsc->dirty = true; + ctx->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL; } diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index 473d2b891e3..11a1b62b26b 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -77,7 +77,7 @@ static uint32_t bin_width(struct fd_context *ctx) } static uint32_t -total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp, +total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2], uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem) { uint32_t total = 0, i; @@ -89,9 +89,14 @@ total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp, } } - if (zsbuf_cpp) { - gmem->zsbuf_base = align(total, 0x4000); - total = gmem->zsbuf_base + zsbuf_cpp * bin_w * bin_h; + if (zsbuf_cpp[0]) { + gmem->zsbuf_base[0] = align(total, 0x4000); + total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h; + } + + if (zsbuf_cpp[1]) { + gmem->zsbuf_base[1] = align(total, 0x4000); + total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h; } return total; @@ -108,13 +113,17 @@ calculate_tiles(struct fd_context *ctx) uint32_t nbins_x = 1, nbins_y = 1; uint32_t bin_w, bin_h; uint32_t max_width = bin_width(ctx); - uint8_t cbuf_cpp[4] = {0}, zsbuf_cpp = 0; + uint8_t cbuf_cpp[4] = {0}, zsbuf_cpp[2] = {0}; uint32_t i, j, t, xoff, yoff; uint32_t tpp_x, tpp_y; bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)); - if (has_zs) - zsbuf_cpp = util_format_get_blocksize(pfb->zsbuf->format); + if (has_zs) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + zsbuf_cpp[0] = rsc->cpp; + if (rsc->stencil) + zsbuf_cpp[1] = rsc->stencil->cpp; + } for (i = 0; i < pfb->nr_cbufs; i++) { if (pfb->cbufs[i]) cbuf_cpp[i] = util_format_get_blocksize(pfb->cbufs[i]->format); @@ -122,7 +131,7 @@ calculate_tiles(struct fd_context *ctx) cbuf_cpp[i] = 4; } - if (gmem->zsbuf_cpp == zsbuf_cpp && + if (!memcmp(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)) && !memcmp(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)) && !memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) { /* everything is up-to-date */ @@ -156,7 +165,7 @@ calculate_tiles(struct fd_context *ctx) * constraints: */ DBG("binning input: cbuf cpp: %d %d %d %d, zsbuf cpp: %d; %dx%d", - cbuf_cpp[0], cbuf_cpp[1], cbuf_cpp[2], cbuf_cpp[3], zsbuf_cpp, + cbuf_cpp[0], cbuf_cpp[1], cbuf_cpp[2], cbuf_cpp[3], zsbuf_cpp[0], width, height); while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) { if (bin_w > bin_h) { @@ -172,7 +181,7 @@ calculate_tiles(struct fd_context *ctx) gmem->scissor = *scissor; memcpy(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)); - gmem->zsbuf_cpp = zsbuf_cpp; + memcpy(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)); gmem->bin_h = bin_h; gmem->bin_w = bin_w; gmem->nbins_x = nbins_x; diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h index 81f9b6abe98..5867235db90 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.h +++ b/src/gallium/drivers/freedreno/freedreno_gmem.h @@ -48,9 +48,9 @@ struct fd_tile { struct fd_gmem_stateobj { struct pipe_scissor_state scissor; uint32_t cbuf_base[4]; - uint32_t zsbuf_base; + uint32_t zsbuf_base[2]; uint8_t cbuf_cpp[4]; - uint8_t zsbuf_cpp; + uint8_t zsbuf_cpp[2]; uint16_t bin_h, nbins_y; uint16_t bin_w, nbins_x; uint16_t minx, miny; diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index e8da68e116b..95f79df565e 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -27,6 +27,7 @@ */ #include "util/u_format.h" +#include "util/u_format_zs.h" #include "util/u_inlines.h" #include "util/u_transfer.h" #include "util/u_string.h" @@ -101,16 +102,51 @@ realloc_bo(struct fd_resource *rsc, uint32_t size) util_range_set_empty(&rsc->valid_buffer_range); } +/* Currently this is only used for flushing Z32_S8 texture transfers, but + * eventually it should handle everything. + */ +static void +fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) +{ + struct fd_resource *rsc = fd_resource(trans->base.resource); + struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level); + struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level); + enum pipe_format format = trans->base.resource->format; + + float *depth = fd_bo_map(rsc->bo) + slice->offset + + (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4; + uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset + + (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x; + + assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || + format == PIPE_FORMAT_X32_S8X24_UINT); + + if (format != PIPE_FORMAT_X32_S8X24_UINT) + util_format_z32_float_s8x24_uint_unpack_z_float( + depth, slice->pitch * 4, + trans->staging, trans->base.stride, + box->width, box->height); + + util_format_z32_float_s8x24_uint_unpack_s_8uint( + stencil, sslice->pitch, + trans->staging, trans->base.stride, + box->width, box->height); +} + static void fd_resource_transfer_flush_region(struct pipe_context *pctx, struct pipe_transfer *ptrans, const struct pipe_box *box) { struct fd_resource *rsc = fd_resource(ptrans->resource); + struct fd_transfer *trans = fd_transfer(ptrans); if (ptrans->resource->target == PIPE_BUFFER) util_range_add(&rsc->valid_buffer_range, ptrans->box.x + box->x, ptrans->box.x + box->x + box->width); + + if (trans->staging) + fd_resource_flush(trans, box); } static void @@ -119,8 +155,19 @@ fd_resource_transfer_unmap(struct pipe_context *pctx, { struct fd_context *ctx = fd_context(pctx); struct fd_resource *rsc = fd_resource(ptrans->resource); - if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) + struct fd_transfer *trans = fd_transfer(ptrans); + + if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { + struct pipe_box box; + u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box); + fd_resource_flush(trans, &box); + } + + if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { fd_bo_cpu_fini(rsc->bo); + if (rsc->stencil) + fd_bo_cpu_fini(rsc->stencil->bo); + } util_range_add(&rsc->valid_buffer_range, ptrans->box.x, @@ -128,6 +175,9 @@ fd_resource_transfer_unmap(struct pipe_context *pctx, pipe_resource_reference(&ptrans->resource, NULL); util_slab_free(&ctx->transfer_pool, ptrans); + + if (trans->staging) + free(trans->staging); } static void * @@ -148,7 +198,8 @@ fd_resource_transfer_map(struct pipe_context *pctx, char *buf; int ret = 0; - DBG("prsc=%p, level=%u, usage=%x", prsc, level, usage); + DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage, + box->width, box->height, box->x, box->y); ptrans = util_slab_alloc(&ctx->transfer_pool); if (!ptrans) @@ -173,6 +224,8 @@ fd_resource_transfer_map(struct pipe_context *pctx, if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { realloc_bo(rsc, fd_bo_size(rsc->bo)); + if (rsc->stencil) + realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo)); fd_invalidate_resource(ctx, prsc); } else if ((usage & PIPE_TRANSFER_WRITE) && prsc->target == PIPE_BUFFER && @@ -185,7 +238,7 @@ fd_resource_transfer_map(struct pipe_context *pctx, /* If the GPU is writing to the resource, or if it is reading from the * resource and we're trying to write to it, flush the renders. */ - if (rsc->dirty || + if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty) || ((ptrans->usage & PIPE_TRANSFER_WRITE) && rsc->reading)) fd_context_render(pctx); @@ -204,8 +257,6 @@ fd_resource_transfer_map(struct pipe_context *pctx, return NULL; } - *pptrans = ptrans; - if (rsc->layer_first) { offset = slice->offset + box->y / util_format_get_blockheight(format) * ptrans->stride + @@ -218,6 +269,47 @@ fd_resource_transfer_map(struct pipe_context *pctx, box->z * slice->size0; } + if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || + prsc->format == PIPE_FORMAT_X32_S8X24_UINT) { + trans->base.stride = trans->base.box.width * rsc->cpp * 2; + trans->staging = malloc(trans->base.stride * trans->base.box.height); + if (!trans->staging) + goto fail; + + /* if we're not discarding the whole range (or resource), we must copy + * the real data in. + */ + if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | + PIPE_TRANSFER_DISCARD_RANGE))) { + struct fd_resource_slice *sslice = + fd_resource_slice(rsc->stencil, level); + void *sbuf = fd_bo_map(rsc->stencil->bo); + if (!sbuf) + goto fail; + + float *depth = (float *)(buf + slice->offset + + box->y * slice->pitch * 4 + box->x * 4); + uint8_t *stencil = sbuf + sslice->offset + + box->y * sslice->pitch + box->x; + + if (format != PIPE_FORMAT_X32_S8X24_UINT) + util_format_z32_float_s8x24_uint_pack_z_float( + trans->staging, trans->base.stride, + depth, slice->pitch * 4, + box->width, box->height); + + util_format_z32_float_s8x24_uint_pack_s_8uint( + trans->staging, trans->base.stride, + stencil, sslice->pitch, + box->width, box->height); + } + + buf = trans->staging; + offset = 0; + } + + *pptrans = ptrans; + return buf + offset; fail: @@ -347,7 +439,10 @@ fd_resource_create(struct pipe_screen *pscreen, util_range_init(&rsc->valid_buffer_range); rsc->base.vtbl = &fd_resource_vtbl; - rsc->cpp = util_format_get_blocksize(tmpl->format); + if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + rsc->cpp = util_format_get_blocksize(PIPE_FORMAT_Z32_FLOAT); + else + rsc->cpp = util_format_get_blocksize(tmpl->format); assert(rsc->cpp); @@ -374,6 +469,19 @@ fd_resource_create(struct pipe_screen *pscreen, if (!rsc->bo) goto fail; + /* There is no native Z32F_S8 sampling or rendering format, so this must + * be emulated via two separate textures. The depth texture still keeps + * its Z32F_S8 format though, and we also keep a reference to a separate + * S8 texture. + */ + if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { + struct pipe_resource stencil = *tmpl; + stencil.format = PIPE_FORMAT_S8_UINT; + rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil)); + if (!rsc->stencil) + goto fail; + } + return prsc; fail: fd_resource_destroy(pscreen, prsc); @@ -567,7 +675,7 @@ fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc) { struct fd_resource *rsc = fd_resource(prsc); - if (rsc->dirty) + if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty)) fd_context_render(pctx); } diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index f80acb1e188..fdf3b8c7d7a 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -72,6 +72,9 @@ struct fd_resource { /* buffer range that has been initialized */ struct util_range valid_buffer_range; + /* reference to the resource holding stencil data for a z32_s8 texture */ + struct fd_resource *stencil; + struct list_head list; }; |