diff options
Diffstat (limited to 'src/gallium/drivers/freedreno')
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_format.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 50 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_context.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_program.c | 19 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_util.c | 3 |
5 files changed, 65 insertions, 10 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c index 939693d53f1..76cb3182169 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c @@ -195,7 +195,7 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { _T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), - /*_T(Z32_FLOAT, Z32_FLOAT, R8G8B8A8_UNORM, WZYX),*/ + _T(Z32_FLOAT, Z32_FLOAT, R8G8B8A8_UNORM, WZYX), /* 48-bit */ V_(R16G16B16_UNORM, 16_16_16_UNORM, NONE, WZYX), diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 4e2eefab148..d76acb2b100 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -304,6 +304,7 @@ emit_gmem2mem_surf(struct fd_context *ctx, { struct fd_ringbuffer *ring = ctx->ring; struct fd_resource *rsc = fd_resource(psurf->texture); + enum pipe_format format = psurf->format; struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level); uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); @@ -313,7 +314,10 @@ emit_gmem2mem_surf(struct fd_context *ctx, OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4); OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | A3XX_RB_COPY_CONTROL_MODE(mode) | - A3XX_RB_COPY_CONTROL_GMEM_BASE(base)); + A3XX_RB_COPY_CONTROL_GMEM_BASE(base) | + COND(format == PIPE_FORMAT_Z32_FLOAT || + format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, + A3XX_RB_COPY_CONTROL_UNK12)); OUT_RELOCW(ring, rsc->bo, offset, 0, -1); /* RB_COPY_DEST_BASE */ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp)); @@ -453,15 +457,35 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[], assert(bufs > 0); - emit_mrt(ring, bufs, psurf, bases, bin_w, false); - OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | A3XX_RB_MODE_CONTROL_MRT(bufs - 1)); - OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); - OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1)); + emit_mrt(ring, bufs, psurf, bases, bin_w, false); + + if (psurf[0] && psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) { + /* Depth is stored as unorm in gmem, so we have to write it in using a + * special blit shader which writes depth. + */ + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, (A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z | + A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE | + A3XX_RB_DEPTH_CONTROL_Z_ENABLE | + A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE | + A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS))); + + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); + OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) | + A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32)); + OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * ctx->gmem.bin_w)); + + OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1); + OUT_RING(ring, 0); + } else { + OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); + OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1)); + } fd3_emit_gmem_restore_tex(ring, psurf, bufs); @@ -600,7 +624,21 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) } if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { - emit.prog = &ctx->blit_prog[0]; + if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && + pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) { + /* Non-float can use a regular color write. It's split over 8-bit + * components, so half precision is always sufficient. + */ + emit.prog = &ctx->blit_prog[0]; + emit.key.half_precision = true; + } else { + /* Float depth needs special blit shader that writes depth */ + if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) + emit.prog = &ctx->blit_z; + else + emit.prog = &ctx->blit_zs; + emit.key.half_precision = false; + } fd3_program_emit(ring, &emit, 1, &pfb->zsbuf); emit_mem2gmem_surf(ctx, &gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); } diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index a648689cefd..e6a5f01d412 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -180,6 +180,7 @@ struct fd_context { /* shaders used by mem->gmem blits: */ struct fd_program_stateobj blit_prog[8]; // TODO move to screen? + struct fd_program_stateobj blit_z, blit_zs; /* do we need to mem2gmem before rendering. We don't, if for example, * there was a glClear() that invalidated the entire previous buffer diff --git a/src/gallium/drivers/freedreno/freedreno_program.c b/src/gallium/drivers/freedreno/freedreno_program.c index 52a165b64af..5e344e69146 100644 --- a/src/gallium/drivers/freedreno/freedreno_program.c +++ b/src/gallium/drivers/freedreno/freedreno_program.c @@ -92,7 +92,7 @@ static void * assemble_tgsi(struct pipe_context *pctx, } static void * -fd_prog_blit(struct pipe_context *pctx, int rts) +fd_prog_blit(struct pipe_context *pctx, int rts, bool depth) { int i; struct ureg_src tc; @@ -105,6 +105,12 @@ fd_prog_blit(struct pipe_context *pctx, int rts) for (i = 0; i < rts; i++) ureg_TEX(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, i), TGSI_TEXTURE_2D, tc, ureg_DECL_sampler(ureg, i)); + if (depth) + ureg_TEX(ureg, + ureg_writemask( + ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), + TGSI_WRITEMASK_Z), + TGSI_TEXTURE_2D, tc, ureg_DECL_sampler(ureg, rts)); ureg_END(ureg); @@ -128,11 +134,16 @@ void fd_prog_init(struct pipe_context *pctx) ctx->solid_prog.fp = assemble_tgsi(pctx, solid_fp, true); ctx->solid_prog.vp = assemble_tgsi(pctx, solid_vp, false); ctx->blit_prog[0].vp = assemble_tgsi(pctx, blit_vp, false); - ctx->blit_prog[0].fp = fd_prog_blit(pctx, 1); + ctx->blit_prog[0].fp = fd_prog_blit(pctx, 1, false); for (i = 1; i < ctx->screen->max_rts; i++) { ctx->blit_prog[i].vp = ctx->blit_prog[0].vp; - ctx->blit_prog[i].fp = fd_prog_blit(pctx, i + 1); + ctx->blit_prog[i].fp = fd_prog_blit(pctx, i + 1, false); } + + ctx->blit_z.vp = ctx->blit_prog[0].vp; + ctx->blit_z.fp = fd_prog_blit(pctx, 0, true); + ctx->blit_zs.vp = ctx->blit_prog[0].vp; + ctx->blit_zs.fp = fd_prog_blit(pctx, 1, true); } void fd_prog_fini(struct pipe_context *pctx) @@ -145,4 +156,6 @@ void fd_prog_fini(struct pipe_context *pctx) pctx->delete_vs_state(pctx, ctx->blit_prog[0].vp); for (i = 0; i < ctx->screen->max_rts; i++) pctx->delete_fs_state(pctx, ctx->blit_prog[i].fp); + pctx->delete_fs_state(pctx, ctx->blit_z.fp); + pctx->delete_fs_state(pctx, ctx->blit_zs.fp); } diff --git a/src/gallium/drivers/freedreno/freedreno_util.c b/src/gallium/drivers/freedreno/freedreno_util.c index 9892b05c37e..2acce06d148 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.c +++ b/src/gallium/drivers/freedreno/freedreno_util.c @@ -44,6 +44,9 @@ fd_pipe2depth(enum pipe_format format) case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_UINT_Z24_UNORM: return DEPTHX_24_8; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return DEPTHX_32; default: return ~0; } |