summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorJonathan Marek <[email protected]>2019-01-28 12:49:54 -0500
committerRob Clark <[email protected]>2019-01-28 18:21:16 -0500
commit912a9c8d8cf5e7e4e05a5cb06f4284eeff7b379a (patch)
treef12b82c1489feb53d99f8cad0dcc9ca1efbaa26a /src/gallium
parentcb2322c7c0f95d6d1a2b90494cf5f6fd55f55638 (diff)
freedreno: a2xx: clear fixes and fast clear path
This fixes the depth/stencil clear on a20x, and adds a fast clear path. The fast clear path is only used for a20x, needs performance tests on a22x. Signed-off-by: Jonathan Marek <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_context.c2
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_draw.c450
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_draw.h7
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_emit.c8
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_gmem.c60
-rw-r--r--src/gallium/drivers/freedreno/freedreno_batch.c8
-rw-r--r--src/gallium/drivers/freedreno/freedreno_batch.h5
-rw-r--r--src/gallium/drivers/freedreno/freedreno_gmem.c20
8 files changed, 434 insertions, 126 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_context.c b/src/gallium/drivers/freedreno/a2xx/fd2_context.c
index 760ad17732a..28073b07011 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_context.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.c
@@ -54,6 +54,8 @@ create_solid_vertexbuf(struct pipe_context *pctx)
+0.000000, +0.000000,
+1.000000, +0.000000,
+0.000000, +1.000000,
+ /* SCREEN_SCISSOR_BR value (must be at 60 byte offset in page) */
+ 0.0,
};
struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
index c857c118d91..05c4cd5391b 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
@@ -208,23 +208,13 @@ fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
return true;
}
-
-static bool
-fd2_clear(struct fd_context *ctx, unsigned buffers,
- const union pipe_color_union *color, double depth, unsigned stencil)
+static void
+clear_state(struct fd_batch *batch, struct fd_ringbuffer *ring,
+ unsigned buffers, bool fast_clear)
{
+ struct fd_context *ctx = batch->ctx;
struct fd2_context *fd2_ctx = fd2_context(ctx);
- struct fd_ringbuffer *ring = ctx->batch->draw;
- struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
- uint32_t reg, colr = 0;
-
- if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
- colr = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f);
-
- /* emit generic state now: */
- fd2_emit_state(ctx, ctx->dirty &
- (FD_DIRTY_BLEND | FD_DIRTY_VIEWPORT |
- FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR));
+ uint32_t reg;
fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
{ .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
@@ -234,96 +224,28 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
OUT_RING(ring, 0);
- if (!is_a20x(ctx->screen)) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
- OUT_RING(ring, 0x0000028f);
- }
-
fd2_program_emit(ctx, ring, &ctx->solid_prog);
OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
- if (is_a20x(ctx->screen)) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 5);
- OUT_RING(ring, 0x00000480);
- OUT_RING(ring, color->ui[0]);
- OUT_RING(ring, color->ui[1]);
- OUT_RING(ring, color->ui[2]);
- OUT_RING(ring, color->ui[3]);
- } else {
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
- OUT_RING(ring, colr);
- }
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
- OUT_RING(ring, 0x00000084);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
- reg = 0;
if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
- reg |= A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE;
- switch (fd_pipe2depth(fb->zsbuf->format)) {
- case DEPTHX_24_8:
- if (buffers & PIPE_CLEAR_DEPTH)
- reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xe);
- if (buffers & PIPE_CLEAR_STENCIL)
- reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0x1);
- break;
- case DEPTHX_16:
- if (buffers & PIPE_CLEAR_DEPTH)
- reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf);
- break;
- default:
- debug_assert(0);
- break;
- }
- }
- OUT_RING(ring, reg);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
- reg = 0;
- if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
- switch (fd_pipe2depth(fb->zsbuf->format)) {
- case DEPTHX_24_8:
- reg = (((uint32_t)(0xffffff * depth)) << 8) |
- (stencil & 0xff);
- break;
- case DEPTHX_16:
- reg = (uint32_t)(0xffffffff * depth);
- break;
- default:
- debug_assert(0);
- break;
- }
- }
- OUT_RING(ring, reg);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
- reg = 0;
- if (buffers & PIPE_CLEAR_DEPTH) {
- reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
+ reg = 0;
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
A2XX_RB_DEPTHCONTROL_Z_ENABLE |
A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |
A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
+ }
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
+ A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
+ A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
+ }
+ OUT_RING(ring, reg);
}
- if (buffers & PIPE_CLEAR_STENCIL) {
- reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
- A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
- A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
- }
- OUT_RING(ring, reg);
-
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
- OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
- OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
@@ -338,18 +260,19 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */
OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
- A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
+ A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+ (fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0));
+
+ if (fast_clear) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
+ OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3));
+ }
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
OUT_RING(ring, 0x0000ffff);
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
- OUT_RING(ring, xy2d(0,0)); /* PA_SC_WINDOW_SCISSOR_TL */
- OUT_RING(ring, xy2d(fb->width, /* PA_SC_WINDOW_SCISSOR_BR */
- fb->height));
-
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
if (buffers & PIPE_CLEAR_COLOR) {
@@ -361,30 +284,326 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
OUT_RING(ring, 0x0);
}
- if (!is_a20x(ctx->screen)) {
- OUT_PKT3(ring, CP_SET_CONSTANT, 3);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
- OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
- OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
- }
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
+ OUT_RING(ring, 0);
- fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
- DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+ if (is_a20x(batch->ctx->screen))
+ return;
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+ OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
+ OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
+ OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+ OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
- OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000084);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+ OUT_RING(ring, 0x0000028f);
+}
+
+static void
+clear_state_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ if (is_a20x(ctx->screen))
+ return;
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
OUT_RING(ring, 0x00000000);
- if (!is_a20x(ctx->screen)) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+ OUT_RING(ring, 0x0000003b);
+}
+
+static void
+clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring,
+ uint32_t color_clear, uint32_t depth_clear, unsigned patch_type)
+{
+ BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */
+
+ /* zero values are patched in */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
+ OUT_RINGP(ring, patch_type, &batch->gmem_patches);
+ OUT_RING(ring, 0);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
+ OUT_RING(ring, 0x8000 | 32);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+
+ /* set fill values */
+ if (!is_a20x(batch->ctx->screen)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
- OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
- OUT_RING(ring, 0x0000003b);
+ OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
+ OUT_RING(ring, color_clear);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
+ OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
+ A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
+ OUT_RING(ring, depth_clear);
+ } else {
+ const float sc = 1.0f / 255.0f;
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, 0x00000480);
+ OUT_RING(ring, fui((float) (color_clear >> 0 & 0xff) * sc));
+ OUT_RING(ring, fui((float) (color_clear >> 8 & 0xff) * sc));
+ OUT_RING(ring, fui((float) (color_clear >> 16 & 0xff) * sc));
+ OUT_RING(ring, fui((float) (color_clear >> 24 & 0xff) * sc));
+
+ // XXX if using float the rounding error breaks it..
+ float depth = ((double) (depth_clear >> 8)) * (1.0/(double) 0xffffff);
+ assert((unsigned) (((double) depth * (double) 0xffffff)) ==
+ (depth_clear >> 8));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
+ OUT_RING(ring, fui(0.0f));
+ OUT_RING(ring, fui(depth));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
+ OUT_RING(ring, 0xff000000 |
+ A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) |
+ A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+ OUT_RING(ring, 0xff000000 |
+ A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) |
+ A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+ }
+
+ fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+}
+
+static bool
+fd2_clear_fast(struct fd_context *ctx, unsigned buffers,
+ const union pipe_color_union *color, double depth, unsigned stencil)
+{
+ /* using 4x MSAA allows clearing ~2x faster
+ * then we can use higher bpp clearing to clear lower bpp
+ * 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8)
+ * note: its possible to clear with 32_32_32_32 format but its not faster
+ * note: fast clear doesn't work with sysmem rendering
+ * (sysmem rendering is disabled when clear is used)
+ *
+ * we only have 16-bit / 32-bit color formats
+ * and 16-bit / 32-bit depth formats
+ * so there are only a few possible combinations
+ *
+ * if the bpp of the color/depth doesn't match
+ * we clear with depth/color individually
+ */
+ struct fd2_context *fd2_ctx = fd2_context(ctx);
+ struct fd_batch *batch = ctx->batch;
+ struct fd_ringbuffer *ring = batch->draw;
+ struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+ uint32_t color_clear = 0, depth_clear = 0;
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
+ int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */
+ int color_size = -1;
+
+ /* TODO: need to test performance on a22x */
+ if (!is_a20x(ctx->screen))
+ return false;
+
+ if (buffers & PIPE_CLEAR_COLOR)
+ color_size = util_format_get_blocksizebits(format) == 32;
+
+ if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
+ depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8;
+
+ assert(color_size >= 0 || depth_size >= 0);
+
+ /* when clearing 24_8, depth/stencil must be both cleared
+ * TODO: if buffer isn't attached we can clear it anyway
+ */
+ if (depth_size == 1 && !(buffers & PIPE_CLEAR_STENCIL) != !(buffers & PIPE_CLEAR_DEPTH))
+ return false;
+
+ if (color_size == 0) {
+ color_clear = pack_rgba(format, color->f);
+ color_clear = (color_clear << 16) | (color_clear & 0xffff);
+ } else if (color_size == 1) {
+ color_clear = pack_rgba(format, color->f);
+ }
+
+ if (depth_size == 0) {
+ depth_clear = (uint32_t)(0xffff * depth);
+ depth_clear |= depth_clear << 16;
+ } else if (depth_size == 1) {
+ depth_clear = (((uint32_t)(0xffffff * depth)) << 8);
+ depth_clear |= (stencil & 0xff);
+ }
+
+ /* disable "window" scissor.. */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
+ OUT_RING(ring, xy2d(0, 0));
+ OUT_RING(ring, xy2d(0x7fff, 0x7fff));
+
+ /* make sure we fill all "pixels" (in SCREEN_SCISSOR) */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
+ OUT_RING(ring, fui(4096.0));
+ OUT_RING(ring, fui(4096.0));
+ OUT_RING(ring, fui(4096.0));
+ OUT_RING(ring, fui(4096.0));
+
+ clear_state(batch, ring, ~0u, true);
+
+ if (color_size >= 0 && depth_size != color_size)
+ clear_fast(batch, ring, color_clear, color_clear, GMEM_PATCH_FASTCLEAR_COLOR);
+
+ if (depth_size >= 0 && depth_size != color_size)
+ clear_fast(batch, ring, depth_clear, depth_clear, GMEM_PATCH_FASTCLEAR_DEPTH);
+
+ if (depth_size == color_size)
+ clear_fast(batch, ring, color_clear, depth_clear, GMEM_PATCH_FASTCLEAR_COLOR_DEPTH);
+
+ clear_state_restore(ctx, ring);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
+ OUT_RING(ring, 0);
+
+ /* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile.
+ * MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT
+ * the value is read from byte offset 60 in the given bo
+ */
+ OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3);
+ OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
+ OUT_RING(ring, 1);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
+ OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ return true;
+}
+
+static bool
+fd2_clear(struct fd_context *ctx, unsigned buffers,
+ const union pipe_color_union *color, double depth, unsigned stencil)
+{
+ struct fd_ringbuffer *ring = ctx->batch->draw;
+ struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
+
+ if (fd2_clear_fast(ctx, buffers, color, depth, stencil))
+ goto dirty;
+
+ /* set clear value */
+ if (is_a20x(ctx->screen)) {
+ if (buffers & PIPE_CLEAR_COLOR) {
+ /* C0 used by fragment shader */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, 0x00000480);
+ OUT_RING(ring, color->ui[0]);
+ OUT_RING(ring, color->ui[1]);
+ OUT_RING(ring, color->ui[2]);
+ OUT_RING(ring, color->ui[3]);
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ /* use viewport to set depth value */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
+ OUT_RING(ring, fui(0.0f));
+ OUT_RING(ring, fui(depth));
+ }
+
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
+ OUT_RING(ring, 0xff000000 |
+ A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) |
+ A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+ OUT_RING(ring, 0xff000000 |
+ A2XX_RB_STENCILREFMASK_STENCILREF(stencil) |
+ A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+ }
+ } else {
+ if (buffers & PIPE_CLEAR_COLOR) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
+ OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f));
+ }
+
+ if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+ uint32_t clear_mask, depth_clear;
+ if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+ switch (fd_pipe2depth(fb->zsbuf->format)) {
+ case DEPTHX_24_8:
+ clear_mask = ((buffers & PIPE_CLEAR_DEPTH) ? 0xe : 0) |
+ ((buffers & PIPE_CLEAR_STENCIL) ? 0x1 : 0);
+ depth_clear = (((uint32_t)(0xffffff * depth)) << 8) |
+ (stencil & 0xff);
+ break;
+ case DEPTHX_16:
+ clear_mask = 0xf;
+ depth_clear = (uint32_t)(0xffffffff * depth);
+ break;
+ default:
+ debug_assert(0);
+ break;
+ }
+ }
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
+ OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
+ A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
+ OUT_RING(ring, depth_clear);
+ }
}
+ /* scissor state */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
+ OUT_RING(ring, xy2d(0, 0));
+ OUT_RING(ring, xy2d(fb->width, fb->height));
+
+ /* viewport state */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
+ OUT_RING(ring, fui((float) fb->width / 2.0));
+ OUT_RING(ring, fui((float) fb->width / 2.0));
+ OUT_RING(ring, fui((float) fb->height / 2.0));
+ OUT_RING(ring, fui((float) fb->height / 2.0));
+
+ /* common state */
+ clear_state(ctx->batch, ring, buffers, false);
+
+ fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+ DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+
+ clear_state_restore(ctx, ring);
+
+dirty:
ctx->dirty |= FD_DIRTY_ZSA |
FD_DIRTY_VIEWPORT |
FD_DIRTY_RASTERIZER |
@@ -392,7 +611,8 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
FD_DIRTY_PROG |
FD_DIRTY_CONST |
FD_DIRTY_BLEND |
- FD_DIRTY_FRAMEBUFFER;
+ FD_DIRTY_FRAMEBUFFER |
+ FD_DIRTY_SCISSOR;
ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG;
ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST;
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_draw.h b/src/gallium/drivers/freedreno/a2xx/fd2_draw.h
index 1dd67e0401c..c7964756a53 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_draw.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_draw.h
@@ -33,4 +33,11 @@
void fd2_draw_init(struct pipe_context *pctx);
+enum {
+ GMEM_PATCH_FASTCLEAR_COLOR,
+ GMEM_PATCH_FASTCLEAR_DEPTH,
+ GMEM_PATCH_FASTCLEAR_COLOR_DEPTH,
+ GMEM_PATCH_RESTORE_INFO,
+};
+
#endif /* FD2_DRAW_H_ */
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
index 18d69444d12..805a4cf032a 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
@@ -360,7 +360,7 @@ fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
- OUT_RING(ring, blend ? zsa->rb_colorcontrol | blend->rb_colorcontrol : 0);
+ OUT_RING(ring, zsa->rb_colorcontrol | blend->rb_colorcontrol);
}
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
@@ -370,13 +370,13 @@ fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
- OUT_RING(ring, blend ? blend->rb_blendcontrol_alpha |
+ OUT_RING(ring, blend->rb_blendcontrol_alpha |
COND(has_alpha, blend->rb_blendcontrol_rgb) |
- COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb) : 0);
+ COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb));
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
- OUT_RING(ring, blend ? blend->rb_colormask : 0xf);
+ OUT_RING(ring, blend->rb_colormask);
}
if (dirty & FD_DIRTY_BLEND_COLOR) {
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
index 6a066a63730..17d6d6ef25a 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
@@ -39,6 +39,7 @@
#include "fd2_program.h"
#include "fd2_util.h"
#include "fd2_zsa.h"
+#include "fd2_draw.h"
#include "instr-a2xx.h"
static uint32_t fmt2swap(enum pipe_format format)
@@ -473,6 +474,58 @@ fd2_emit_tile_init(struct fd_batch *batch)
reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
OUT_RING(ring, reg); /* RB_DEPTH_INFO */
+ /* fast clear patches */
+ int depth_size = -1;
+ int color_size = -1;
+
+ if (pfb->cbufs[0])
+ color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2;
+
+ if (pfb->zsbuf)
+ depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2;
+
+ for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
+ struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
+ uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0];
+ uint32_t size, lines;
+
+ /* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */
+ switch (patch->val) {
+ case GMEM_PATCH_FASTCLEAR_COLOR:
+ size = align(gmem->bin_w * gmem->bin_h * color_size, 0x4000);
+ lines = size / 1024;
+ depth_base = size / 2;
+ break;
+ case GMEM_PATCH_FASTCLEAR_DEPTH:
+ size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x4000);
+ lines = size / 1024;
+ color_base = depth_base;
+ depth_base = depth_base + size / 2;
+ break;
+ case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH:
+ lines = align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x4000) / 1024;
+ break;
+ case GMEM_PATCH_RESTORE_INFO:
+ patch->cs[0] = gmem->bin_w;
+ patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
+ A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format));
+ patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
+ if (pfb->zsbuf)
+ patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
+ continue;
+ default:
+ continue;
+ }
+
+ patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) |
+ A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines);
+ patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) |
+ A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8);
+ patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) |
+ A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1);
+ }
+ util_dynarray_resize(&batch->gmem_patches, 0);
+
/* set to zero, for some reason hardware doesn't like certain values */
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
@@ -607,6 +660,7 @@ static void
fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
{
struct fd_context *ctx = batch->ctx;
+ struct fd2_context *fd2_ctx = fd2_context(ctx);
struct fd_ringbuffer *ring = batch->gmem;
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
@@ -624,6 +678,12 @@ fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
+ /* write SCISSOR_BR to memory so fast clear path can restore from it */
+ OUT_PKT3(ring, CP_MEM_WRITE, 2);
+ OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0);
+ OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
+ A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
+
/* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
if (is_a20x(batch->ctx->screen)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c
index a852494a8fc..a1578506c2a 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -90,8 +90,10 @@ batch_init(struct fd_batch *batch)
util_dynarray_init(&batch->draw_patches, NULL);
- if (is_a2xx(ctx->screen))
+ if (is_a2xx(ctx->screen)) {
util_dynarray_init(&batch->shader_patches, NULL);
+ util_dynarray_init(&batch->gmem_patches, NULL);
+ }
if (is_a3xx(ctx->screen))
util_dynarray_init(&batch->rbrc_patches, NULL);
@@ -167,8 +169,10 @@ batch_fini(struct fd_batch *batch)
util_dynarray_fini(&batch->draw_patches);
- if (is_a2xx(batch->ctx->screen))
+ if (is_a2xx(batch->ctx->screen)) {
util_dynarray_fini(&batch->shader_patches);
+ util_dynarray_fini(&batch->gmem_patches);
+ }
if (is_a3xx(batch->ctx->screen))
util_dynarray_fini(&batch->rbrc_patches);
diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h
index 428a0279072..7b723db64af 100644
--- a/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch.h
@@ -145,6 +145,11 @@ struct fd_batch {
*/
struct util_dynarray rbrc_patches;
+ /* Keep track of GMEM related values that need to be patched up once we
+ * know the gmem layout:
+ */
+ struct util_dynarray gmem_patches;
+
/* Keep track of pointer to start of MEM exports for a20x binning shaders
*
* this is so the end of the shader can be cut off at the right point
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index d0420b27d31..dd35dfa29fa 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -77,24 +77,25 @@ static uint32_t bin_width(struct fd_screen *screen)
static uint32_t
total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2],
- uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem)
+ uint32_t bin_w, uint32_t bin_h, uint32_t gmem_align,
+ struct fd_gmem_stateobj *gmem)
{
uint32_t total = 0, i;
for (i = 0; i < MAX_RENDER_TARGETS; i++) {
if (cbuf_cpp[i]) {
- gmem->cbuf_base[i] = align(total, 0x4000);
+ gmem->cbuf_base[i] = align(total, gmem_align);
total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h;
}
}
if (zsbuf_cpp[0]) {
- gmem->zsbuf_base[0] = align(total, 0x4000);
+ gmem->zsbuf_base[0] = align(total, gmem_align);
total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h;
}
if (zsbuf_cpp[1]) {
- gmem->zsbuf_base[1] = align(total, 0x4000);
+ gmem->zsbuf_base[1] = align(total, gmem_align);
total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h;
}
@@ -116,6 +117,7 @@ calculate_tiles(struct fd_batch *batch)
uint32_t minx, miny, width, height;
uint32_t nbins_x = 1, nbins_y = 1;
uint32_t bin_w, bin_h;
+ uint32_t gmem_align = 0x4000;
uint32_t max_width = bin_width(screen);
uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0};
uint32_t i, j, t, xoff, yoff;
@@ -178,10 +180,18 @@ calculate_tiles(struct fd_batch *batch)
zsbuf_cpp[0], width, height);
}
+ if (is_a20x(screen) && batch->cleared) {
+ /* under normal circumstances the requirement would be 4K
+ * but the fast clear path requires an alignment of 32K
+ */
+ gmem_align = 0x8000;
+ }
+
/* then find a bin width/height that satisfies the memory
* constraints:
*/
- while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) {
+ while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem_align, gmem) >
+ gmem_size) {
if (bin_w > bin_h) {
nbins_x++;
bin_w = align(width / nbins_x, gmem_alignw);