summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno/a2xx
diff options
context:
space:
mode:
authorJonathan Marek <[email protected]>2018-12-18 23:33:54 -0500
committerRob Clark <[email protected]>2019-01-28 18:21:16 -0500
commitcb2322c7c0f95d6d1a2b90494cf5f6fd55f55638 (patch)
tree2456dacc3bab7f9d0be62965913b8922d78f7f46 /src/gallium/drivers/freedreno/a2xx
parent501c6e70d417b6e7347e41976a753c0680268854 (diff)
freedreno: a2xx: a20x hw binning
Signed-off-by: Jonathan Marek <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/a2xx')
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_draw.c32
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_emit.c52
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_emit.h3
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_gmem.c163
-rw-r--r--src/gallium/drivers/freedreno/a2xx/fd2_program.c11
5 files changed, 253 insertions, 8 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
index f15d57cf0e0..c857c118d91 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
@@ -75,11 +75,12 @@ emit_vertexbufs(struct fd_context *ctx)
// CONST(20,0) (or CONST(26,0) in soliv_vp)
fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
+ fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements);
}
static void
draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
- struct fd_ringbuffer *ring, unsigned index_offset)
+ struct fd_ringbuffer *ring, unsigned index_offset, bool binning)
{
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
@@ -119,8 +120,22 @@ draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
OUT_RING(ring, info->min_index); /* VGT_MIN_VTX_INDX */
}
+ /* binning shader will take offset from C64 */
+ if (binning && is_a20x(ctx->screen)) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, 0x00000180);
+ OUT_RING(ring, fui(ctx->batch->num_vertices));
+ OUT_RING(ring, fui(0.0f));
+ OUT_RING(ring, fui(0.0f));
+ OUT_RING(ring, fui(0.0f));
+ }
+
+ enum pc_di_vis_cull_mode vismode = USE_VISIBILITY;
+ if (binning || info->mode == PIPE_PRIM_POINTS)
+ vismode = IGNORE_VISIBILITY;
+
fd_draw_emit(ctx->batch, ring, ctx->primtypes[info->mode],
- IGNORE_VISIBILITY, info, index_offset);
+ vismode, info, index_offset);
if (is_a20x(ctx->screen)) {
/* not sure why this is required, but it fixes some hangs */
@@ -145,6 +160,9 @@ fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
if (ctx->dirty & FD_DIRTY_VTXBUF)
emit_vertexbufs(ctx);
+ if (fd_binning_enabled)
+ fd2_emit_state_binning(ctx, ctx->dirty);
+
fd2_emit_state(ctx, ctx->dirty);
/* a2xx can draw only 65535 vertices at once
@@ -166,17 +184,23 @@ fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
struct pipe_draw_info info = *pinfo;
unsigned count = info.count;
unsigned step = step_tbl[info.mode];
+ unsigned num_vertices = ctx->batch->num_vertices;
if (!step)
return false;
for (; count + step > 32766; count -= step) {
info.count = MIN2(count, 32766);
- draw_impl(ctx, &info, ctx->batch->draw, index_offset);
+ draw_impl(ctx, &info, ctx->batch->draw, index_offset, false);
+ draw_impl(ctx, &info, ctx->batch->binning, index_offset, true);
info.start += step;
+ ctx->batch->num_vertices += step;
}
+ /* changing this value is a hack, restore it */
+ ctx->batch->num_vertices = num_vertices;
} else {
- draw_impl(ctx, pinfo, ctx->batch->draw, index_offset);
+ draw_impl(ctx, pinfo, ctx->batch->draw, index_offset, false);
+ draw_impl(ctx, pinfo, ctx->batch->binning, index_offset, true);
}
fd_context_all_clean(ctx);
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
index e98f86a8257..18d69444d12 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
@@ -186,6 +186,58 @@ fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
}
void
+fd2_emit_state_binning(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
+{
+ struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
+ struct fd_ringbuffer *ring = ctx->batch->binning;
+
+ /* subset of fd2_emit_state needed for hw binning on a20x */
+
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE))
+ fd2_program_emit(ctx, ring, &ctx->prog);
+
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
+ emit_constants(ring, VS_CONST_BASE * 4,
+ &ctx->constbuf[PIPE_SHADER_VERTEX],
+ (dirty & FD_DIRTY_PROG) ? ctx->prog.vp : NULL);
+ }
+
+ if (dirty & FD_DIRTY_VIEWPORT) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 9);
+ OUT_RING(ring, 0x00000184);
+ OUT_RING(ring, fui(ctx->viewport.translate[0]));
+ OUT_RING(ring, fui(ctx->viewport.translate[1]));
+ OUT_RING(ring, fui(ctx->viewport.translate[2]));
+ OUT_RING(ring, fui(0.0f));
+ OUT_RING(ring, fui(ctx->viewport.scale[0]));
+ OUT_RING(ring, fui(ctx->viewport.scale[1]));
+ OUT_RING(ring, fui(ctx->viewport.scale[2]));
+ OUT_RING(ring, fui(0.0f));
+ }
+
+ /* not sure why this is needed */
+ if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
+ enum pipe_format format =
+ pipe_surface_format(ctx->batch->framebuffer.cbufs[0]);
+ bool has_alpha = util_format_has_alpha(format);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
+ OUT_RING(ring, blend->rb_blendcontrol_alpha |
+ COND(has_alpha, blend->rb_blendcontrol_rgb) |
+ COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb));
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
+ OUT_RING(ring, blend->rb_colormask);
+ }
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
+ OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE);
+}
+
+void
fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
{
struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.h b/src/gallium/drivers/freedreno/a2xx/fd2_emit.h
index 5e4bddd1faa..891ed91e5a8 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.h
@@ -40,7 +40,8 @@ struct fd2_vertex_buf {
void fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
struct fd2_vertex_buf *vbufs, uint32_t n);
-void fd2_emit_state(struct fd_context *ctx, enum fd_dirty_3d_state dirty);
+void fd2_emit_state_binning(struct fd_context *ctx, const enum fd_dirty_3d_state dirty);
+void fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty);
void fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring);
void fd2_emit_init(struct pipe_context *pctx);
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
index 56db5608c28..6a066a63730 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
@@ -39,6 +39,7 @@
#include "fd2_program.h"
#include "fd2_util.h"
#include "fd2_zsa.h"
+#include "instr-a2xx.h"
static uint32_t fmt2swap(enum pipe_format format)
{
@@ -367,6 +368,41 @@ fd2_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile)
}
static void
+patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
+{
+ unsigned i;
+
+ if (!is_a20x(batch->ctx->screen)) {
+ /* identical to a3xx */
+ for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
+ struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
+ *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
+ }
+ util_dynarray_resize(&batch->draw_patches, 0);
+ return;
+ }
+
+ if (vismode == USE_VISIBILITY)
+ return;
+
+ for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t*); i++) {
+ uint32_t *ptr = *util_dynarray_element(&batch->draw_patches, uint32_t*, i);
+ unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */
+
+ /* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX
+ * replace first two DWORDS with NOP and move the rest down
+ * (we don't want to have to move the idx buffer reloc)
+ */
+ ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8);
+ ptr[1] = 0x00000000;
+
+ ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */
+ ptr[2] = CP_TYPE3_PKT | ((cnt-2) << 16) | (CP_DRAW_INDX << 8);
+ ptr[3] = 0x00000000;
+ }
+}
+
+static void
fd2_emit_sysmem_prep(struct fd_batch *batch)
{
struct fd_context *ctx = batch->ctx;
@@ -408,6 +444,10 @@ fd2_emit_sysmem_prep(struct fd_batch *batch)
OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(0) |
A2XX_PA_SC_WINDOW_OFFSET_Y(0));
+
+ patch_draws(batch, IGNORE_VISIBILITY);
+ util_dynarray_resize(&batch->draw_patches, 0);
+ util_dynarray_resize(&batch->shader_patches, 0);
}
/* before first tile */
@@ -432,6 +472,112 @@ fd2_emit_tile_init(struct fd_batch *batch)
if (pfb->zsbuf)
reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
OUT_RING(ring, reg); /* RB_DEPTH_INFO */
+
+ /* set to zero, for some reason hardware doesn't like certain values */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
+ OUT_RING(ring, 0);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
+ OUT_RING(ring, 0);
+
+ if (is_a20x(ctx->screen) && fd_binning_enabled && gmem->num_vsc_pipes) {
+ /* patch out unneeded memory exports by changing EXEC CF to EXEC_END
+ *
+ * in the shader compiler, we guarantee that the shader ends with
+ * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports
+ *
+ * the since patches point only to dwords and CFs are 1.5 dwords
+ * the patch is aligned and might point to a ALLOC CF
+ */
+ for (int i = 0; i < batch->shader_patches.size / sizeof(void*); i++) {
+ instr_cf_t *cf =
+ *util_dynarray_element(&batch->shader_patches, instr_cf_t*, i);
+ if (cf->opc == ALLOC)
+ cf++;
+ assert(cf->opc == EXEC);
+ assert(cf[ctx->screen->num_vsc_pipes*2-2].opc == EXEC_END);
+ cf[2*(gmem->num_vsc_pipes-1)].opc = EXEC_END;
+ }
+
+ patch_draws(batch, USE_VISIBILITY);
+
+ /* initialize shader constants for the binning memexport */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4);
+ OUT_RING(ring, 0x0000000C);
+
+ for (int i = 0; i < gmem->num_vsc_pipes; i++) {
+ struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
+
+ /* XXX we know how large this needs to be..
+ * should do some sort of realloc
+ * it should be ctx->batch->num_vertices bytes large
+ * with this size it will break with more than 256k vertices..
+ */
+ if (!pipe->bo) {
+ pipe->bo = fd_bo_new(ctx->dev, 0x40000,
+ DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
+ }
+
+ /* memory export address (export32):
+ * .x: (base_address >> 2) | 0x40000000 (?)
+ * .y: index (float) - set by shader
+ * .z: 0x4B00D000 (?)
+ * .w: 0x4B000000 (?) | max_index (?)
+ */
+ OUT_RELOCW(ring, pipe->bo, 0, 0x40000000, -2);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x4B00D000);
+ OUT_RING(ring, 0x4B000000 | 0x40000);
+ }
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8);
+ OUT_RING(ring, 0x0000018C);
+
+ for (int i = 0; i < gmem->num_vsc_pipes; i++) {
+ struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
+ float off_x, off_y, mul_x, mul_y;
+
+ /* const to tranform from [-1,1] to bin coordinates for this pipe
+ * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc
+ * 8 possible values on x/y axis,
+ * to clip at binning stage: only use center 6x6
+ * TODO: set the z parameters too so that hw binning
+ * can clip primitives in Z too
+ */
+
+ mul_x = 1.0f / (float) (gmem->bin_w * 8);
+ mul_y = 1.0f / (float) (gmem->bin_h * 8);
+ off_x = -pipe->x * (1.0/8.0f) + 0.125f - mul_x * gmem->minx;
+ off_y = -pipe->y * (1.0/8.0f) + 0.125f - mul_y * gmem->miny;
+
+ OUT_RING(ring, fui(off_x * (256.0f/255.0f)));
+ OUT_RING(ring, fui(off_y * (256.0f/255.0f)));
+ OUT_RING(ring, 0x3f000000);
+ OUT_RING(ring, fui(0.0f));
+
+ OUT_RING(ring, fui(mul_x * (256.0f/255.0f)));
+ OUT_RING(ring, fui(mul_y * (256.0f/255.0f)));
+ OUT_RING(ring, fui(0.0f));
+ OUT_RING(ring, fui(0.0f));
+ }
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+ OUT_RING(ring, 0);
+
+ ctx->emit_ib(ring, batch->binning);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+ OUT_RING(ring, 0x00000002);
+ } else {
+ patch_draws(batch, IGNORE_VISIBILITY);
+ }
+
+ util_dynarray_resize(&batch->draw_patches, 0);
+ util_dynarray_resize(&batch->shader_patches, 0);
}
/* before mem2gmem */
@@ -460,6 +606,7 @@ fd2_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
static void
fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
{
+ struct fd_context *ctx = batch->ctx;
struct fd_ringbuffer *ring = batch->gmem;
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
@@ -486,6 +633,22 @@ fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
OUT_RING(ring, fui(0.0f));
OUT_RING(ring, fui(0.0f));
}
+
+ if (is_a20x(ctx->screen) && fd_binning_enabled) {
+ struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p];
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
+ OUT_RING(ring, tile->n);
+
+ OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
+ OUT_RING(ring, tile->n);
+
+ /* TODO only emit this when tile->p changes */
+ OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
+ OUT_RELOC(ring, pipe->bo, 0, 0, 0);
+ }
}
void
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_program.c b/src/gallium/drivers/freedreno/a2xx/fd2_program.c
index cbb18f84c4c..84b54cf56b7 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_program.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_program.c
@@ -65,7 +65,7 @@ delete_shader(struct fd2_shader_stateobj *so)
static void
emit(struct fd_ringbuffer *ring, gl_shader_stage type,
- struct ir2_shader_info *info)
+ struct ir2_shader_info *info, struct util_dynarray *patches)
{
unsigned i;
@@ -74,6 +74,10 @@ emit(struct fd_ringbuffer *ring, gl_shader_stage type,
OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords);
OUT_RING(ring, type == MESA_SHADER_FRAGMENT);
OUT_RING(ring, info->sizedwords);
+
+ if (patches)
+ util_dynarray_append(patches, uint32_t*, &ring->cur[info->mem_export_ptr]);
+
for (i = 0; i < info->sizedwords; i++)
OUT_RING(ring, info->dwords[i]);
}
@@ -261,10 +265,11 @@ fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]);
}
- emit(ring, MESA_SHADER_VERTEX, vpi);
+ emit(ring, MESA_SHADER_VERTEX, vpi,
+ binning ? &ctx->batch->shader_patches : NULL);
if (fp) {
- emit(ring, MESA_SHADER_FRAGMENT, fpi);
+ emit(ring, MESA_SHADER_FRAGMENT, fpi, NULL);
fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg;
vs_export = MAX2(1, f->inputs_count) - 1;
}