summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno/a3xx
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2014-10-14 12:20:54 -0400
committerRob Clark <[email protected]>2014-10-15 15:49:48 -0400
commitd5d80b37392c7f15c4fb39b6b1826230239930fd (patch)
treea70ad7ceaea741ea2040996759c7d982fb875011 /src/gallium/drivers/freedreno/a3xx
parent57de9bbb632ed7639d07d37965dcee5a1fe3bf30 (diff)
freedreno/a3xx: refactor vertex state emit
Get rid of fd3_vertex_buf and use fd_vertex_state directly for all draws. Removes a tiny bit of CPU overhead for munging around the vertex state every time it is emitted, but more importantly it cleans things up for later optimizations, so the emit paths don't have to special case internal draws (gmem<->mem, clears, etc) with regular draws. Instead of constructing fd3_vertex_buf array each time for internal draws, and context init time pre-create solid_vbuf_state and blit_vbuf_state. Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/a3xx')
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_context.c31
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_context.h12
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c35
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c20
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.h14
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_gmem.c22
6 files changed, 65 insertions, 69 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
index f8f412e1a1b..4e3b5038bed 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -49,6 +49,9 @@ fd3_context_destroy(struct pipe_context *pctx)
fd_bo_del(fd3_ctx->fs_pvt_mem);
fd_bo_del(fd3_ctx->vsc_size_mem);
+ pctx->delete_vertex_elements_state(pctx, fd3_ctx->solid_vbuf_state.vtx);
+ pctx->delete_vertex_elements_state(pctx, fd3_ctx->blit_vbuf_state.vtx);
+
pipe_resource_reference(&fd3_ctx->solid_vbuf, NULL);
pipe_resource_reference(&fd3_ctx->blit_texcoord_vbuf, NULL);
@@ -135,6 +138,34 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv)
fd3_ctx->solid_vbuf = create_solid_vertexbuf(pctx);
fd3_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
+ /* setup solid_vbuf_state: */
+ fd3_ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(
+ pctx, 1, (struct pipe_vertex_element[]){{
+ .vertex_buffer_index = 0,
+ .src_offset = 0,
+ .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }});
+ fd3_ctx->solid_vbuf_state.vertexbuf.count = 1;
+ fd3_ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
+ fd3_ctx->solid_vbuf_state.vertexbuf.vb[0].buffer = fd3_ctx->solid_vbuf;
+
+ /* setup blit_vbuf_state: */
+ fd3_ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(
+ pctx, 2, (struct pipe_vertex_element[]){{
+ .vertex_buffer_index = 0,
+ .src_offset = 0,
+ .src_format = PIPE_FORMAT_R32G32_FLOAT,
+ }, {
+ .vertex_buffer_index = 1,
+ .src_offset = 0,
+ .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }});
+ fd3_ctx->blit_vbuf_state.vertexbuf.count = 2;
+ fd3_ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
+ fd3_ctx->blit_vbuf_state.vertexbuf.vb[0].buffer = fd3_ctx->blit_texcoord_vbuf;
+ fd3_ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
+ fd3_ctx->blit_vbuf_state.vertexbuf.vb[1].buffer = fd3_ctx->solid_vbuf;
+
fd3_query_context_init(pctx);
fd3_ctx->border_color_uploader = u_upload_create(pctx, 4096,
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
index 2736470b93a..324edb2eb80 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
@@ -62,6 +62,18 @@ struct fd3_context {
*/
struct pipe_resource *blit_texcoord_vbuf;
+ /* vertex state for solid_vbuf:
+ * - solid_vbuf / 12 / R32G32B32_FLOAT
+ */
+ struct fd_vertex_state solid_vbuf_state;
+
+ /* vertex state for blit_prog:
+ * - blit_texcoord_vbuf / 8 / R32G32_FLOAT
+ * - solid_vbuf / 12 / R32G32B32_FLOAT
+ */
+ struct fd_vertex_state blit_vbuf_state;
+
+
/*
* Border color layout *appears* to be as arrays of 0x40 byte
* elements, with frag shader elements starting at (16 x 0x40).
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index bd395f602b0..e333a80879f 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -47,26 +47,7 @@ static void
emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct ir3_shader_key key)
{
- struct fd_vertex_stateobj *vtx = ctx->vtx;
- struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
- struct fd3_vertex_buf bufs[PIPE_MAX_ATTRIBS];
- unsigned i;
-
- if (!vtx->num_elements)
- return;
-
- for (i = 0; i < vtx->num_elements; i++) {
- struct pipe_vertex_element *elem = &vtx->pipe[i];
- struct pipe_vertex_buffer *vb =
- &vertexbuf->vb[elem->vertex_buffer_index];
- bufs[i].offset = vb->buffer_offset + elem->src_offset;
- bufs[i].stride = vb->stride;
- bufs[i].prsc = vb->buffer;
- bufs[i].format = elem->src_format;
- }
-
- fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->prog.vp, key),
- bufs, vtx->num_elements);
+ fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->prog.vp, key), &ctx->vtx);
}
static void
@@ -75,7 +56,7 @@ draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
{
fd3_emit_state(ctx, ring, info, &ctx->prog, key, dirty);
- if (dirty & FD_DIRTY_VTXBUF)
+ if (dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
emit_vertexbufs(ctx, ring, key);
OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
@@ -185,11 +166,7 @@ fd3_clear_binning(struct fd_context *ctx, unsigned dirty)
fd3_emit_state(ctx, ring, NULL, &ctx->solid_prog, key, dirty);
fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
- (struct fd3_vertex_buf[]) {{
- .prsc = fd3_ctx->solid_vbuf,
- .stride = 12,
- .format = PIPE_FORMAT_R32G32B32_FLOAT,
- }}, 1);
+ &fd3_ctx->solid_vbuf_state);
OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
@@ -320,11 +297,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
- (struct fd3_vertex_buf[]) {{
- .prsc = fd3_ctx->solid_vbuf,
- .stride = 12,
- .format = PIPE_FORMAT_R32G32B32_FLOAT,
- }}, 1);
+ &fd3_ctx->solid_vbuf_state);
fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index d92ebc2f0ad..e0cbebaeaf7 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -331,13 +331,15 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
void
fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
- struct ir3_shader_variant *vp,
- struct fd3_vertex_buf *vbufs, uint32_t n)
+ struct ir3_shader_variant *vp, struct fd_vertex_state *vtx)
{
uint32_t i, j, last = 0;
uint32_t total_in = 0;
+ unsigned n = MIN2(vtx->vtx->num_elements, vp->inputs_count);
- n = MIN2(n, vp->inputs_count);
+ /* hw doesn't like to be configured for zero vbo's, it seems: */
+ if (vtx->vtx->num_elements == 0)
+ return;
for (i = 0; i < n; i++)
if (vp->inputs[i].compmask)
@@ -345,9 +347,11 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
for (i = 0, j = 0; i <= last; i++) {
if (vp->inputs[i].compmask) {
- struct pipe_resource *prsc = vbufs[i].prsc;
- struct fd_resource *rsc = fd_resource(prsc);
- enum pipe_format pfmt = vbufs[i].format;
+ struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
+ struct pipe_vertex_buffer *vb =
+ &vtx->vertexbuf.vb[elem->vertex_buffer_index];
+ struct fd_resource *rsc = fd_resource(vb->buffer);
+ enum pipe_format pfmt = elem->src_format;
enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
bool switchnext = (i != last);
bool isint = util_format_is_pure_integer(pfmt);
@@ -357,11 +361,11 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2);
OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
- A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vbufs[i].stride) |
+ A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) |
A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
- OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
+ OUT_RELOC(ring, rsc->bo, vb->buffer_offset + elem->src_offset, 0, 0);
OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(j), 1);
OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
index 81ff06275bc..89e73cf2cc1 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -46,21 +46,13 @@ void fd3_emit_constant(struct fd_ringbuffer *ring,
void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
struct pipe_surface *psurf);
-/* NOTE: this just exists because we don't have proper vertex/vertexbuf
- * state objs for clear, and mem2gmem/gmem2mem operations..
- */
-struct fd3_vertex_buf {
- unsigned offset, stride;
- struct pipe_resource *prsc;
- enum pipe_format format;
-};
-
void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
- struct ir3_shader_variant *vp,
- struct fd3_vertex_buf *vbufs, uint32_t n);
+ struct ir3_shader_variant *vp, struct fd_vertex_state *vtx);
+
void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct pipe_draw_info *info, struct fd_program_stateobj *prog,
struct ir3_shader_key key, uint32_t dirty);
+
void fd3_emit_restore(struct fd_context *ctx);
#endif /* FD3_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index c43121993c0..172bd4c9d20 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -185,11 +185,7 @@ emit_binning_workaround(struct fd_context *ctx)
fd3_program_emit(ring, &ctx->solid_prog, key, false);
fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
- (struct fd3_vertex_buf[]) {{
- .prsc = fd3_ctx->solid_vbuf,
- .stride = 12,
- .format = PIPE_FORMAT_R32G32B32_FLOAT,
- }}, 1);
+ &fd3_ctx->solid_vbuf_state);
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
@@ -410,11 +406,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
fd3_program_emit(ring, &ctx->solid_prog, key, false);
fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
- (struct fd3_vertex_buf[]) {{
- .prsc = fd3_ctx->solid_vbuf,
- .stride = 12,
- .format = PIPE_FORMAT_R32G32B32_FLOAT,
- }}, 1);
+ &fd3_ctx->solid_vbuf_state);
if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
uint32_t base = depth_base(ctx);
@@ -554,15 +546,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
fd3_program_emit(ring, &ctx->blit_prog, key, false);
fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->blit_prog.vp, key),
- (struct fd3_vertex_buf[]) {{
- .prsc = fd3_ctx->blit_texcoord_vbuf,
- .stride = 8,
- .format = PIPE_FORMAT_R32G32_FLOAT,
- }, {
- .prsc = fd3_ctx->solid_vbuf,
- .stride = 12,
- .format = PIPE_FORMAT_R32G32B32_FLOAT,
- }}, 2);
+ &fd3_ctx->blit_vbuf_state);
/* for gmem pitch/base calculations, we need to use the non-
* truncated tile sizes: