diff options
author | Kristian H. Kristensen <[email protected]> | 2020-05-15 15:11:55 -0700 |
---|---|---|
committer | Marge Bot <[email protected]> | 2020-05-21 00:16:55 +0000 |
commit | 1c21577246691589f0295081d208894082444a02 (patch) | |
tree | a88575ed1b30ead9d878c12130434397c9912133 /src | |
parent | 5f494636faf00ac5a2f6e88b0100c642fc04536a (diff) |
freedreno/a6xx: Emit VFD setup as array writes
We can use only one PKT4 for each of VFD_FETCH, VFD_DECODE and
VFD_DEST_CNTL and write all the elements if we split the loop into
three loops.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5064>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 32 |
1 files changed, 23 insertions, 9 deletions
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 6105fcd8cfd..52a44c67535 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -568,38 +568,48 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp) } struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(emit->ctx->batch->submit, - 4 * (2 + cnt * 10), FD_RINGBUFFER_STREAMING); + 4 * (5 + cnt * 7), FD_RINGBUFFER_STREAMING); OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_0, 1); OUT_RING(ring, A6XX_VFD_CONTROL_0_FETCH_CNT(cnt) | A6XX_VFD_CONTROL_0_DECODE_CNT(cnt)); + OUT_PKT4(ring, REG_A6XX_VFD_FETCH(0), 4 * cnt); for (int32_t j = 0; j < cnt; j++) { int32_t i = map[j]; struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; const struct pipe_vertex_buffer *vb = &vtx->vertexbuf.vb[elem->vertex_buffer_index]; struct fd_resource *rsc = fd_resource(vb->buffer.resource); - enum pipe_format pfmt = elem->src_format; - enum a6xx_format fmt = fd6_pipe2vtx(pfmt); - bool isint = util_format_is_pure_integer(pfmt); uint32_t off = vb->buffer_offset + elem->src_offset; uint32_t size = fd_bo_size(rsc->bo) - off; - debug_assert(fmt != ~0); #ifdef DEBUG /* see dEQP-GLES31.stress.vertex_attribute_binding.buffer_bounds.bind_vertex_buffer_offset_near_wrap_10 */ - if (off > fd_bo_size(rsc->bo)) + if (off > fd_bo_size(rsc->bo)) { + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); continue; + } #endif - OUT_PKT4(ring, REG_A6XX_VFD_FETCH(j), 4); OUT_RELOC(ring, rsc->bo, off, 0, 0); OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */ OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */ + } + + OUT_PKT4(ring, REG_A6XX_VFD_DECODE(0), 2 * cnt); + for (int32_t j = 0; j < cnt; j++) { + int32_t i = map[j]; + struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; + enum pipe_format pfmt = elem->src_format; + enum a6xx_format fmt = fd6_pipe2vtx(pfmt); + bool isint = util_format_is_pure_integer(pfmt); + debug_assert(fmt != ~0); - OUT_PKT4(ring, REG_A6XX_VFD_DECODE(j), 2); OUT_RING(ring, A6XX_VFD_DECODE_INSTR_IDX(j) | A6XX_VFD_DECODE_INSTR_FORMAT(fmt) | COND(elem->instance_divisor, A6XX_VFD_DECODE_INSTR_INSTANCED) | @@ -607,8 +617,12 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp) A6XX_VFD_DECODE_INSTR_UNK30 | COND(!isint, A6XX_VFD_DECODE_INSTR_FLOAT)); OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */ + } + + OUT_PKT4(ring, REG_A6XX_VFD_DEST_CNTL(0), cnt); + for (int32_t j = 0; j < cnt; j++) { + int32_t i = map[j]; - OUT_PKT4(ring, REG_A6XX_VFD_DEST_CNTL(j), 1); OUT_RING(ring, A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vp->inputs[i].compmask) | A6XX_VFD_DEST_CNTL_INSTR_REGID(vp->inputs[i].regid)); } |