aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKristian H. Kristensen <[email protected]>2020-05-15 15:11:55 -0700
committerMarge Bot <[email protected]>2020-05-21 00:16:55 +0000
commit1c21577246691589f0295081d208894082444a02 (patch)
treea88575ed1b30ead9d878c12130434397c9912133 /src
parent5f494636faf00ac5a2f6e88b0100c642fc04536a (diff)
freedreno/a6xx: Emit VFD setup as array writes
We can use only one PKT4 for each of VFD_FETCH, VFD_DECODE and VFD_DEST_CNTL and write all the elements if we split the loop into three loops. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5064>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/freedreno/a6xx/fd6_emit.c32
1 files changed, 23 insertions, 9 deletions
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index 6105fcd8cfd..52a44c67535 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -568,38 +568,48 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp)
}
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(emit->ctx->batch->submit,
- 4 * (2 + cnt * 10), FD_RINGBUFFER_STREAMING);
+ 4 * (5 + cnt * 7), FD_RINGBUFFER_STREAMING);
OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_0, 1);
OUT_RING(ring, A6XX_VFD_CONTROL_0_FETCH_CNT(cnt) |
A6XX_VFD_CONTROL_0_DECODE_CNT(cnt));
+ OUT_PKT4(ring, REG_A6XX_VFD_FETCH(0), 4 * cnt);
for (int32_t j = 0; j < cnt; j++) {
int32_t i = map[j];
struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
const struct pipe_vertex_buffer *vb =
&vtx->vertexbuf.vb[elem->vertex_buffer_index];
struct fd_resource *rsc = fd_resource(vb->buffer.resource);
- enum pipe_format pfmt = elem->src_format;
- enum a6xx_format fmt = fd6_pipe2vtx(pfmt);
- bool isint = util_format_is_pure_integer(pfmt);
uint32_t off = vb->buffer_offset + elem->src_offset;
uint32_t size = fd_bo_size(rsc->bo) - off;
- debug_assert(fmt != ~0);
#ifdef DEBUG
/* see dEQP-GLES31.stress.vertex_attribute_binding.buffer_bounds.bind_vertex_buffer_offset_near_wrap_10
*/
- if (off > fd_bo_size(rsc->bo))
+ if (off > fd_bo_size(rsc->bo)) {
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
continue;
+ }
#endif
- OUT_PKT4(ring, REG_A6XX_VFD_FETCH(j), 4);
OUT_RELOC(ring, rsc->bo, off, 0, 0);
OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */
OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */
+ }
+
+ OUT_PKT4(ring, REG_A6XX_VFD_DECODE(0), 2 * cnt);
+ for (int32_t j = 0; j < cnt; j++) {
+ int32_t i = map[j];
+ struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
+ enum pipe_format pfmt = elem->src_format;
+ enum a6xx_format fmt = fd6_pipe2vtx(pfmt);
+ bool isint = util_format_is_pure_integer(pfmt);
+ debug_assert(fmt != ~0);
- OUT_PKT4(ring, REG_A6XX_VFD_DECODE(j), 2);
OUT_RING(ring, A6XX_VFD_DECODE_INSTR_IDX(j) |
A6XX_VFD_DECODE_INSTR_FORMAT(fmt) |
COND(elem->instance_divisor, A6XX_VFD_DECODE_INSTR_INSTANCED) |
@@ -607,8 +617,12 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp)
A6XX_VFD_DECODE_INSTR_UNK30 |
COND(!isint, A6XX_VFD_DECODE_INSTR_FLOAT));
OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */
+ }
+
+ OUT_PKT4(ring, REG_A6XX_VFD_DEST_CNTL(0), cnt);
+ for (int32_t j = 0; j < cnt; j++) {
+ int32_t i = map[j];
- OUT_PKT4(ring, REG_A6XX_VFD_DEST_CNTL(j), 1);
OUT_RING(ring, A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vp->inputs[i].compmask) |
A6XX_VFD_DEST_CNTL_INSTR_REGID(vp->inputs[i].regid));
}