diff options
author | Eric Anholt <[email protected]> | 2015-01-09 12:56:34 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2015-01-10 13:54:12 +1300 |
commit | 92a0b0bd7099b15320faaccfd70b3c8dc877810e (patch) | |
tree | fb403c04ec6144c77c47b3e0c266ef67a74d0506 | |
parent | 72cb6619cb75a92901d372d687505a747a384571 (diff) |
vc4: Pack VPM attr contents according to just the size of the attribute.
total instructions in shared programs: 40960 -> 39753 (-2.95%)
instructions in affected programs: 20871 -> 19664 (-5.78%)
-rw-r--r-- | src/gallium/drivers/vc4/vc4_draw.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 12 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 1 |
3 files changed, 9 insertions, 11 deletions
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 0d915040c88..77e98211c6c 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -197,6 +197,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */ uint32_t max_index = 0xffff; + uint32_t vpm_offset = 0; for (int i = 0; i < vtx->num_elements; i++) { struct pipe_vertex_element *elem = &vtx->pipe[i]; struct pipe_vertex_buffer *vb = @@ -210,8 +211,10 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset); cl_u8(&vc4->shader_rec, elem_size - 1); cl_u8(&vc4->shader_rec, vb->stride); - cl_u8(&vc4->shader_rec, i * 16); /* VS VPM offset */ - cl_u8(&vc4->shader_rec, i * 16); /* CS VPM offset */ + cl_u8(&vc4->shader_rec, vpm_offset); /* VS VPM offset */ + cl_u8(&vc4->shader_rec, vpm_offset); /* CS VPM offset */ + + vpm_offset += align(elem_size, 4) / 4; if (vb->stride > 0) { max_index = MIN2(max_index, diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 6bad1560b2f..e362dcdf6ca 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1076,17 +1076,11 @@ static void emit_vertex_input(struct vc4_compile *c, int attr) { enum pipe_format format = c->vs_key->attr_formats[attr]; + uint32_t attr_size = util_format_get_blocksize(format); struct qreg vpm_reads[4]; - /* Right now, we're setting the VPM offsets to be 16 bytes wide every - * time, so we always read 4 32-bit VPM entries. - */ - for (int i = 0; i < 4; i++) { - vpm_reads[i] = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_VPM_READ, - vpm_reads[i], - c->undef, - c->undef)); + for (int i = 0; i < align(attr_size, 4) / 4; i++) { + vpm_reads[i] = qir_VPM_READ(c); c->num_inputs++; } diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 6dac00fbbd8..d8f9babef4c 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -495,6 +495,7 @@ QIR_ALU0(FRAG_W) QIR_ALU0(FRAG_REV_FLAG) QIR_ALU0(TEX_RESULT) QIR_ALU0(TLB_COLOR_READ) +QIR_ALU0(VPM_READ) QIR_NODST_1(TLB_Z_WRITE) QIR_NODST_1(TLB_DISCARD_SETUP) QIR_NODST_1(TLB_STENCIL_SETUP) |