summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2015-01-10 14:30:25 +1300
committerEric Anholt <[email protected]>2015-01-10 15:24:56 +1300
commitb920ecf793bd419558a240014624add08774765d (patch)
treeb1ae0fb208547b3e14237f2fe202fb328500c937
parentc772c92153fdcd4ba4920b7ef1745ce83b09603b (diff)
vc4: Cook up the draw-time VPM setup info during shader compile.
This will give the compiler the chance to dead-code eliminate unused VPM reads. This is particularly a big deal in the CS where a bunch of vattrs are just not going to be used.
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h6
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c12
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c19
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h2
4 files changed, 28 insertions, 11 deletions
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index 7e18a75e5b6..90a68e5c28e 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -121,6 +121,12 @@ struct vc4_compiled_shader {
uint8_t num_inputs;
+ /* Byte offsets for the start of the vertex attributes 0-7, and the
+ * total size as "attribute" 8.
+ */
+ uint8_t vattr_offsets[9];
+ uint8_t vattrs_live;
+
/**
* Array of the meanings of the VPM inputs this shader needs.
*
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index cc3f2d42183..bb4b9a42217 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -185,14 +185,14 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
- cl_u8(&vc4->shader_rec, (1 << num_elements_emit) - 1); /* vs attribute array bitfield */
- cl_u8(&vc4->shader_rec, 16 * num_elements_emit); /* vs total attribute size */
+ cl_u8(&vc4->shader_rec, vc4->prog.vs->vattrs_live);
+ cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[8]);
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
- cl_u8(&vc4->shader_rec, (1 << num_elements_emit) - 1); /* cs attribute array bitfield */
- cl_u8(&vc4->shader_rec, 16 * num_elements_emit); /* cs total attribute size */
+ cl_u8(&vc4->shader_rec, vc4->prog.cs->vattrs_live);
+ cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[8]);
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.cs->bo, 0);
cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
@@ -211,8 +211,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset);
cl_u8(&vc4->shader_rec, elem_size - 1);
cl_u8(&vc4->shader_rec, vb->stride);
- cl_u8(&vc4->shader_rec, vpm_offset); /* VS VPM offset */
- cl_u8(&vc4->shader_rec, vpm_offset); /* CS VPM offset */
+ cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[i]);
+ cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[i]);
vpm_offset += align(elem_size, 4);
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 56cd5c27d7b..581b9400957 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1079,6 +1079,7 @@ emit_vertex_input(struct vc4_compile *c, int attr)
uint32_t attr_size = util_format_get_blocksize(format);
struct qreg vpm_reads[4];
+ c->vattr_sizes[attr] = align(attr_size, 4);
for (int i = 0; i < align(attr_size, 4) / 4; i++) {
struct qreg vpm = { QFILE_VPM, attr * 4 + i };
vpm_reads[i] = qir_MOV(c, vpm);
@@ -1933,11 +1934,10 @@ emit_stub_vpm_read(struct vc4_compile *c)
if (c->num_inputs)
return;
- for (int i = 0; i < 4; i++) {
- struct qreg vpm = { QFILE_VPM, 0 };
- (void)qir_MOV(c, vpm);
- c->num_inputs++;
- }
+ c->vattr_sizes[0] = 4;
+ struct qreg vpm = { QFILE_VPM, 0 };
+ (void)qir_MOV(c, vpm);
+ c->num_inputs++;
}
static void
@@ -2275,6 +2275,15 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
}
} else {
shader->num_inputs = c->num_inputs;
+
+ shader->vattr_offsets[0] = 0;
+ for (int i = 0; i < 8; i++) {
+ shader->vattr_offsets[i + 1] =
+ shader->vattr_offsets[i] + c->vattr_sizes[i];
+
+ if (c->vattr_sizes[i])
+ shader->vattrs_live |= (1 << i);
+ }
}
copy_uniform_state_to_shader(shader, c);
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index ebec7ccfbe3..d2f89ae9e69 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -312,6 +312,8 @@ struct vc4_compile {
struct qreg line_x, point_x, point_y;
struct qreg discard;
+ uint8_t vattr_sizes[8];
+
/**
* Array of the TGSI semantics of all FS QFILE_VARY reads.
*