summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler.c25
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c41
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c9
3 files changed, 57 insertions, 18 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index 54b36265ddf..905af54e48d 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -2214,7 +2214,7 @@ fd3_compile_shader(struct fd3_shader_variant *so,
{
struct fd3_compile_context ctx;
struct ir3_block *block;
- unsigned i, actual_in;
+ unsigned i, j, actual_in;
int ret = 0;
assert(!so->ir);
@@ -2232,6 +2232,29 @@ fd3_compile_shader(struct fd3_shader_variant *so,
block = ctx.block;
+ /* at this point, for binning pass, throw away unneeded outputs: */
+ if (key.binning_pass) {
+ for (i = 0, j = 0; i < so->outputs_count; i++) {
+ unsigned name = sem2name(so->outputs[i].semantic);
+ unsigned idx = sem2name(so->outputs[i].semantic);
+
+ /* throw away everything but first position/psize */
+ if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) ||
+ (name == TGSI_SEMANTIC_PSIZE))) {
+ if (i != j) {
+ so->outputs[j] = so->outputs[i];
+ block->outputs[(j*4)+0] = block->outputs[(i*4)+0];
+ block->outputs[(j*4)+1] = block->outputs[(i*4)+1];
+ block->outputs[(j*4)+2] = block->outputs[(i*4)+2];
+ block->outputs[(j*4)+3] = block->outputs[(i*4)+3];
+ }
+ j++;
+ }
+ }
+ so->outputs_count = j;
+ block->noutputs = j * 4;
+ }
+
/* at this point, we want the kill's in the outputs array too,
* so that they get scheduled (since they have no dst).. we've
* already ensured that the array is big enough in push_block():
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 5bfd976170c..50271fa137c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -90,6 +90,7 @@ emit_constants(struct fd_ringbuffer *ring,
struct fd3_shader_variant *shader)
{
uint32_t enabled_mask = constbuf->enabled_mask;
+ uint32_t first_immediate;
uint32_t base = 0;
unsigned i;
@@ -97,6 +98,13 @@ emit_constants(struct fd_ringbuffer *ring,
// they are clobbered by a clear, gmem2mem, or mem2gmem..
constbuf->dirty_mask = enabled_mask;
+ /* in particular, with binning shader and a unneeded consts no
+ * longer referenced, we could end up w/ constlen that is smaller
+ * than first_immediate. In that case truncate the user consts
+ * early to avoid HLSQ lockup caused by writing too many consts
+ */
+ first_immediate = MIN2(shader->first_immediate, shader->constlen);
+
/* emit user constants: */
while (enabled_mask) {
unsigned index = ffs(enabled_mask) - 1;
@@ -109,10 +117,14 @@ emit_constants(struct fd_ringbuffer *ring,
/* gallium could leave const buffers bound above what the
* current shader uses.. don't let that confuse us.
*/
- if (base >= (4 * shader->first_immediate))
+ if (base >= (4 * first_immediate))
break;
if (constbuf->dirty_mask & (1 << index)) {
+ /* and even if the start of the const buffer is before
+ * first_immediate, the end may not be:
+ */
+ size = MIN2(size, (4 * first_immediate) - base);
fd3_emit_constant(ring, sb, base,
cb->buffer_offset, size,
cb->user_buffer, cb->buffer);
@@ -332,6 +344,15 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
j++;
}
}
+
+ OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
+ OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) |
+ A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
+ A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
+ A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
+ OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
+ A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) |
+ A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0)));
}
void
@@ -429,11 +450,13 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
struct fd3_rasterizer_stateobj *rasterizer =
fd3_rasterizer_stateobj(ctx->rasterizer);
- uint32_t stride_in_vpc;
+ uint32_t stride_in_vpc = 0;
- stride_in_vpc = align(fp->total_in, 4) / 4;
- if (stride_in_vpc > 0)
- stride_in_vpc = MAX2(stride_in_vpc, 2);
+ if (!key.binning_pass) {
+ stride_in_vpc = align(fp->total_in, 4) / 4;
+ if (stride_in_vpc > 0)
+ stride_in_vpc = MAX2(stride_in_vpc, 2);
+ }
OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
OUT_RING(ring, rasterizer->pc_prim_vtx_cntl |
@@ -480,9 +503,11 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
emit_constants(ring, SB_VERT_SHADER,
&ctx->constbuf[PIPE_SHADER_VERTEX],
(prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
- emit_constants(ring, SB_FRAG_SHADER,
- &ctx->constbuf[PIPE_SHADER_FRAGMENT],
- (prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
+ if (!key.binning_pass) {
+ emit_constants(ring, SB_FRAG_SHADER,
+ &ctx->constbuf[PIPE_SHADER_FRAGMENT],
+ (prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
+ }
}
if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 01502ce955e..6fc39a96380 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -532,15 +532,6 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
}
-
- OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
- OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) |
- A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
- A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(vp->inputs_count) |
- A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(vp->inputs_count));
- OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
- A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) |
- A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0)));
}
/* hack.. until we figure out how to deal w/ vpsrepl properly.. */