diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 25 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 41 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.c | 9 |
3 files changed, 57 insertions, 18 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index 54b36265ddf..905af54e48d 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -2214,7 +2214,7 @@ fd3_compile_shader(struct fd3_shader_variant *so, { struct fd3_compile_context ctx; struct ir3_block *block; - unsigned i, actual_in; + unsigned i, j, actual_in; int ret = 0; assert(!so->ir); @@ -2232,6 +2232,29 @@ fd3_compile_shader(struct fd3_shader_variant *so, block = ctx.block; + /* at this point, for binning pass, throw away unneeded outputs: */ + if (key.binning_pass) { + for (i = 0, j = 0; i < so->outputs_count; i++) { + unsigned name = sem2name(so->outputs[i].semantic); + unsigned idx = sem2name(so->outputs[i].semantic); + + /* throw away everything but first position/psize */ + if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) || + (name == TGSI_SEMANTIC_PSIZE))) { + if (i != j) { + so->outputs[j] = so->outputs[i]; + block->outputs[(j*4)+0] = block->outputs[(i*4)+0]; + block->outputs[(j*4)+1] = block->outputs[(i*4)+1]; + block->outputs[(j*4)+2] = block->outputs[(i*4)+2]; + block->outputs[(j*4)+3] = block->outputs[(i*4)+3]; + } + j++; + } + } + so->outputs_count = j; + block->noutputs = j * 4; + } + /* at this point, we want the kill's in the outputs array too, * so that they get scheduled (since they have no dst).. we've * already ensured that the array is big enough in push_block(): diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 5bfd976170c..50271fa137c 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -90,6 +90,7 @@ emit_constants(struct fd_ringbuffer *ring, struct fd3_shader_variant *shader) { uint32_t enabled_mask = constbuf->enabled_mask; + uint32_t first_immediate; uint32_t base = 0; unsigned i; @@ -97,6 +98,13 @@ emit_constants(struct fd_ringbuffer *ring, // they are clobbered by a clear, gmem2mem, or mem2gmem.. constbuf->dirty_mask = enabled_mask; + /* in particular, with binning shader and a unneeded consts no + * longer referenced, we could end up w/ constlen that is smaller + * than first_immediate. In that case truncate the user consts + * early to avoid HLSQ lockup caused by writing too many consts + */ + first_immediate = MIN2(shader->first_immediate, shader->constlen); + /* emit user constants: */ while (enabled_mask) { unsigned index = ffs(enabled_mask) - 1; @@ -109,10 +117,14 @@ emit_constants(struct fd_ringbuffer *ring, /* gallium could leave const buffers bound above what the * current shader uses.. don't let that confuse us. */ - if (base >= (4 * shader->first_immediate)) + if (base >= (4 * first_immediate)) break; if (constbuf->dirty_mask & (1 << index)) { + /* and even if the start of the const buffer is before + * first_immediate, the end may not be: + */ + size = MIN2(size, (4 * first_immediate) - base); fd3_emit_constant(ring, sb, base, cb->buffer_offset, size, cb->user_buffer, cb->buffer); @@ -332,6 +344,15 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, j++; } } + + OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2); + OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) | + A3XX_VFD_CONTROL_0_PACKETSIZE(2) | + A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) | + A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j)); + OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX + A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) | + A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0))); } void @@ -429,11 +450,13 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { struct fd3_rasterizer_stateobj *rasterizer = fd3_rasterizer_stateobj(ctx->rasterizer); - uint32_t stride_in_vpc; + uint32_t stride_in_vpc = 0; - stride_in_vpc = align(fp->total_in, 4) / 4; - if (stride_in_vpc > 0) - stride_in_vpc = MAX2(stride_in_vpc, 2); + if (!key.binning_pass) { + stride_in_vpc = align(fp->total_in, 4) / 4; + if (stride_in_vpc > 0) + stride_in_vpc = MAX2(stride_in_vpc, 2); + } OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); OUT_RING(ring, rasterizer->pc_prim_vtx_cntl | @@ -480,9 +503,11 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, emit_constants(ring, SB_VERT_SHADER, &ctx->constbuf[PIPE_SHADER_VERTEX], (prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL); - emit_constants(ring, SB_FRAG_SHADER, - &ctx->constbuf[PIPE_SHADER_FRAGMENT], - (prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL); + if (!key.binning_pass) { + emit_constants(ring, SB_FRAG_SHADER, + &ctx->constbuf[PIPE_SHADER_FRAGMENT], + (prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL); + } } if ((dirty & FD_DIRTY_BLEND) && ctx->blend) { diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 01502ce955e..6fc39a96380 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -532,15 +532,6 @@ fd3_program_emit(struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */ } - - OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2); - OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) | - A3XX_VFD_CONTROL_0_PACKETSIZE(2) | - A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(vp->inputs_count) | - A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(vp->inputs_count)); - OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX - A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) | - A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0))); } /* hack.. until we figure out how to deal w/ vpsrepl properly.. */ |