diff options
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 32 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_gmem.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.c | 41 |
4 files changed, 70 insertions, 15 deletions
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index fc4a53f8651..93f6a267fa9 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -359,7 +359,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, if (tex->num_samplers > 0) { struct fd_ringbuffer *state = - fd_ringbuffer_new_object(ctx->pipe, tex->num_samplers * 4); + fd_ringbuffer_new_flags(ctx->pipe, tex->num_samplers * 4 * 4, + FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING); for (unsigned i = 0; i < tex->num_samplers; i++) { static const struct fd6_sampler_stateobj dummy_sampler = {}; const struct fd6_sampler_stateobj *sampler = tex->samplers[i] ? @@ -389,7 +390,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, if (tex->num_textures > 0) { struct fd_ringbuffer *state = - fd_ringbuffer_new_object(ctx->pipe, tex->num_textures * 16); + fd_ringbuffer_new_flags(ctx->pipe, tex->num_textures * 16 * 4, + FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING); for (unsigned i = 0; i < tex->num_textures; i++) { static const struct fd6_pipe_sampler_view dummy_view = {}; const struct fd6_pipe_sampler_view *view = tex->textures[i] ? @@ -791,9 +793,29 @@ fd6_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL1_MRT(nr)); } - ir3_emit_vs_consts(vp, ring, ctx, emit->info); - if (!emit->key.binning_pass) - ir3_emit_fs_consts(fp, ring, ctx); +#define DIRTY_CONST (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST | \ + FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE) + + if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & DIRTY_CONST) { + struct fd_ringbuffer *vsconstobj = + fd_ringbuffer_new_flags(ctx->pipe, 0x1000, + FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING); + + ir3_emit_vs_consts(vp, vsconstobj, ctx, emit->info); + fd6_emit_add_group(emit, vsconstobj, FD6_GROUP_VS_CONST, 0x7); + fd_ringbuffer_del(vsconstobj); + } + + if ((ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & DIRTY_CONST) && + !emit->key.binning_pass) { + struct fd_ringbuffer *fsconstobj = + fd_ringbuffer_new_flags(ctx->pipe, 0x1000, + FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING); + + ir3_emit_fs_consts(fp, fsconstobj, ctx); + fd6_emit_add_group(emit, fsconstobj, FD6_GROUP_FS_CONST, 0x7); + fd_ringbuffer_del(fsconstobj); + } struct pipe_stream_output_info *info = &vp->shader->stream_output; if (info->num_outputs) { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index a2117a1b244..4e27597a70b 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -43,7 +43,8 @@ struct fd_ringbuffer; * need to be emit'd. */ enum fd6_state_id { - FD6_GROUP_CONST, + FD6_GROUP_VS_CONST, + FD6_GROUP_FS_CONST, }; struct fd6_state_group { @@ -116,7 +117,7 @@ fd6_emit_add_group(struct fd6_emit *emit, struct fd_ringbuffer *stateobj, if (fd_ringbuffer_size(stateobj) == 0) return; struct fd6_state_group *g = &emit->groups[emit->num_groups++]; - g->stateobj = stateobj; + g->stateobj = fd_ringbuffer_ref(stateobj); g->group_id = group_id; g->enable_mask = enable_mask; } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c index 0c96250f974..11673992959 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c @@ -751,6 +751,13 @@ fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10); } + OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); + OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE__0_GROUP_ID(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); OUT_RING(ring, 0x0); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 5532a7f3467..ee063f84d73 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -552,6 +552,18 @@ ir3_shader_outputs(const struct ir3_shader *so) #include "freedreno_resource.h" +static inline void +ring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) +{ + /* when we emit const state via ring (IB2) we need a WFI, but when + * it is emit'd via stateobj, we don't + */ + if (ring->flags & FD_RINGBUFFER_OBJECT) + return; + + fd_wfi(batch, ring); +} + static void emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) @@ -579,7 +591,7 @@ emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v, size = MIN2(size, 4 * max_const); if (size > 0) { - fd_wfi(ctx->batch, ring); + ring_wfi(ctx->batch, ring); ctx->emit_const(ring, v->type, 0, cb->buffer_offset, size, cb->user_buffer, cb->buffer); @@ -611,7 +623,7 @@ emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, } } - fd_wfi(ctx->batch, ring); + ring_wfi(ctx->batch, ring); ctx->emit_const_bo(ring, v->type, false, offset * 4, params, prscs, offsets); } } @@ -631,7 +643,7 @@ emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v, sizes[off] = sb->sb[index].buffer_size; } - fd_wfi(ctx->batch, ring); + ring_wfi(ctx->batch, ring); ctx->emit_const(ring, v->type, offset * 4, 0, ARRAY_SIZE(sizes), sizes, NULL); } @@ -673,7 +685,7 @@ emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v, } } - fd_wfi(ctx->batch, ring); + ring_wfi(ctx->batch, ring); ctx->emit_const(ring, v->type, offset * 4, 0, ARRAY_SIZE(dims), dims, NULL); } @@ -696,7 +708,7 @@ emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v, size *= 4; if (size > 0) { - fd_wfi(ctx->batch, ring); + ring_wfi(ctx->batch, ring); ctx->emit_const(ring, v->type, base, 0, size, v->immediates[0].val, NULL); } @@ -729,7 +741,7 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v, } } - fd_wfi(ctx->batch, ring); + ring_wfi(ctx->batch, ring); ctx->emit_const_bo(ring, v->type, true, offset * 4, params, prscs, offsets); } } @@ -787,6 +799,19 @@ emit_common_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin { enum fd_dirty_shader_state dirty = ctx->dirty_shader[t]; + /* When we use CP_SET_DRAW_STATE objects to emit constant state, + * if we emit any of it we need to emit all. This is because + * we are using the same state-group-id each time for uniform + * state, and if previous update is never evaluated (due to no + * visible primitives in the current tile) then the new stateobj + * completely replaces the old one. + * + * Possibly if we split up different parts of the const state to + * different state-objects we could avoid this. + */ + if (dirty && (ring->flags & FD_RINGBUFFER_OBJECT)) + dirty = ~0; + if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) { struct fd_constbuf_stateobj *constbuf; bool shader_dirty; @@ -846,7 +871,7 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin vertex_params_size = ARRAY_SIZE(vertex_params); } - fd_wfi(ctx->batch, ring); + ring_wfi(ctx->batch, ring); bool needs_vtxid_base = ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) != regid(63, 0); @@ -918,7 +943,7 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin /* emit compute-shader driver-params: */ uint32_t offset = v->constbase.driver_param; if (v->constlen > offset) { - fd_wfi(ctx->batch, ring); + ring_wfi(ctx->batch, ring); if (info->indirect) { struct pipe_resource *indirect = NULL; |