diff options
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_context.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 156 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a4xx/fd4_context.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 156 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a4xx/fd4_emit.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_context.h | 9 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_util.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.c | 148 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.h | 4 |
12 files changed, 263 insertions, 229 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c index 8441898382b..dc33783e398 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c @@ -121,6 +121,7 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv) fd3_gmem_init(pctx); fd3_texture_init(pctx); fd3_prog_init(pctx); + fd3_emit_init(pctx); pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv); if (!pctx) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 070ed43a279..fc30d4842ba 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -345,7 +345,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, fd3_emit_vertex_bufs(ring, &emit); - fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL); + fd3_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL); OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 07cc2266d08..9032366b748 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -43,19 +43,26 @@ #include "fd3_format.h" #include "fd3_zsa.h" +static const enum adreno_state_block sb[] = { + [SHADER_VERTEX] = SB_VERT_SHADER, + [SHADER_FRAGMENT] = SB_FRAG_SHADER, +}; + /* regid: base const register * prsc or dwords: buffer containing constant values * sizedwords: size of const value buffer */ void -fd3_emit_constant(struct fd_ringbuffer *ring, - enum adreno_state_block sb, +fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { uint32_t i, sz; enum adreno_state_src src; + debug_assert((regid % 4) == 0); + debug_assert((sizedwords % 4) == 0); + if (prsc) { sz = 0; src = SS_INDIRECT; @@ -67,7 +74,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring, OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | CP_LOAD_STATE_0_STATE_SRC(src) | - CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2)); if (prsc) { struct fd_bo *bo = fd_resource(prsc)->bo; @@ -84,89 +91,31 @@ fd3_emit_constant(struct fd_ringbuffer *ring, } static void -emit_constants(struct fd_ringbuffer *ring, - enum adreno_state_block sb, - struct fd_constbuf_stateobj *constbuf, - struct ir3_shader_variant *shader, - bool emit_immediates) +fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, + uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets) { - uint32_t enabled_mask = constbuf->enabled_mask; - uint32_t max_const; - int i; - - // XXX TODO only emit dirty consts.. but we need to keep track if - // they are clobbered by a clear, gmem2mem, or mem2gmem.. - constbuf->dirty_mask = enabled_mask; - - /* in particular, with binning shader we may end up with unused - * consts, ie. we could end up w/ constlen that is smaller - * than first_immediate. In that case truncate the user consts - * early to avoid HLSQ lockup caused by writing too many consts - */ - max_const = MIN2(shader->first_driver_param, shader->constlen); - - /* emit user constants: */ - if (enabled_mask & 1) { - const unsigned index = 0; - struct pipe_constant_buffer *cb = &constbuf->cb[index]; - unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ - - // I expect that size should be a multiple of vec4's: - assert(size == align(size, 4)); - - /* and even if the start of the const buffer is before - * first_immediate, the end may not be: - */ - size = MIN2(size, 4 * max_const); - - if (size && constbuf->dirty_mask & (1 << index)) { - fd3_emit_constant(ring, sb, 0, - cb->buffer_offset, size, - cb->user_buffer, cb->buffer); - constbuf->dirty_mask &= ~(1 << index); - } - - enabled_mask &= ~(1 << index); - } - - if (shader->constlen > shader->first_driver_param) { - uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param); - /* emit ubos: */ - OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param * 2) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(sb) | - CP_LOAD_STATE_0_NUM_UNIT(params * 2)); - OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | - CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); - - for (i = 1; i <= params * 4; i++) { - struct pipe_constant_buffer *cb = &constbuf->cb[i]; - assert(!cb->user_buffer); - if ((enabled_mask & (1 << i)) && cb->buffer) - OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0); - else - OUT_RING(ring, 0xbad00000 | ((i - 1) << 16)); - } - } + uint32_t i; - /* emit shader immediates: */ - if (shader && emit_immediates) { - int size = shader->immediates_count; - uint32_t base = shader->first_immediate; + debug_assert((regid % 4) == 0); + debug_assert((num % 4) == 0); - /* truncate size to avoid writing constants that shader - * does not use: - */ - size = MIN2(size + base, shader->constlen) - base; - - /* convert out of vec4: */ - base *= 4; - size *= 4; - - if (size > 0) { - fd3_emit_constant(ring, sb, base, - 0, size, shader->immediates[0].val, NULL); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + num); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | + CP_LOAD_STATE_0_NUM_UNIT(num/2)); + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + + for (i = 0; i < num; i++) { + if (bos[i]) { + if (write) { + OUT_RELOCW(ring, bos[i], offsets[i], 0, 0); + } else { + OUT_RELOC(ring, bos[i], offsets[i], 0, 0); + } + } else { + OUT_RING(ring, 0xbad00000 | (i << 16)); } } } @@ -669,33 +618,12 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT3(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, HLSQ_FLUSH); - if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) && - /* evil hack to deal sanely with clear path: */ - (emit->prog == &ctx->prog)) { - fd_wfi(ctx, ring); - emit_constants(ring, SB_VERT_SHADER, - &ctx->constbuf[PIPE_SHADER_VERTEX], - vp, emit->prog->dirty & FD_SHADER_DIRTY_VP); - if (!emit->key.binning_pass) { - emit_constants(ring, SB_FRAG_SHADER, - &ctx->constbuf[PIPE_SHADER_FRAGMENT], - fp, emit->prog->dirty & FD_SHADER_DIRTY_FP); - } - } - - /* emit driver params every time */ - if (emit->info && emit->prog == &ctx->prog) { - uint32_t vertex_params[4] = { - emit->info->indexed ? emit->info->index_bias : emit->info->start, - 0, - 0, - 0 - }; - if (vp->constlen >= vp->first_driver_param + 4) { - fd3_emit_constant(ring, SB_VERT_SHADER, - (vp->first_driver_param + 4) * 4, - 0, 4, vertex_params, NULL); - } + if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */ + ir3_emit_consts(vp, ring, emit->info, dirty); + if (!emit->key.binning_pass) + ir3_emit_consts(fp, ring, emit->info, dirty); + /* mark clean after emitting consts: */ + ctx->prog.dirty = 0; } if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) { @@ -930,3 +858,11 @@ fd3_emit_restore(struct fd_context *ctx) ctx->needs_rb_fbd = true; } + +void +fd3_emit_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->emit_const = fd3_emit_const; + ctx->emit_const_bo = fd3_emit_const_bo; +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h index 8f21919c9a7..795654706a7 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h @@ -37,10 +37,8 @@ #include "ir3_shader.h" struct fd_ringbuffer; -enum adreno_state_block; -void fd3_emit_constant(struct fd_ringbuffer *ring, - enum adreno_state_block sb, +void fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc); @@ -90,4 +88,6 @@ void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, void fd3_emit_restore(struct fd_context *ctx); +void fd3_emit_init(struct pipe_context *pctx); + #endif /* FD3_EMIT_H */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c index 6e109b6205a..e172d350517 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c @@ -119,6 +119,7 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv) fd4_gmem_init(pctx); fd4_texture_init(pctx); fd4_prog_init(pctx); + fd4_emit_init(pctx); pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv); if (!pctx) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index d070f5fd6b7..ff1dfdc392f 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -295,7 +295,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers, OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW3 */ /* until fastclear works: */ - fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL); + fd4_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL); OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2); OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index f3e1ccebccc..4462a82777f 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -43,19 +43,26 @@ #include "fd4_format.h" #include "fd4_zsa.h" +static const enum adreno_state_block sb[] = { + [SHADER_VERTEX] = SB_VERT_SHADER, + [SHADER_FRAGMENT] = SB_FRAG_SHADER, +}; + /* regid: base const register * prsc or dwords: buffer containing constant values * sizedwords: size of const value buffer */ void -fd4_emit_constant(struct fd_ringbuffer *ring, - enum adreno_state_block sb, +fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { uint32_t i, sz; enum adreno_state_src src; + debug_assert((regid % 4) == 0); + debug_assert((sizedwords % 4) == 0); + if (prsc) { sz = 0; src = 0x2; // TODO ?? @@ -67,7 +74,7 @@ fd4_emit_constant(struct fd_ringbuffer *ring, OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | CP_LOAD_STATE_0_STATE_SRC(src) | - CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4)); if (prsc) { struct fd_bo *bo = fd_resource(prsc)->bo; @@ -84,89 +91,31 @@ fd4_emit_constant(struct fd_ringbuffer *ring, } static void -emit_constants(struct fd_ringbuffer *ring, - enum adreno_state_block sb, - struct fd_constbuf_stateobj *constbuf, - struct ir3_shader_variant *shader, - bool emit_immediates) +fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, + uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets) { - uint32_t enabled_mask = constbuf->enabled_mask; - uint32_t max_const; - int i; - - // XXX TODO only emit dirty consts.. but we need to keep track if - // they are clobbered by a clear, gmem2mem, or mem2gmem.. - constbuf->dirty_mask = enabled_mask; - - /* in particular, with binning shader we may end up with unused - * consts, ie. we could end up w/ constlen that is smaller - * than first_immediate. In that case truncate the user consts - * early to avoid HLSQ lockup caused by writing too many consts - */ - max_const = MIN2(shader->first_driver_param, shader->constlen); - - /* emit user constants: */ - if (enabled_mask & 1) { - const unsigned index = 0; - struct pipe_constant_buffer *cb = &constbuf->cb[index]; - unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ - - // I expect that size should be a multiple of vec4's: - assert(size == align(size, 4)); - - /* and even if the start of the const buffer is before - * first_immediate, the end may not be: - */ - size = MIN2(size, 4 * max_const); - - if (size && (constbuf->dirty_mask & (1 << index))) { - fd4_emit_constant(ring, sb, 0, - cb->buffer_offset, size, - cb->user_buffer, cb->buffer); - constbuf->dirty_mask &= ~(1 << index); - } + uint32_t i; - enabled_mask &= ~(1 << index); - } - - /* emit ubos: */ - if (shader->constlen > shader->first_driver_param) { - uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param); - OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) | - CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | - CP_LOAD_STATE_0_STATE_BLOCK(sb) | - CP_LOAD_STATE_0_NUM_UNIT(params)); - OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | - CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); - - for (i = 1; i <= params * 4; i++) { - struct pipe_constant_buffer *cb = &constbuf->cb[i]; - assert(!cb->user_buffer); - if ((enabled_mask & (1 << i)) && cb->buffer) - OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0); - else - OUT_RING(ring, 0xbad00000 | ((i - 1) << 16)); - } - } + debug_assert((regid % 4) == 0); + debug_assert((num % 4) == 0); - /* emit shader immediates: */ - if (shader && emit_immediates) { - int size = shader->immediates_count; - uint32_t base = shader->first_immediate; - - /* truncate size to avoid writing constants that shader - * does not use: - */ - size = MIN2(size + base, shader->constlen) - base; - - /* convert out of vec4: */ - base *= 4; - size *= 4; - - if (size > 0) { - fd4_emit_constant(ring, sb, base, - 0, size, shader->immediates[0].val, NULL); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + num); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | + CP_LOAD_STATE_0_NUM_UNIT(num/4)); + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + + for (i = 0; i < num; i++) { + if (bos[i]) { + if (write) { + OUT_RELOCW(ring, bos[i], offsets[i], 0, 0); + } else { + OUT_RELOC(ring, bos[i], offsets[i], 0, 0); + } + } else { + OUT_RING(ring, 0xbad00000 | (i << 16)); } } } @@ -520,33 +469,12 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & FD_DIRTY_PROG) fd4_program_emit(ring, emit); - if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) && - /* evil hack to deal sanely with clear path: */ - (emit->prog == &ctx->prog)) { - fd_wfi(ctx, ring); - emit_constants(ring, SB_VERT_SHADER, - &ctx->constbuf[PIPE_SHADER_VERTEX], - vp, emit->prog->dirty & FD_SHADER_DIRTY_VP); - if (!emit->key.binning_pass) { - emit_constants(ring, SB_FRAG_SHADER, - &ctx->constbuf[PIPE_SHADER_FRAGMENT], - fp, emit->prog->dirty & FD_SHADER_DIRTY_FP); - } - } - - /* emit driver params every time */ - if (emit->info && emit->prog == &ctx->prog) { - uint32_t vertex_params[4] = { - emit->info->indexed ? emit->info->index_bias : emit->info->start, - 0, - 0, - 0 - }; - if (vp->constlen >= vp->first_driver_param + 4) { - fd4_emit_constant(ring, SB_VERT_SHADER, - (vp->first_driver_param + 4) * 4, - 0, 4, vertex_params, NULL); - } + if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */ + ir3_emit_consts(vp, ring, emit->info, dirty); + if (!emit->key.binning_pass) + ir3_emit_consts(fp, ring, emit->info, dirty); + /* mark clean after emitting consts: */ + ctx->prog.dirty = 0; } if ((dirty & FD_DIRTY_BLEND) && ctx->blend) { @@ -767,3 +695,11 @@ fd4_emit_restore(struct fd_context *ctx) ctx->needs_rb_fbd = true; } + +void +fd4_emit_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->emit_const = fd4_emit_const; + ctx->emit_const_bo = fd4_emit_const_bo; +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h index 7d059f8e532..7debee59471 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h @@ -37,10 +37,8 @@ #include "ir3_shader.h" struct fd_ringbuffer; -enum adreno_state_block; -void fd4_emit_constant(struct fd_ringbuffer *ring, - enum adreno_state_block sb, +void fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc); @@ -96,4 +94,6 @@ void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, void fd4_emit_restore(struct fd_context *ctx); +void fd4_emit_init(struct pipe_context *pctx); + #endif /* FD4_EMIT_H */ diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index c2d98345349..bc5267aa96e 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -351,9 +351,16 @@ struct fd_context { void (*emit_sysmem_prep)(struct fd_context *ctx); /* draw: */ - void (*draw_vbo)(struct fd_context *pctx, const struct pipe_draw_info *info); + void (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info); void (*clear)(struct fd_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil); + + /* constant emit: (note currently not used/needed for a2xx) */ + void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type, + uint32_t regid, uint32_t offset, uint32_t sizedwords, + const uint32_t *dwords, struct pipe_resource *prsc); + void (*emit_const_bo)(struct fd_ringbuffer *ring, enum shader_t type, boolean write, + uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets); }; static inline struct fd_context * diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 1b78763c58e..6aec2585ceb 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -40,6 +40,7 @@ #include "util/u_dynarray.h" #include "util/u_pack_color.h" +#include "disasm.h" #include "adreno_common.xml.h" #include "adreno_pm4.xml.h" diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index d4027729a22..75425e91378 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -412,3 +412,151 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin) debug_printf("\n"); } + +/* This has to reach into the fd_context a bit more than the rest of + * ir3, but it needs to be aligned with the compiler, so both agree + * on which const regs hold what. And the logic is identical between + * a3xx/a4xx, the only difference is small details in the actual + * CP_LOAD_STATE packets (which is handled inside the generation + * specific ctx->emit_const(_bo)() fxns) + */ + +#include "freedreno_resource.h" + +static void +emit_user_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_constbuf_stateobj *constbuf) +{ + struct fd_context *ctx = fd_context(v->shader->pctx); + const unsigned index = 0; /* user consts are index 0 */ + /* TODO save/restore dirty_mask for binning pass instead: */ + uint32_t dirty_mask = constbuf->enabled_mask; + + if (dirty_mask & (1 << index)) { + struct pipe_constant_buffer *cb = &constbuf->cb[index]; + unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ + + /* in particular, with binning shader we may end up with + * unused consts, ie. we could end up w/ constlen that is + * smaller than first_driver_param. In that case truncate + * the user consts early to avoid HLSQ lockup caused by + * writing too many consts + */ + uint32_t max_const = MIN2(v->first_driver_param, v->constlen); + + // I expect that size should be a multiple of vec4's: + assert(size == align(size, 4)); + + /* and even if the start of the const buffer is before + * first_immediate, the end may not be: + */ + size = MIN2(size, 4 * max_const); + + if (size > 0) { + fd_wfi(ctx, ring); + ctx->emit_const(ring, v->type, 0, + cb->buffer_offset, size, + cb->user_buffer, cb->buffer); + constbuf->dirty_mask &= ~(1 << index); + } + } +} + +static void +emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_constbuf_stateobj *constbuf) +{ + if (v->constlen > v->first_driver_param) { + struct fd_context *ctx = fd_context(v->shader->pctx); + uint32_t offset = v->first_driver_param; /* UBOs after user consts */ + uint32_t params = MIN2(4, v->constlen - v->first_driver_param) * 4; + uint32_t offsets[params]; + struct fd_bo *bos[params]; + + for (uint32_t i = 0; i < params; i++) { + const uint32_t index = i + 1; /* UBOs start at index 1 */ + struct pipe_constant_buffer *cb = &constbuf->cb[index]; + assert(!cb->user_buffer); + + if ((constbuf->enabled_mask & (1 << index)) && cb->buffer) { + offsets[i] = cb->buffer_offset; + bos[i] = fd_resource(cb->buffer)->bo; + } else { + offsets[i] = 0; + bos[i] = NULL; + } + } + + fd_wfi(ctx, ring); + ctx->emit_const_bo(ring, v->type, false, offset * 4, params, bos, offsets); + } +} + +static void +emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring) +{ + struct fd_context *ctx = fd_context(v->shader->pctx); + int size = v->immediates_count; + uint32_t base = v->first_immediate; + + /* truncate size to avoid writing constants that shader + * does not use: + */ + size = MIN2(size + base, v->constlen) - base; + + /* convert out of vec4: */ + base *= 4; + size *= 4; + + if (size > 0) { + fd_wfi(ctx, ring); + ctx->emit_const(ring, v->type, base, + 0, size, v->immediates[0].val, NULL); + } +} + +void +ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + const struct pipe_draw_info *info, uint32_t dirty) +{ + struct fd_context *ctx = fd_context(v->shader->pctx); + + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) { + struct fd_constbuf_stateobj *constbuf; + bool shader_dirty; + + if (v->type == SHADER_VERTEX) { + constbuf = &ctx->constbuf[PIPE_SHADER_VERTEX]; + shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_VP); + } else if (v->type == SHADER_FRAGMENT) { + constbuf = &ctx->constbuf[PIPE_SHADER_FRAGMENT]; + shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_FP); + } else { + unreachable("bad shader type"); + return; + } + + emit_user_consts(v, ring, constbuf); + emit_ubos(v, ring, constbuf); + if (shader_dirty) + emit_immediates(v, ring); + } + + /* emit driver params every time: */ + /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */ + if (info && (v->type == SHADER_VERTEX)) { + uint32_t offset = v->first_driver_param + 4; /* driver params after UBOs */ + if (v->constlen >= offset) { + uint32_t vertex_params[4] = { + info->indexed ? info->index_bias : info->start, + 0, + 0, + 0 + }; + + fd_wfi(ctx, ring); + ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0, + ARRAY_SIZE(vertex_params), vertex_params, NULL); + } + } +} diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 5365d5687f1..ef16d7b2f6e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -224,6 +224,10 @@ struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key); void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin); +struct fd_ringbuffer; +void ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + const struct pipe_draw_info *info, uint32_t dirty); + static inline const char * ir3_shader_stage(struct ir3_shader *shader) { |