diff options
-rw-r--r-- | src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 75 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_screen.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_depth.c | 3 |
3 files changed, 56 insertions, 24 deletions
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index bf51847706e..c315a47bb16 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -87,11 +87,12 @@ static void emit_constants(struct fd_ringbuffer *ring, enum adreno_state_block sb, struct fd_constbuf_stateobj *constbuf, - struct ir3_shader_variant *shader) + struct ir3_shader_variant *shader, + bool emit_immediates) { uint32_t enabled_mask = constbuf->enabled_mask; - uint32_t first_immediate; - uint32_t base = 0; + uint32_t max_const; + int i; // XXX TODO only emit dirty consts.. but we need to keep track if // they are clobbered by a clear, gmem2mem, or mem2gmem.. @@ -102,42 +103,57 @@ emit_constants(struct fd_ringbuffer *ring, * than first_immediate. In that case truncate the user consts * early to avoid HLSQ lockup caused by writing too many consts */ - first_immediate = MIN2(shader->first_immediate, shader->constlen); + max_const = MIN2(shader->first_driver_param, shader->constlen); /* emit user constants: */ - while (enabled_mask) { - unsigned index = ffs(enabled_mask) - 1; + if (enabled_mask & 1) { + const unsigned index = 0; struct pipe_constant_buffer *cb = &constbuf->cb[index]; unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ // I expect that size should be a multiple of vec4's: assert(size == align(size, 4)); - /* gallium could leave const buffers bound above what the - * current shader uses.. don't let that confuse us. + /* and even if the start of the const buffer is before + * first_immediate, the end may not be: */ - if (base >= (4 * first_immediate)) - break; - - if (constbuf->dirty_mask & (1 << index)) { - /* and even if the start of the const buffer is before - * first_immediate, the end may not be: - */ - size = MIN2(size, (4 * first_immediate) - base); - fd4_emit_constant(ring, sb, base, + size = MIN2(size, 4 * max_const); + + if (size && (constbuf->dirty_mask & (1 << index))) { + fd4_emit_constant(ring, sb, 0, cb->buffer_offset, size, cb->user_buffer, cb->buffer); constbuf->dirty_mask &= ~(1 << index); } - base += size; enabled_mask &= ~(1 << index); } + /* emit ubos: */ + if (shader->constlen > shader->first_driver_param) { + uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(params)); + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + + for (i = 1; i <= params * 4; i++) { + struct pipe_constant_buffer *cb = &constbuf->cb[i]; + assert(!cb->user_buffer); + if ((enabled_mask & (1 << i)) && cb->buffer) + OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0); + else + OUT_RING(ring, 0xbad00000 | ((i - 1) << 16)); + } + } + /* emit shader immediates: */ - if (shader) { + if (shader && emit_immediates) { int size = shader->immediates_count; - base = shader->first_immediate; + uint32_t base = shader->first_immediate; /* truncate size to avoid writing constants that shader * does not use: @@ -499,11 +515,26 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, fd_wfi(ctx, ring); emit_constants(ring, SB_VERT_SHADER, &ctx->constbuf[PIPE_SHADER_VERTEX], - (emit->prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL); + vp, emit->prog->dirty & FD_SHADER_DIRTY_VP); if (!emit->key.binning_pass) { emit_constants(ring, SB_FRAG_SHADER, &ctx->constbuf[PIPE_SHADER_FRAGMENT], - (emit->prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL); + fp, emit->prog->dirty & FD_SHADER_DIRTY_FP); + } + } + + /* emit driver params every time */ + if (emit->info && emit->prog == &ctx->prog) { + uint32_t vertex_params[4] = { + emit->info->indexed ? emit->info->index_bias : emit->info->start, + 0, + 0, + 0 + }; + if (vp->constlen >= vp->first_driver_param + 4) { + fd4_emit_constant(ring, SB_VERT_SHADER, + (vp->first_driver_param + 4) * 4, + 0, 4, vertex_params, NULL); } } diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 1b89387678d..fda60eda6fd 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -363,7 +363,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, */ return ((is_a3xx(screen) || is_a4xx(screen)) ? 4096 : 64) * sizeof(float[4]); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return is_a3xx(screen) ? 16 : 1; + return (is_a3xx(screen) || is_a4xx(screen)) ? 16 : 1; case PIPE_SHADER_CAP_MAX_PREDS: return 0; /* nothing uses this */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index 9e1f45dabaf..b899c66b37e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -71,7 +71,8 @@ int ir3_delayslots(struct ir3_instruction *assigner, return 0; /* assigner must be alu: */ - if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer)) { + if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) || + is_mem(consumer)) { return 6; } else if ((consumer->category == 3) && (is_mad(consumer->opc) || is_madsh(consumer->opc)) && |