summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c75
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_depth.c3
3 files changed, 56 insertions, 24 deletions
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index bf51847706e..c315a47bb16 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -87,11 +87,12 @@ static void
emit_constants(struct fd_ringbuffer *ring,
enum adreno_state_block sb,
struct fd_constbuf_stateobj *constbuf,
- struct ir3_shader_variant *shader)
+ struct ir3_shader_variant *shader,
+ bool emit_immediates)
{
uint32_t enabled_mask = constbuf->enabled_mask;
- uint32_t first_immediate;
- uint32_t base = 0;
+ uint32_t max_const;
+ int i;
// XXX TODO only emit dirty consts.. but we need to keep track if
// they are clobbered by a clear, gmem2mem, or mem2gmem..
@@ -102,42 +103,57 @@ emit_constants(struct fd_ringbuffer *ring,
* than first_immediate. In that case truncate the user consts
* early to avoid HLSQ lockup caused by writing too many consts
*/
- first_immediate = MIN2(shader->first_immediate, shader->constlen);
+ max_const = MIN2(shader->first_driver_param, shader->constlen);
/* emit user constants: */
- while (enabled_mask) {
- unsigned index = ffs(enabled_mask) - 1;
+ if (enabled_mask & 1) {
+ const unsigned index = 0;
struct pipe_constant_buffer *cb = &constbuf->cb[index];
unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
// I expect that size should be a multiple of vec4's:
assert(size == align(size, 4));
- /* gallium could leave const buffers bound above what the
- * current shader uses.. don't let that confuse us.
+ /* and even if the start of the const buffer is before
+ * first_immediate, the end may not be:
*/
- if (base >= (4 * first_immediate))
- break;
-
- if (constbuf->dirty_mask & (1 << index)) {
- /* and even if the start of the const buffer is before
- * first_immediate, the end may not be:
- */
- size = MIN2(size, (4 * first_immediate) - base);
- fd4_emit_constant(ring, sb, base,
+ size = MIN2(size, 4 * max_const);
+
+ if (size && (constbuf->dirty_mask & (1 << index))) {
+ fd4_emit_constant(ring, sb, 0,
cb->buffer_offset, size,
cb->user_buffer, cb->buffer);
constbuf->dirty_mask &= ~(1 << index);
}
- base += size;
enabled_mask &= ~(1 << index);
}
+ /* emit ubos: */
+ if (shader->constlen > shader->first_driver_param) {
+ uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_NUM_UNIT(params));
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+ for (i = 1; i <= params * 4; i++) {
+ struct pipe_constant_buffer *cb = &constbuf->cb[i];
+ assert(!cb->user_buffer);
+ if ((enabled_mask & (1 << i)) && cb->buffer)
+ OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
+ else
+ OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
+ }
+ }
+
/* emit shader immediates: */
- if (shader) {
+ if (shader && emit_immediates) {
int size = shader->immediates_count;
- base = shader->first_immediate;
+ uint32_t base = shader->first_immediate;
/* truncate size to avoid writing constants that shader
* does not use:
@@ -499,11 +515,26 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
fd_wfi(ctx, ring);
emit_constants(ring, SB_VERT_SHADER,
&ctx->constbuf[PIPE_SHADER_VERTEX],
- (emit->prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
+ vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
if (!emit->key.binning_pass) {
emit_constants(ring, SB_FRAG_SHADER,
&ctx->constbuf[PIPE_SHADER_FRAGMENT],
- (emit->prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
+ fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
+ }
+ }
+
+ /* emit driver params every time */
+ if (emit->info && emit->prog == &ctx->prog) {
+ uint32_t vertex_params[4] = {
+ emit->info->indexed ? emit->info->index_bias : emit->info->start,
+ 0,
+ 0,
+ 0
+ };
+ if (vp->constlen >= vp->first_driver_param + 4) {
+ fd4_emit_constant(ring, SB_VERT_SHADER,
+ (vp->first_driver_param + 4) * 4,
+ 0, 4, vertex_params, NULL);
}
}
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 1b89387678d..fda60eda6fd 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -363,7 +363,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
*/
return ((is_a3xx(screen) || is_a4xx(screen)) ? 4096 : 64) * sizeof(float[4]);
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
- return is_a3xx(screen) ? 16 : 1;
+ return (is_a3xx(screen) || is_a4xx(screen)) ? 16 : 1;
case PIPE_SHADER_CAP_MAX_PREDS:
return 0; /* nothing uses this */
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
index 9e1f45dabaf..b899c66b37e 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
@@ -71,7 +71,8 @@ int ir3_delayslots(struct ir3_instruction *assigner,
return 0;
/* assigner must be alu: */
- if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer)) {
+ if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
+ is_mem(consumer)) {
return 6;
} else if ((consumer->category == 3) &&
(is_mad(consumer->opc) || is_madsh(consumer->opc)) &&