summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_context.c1
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c156
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.h6
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_context.c1
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_draw.c2
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c156
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.h6
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h9
-rw-r--r--src/gallium/drivers/freedreno/freedreno_util.h1
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.c148
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h4
12 files changed, 263 insertions, 229 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
index 8441898382b..dc33783e398 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -121,6 +121,7 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv)
fd3_gmem_init(pctx);
fd3_texture_init(pctx);
fd3_prog_init(pctx);
+ fd3_emit_init(pctx);
pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv);
if (!pctx)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index 070ed43a279..fc30d4842ba 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -345,7 +345,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
fd3_emit_vertex_bufs(ring, &emit);
- fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+ fd3_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL);
OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 07cc2266d08..9032366b748 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -43,19 +43,26 @@
#include "fd3_format.h"
#include "fd3_zsa.h"
+static const enum adreno_state_block sb[] = {
+ [SHADER_VERTEX] = SB_VERT_SHADER,
+ [SHADER_FRAGMENT] = SB_FRAG_SHADER,
+};
+
/* regid: base const register
* prsc or dwords: buffer containing constant values
* sizedwords: size of const value buffer
*/
void
-fd3_emit_constant(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
+fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc)
{
uint32_t i, sz;
enum adreno_state_src src;
+ debug_assert((regid % 4) == 0);
+ debug_assert((sizedwords % 4) == 0);
+
if (prsc) {
sz = 0;
src = SS_INDIRECT;
@@ -67,7 +74,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
CP_LOAD_STATE_0_STATE_SRC(src) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
if (prsc) {
struct fd_bo *bo = fd_resource(prsc)->bo;
@@ -84,89 +91,31 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
}
static void
-emit_constants(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
- struct fd_constbuf_stateobj *constbuf,
- struct ir3_shader_variant *shader,
- bool emit_immediates)
+fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+ uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
{
- uint32_t enabled_mask = constbuf->enabled_mask;
- uint32_t max_const;
- int i;
-
- // XXX TODO only emit dirty consts.. but we need to keep track if
- // they are clobbered by a clear, gmem2mem, or mem2gmem..
- constbuf->dirty_mask = enabled_mask;
-
- /* in particular, with binning shader we may end up with unused
- * consts, ie. we could end up w/ constlen that is smaller
- * than first_immediate. In that case truncate the user consts
- * early to avoid HLSQ lockup caused by writing too many consts
- */
- max_const = MIN2(shader->first_driver_param, shader->constlen);
-
- /* emit user constants: */
- if (enabled_mask & 1) {
- const unsigned index = 0;
- struct pipe_constant_buffer *cb = &constbuf->cb[index];
- unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
-
- // I expect that size should be a multiple of vec4's:
- assert(size == align(size, 4));
-
- /* and even if the start of the const buffer is before
- * first_immediate, the end may not be:
- */
- size = MIN2(size, 4 * max_const);
-
- if (size && constbuf->dirty_mask & (1 << index)) {
- fd3_emit_constant(ring, sb, 0,
- cb->buffer_offset, size,
- cb->user_buffer, cb->buffer);
- constbuf->dirty_mask &= ~(1 << index);
- }
-
- enabled_mask &= ~(1 << index);
- }
-
- if (shader->constlen > shader->first_driver_param) {
- uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
- /* emit ubos: */
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param * 2) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE_0_NUM_UNIT(params * 2));
- OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
-
- for (i = 1; i <= params * 4; i++) {
- struct pipe_constant_buffer *cb = &constbuf->cb[i];
- assert(!cb->user_buffer);
- if ((enabled_mask & (1 << i)) && cb->buffer)
- OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
- else
- OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
- }
- }
+ uint32_t i;
- /* emit shader immediates: */
- if (shader && emit_immediates) {
- int size = shader->immediates_count;
- uint32_t base = shader->first_immediate;
+ debug_assert((regid % 4) == 0);
+ debug_assert((num % 4) == 0);
- /* truncate size to avoid writing constants that shader
- * does not use:
- */
- size = MIN2(size + base, shader->constlen) - base;
-
- /* convert out of vec4: */
- base *= 4;
- size *= 4;
-
- if (size > 0) {
- fd3_emit_constant(ring, sb, base,
- 0, size, shader->immediates[0].val, NULL);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+ CP_LOAD_STATE_0_NUM_UNIT(num/2));
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+ for (i = 0; i < num; i++) {
+ if (bos[i]) {
+ if (write) {
+ OUT_RELOCW(ring, bos[i], offsets[i], 0, 0);
+ } else {
+ OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+ }
+ } else {
+ OUT_RING(ring, 0xbad00000 | (i << 16));
}
}
}
@@ -669,33 +618,12 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, HLSQ_FLUSH);
- if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
- /* evil hack to deal sanely with clear path: */
- (emit->prog == &ctx->prog)) {
- fd_wfi(ctx, ring);
- emit_constants(ring, SB_VERT_SHADER,
- &ctx->constbuf[PIPE_SHADER_VERTEX],
- vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
- if (!emit->key.binning_pass) {
- emit_constants(ring, SB_FRAG_SHADER,
- &ctx->constbuf[PIPE_SHADER_FRAGMENT],
- fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
- }
- }
-
- /* emit driver params every time */
- if (emit->info && emit->prog == &ctx->prog) {
- uint32_t vertex_params[4] = {
- emit->info->indexed ? emit->info->index_bias : emit->info->start,
- 0,
- 0,
- 0
- };
- if (vp->constlen >= vp->first_driver_param + 4) {
- fd3_emit_constant(ring, SB_VERT_SHADER,
- (vp->first_driver_param + 4) * 4,
- 0, 4, vertex_params, NULL);
- }
+ if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
+ ir3_emit_consts(vp, ring, emit->info, dirty);
+ if (!emit->key.binning_pass)
+ ir3_emit_consts(fp, ring, emit->info, dirty);
+ /* mark clean after emitting consts: */
+ ctx->prog.dirty = 0;
}
if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) {
@@ -930,3 +858,11 @@ fd3_emit_restore(struct fd_context *ctx)
ctx->needs_rb_fbd = true;
}
+
+void
+fd3_emit_init(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->emit_const = fd3_emit_const;
+ ctx->emit_const_bo = fd3_emit_const_bo;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
index 8f21919c9a7..795654706a7 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -37,10 +37,8 @@
#include "ir3_shader.h"
struct fd_ringbuffer;
-enum adreno_state_block;
-void fd3_emit_constant(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
+void fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc);
@@ -90,4 +88,6 @@ void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
void fd3_emit_restore(struct fd_context *ctx);
+void fd3_emit_init(struct pipe_context *pctx);
+
#endif /* FD3_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
index 6e109b6205a..e172d350517 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
@@ -119,6 +119,7 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv)
fd4_gmem_init(pctx);
fd4_texture_init(pctx);
fd4_prog_init(pctx);
+ fd4_emit_init(pctx);
pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv);
if (!pctx)
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index d070f5fd6b7..ff1dfdc392f 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -295,7 +295,7 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW3 */
/* until fastclear works: */
- fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+ fd4_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL);
OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index f3e1ccebccc..4462a82777f 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -43,19 +43,26 @@
#include "fd4_format.h"
#include "fd4_zsa.h"
+static const enum adreno_state_block sb[] = {
+ [SHADER_VERTEX] = SB_VERT_SHADER,
+ [SHADER_FRAGMENT] = SB_FRAG_SHADER,
+};
+
/* regid: base const register
* prsc or dwords: buffer containing constant values
* sizedwords: size of const value buffer
*/
void
-fd4_emit_constant(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
+fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc)
{
uint32_t i, sz;
enum adreno_state_src src;
+ debug_assert((regid % 4) == 0);
+ debug_assert((sizedwords % 4) == 0);
+
if (prsc) {
sz = 0;
src = 0x2; // TODO ??
@@ -67,7 +74,7 @@ fd4_emit_constant(struct fd_ringbuffer *ring,
OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
CP_LOAD_STATE_0_STATE_SRC(src) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
if (prsc) {
struct fd_bo *bo = fd_resource(prsc)->bo;
@@ -84,89 +91,31 @@ fd4_emit_constant(struct fd_ringbuffer *ring,
}
static void
-emit_constants(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
- struct fd_constbuf_stateobj *constbuf,
- struct ir3_shader_variant *shader,
- bool emit_immediates)
+fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+ uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
{
- uint32_t enabled_mask = constbuf->enabled_mask;
- uint32_t max_const;
- int i;
-
- // XXX TODO only emit dirty consts.. but we need to keep track if
- // they are clobbered by a clear, gmem2mem, or mem2gmem..
- constbuf->dirty_mask = enabled_mask;
-
- /* in particular, with binning shader we may end up with unused
- * consts, ie. we could end up w/ constlen that is smaller
- * than first_immediate. In that case truncate the user consts
- * early to avoid HLSQ lockup caused by writing too many consts
- */
- max_const = MIN2(shader->first_driver_param, shader->constlen);
-
- /* emit user constants: */
- if (enabled_mask & 1) {
- const unsigned index = 0;
- struct pipe_constant_buffer *cb = &constbuf->cb[index];
- unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
-
- // I expect that size should be a multiple of vec4's:
- assert(size == align(size, 4));
-
- /* and even if the start of the const buffer is before
- * first_immediate, the end may not be:
- */
- size = MIN2(size, 4 * max_const);
-
- if (size && (constbuf->dirty_mask & (1 << index))) {
- fd4_emit_constant(ring, sb, 0,
- cb->buffer_offset, size,
- cb->user_buffer, cb->buffer);
- constbuf->dirty_mask &= ~(1 << index);
- }
+ uint32_t i;
- enabled_mask &= ~(1 << index);
- }
-
- /* emit ubos: */
- if (shader->constlen > shader->first_driver_param) {
- uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE_0_NUM_UNIT(params));
- OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
-
- for (i = 1; i <= params * 4; i++) {
- struct pipe_constant_buffer *cb = &constbuf->cb[i];
- assert(!cb->user_buffer);
- if ((enabled_mask & (1 << i)) && cb->buffer)
- OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
- else
- OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
- }
- }
+ debug_assert((regid % 4) == 0);
+ debug_assert((num % 4) == 0);
- /* emit shader immediates: */
- if (shader && emit_immediates) {
- int size = shader->immediates_count;
- uint32_t base = shader->first_immediate;
-
- /* truncate size to avoid writing constants that shader
- * does not use:
- */
- size = MIN2(size + base, shader->constlen) - base;
-
- /* convert out of vec4: */
- base *= 4;
- size *= 4;
-
- if (size > 0) {
- fd4_emit_constant(ring, sb, base,
- 0, size, shader->immediates[0].val, NULL);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+ CP_LOAD_STATE_0_NUM_UNIT(num/4));
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+ for (i = 0; i < num; i++) {
+ if (bos[i]) {
+ if (write) {
+ OUT_RELOCW(ring, bos[i], offsets[i], 0, 0);
+ } else {
+ OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+ }
+ } else {
+ OUT_RING(ring, 0xbad00000 | (i << 16));
}
}
}
@@ -520,33 +469,12 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & FD_DIRTY_PROG)
fd4_program_emit(ring, emit);
- if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
- /* evil hack to deal sanely with clear path: */
- (emit->prog == &ctx->prog)) {
- fd_wfi(ctx, ring);
- emit_constants(ring, SB_VERT_SHADER,
- &ctx->constbuf[PIPE_SHADER_VERTEX],
- vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
- if (!emit->key.binning_pass) {
- emit_constants(ring, SB_FRAG_SHADER,
- &ctx->constbuf[PIPE_SHADER_FRAGMENT],
- fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
- }
- }
-
- /* emit driver params every time */
- if (emit->info && emit->prog == &ctx->prog) {
- uint32_t vertex_params[4] = {
- emit->info->indexed ? emit->info->index_bias : emit->info->start,
- 0,
- 0,
- 0
- };
- if (vp->constlen >= vp->first_driver_param + 4) {
- fd4_emit_constant(ring, SB_VERT_SHADER,
- (vp->first_driver_param + 4) * 4,
- 0, 4, vertex_params, NULL);
- }
+ if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
+ ir3_emit_consts(vp, ring, emit->info, dirty);
+ if (!emit->key.binning_pass)
+ ir3_emit_consts(fp, ring, emit->info, dirty);
+ /* mark clean after emitting consts: */
+ ctx->prog.dirty = 0;
}
if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
@@ -767,3 +695,11 @@ fd4_emit_restore(struct fd_context *ctx)
ctx->needs_rb_fbd = true;
}
+
+void
+fd4_emit_init(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->emit_const = fd4_emit_const;
+ ctx->emit_const_bo = fd4_emit_const_bo;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
index 7d059f8e532..7debee59471 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
@@ -37,10 +37,8 @@
#include "ir3_shader.h"
struct fd_ringbuffer;
-enum adreno_state_block;
-void fd4_emit_constant(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
+void fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc);
@@ -96,4 +94,6 @@ void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
void fd4_emit_restore(struct fd_context *ctx);
+void fd4_emit_init(struct pipe_context *pctx);
+
#endif /* FD4_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index c2d98345349..bc5267aa96e 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -351,9 +351,16 @@ struct fd_context {
void (*emit_sysmem_prep)(struct fd_context *ctx);
/* draw: */
- void (*draw_vbo)(struct fd_context *pctx, const struct pipe_draw_info *info);
+ void (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info);
void (*clear)(struct fd_context *ctx, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned stencil);
+
+ /* constant emit: (note currently not used/needed for a2xx) */
+ void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type,
+ uint32_t regid, uint32_t offset, uint32_t sizedwords,
+ const uint32_t *dwords, struct pipe_resource *prsc);
+ void (*emit_const_bo)(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+ uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets);
};
static inline struct fd_context *
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index 1b78763c58e..6aec2585ceb 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -40,6 +40,7 @@
#include "util/u_dynarray.h"
#include "util/u_pack_color.h"
+#include "disasm.h"
#include "adreno_common.xml.h"
#include "adreno_pm4.xml.h"
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index d4027729a22..75425e91378 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -412,3 +412,151 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
debug_printf("\n");
}
+
+/* This has to reach into the fd_context a bit more than the rest of
+ * ir3, but it needs to be aligned with the compiler, so both agree
+ * on which const regs hold what. And the logic is identical between
+ * a3xx/a4xx, the only difference is small details in the actual
+ * CP_LOAD_STATE packets (which is handled inside the generation
+ * specific ctx->emit_const(_bo)() fxns)
+ */
+
+#include "freedreno_resource.h"
+
+static void
+emit_user_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+ struct fd_constbuf_stateobj *constbuf)
+{
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+ const unsigned index = 0; /* user consts are index 0 */
+ /* TODO save/restore dirty_mask for binning pass instead: */
+ uint32_t dirty_mask = constbuf->enabled_mask;
+
+ if (dirty_mask & (1 << index)) {
+ struct pipe_constant_buffer *cb = &constbuf->cb[index];
+ unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+
+ /* in particular, with binning shader we may end up with
+ * unused consts, ie. we could end up w/ constlen that is
+ * smaller than first_driver_param. In that case truncate
+ * the user consts early to avoid HLSQ lockup caused by
+ * writing too many consts
+ */
+ uint32_t max_const = MIN2(v->first_driver_param, v->constlen);
+
+ // I expect that size should be a multiple of vec4's:
+ assert(size == align(size, 4));
+
+ /* and even if the start of the const buffer is before
+ * first_immediate, the end may not be:
+ */
+ size = MIN2(size, 4 * max_const);
+
+ if (size > 0) {
+ fd_wfi(ctx, ring);
+ ctx->emit_const(ring, v->type, 0,
+ cb->buffer_offset, size,
+ cb->user_buffer, cb->buffer);
+ constbuf->dirty_mask &= ~(1 << index);
+ }
+ }
+}
+
+static void
+emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+ struct fd_constbuf_stateobj *constbuf)
+{
+ if (v->constlen > v->first_driver_param) {
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+ uint32_t offset = v->first_driver_param; /* UBOs after user consts */
+ uint32_t params = MIN2(4, v->constlen - v->first_driver_param) * 4;
+ uint32_t offsets[params];
+ struct fd_bo *bos[params];
+
+ for (uint32_t i = 0; i < params; i++) {
+ const uint32_t index = i + 1; /* UBOs start at index 1 */
+ struct pipe_constant_buffer *cb = &constbuf->cb[index];
+ assert(!cb->user_buffer);
+
+ if ((constbuf->enabled_mask & (1 << index)) && cb->buffer) {
+ offsets[i] = cb->buffer_offset;
+ bos[i] = fd_resource(cb->buffer)->bo;
+ } else {
+ offsets[i] = 0;
+ bos[i] = NULL;
+ }
+ }
+
+ fd_wfi(ctx, ring);
+ ctx->emit_const_bo(ring, v->type, false, offset * 4, params, bos, offsets);
+ }
+}
+
+static void
+emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
+{
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+ int size = v->immediates_count;
+ uint32_t base = v->first_immediate;
+
+ /* truncate size to avoid writing constants that shader
+ * does not use:
+ */
+ size = MIN2(size + base, v->constlen) - base;
+
+ /* convert out of vec4: */
+ base *= 4;
+ size *= 4;
+
+ if (size > 0) {
+ fd_wfi(ctx, ring);
+ ctx->emit_const(ring, v->type, base,
+ 0, size, v->immediates[0].val, NULL);
+ }
+}
+
+void
+ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+ const struct pipe_draw_info *info, uint32_t dirty)
+{
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
+ struct fd_constbuf_stateobj *constbuf;
+ bool shader_dirty;
+
+ if (v->type == SHADER_VERTEX) {
+ constbuf = &ctx->constbuf[PIPE_SHADER_VERTEX];
+ shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_VP);
+ } else if (v->type == SHADER_FRAGMENT) {
+ constbuf = &ctx->constbuf[PIPE_SHADER_FRAGMENT];
+ shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_FP);
+ } else {
+ unreachable("bad shader type");
+ return;
+ }
+
+ emit_user_consts(v, ring, constbuf);
+ emit_ubos(v, ring, constbuf);
+ if (shader_dirty)
+ emit_immediates(v, ring);
+ }
+
+ /* emit driver params every time: */
+ /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
+ if (info && (v->type == SHADER_VERTEX)) {
+ uint32_t offset = v->first_driver_param + 4; /* driver params after UBOs */
+ if (v->constlen >= offset) {
+ uint32_t vertex_params[4] = {
+ info->indexed ? info->index_bias : info->start,
+ 0,
+ 0,
+ 0
+ };
+
+ fd_wfi(ctx, ring);
+ ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
+ ARRAY_SIZE(vertex_params), vertex_params, NULL);
+ }
+ }
+}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 5365d5687f1..ef16d7b2f6e 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -224,6 +224,10 @@ struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
struct ir3_shader_key key);
void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin);
+struct fd_ringbuffer;
+void ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+ const struct pipe_draw_info *info, uint32_t dirty);
+
static inline const char *
ir3_shader_stage(struct ir3_shader *shader)
{