summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2015-03-31 11:51:00 -0400
committerRob Clark <[email protected]>2015-04-05 16:36:35 -0400
commit1de72dfc8a2014069edd1b3d3d46dad478d0680a (patch)
tree15dec969001bb8b9ff70b98ab47dc1b8edb06516 /src
parentc7811f56c205b113dd820034a99ff3aaa20af636 (diff)
freedreno/a3xx: add UBO support
Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c67
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c4
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.c2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler.c93
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_legalize.c2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.c2
6 files changed, 132 insertions, 38 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 5fd31f50daf..f961fc07585 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -87,11 +87,12 @@ static void
emit_constants(struct fd_ringbuffer *ring,
enum adreno_state_block sb,
struct fd_constbuf_stateobj *constbuf,
- struct ir3_shader_variant *shader)
+ struct ir3_shader_variant *shader,
+ bool emit_immediates)
{
uint32_t enabled_mask = constbuf->enabled_mask;
- uint32_t first_immediate;
- uint32_t base = 0;
+ uint32_t max_const;
+ int i;
// XXX TODO only emit dirty consts.. but we need to keep track if
// they are clobbered by a clear, gmem2mem, or mem2gmem..
@@ -102,42 +103,57 @@ emit_constants(struct fd_ringbuffer *ring,
* than first_immediate. In that case truncate the user consts
* early to avoid HLSQ lockup caused by writing too many consts
*/
- first_immediate = MIN2(shader->first_immediate, shader->constlen);
+ max_const = MIN2(shader->first_driver_param, shader->constlen);
/* emit user constants: */
- while (enabled_mask) {
- unsigned index = ffs(enabled_mask) - 1;
+ if (enabled_mask & 1) {
+ const unsigned index = 0;
struct pipe_constant_buffer *cb = &constbuf->cb[index];
unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
// I expect that size should be a multiple of vec4's:
assert(size == align(size, 4));
- /* gallium could leave const buffers bound above what the
- * current shader uses.. don't let that confuse us.
+ /* and even if the start of the const buffer is before
+ * first_immediate, the end may not be:
*/
- if (base >= (4 * first_immediate))
- break;
+ size = MIN2(size, 4 * max_const);
- if (constbuf->dirty_mask & (1 << index)) {
- /* and even if the start of the const buffer is before
- * first_immediate, the end may not be:
- */
- size = MIN2(size, (4 * first_immediate) - base);
- fd3_emit_constant(ring, sb, base,
- cb->buffer_offset, size,
- cb->user_buffer, cb->buffer);
+ if (size && constbuf->dirty_mask & (1 << index)) {
+ fd3_emit_constant(ring, sb, 0,
+ cb->buffer_offset, size,
+ cb->user_buffer, cb->buffer);
constbuf->dirty_mask &= ~(1 << index);
}
- base += size;
enabled_mask &= ~(1 << index);
}
+ if (shader->constlen > shader->first_driver_param) {
+ uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
+ /* emit ubos: */
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param * 2) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_NUM_UNIT(params * 2));
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+ for (i = 1; i <= params * 4; i++) {
+ struct pipe_constant_buffer *cb = &constbuf->cb[i];
+ assert(!cb->user_buffer);
+ if ((enabled_mask & (1 << i)) && cb->buffer)
+ OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
+ else
+ OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
+ }
+ }
+
/* emit shader immediates: */
- if (shader) {
+ if (shader && emit_immediates) {
int size = shader->immediates_count;
- base = shader->first_immediate;
+ uint32_t base = shader->first_immediate;
/* truncate size to avoid writing constants that shader
* does not use:
@@ -619,11 +635,11 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
fd_wfi(ctx, ring);
emit_constants(ring, SB_VERT_SHADER,
&ctx->constbuf[PIPE_SHADER_VERTEX],
- (emit->prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
+ vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
if (!emit->key.binning_pass) {
emit_constants(ring, SB_FRAG_SHADER,
&ctx->constbuf[PIPE_SHADER_FRAGMENT],
- (emit->prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
+ fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
}
}
@@ -635,8 +651,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
0,
0
};
- if (vp->constlen > vp->first_driver_param) {
- fd3_emit_constant(ring, SB_VERT_SHADER, vp->first_driver_param * 4,
+ if (vp->constlen >= vp->first_driver_param + 4) {
+ fd3_emit_constant(ring, SB_VERT_SHADER,
+ (vp->first_driver_param + 4) * 4,
0, 4, vertex_params, NULL);
}
}
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index fe724442c07..66fe0e571cf 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -356,9 +356,9 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
* split between VS and FS. Use lower limit of 256 to
* avoid getting into impossible situations:
*/
- return ((is_a3xx(screen) || is_a4xx(screen)) ? 256 : 64) * sizeof(float[4]);
+ return ((is_a3xx(screen) || is_a4xx(screen)) ? 4096 : 64) * sizeof(float[4]);
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
- return 1;
+ return is_a3xx(screen) ? 16 : 1;
case PIPE_SHADER_CAP_MAX_PREDS:
return 0; /* nothing uses this */
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index 42a45776211..284c6559eb1 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -487,7 +487,7 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
iassert(instr->regs_count >= 2);
- if (instr->cat6.offset) {
+ if (instr->cat6.offset || instr->opc == OPC_LDG) {
instr_cat6a_t *cat6a = ptr;
cat6->has_off = true;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
index 38df8b5fdf4..43f4c955ac0 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -151,6 +151,7 @@ static void vectorize(struct ir3_compile_context *ctx,
static void create_mov(struct ir3_compile_context *ctx,
struct tgsi_dst_register *dst, struct tgsi_src_register *src);
static type_t get_ftype(struct ir3_compile_context *ctx);
+static type_t get_utype(struct ir3_compile_context *ctx);
static unsigned setup_arrays(struct ir3_compile_context *ctx, unsigned file, unsigned i)
{
@@ -252,7 +253,7 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
* the assembler what the max addr reg value can be:
*/
if (info->indirect_files & FM(CONSTANT))
- so->constlen = ctx->info.file_max[TGSI_FILE_CONSTANT] + 1;
+ so->constlen = MIN2(255, ctx->info.const_file_max[0] + 1);
i = 0;
i += setup_arrays(ctx, TGSI_FILE_INPUT, i);
@@ -261,12 +262,13 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
/* any others? we don't track arrays for const..*/
/* Immediates go after constants: */
- if (so->type == SHADER_VERTEX) {
- so->first_driver_param = info->file_max[TGSI_FILE_CONSTANT] + 1;
- so->first_immediate = so->first_driver_param + 1;
- } else {
- so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1;
- }
+ so->first_immediate = so->first_driver_param =
+ info->const_file_max[0] + 1;
+ /* 1 unit for the vertex id base */
+ if (so->type == SHADER_VERTEX)
+ so->first_immediate++;
+ /* 4 (vec4) units for ubo base addresses */
+ so->first_immediate += 4;
ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
ret = tgsi_parse_init(&ctx->parser, ctx->tokens);
@@ -717,6 +719,80 @@ ssa_src(struct ir3_compile_context *ctx, struct ir3_register *reg,
reg->offset = regid(off, chan);
instr = array_fanin(ctx, aid, src->File);
+ } else if (src->File == TGSI_FILE_CONSTANT && src->Dimension) {
+ const struct tgsi_full_src_register *fsrc = (const void *)src;
+ struct ir3_instruction *temp = NULL;
+ int ubo_regid = regid(ctx->so->first_driver_param, 0) +
+ fsrc->Dimension.Index - 1;
+ int offset = 0;
+
+ /* We don't handle indirect UBO array accesses... yet. */
+ compile_assert(ctx, !fsrc->Dimension.Indirect);
+ /* UBOs start at index 1. */
+ compile_assert(ctx, fsrc->Dimension.Index > 0);
+
+ if (src->Indirect) {
+ /* In case of an indirect index, it will have been loaded into an
+ * address register. There will be a sequence of
+ *
+ * shl.b x, val, 2
+ * mova a0, x
+ *
+ * We rely on this sequence to get the original val out and shift
+ * it by 4, since we're dealing in vec4 units.
+ */
+ compile_assert(ctx, ctx->block->address);
+ compile_assert(ctx, ctx->block->address->regs[1]->instr->opc ==
+ OPC_SHL_B);
+
+ temp = instr = instr_create(ctx, 2, OPC_SHL_B);
+ ir3_reg_create(instr, 0, 0);
+ ir3_reg_create(instr, 0, IR3_REG_HALF | IR3_REG_SSA)->instr =
+ ctx->block->address->regs[1]->instr->regs[1]->instr;
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4;
+ } else if (src->Index >= 64) {
+ /* Otherwise it's a plain index (in vec4 units). Move it into a
+ * register.
+ */
+ temp = instr = instr_create(ctx, 1, 0);
+ instr->cat1.src_type = get_utype(ctx);
+ instr->cat1.dst_type = get_utype(ctx);
+ ir3_reg_create(instr, 0, 0);
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = src->Index * 16;
+ } else {
+ /* The offset is small enough to fit into the ldg instruction
+ * directly.
+ */
+ offset = src->Index * 16;
+ }
+
+ if (temp) {
+ /* If there was an offset (most common), add it to the buffer
+ * address.
+ */
+ instr = instr_create(ctx, 2, OPC_ADD_S);
+ ir3_reg_create(instr, 0, 0);
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = temp;
+ ir3_reg_create(instr, ubo_regid, IR3_REG_CONST);
+ } else {
+ /* Otherwise just load the buffer address directly */
+ instr = instr_create(ctx, 1, 0);
+ instr->cat1.src_type = get_utype(ctx);
+ instr->cat1.dst_type = get_utype(ctx);
+ ir3_reg_create(instr, 0, 0);
+ ir3_reg_create(instr, ubo_regid, IR3_REG_CONST);
+ }
+
+ temp = instr;
+
+ instr = instr_create(ctx, 6, OPC_LDG);
+ instr->cat6.type = TYPE_U32;
+ instr->cat6.offset = offset + chan * 4;
+ ir3_reg_create(instr, 0, 0);
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = temp;
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
+
+ reg->flags &= ~(IR3_REG_RELATIV | IR3_REG_CONST);
} else {
/* normal case (not relative addressed GPR) */
instr = ssa_instr_get(ctx, src->File, regid(src->Index, chan));
@@ -3183,7 +3259,8 @@ decl_sv(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
instr->cat1.src_type = get_stype(ctx);
instr->cat1.dst_type = get_stype(ctx);
ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, regid(so->first_driver_param, 0), IR3_REG_CONST);
+ ir3_reg_create(instr, regid(so->first_driver_param + 4, 0),
+ IR3_REG_CONST);
break;
case TGSI_SEMANTIC_INSTANCEID:
ctx->instance_id = instr = create_input(ctx->block, NULL, r);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
index db501e7a51c..2455f7e4efc 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
@@ -175,7 +175,7 @@ static void legalize(struct ir3_legalize_ctx *ctx)
/* both tex/sfu appear to not always immediately consume
* their src register(s):
*/
- if (is_tex(n) || is_sfu(n)) {
+ if (is_tex(n) || is_sfu(n) || is_mem(n)) {
foreach_src(reg, n) {
if (reg_gpr(reg))
regmask_set(&needs_ss_war, reg);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index b1dff381813..0cf357e17d8 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -116,7 +116,7 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id)
* the compiler (to worst-case value) since we don't know in
* the assembler what the max addr reg value can be:
*/
- v->constlen = MAX2(v->constlen, v->info.max_const + 1);
+ v->constlen = MIN2(255, MAX2(v->constlen, v->info.max_const + 1));
fixup_regfootprint(v);