summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h22
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler.c99
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cp.c2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_dump.c16
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_ra.c147
5 files changed, 244 insertions, 42 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 30932854884..430bcf22d6f 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -205,6 +205,9 @@ struct ir3_instruction {
int off; /* component/offset */
} fo;
struct {
+ int aid;
+ } fi;
+ struct {
struct ir3_block *if_block, *else_block;
} flow;
struct {
@@ -264,6 +267,19 @@ struct ir3_instruction {
*/
struct ir3_instruction *address;
+ /* in case of a instruction with relative dst instruction, we need to
+ * capture the dependency on the fanin for the previous values of
+ * the array elements. Since we don't know at compile time actually
+ * which array elements are written, this serves to preserve the
+ * unconditional write to array elements prior to the conditional
+ * write.
+ *
+ * TODO only cat1 can do indirect write.. we could maybe move this
+ * into instr->cat1.fanin (but would require the frontend to insert
+ * the extra mov)
+ */
+ struct ir3_instruction *fanin;
+
struct ir3_instruction *next;
#ifdef DEBUG
uint32_t serialno;
@@ -373,6 +389,8 @@ static inline int ir3_instr_regno(struct ir3_instruction *instr,
}
+#define MAX_ARRAYS 16
+
/* comp:
* 0 - x
* 1 - y
@@ -498,6 +516,8 @@ static inline bool reg_gpr(struct ir3_register *r)
static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
{
+ if (instr->fanin)
+ return instr->regs_count + 2;
if (instr->address)
return instr->regs_count + 1;
return instr->regs_count;
@@ -505,6 +525,8 @@ static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
{
+ if (n == (instr->regs_count + 1))
+ return instr->fanin;
if (n == (instr->regs_count + 0))
return instr->address;
return ssa(instr->regs[n]);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
index d755babf31b..df428ebed7e 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -64,7 +64,7 @@ struct ir3_compile_context {
*/
struct {
struct ir3_instruction *instr, **instrp;
- } output_updates[16];
+ } output_updates[64];
unsigned num_output_updates;
/* are we in a sequence of "atomic" instructions?
@@ -97,7 +97,7 @@ struct ir3_compile_context {
struct {
unsigned first, last;
struct ir3_instruction *fanin;
- } array[16];
+ } array[MAX_ARRAYS];
uint32_t array_dirty;
/* offset into array[], per file, of first array info */
uint8_t array_offsets[TGSI_FILE_COUNT];
@@ -247,10 +247,6 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
memset(ctx->array_offsets, 0, sizeof(ctx->array_offsets));
#define FM(x) (1 << TGSI_FILE_##x)
- /* optimize can't deal with relative addressing: */
- if (info->indirect_files_written & (FM(TEMPORARY) | FM(INPUT) | FM(OUTPUT)))
- return TGSI_PARSE_ERROR;
-
/* NOTE: if relative addressing is used, we set constlen in
* the compiler (to worst-case value) since we don't know in
* the assembler what the max addr reg value can be:
@@ -595,6 +591,16 @@ ssa_instr_get(struct ir3_compile_context *ctx, unsigned file, unsigned n)
return instr;
}
+static int dst_array_id(struct ir3_compile_context *ctx,
+ const struct tgsi_dst_register *dst)
+{
+ // XXX complete hack to recover tgsi_full_dst_register...
+ // nothing that isn't wrapped in a tgsi_full_dst_register
+ // should be indirect
+ const struct tgsi_full_dst_register *fdst = (const void *)dst;
+ return fdst->Indirect.ArrayID + ctx->array_offsets[dst->File];
+}
+
static int src_array_id(struct ir3_compile_context *ctx,
const struct tgsi_src_register *src)
{
@@ -639,7 +645,56 @@ static void
ssa_dst(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
const struct tgsi_dst_register *dst, unsigned chan)
{
- ssa_instr_set(ctx, dst->File, regid(dst->Index, chan), instr);
+ if (dst->Indirect) {
+ struct ir3_register *reg = instr->regs[0];
+ unsigned i, aid = dst_array_id(ctx, dst);
+ unsigned first = ctx->array[aid].first;
+ unsigned last = ctx->array[aid].last;
+ unsigned off = dst->Index - first; /* vec4 offset */
+
+ reg->size = 4 * (1 + last - first);
+ reg->offset = regid(off, chan);
+
+ instr->fanin = array_fanin(ctx, aid, dst->File);
+
+ /* annotate with the array-id, to help out the register-
+ * assignment stage. At least for the case of indirect
+ * writes, we should capture enough dependencies to
+ * preserve the order of reads/writes of the array, so
+ * the multiple "names" for the array should end up all
+ * assigned to the same registers.
+ */
+ instr->fanin->fi.aid = aid;
+
+ /* Since we are scalarizing vec4 tgsi instructions/regs, we
+ * run into a slight complication here. To do the naive thing
+ * and setup a fanout for each scalar array element would end
+ * up with the result that the instructions generated for each
+ * component of the vec4 would end up clobbering each other.
+ * So we take advantage here of knowing that the array index
+ * (after the shl.b) will be a multiple of four, and only set
+ * every fourth scalar component in the array. See also
+ * fixup_ssa_dst_array()
+ */
+ for (i = first; i <= last; i++) {
+ struct ir3_instruction *split;
+ unsigned n = regid(i, chan);
+ int off = (4 * (i - first)) + chan;
+
+ if (is_meta(instr) && (instr->opc == OPC_META_FO))
+ off -= instr->fo.off;
+
+ split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
+ split->fo.off = off;
+ ir3_reg_create(split, 0, 0);
+ ir3_reg_create(split, 0, IR3_REG_SSA)->instr = instr;
+
+ ssa_instr_set(ctx, dst->File, n, split);
+ }
+ } else {
+ /* normal case (not relative addressed GPR) */
+ ssa_instr_set(ctx, dst->File, regid(dst->Index, chan), instr);
+ }
}
static void
@@ -705,12 +760,22 @@ add_dst_reg_wrmask(struct ir3_compile_context *ctx,
break;
}
- if (dst->Indirect)
+ if (dst->Indirect) {
flags |= IR3_REG_RELATIV;
- reg = ir3_reg_create(instr, regid(num, chan), flags);
+ /* shouldn't happen, and we can't cope with it below: */
+ compile_assert(ctx, wrmask == 0x1);
+
+ compile_assert(ctx, ctx->block->address);
+ if (instr->address)
+ compile_assert(ctx, ctx->block->address == instr->address);
+
+ instr->address = ctx->block->address;
+ }
+ reg = ir3_reg_create(instr, regid(num, chan), flags);
reg->wrmask = wrmask;
+
if (wrmask == 0x1) {
/* normal case */
ssa_dst(ctx, instr, dst, chan);
@@ -720,6 +785,8 @@ add_dst_reg_wrmask(struct ir3_compile_context *ctx,
struct ir3_instruction *prev = NULL;
unsigned i;
+ compile_assert(ctx, !dst->Indirect);
+
/* if instruction writes multiple, we need to create
* some place-holder collect the registers:
*/
@@ -2539,10 +2606,16 @@ instr_cat1(const struct instr_translater *t,
struct ir3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
- struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_dst_register *dst = &inst->Dst[0].Register;
struct tgsi_src_register *src = &inst->Src[0].Register;
+
+ /* NOTE: atomic start/end, rather than in create_mov() since
+ * create_mov() is used already w/in atomic sequences (and
+ * we aren't clever enough to deal with the nesting)
+ */
+ instr_atomic_start(ctx);
create_mov(ctx, dst, src);
- put_dst(ctx, inst, dst);
+ instr_atomic_end(ctx);
}
static void
@@ -3322,6 +3395,10 @@ ir3_compile_shader(struct ir3_shader_variant *so,
goto out;
}
+ /* for now, until the edge cases are worked out: */
+ if (ctx.info.indirect_files_written & (FM(TEMPORARY) | FM(INPUT) | FM(OUTPUT)))
+ cp = false;
+
compile_instructions(&ctx);
block = ctx.block;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
index 898ed70abeb..8ad1894a324 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
@@ -49,7 +49,7 @@ static bool is_eligible_mov(struct ir3_instruction *instr)
struct ir3_register *dst = instr->regs[0];
struct ir3_register *src = instr->regs[1];
struct ir3_instruction *src_instr = ssa(src);
- if (dst->flags & IR3_REG_ADDR)
+ if (dst->flags & (IR3_REG_ADDR | IR3_REG_RELATIV))
return false;
/* TODO: propagate abs/neg modifiers if possible */
if (src->flags & (IR3_REG_ABS | IR3_REG_NEGATE | IR3_REG_RELATIV))
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_dump.c b/src/gallium/drivers/freedreno/ir3/ir3_dump.c
index a846777b879..dc251653fa5 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_dump.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_dump.c
@@ -414,8 +414,20 @@ ir3_dump_instr_single(struct ir3_instruction *instr)
fprintf(ctx.f, "]");
}
- if (is_meta(instr) && (instr->opc == OPC_META_FO))
- printf(", off=%d", instr->fo.off);
+ if (instr->fanin) {
+ fprintf(ctx.f, ", fanin=_");
+ fprintf(ctx.f, "[");
+ dump_instr_name(&ctx, instr->fanin);
+ fprintf(ctx.f, "]");
+ }
+
+ if (is_meta(instr)) {
+ if (instr->opc == OPC_META_FO) {
+ printf(", off=%d", instr->fo.off);
+ } else if ((instr->opc == OPC_META_FI) && instr->fi.aid) {
+ printf(", aid=%d", instr->fi.aid);
+ }
+ }
printf("\n");
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index 0f6d40f5a7c..a4235a77a15 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -47,6 +47,12 @@
* I'm not really sure a sane way for the CP stage to realize when it
* cannot remove a mov due to multi-register constraints..
*
+ * NOTE: http://scopesconf.org/scopes-01/paper/session1_2.ps.gz has
+ * some ideas to handle array allocation with a more conventional
+ * graph coloring algorithm for register assignment, which might be
+ * a good alternative to the current algo. However afaict it cannot
+ * handle overlapping arrays, which is a scenario that we have to
+ * deal with
*/
struct ir3_ra_ctx {
@@ -56,6 +62,10 @@ struct ir3_ra_ctx {
bool frag_face;
int cnt;
bool error;
+ struct {
+ unsigned base;
+ unsigned size;
+ } arrays[MAX_ARRAYS];
};
#ifdef DEBUG
@@ -141,6 +151,26 @@ static bool instr_is_output(struct ir3_instruction *instr)
return false;
}
+static void mark_sources(struct ir3_instruction *instr,
+ struct ir3_instruction *n, regmask_t *liveregs, regmask_t *written)
+{
+ unsigned i;
+
+ for (i = 1; i < n->regs_count; i++) {
+ struct ir3_register *r = reg_check(n, i);
+ if (r)
+ regmask_set_if_not(liveregs, r, written);
+
+ /* if any src points back to the instruction(s) in
+ * the block of neighbors that we are assigning then
+ * mark any written (clobbered) registers as live:
+ */
+ if (instr_used_by(instr, n->regs[i]))
+ regmask_or(liveregs, liveregs, written);
+ }
+
+}
+
/* live means read before written */
static void compute_liveregs(struct ir3_ra_ctx *ctx,
struct ir3_instruction *instr, regmask_t *liveregs)
@@ -159,18 +189,13 @@ static void compute_liveregs(struct ir3_ra_ctx *ctx,
continue;
/* check first src's read: */
- for (i = 1; i < n->regs_count; i++) {
- r = reg_check(n, i);
- if (r)
- regmask_set_if_not(liveregs, r, &written);
-
- /* if any src points back to the instruction(s) in
- * the block of neighbors that we are assigning then
- * mark any written (clobbered) registers as live:
- */
- if (instr_used_by(instr, n->regs[i]))
- regmask_or(liveregs, liveregs, &written);
- }
+ mark_sources(instr, n, liveregs, &written);
+
+ /* for instructions that write to an array, we need to
+ * capture the dependency on the array elements:
+ */
+ if (n->fanin)
+ mark_sources(instr, n->fanin, liveregs, &written);
/* meta-instructions don't actually get scheduled,
* so don't let it's write confuse us.. what we
@@ -383,14 +408,32 @@ static void instr_assign_src(struct ir3_ra_ctx *ctx,
}
}
-static void instr_assign(struct ir3_ra_ctx *ctx,
+static void instr_assign_srcs(struct ir3_ra_ctx *ctx,
struct ir3_instruction *instr, unsigned name)
{
struct ir3_instruction *n, *src;
+
+ for (n = instr->next; n && !ctx->error; n = n->next) {
+ foreach_ssa_src_n(src, i, n) {
+ unsigned r = i + 1;
+
+ /* skip address / etc (non real sources): */
+ if (r >= n->regs_count)
+ continue;
+
+ if (src == instr)
+ instr_assign_src(ctx, n, r, name);
+ }
+ }
+}
+
+static void instr_assign(struct ir3_ra_ctx *ctx,
+ struct ir3_instruction *instr, unsigned name)
+{
struct ir3_register *reg = instr->regs[0];
- if ((reg->flags & IR3_REG_RELATIV))
- name += reg->offset;
+ if (reg->flags & IR3_REG_RELATIV)
+ return;
/* check if already assigned: */
if (!(reg->flags & IR3_REG_SSA)) {
@@ -403,18 +446,7 @@ static void instr_assign(struct ir3_ra_ctx *ctx,
reg_assign(instr, 0, name);
/* and rename any subsequent use of result of this instr: */
- for (n = instr->next; n && !ctx->error; n = n->next) {
- foreach_ssa_src_n(src, i, n) {
- unsigned r = i + 1;
-
- /* skip address / etc (non real sources): */
- if (r >= n->regs_count)
- continue;
-
- if (src == instr)
- instr_assign_src(ctx, n, r, name);
- }
- }
+ instr_assign_srcs(ctx, instr, name);
/* To simplify the neighbor logic, and to "avoid" dealing with
* instructions which write more than one output, we actually
@@ -423,6 +455,8 @@ static void instr_assign(struct ir3_ra_ctx *ctx,
* to the actual instruction:
*/
if (is_meta(instr) && (instr->opc == OPC_META_FO)) {
+ struct ir3_instruction *src;
+
debug_assert(name >= instr->fo.off);
foreach_ssa_src(src, instr)
@@ -444,7 +478,8 @@ static int check_partial_assignment(struct ir3_ra_ctx *ctx,
for (n = instr; n; n = n->cp.right) {
struct ir3_register *dst = n->regs[0];
- if (!(dst->flags & IR3_REG_SSA)) {
+ if ((n->depth != DEPTH_UNUSED) &&
+ !(dst->flags & IR3_REG_SSA)) {
int name = dst->num - off;
debug_assert(name >= 0);
return name;
@@ -472,6 +507,23 @@ static void instr_alloc_and_assign(struct ir3_ra_ctx *ctx,
dst = instr->regs[0];
+ /* For indirect dst, take the register assignment from the
+ * fanin and propagate it forward.
+ */
+ if (dst->flags & IR3_REG_RELATIV) {
+ /* NOTE can be grouped, if for example outputs:
+ * for now disable cp if indirect writes
+ */
+ instr_alloc_and_assign(ctx, instr->fanin);
+
+ dst->num += instr->fanin->regs[0]->num;
+ dst->flags &= ~IR3_REG_SSA;
+
+ instr_assign_srcs(ctx, instr, instr->fanin->regs[0]->num);
+
+ return;
+ }
+
/* for instructions w/ fanouts, do the actual register assignment
* on the group of fanout neighbor nodes and propagate the reg
* name back up to the texture instruction.
@@ -510,6 +562,33 @@ static void instr_alloc_and_assign(struct ir3_ra_ctx *ctx,
}
}
+static void instr_assign_array(struct ir3_ra_ctx *ctx,
+ struct ir3_instruction *instr)
+{
+ struct ir3_instruction *src;
+ int name, aid = instr->fi.aid;
+
+ if (ctx->arrays[aid].base == ~0) {
+ int size = instr->regs_count - 1;
+ ctx->arrays[aid].base = alloc_block(ctx, instr, size);
+ ctx->arrays[aid].size = size;
+ }
+
+ name = ctx->arrays[aid].base;
+
+ foreach_ssa_src_n(src, i, instr) {
+ unsigned r = i + 1;
+
+ /* skip address / etc (non real sources): */
+ if (r >= instr->regs_count)
+ break;
+
+ instr_assign(ctx, src, name);
+ name++;
+ }
+
+}
+
static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
{
struct ir3_instruction *n;
@@ -531,6 +610,16 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
ra_dump_list("-------\n", block->head);
+ /* first pass, assign arrays: */
+ for (n = block->head; n && !ctx->error; n = n->next) {
+ if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) {
+ debug_assert(!n->cp.left); /* don't think this should happen */
+ ra_dump_instr("ASSIGN ARRAY: ", n);
+ instr_assign_array(ctx, n);
+ ra_dump_list("-------\n", block->head);
+ }
+ }
+
for (n = block->head; n && !ctx->error; n = n->next) {
ra_dump_instr("ASSIGN: ", n);
instr_alloc_and_assign(ctx, ir3_neighbor_first(n));
@@ -552,6 +641,8 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type,
};
int ret;
+ memset(&ctx.arrays, ~0, sizeof(ctx.arrays));
+
/* mark dst registers w/ SSA flag so we can see which
* have been assigned so far:
* NOTE: we really should set SSA flag consistently on