summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2014-12-30 20:02:36 -0500
committerRob Clark <[email protected]>2015-01-07 19:37:28 -0500
commit1e5c207dba4dbd07919bff2efe57ad361a44ac84 (patch)
tree5c138ae76d07d4de3648ae8bdf78fbc72c72e1ec /src
parent63e5b72da8b1df4bbb0fcf46524d106f51264605 (diff)
freedreno/ir3: start on indirect gpr reads
Handle TEMP[ADDR[]] src registers by generating a fanin to group array elements, similarly to how texture fetch instructions work. NOTE: For all the scalar instructions generated for a single tgsi vector operation which uses an array src (or possibly even uses the same array as multiple srcs), re-use the same fanin node. Since a vector operation operates on all components at the same time, it should never see more than one version of the same array. Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h5
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler.c131
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_ra.c18
3 files changed, 146 insertions, 8 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index b1fb08fcec5..a3bbba941ce 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -209,6 +209,9 @@ struct ir3_instruction {
struct {
struct ir3_block *block;
} inout;
+ struct {
+ int off; /* offset relative to addr reg */
+ } deref;
/* XXX keep this as big as all other union members! */
uint32_t info[3];
@@ -465,7 +468,7 @@ static inline struct ir3_instruction *ssa(struct ir3_register *reg)
static inline bool reg_gpr(struct ir3_register *r)
{
- if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_ADDR))
+ if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))
return false;
if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
return false;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
index 99bad377d53..8c88bf7db47 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -88,6 +88,17 @@ struct ir3_compile_context {
struct tgsi_shader_info info;
+ /* hmm, would be nice if tgsi_scan_shader figured this out
+ * for us:
+ */
+ struct {
+ unsigned first, last;
+ struct ir3_instruction *fanin;
+ } array[16];
+ uint32_t array_dirty;
+ /* offset into array[], per file, of first array info */
+ uint8_t array_offsets[TGSI_FILE_COUNT];
+
/* for calculating input/output positions/linkages: */
unsigned next_inloc;
@@ -130,11 +141,21 @@ static void create_mov(struct ir3_compile_context *ctx,
struct tgsi_dst_register *dst, struct tgsi_src_register *src);
static type_t get_ftype(struct ir3_compile_context *ctx);
+static unsigned setup_arrays(struct ir3_compile_context *ctx, unsigned file, unsigned i)
+{
+ /* ArrayID 0 for a given file is the legacy array spanning the entire file: */
+ ctx->array[i].first = 0;
+ ctx->array[i].last = ctx->info.file_max[file];
+ ctx->array_offsets[file] = i;
+ i += ctx->info.array_max[file] + 1;
+ return i;
+}
+
static unsigned
compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
const struct tgsi_token *tokens)
{
- unsigned ret;
+ unsigned ret, i;
struct tgsi_shader_info *info = &ctx->info;
struct tgsi_lowering_config lconfig = {
.color_two_side = so->key.color_two_side,
@@ -190,6 +211,7 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
}
ctx->ir = so->ir;
ctx->so = so;
+ ctx->array_dirty = 0;
ctx->next_inloc = 8;
ctx->num_internal_temps = 0;
ctx->branch_count = 0;
@@ -204,10 +226,12 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
ctx->using_tmp_dst = false;
memset(ctx->frag_coord, 0, sizeof(ctx->frag_coord));
+ memset(ctx->array, 0, sizeof(ctx->array));
+ memset(ctx->array_offsets, 0, sizeof(ctx->array_offsets));
#define FM(x) (1 << TGSI_FILE_##x)
/* optimize can't deal with relative addressing: */
- if (info->indirect_files & (FM(TEMPORARY) | FM(INPUT) | FM(OUTPUT)))
+ if (info->indirect_files_written & (FM(TEMPORARY) | FM(INPUT) | FM(OUTPUT)))
return TGSI_PARSE_ERROR;
/* NOTE: if relative addressing is used, we set constlen in
@@ -217,6 +241,12 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
if (info->indirect_files & FM(CONSTANT))
so->constlen = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1);
+ i = 0;
+ i += setup_arrays(ctx, TGSI_FILE_INPUT, i);
+ i += setup_arrays(ctx, TGSI_FILE_TEMPORARY, i);
+ i += setup_arrays(ctx, TGSI_FILE_OUTPUT, i);
+ /* any others? we don't track arrays for const..*/
+
/* Immediates go after constants: */
so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1;
ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
@@ -275,6 +305,12 @@ instr_finish(struct ir3_compile_context *ctx)
*(ctx->output_updates[i].instrp) = ctx->output_updates[i].instr;
ctx->num_output_updates = 0;
+
+ while (ctx->array_dirty) {
+ unsigned aid = ffs(ctx->array_dirty) - 1;
+ ctx->array[aid].fanin = NULL;
+ ctx->array_dirty &= ~(1 << aid);
+ }
}
/* For "atomic" groups of instructions, for example the four scalar
@@ -515,6 +551,8 @@ ssa_instr(struct ir3_compile_context *ctx, unsigned file, unsigned n)
* NOTE: *don't* use instr_create() here!
*/
instr = create_immed(ctx, 0.0);
+ /* no need to recreate the immed for every access: */
+ block->temporaries[n] = instr;
}
break;
}
@@ -522,17 +560,68 @@ ssa_instr(struct ir3_compile_context *ctx, unsigned file, unsigned n)
return instr;
}
+static int array_id(struct ir3_compile_context *ctx,
+ const struct tgsi_src_register *src)
+{
+ // XXX complete hack to recover tgsi_full_src_register...
+ // nothing that isn't wrapped in a tgsi_full_src_register
+ // should be indirect
+ const struct tgsi_full_src_register *fsrc = (const void *)src;
+ debug_assert(src->File != TGSI_FILE_CONSTANT);
+ return fsrc->Indirect.ArrayID + ctx->array_offsets[src->File];
+}
+
static void
ssa_src(struct ir3_compile_context *ctx, struct ir3_register *reg,
const struct tgsi_src_register *src, unsigned chan)
{
struct ir3_instruction *instr;
- instr = ssa_instr(ctx, src->File, regid(src->Index, chan));
+ if (src->Indirect && (src->File != TGSI_FILE_CONSTANT)) {
+ /* for relative addressing of gpr's (due to register assignment)
+ * we must generate a fanin instruction to collect all possible
+ * array elements that the instruction could address together:
+ */
+ unsigned i, j, aid = array_id(ctx, src);
+
+ if (ctx->array[aid].fanin) {
+ instr = ctx->array[aid].fanin;
+ } else {
+ unsigned first, last;
+
+ first = ctx->array[aid].first;
+ last = ctx->array[aid].last;
+
+ instr = ir3_instr_create2(ctx->block, -1, OPC_META_FI,
+ 1 + (4 * (last + 1 - first)));
+ ir3_reg_create(instr, 0, 0);
+ for (i = first; i <= last; i++) {
+ for (j = 0; j < 4; j++) {
+ unsigned n = (i * 4) + j;
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr =
+ ssa_instr(ctx, src->File, n);
+ }
+ }
+ ctx->array[aid].fanin = instr;
+ ctx->array_dirty |= (1 << aid);
+ }
+ } else {
+ /* normal case (not relative addressed GPR) */
+ instr = ssa_instr(ctx, src->File, regid(src->Index, chan));
+ }
if (instr) {
reg->flags |= IR3_REG_SSA;
reg->instr = instr;
+ } else if (reg->flags & IR3_REG_SSA) {
+ /* special hack for trans_samp() which calls ssa_src() directly
+ * to build up the collect (fanin) for const src.. (so SSA flag
+ * set but no src instr... it basically gets lucky because we
+ * default to 0.0 for "undefined" src instructions, which is
+ * what it wants. We probably need to give it a better way to
+ * do this, but for now this hack:
+ */
+ reg->instr = create_immed(ctx, 0.0);
}
}
@@ -689,11 +778,23 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx,
instr = ir3_instr_create(ctx->block, -1, OPC_META_DEREF);
ir3_reg_create(instr, 0, 0);
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->block->address;
+
+ if (src->File != TGSI_FILE_CONSTANT) {
+ unsigned aid = array_id(ctx, src);
+ unsigned off = src->Index - ctx->array[aid].first; /* vec4 offset */
+ instr->deref.off = regid(off, chan);
+ }
}
reg = ir3_reg_create(instr, regid(num, chan), flags);
- reg->wrmask = wrmask;
+ if (src->Indirect && (src->File != TGSI_FILE_CONSTANT)) {
+ unsigned aid = array_id(ctx, src);
+ reg->size = 4 * (1 + ctx->array[aid].last - ctx->array[aid].first);
+ } else {
+ reg->wrmask = wrmask;
+ }
+
if (wrmask == 0x1) {
/* normal case */
ssa_src(ctx, reg, src, chan);
@@ -729,8 +830,11 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx,
}
if (src->Indirect) {
+ unsigned size = reg->size;
+
reg = ir3_reg_create(orig, 0, flags | IR3_REG_SSA);
reg->instr = instr;
+ reg->size = size;
}
return reg;
}
@@ -2990,11 +3094,26 @@ compile_instructions(struct ir3_compile_context *ctx)
case TGSI_TOKEN_TYPE_DECLARATION: {
struct tgsi_full_declaration *decl =
&ctx->parser.FullToken.FullDeclaration;
- if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+ unsigned file = decl->Declaration.File;
+ if (file == TGSI_FILE_OUTPUT) {
decl_out(ctx, decl);
- } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ } else if (file == TGSI_FILE_INPUT) {
decl_in(ctx, decl);
}
+
+ if ((file != TGSI_FILE_CONSTANT) && decl->Declaration.Array) {
+ int aid = decl->Array.ArrayID + ctx->array_offsets[file];
+
+ compile_assert(ctx, aid < ARRAY_SIZE(ctx->array));
+
+ /* legacy ArrayID==0 stuff probably isn't going to work
+ * well (and is at least untested).. let's just scream:
+ */
+ compile_assert(ctx, aid != 0);
+
+ ctx->array[aid].first = decl->Range.First;
+ ctx->array[aid].last = decl->Range.Last;
+ }
break;
}
case TGSI_TOKEN_TYPE_IMMEDIATE: {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index 08540466bb0..eaeba0a6465 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -199,6 +199,14 @@ static void compute_liveregs(struct ir3_ra_ctx *ctx,
if (r)
regmask_set_if_not(liveregs, r, &written);
}
+
+ /* if instruction is output, we need a reg that isn't written
+ * before the end.. equiv to the instr_used_by() check above
+ * in the loop body
+ * TODO maybe should follow fanin/fanout?
+ */
+ if (instr_is_output(instr))
+ regmask_or(liveregs, liveregs, &written);
}
static int find_available(regmask_t *liveregs, int size, bool half)
@@ -364,6 +372,14 @@ static void instr_assign_src(struct ir3_ra_ctx *ctx,
case OPC_META_FI:
instr_assign(ctx, instr, name - (r - 1));
return;
+ case OPC_META_DEREF:
+ /* first arg of meta:deref is the addr reg (do not
+ * propagate), 2nd is actual src (fanin) which does
+ * get propagated)
+ */
+ if (r == 2)
+ instr_assign(ctx, instr, name + instr->deref.off);
+ break;
default:
break;
}
@@ -467,7 +483,7 @@ static void instr_alloc_and_assign(struct ir3_ra_ctx *ctx,
/* already partially assigned, just finish the job */
} else if (is_addr(instr)) {
debug_assert(!instr->cp.right);
- name = instr->regs[2]->num;
+ name = instr->regs[2]->num + instr->deref.off;
} else if (reg_gpr(dst)) {
int size;
/* number of consecutive registers to assign: */