summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c9
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.c35
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h91
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c333
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cp.c29
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_depth.c4
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_print.c34
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_ra.c190
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_sched.c3
9 files changed, 365 insertions, 363 deletions
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index a75b04b327a..6562924dae1 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -400,9 +400,16 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 1;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ /* Technically this should be the same as for TEMP/CONST, since
+ * everything is just normal registers. This is just temporary
+ * hack until load_input/store_output handle arrays in a similar
+ * way as load_var/store_var..
+ */
+ return 0;
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
- return 1;
+ /* a2xx compiler doesn't handle indirect: */
+ return is_ir3(screen) ? 1 : 0;
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index b24825cff85..be415d8e5fe 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -81,6 +81,7 @@ struct ir3 * ir3_create(struct ir3_compiler *compiler,
shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
list_inithead(&shader->block_list);
+ list_inithead(&shader->array_list);
return shader;
}
@@ -121,18 +122,19 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
val.iim_val = reg->iim_val;
} else {
unsigned components;
+ int16_t max;
if (reg->flags & IR3_REG_RELATIV) {
components = reg->size;
- val.dummy10 = reg->offset;
+ val.dummy10 = reg->array.offset;
+ max = (reg->array.offset + repeat + components - 1) >> 2;
} else {
components = util_last_bit(reg->wrmask);
val.comp = reg->num & 0x3;
val.num = reg->num >> 2;
+ max = (reg->num + repeat + components - 1) >> 2;
}
- int16_t max = (reg->num + repeat + components - 1) >> 2;
-
if (reg->flags & IR3_REG_CONST) {
info->max_const = MAX2(info->max_const, max);
} else if (val.num == 63) {
@@ -233,7 +235,7 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
iassert((instr->regs_count == 2) || (instr->regs_count == 3));
if (src1->flags & IR3_REG_RELATIV) {
- iassert(src1->num < (1 << 10));
+ iassert(src1->array.offset < (1 << 10));
cat2->rel1.src1 = reg(src1, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
IR3_REG_HALF | absneg);
@@ -260,7 +262,7 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
!((src1->flags ^ src2->flags) & IR3_REG_HALF));
if (src2->flags & IR3_REG_RELATIV) {
- iassert(src2->num < (1 << 10));
+ iassert(src2->array.offset < (1 << 10));
cat2->rel2.src2 = reg(src2, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
IR3_REG_HALF | absneg);
@@ -333,7 +335,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
if (src1->flags & IR3_REG_RELATIV) {
- iassert(src1->num < (1 << 10));
+ iassert(src1->array.offset < (1 << 10));
cat3->rel1.src1 = reg(src1, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
IR3_REG_HALF | absneg);
@@ -361,7 +363,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
if (src3->flags & IR3_REG_RELATIV) {
- iassert(src3->num < (1 << 10));
+ iassert(src3->array.offset < (1 << 10));
cat3->rel2.src3 = reg(src3, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
IR3_REG_HALF | absneg);
@@ -404,7 +406,7 @@ static int emit_cat4(struct ir3_instruction *instr, void *ptr,
iassert(instr->regs_count == 2);
if (src->flags & IR3_REG_RELATIV) {
- iassert(src->num < (1 << 10));
+ iassert(src->array.offset < (1 << 10));
cat4->rel.src = reg(src, info, instr->repeat,
IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
@@ -737,6 +739,14 @@ struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
return reg;
}
+struct ir3_register * ir3_reg_clone(struct ir3 *shader,
+ struct ir3_register *reg)
+{
+ struct ir3_register *new_reg = reg_create(shader, 0, 0);
+ *new_reg = *reg;
+ return new_reg;
+}
+
void
ir3_instr_set_address(struct ir3_instruction *instr,
struct ir3_instruction *addr)
@@ -777,3 +787,12 @@ ir3_count_instructions(struct ir3 *ir)
}
return cnt;
}
+
+struct ir3_array *
+ir3_lookup_array(struct ir3 *ir, unsigned id)
+{
+ list_for_each_entry (struct ir3_array, arr, &ir->array_list, node)
+ if (arr->id == id)
+ return arr;
+ return NULL;
+}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 62d14a0ae37..1e5a1e9ee8b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -83,7 +83,8 @@ struct ir3_register {
* before register assignment is done:
*/
IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */
- IR3_REG_PHI_SRC= 0x4000, /* phi src, regs[0]->instr points to phi */
+ IR3_REG_ARRAY = 0x4000,
+ IR3_REG_PHI_SRC= 0x8000, /* phi src, regs[0]->instr points to phi */
} flags;
union {
@@ -97,11 +98,18 @@ struct ir3_register {
uint32_t uim_val;
float fim_val;
/* relative: */
- int offset;
+ struct {
+ uint16_t id;
+ uint16_t offset;
+ } array;
};
- /* for IR3_REG_SSA, src registers contain ptr back to
- * assigning instruction.
+ /* For IR3_REG_SSA, src registers contain ptr back to assigning
+ * instruction.
+ *
+ * For IR3_REG_ARRAY, the pointer is back to the last dependent
+ * array access (although the net effect is the same, it points
+ * back to a previous instruction that we depend on).
*/
struct ir3_instruction *instr;
@@ -222,9 +230,6 @@ struct ir3_instruction {
int off; /* component/offset */
} fo;
struct {
- int aid;
- } fi;
- struct {
/* used to temporarily hold reference to nir_phi_instr
* until we resolve the phi srcs
*/
@@ -293,19 +298,6 @@ struct ir3_instruction {
*/
struct ir3_instruction *address;
- /* in case of a instruction with relative dst instruction, we need to
- * capture the dependency on the fanin for the previous values of
- * the array elements. Since we don't know at compile time actually
- * which array elements are written, this serves to preserve the
- * unconditional write to array elements prior to the conditional
- * write.
- *
- * TODO only cat1 can do indirect write.. we could maybe move this
- * into instr->cat1.fanin (but would require the frontend to insert
- * the extra mov)
- */
- struct ir3_instruction *fanin;
-
/* Entry in ir3_block's instruction list: */
struct list_head node;
@@ -379,10 +371,39 @@ struct ir3 {
/* List of blocks: */
struct list_head block_list;
+ /* List of ir3_array's: */
+ struct list_head array_list;
+
unsigned heap_idx;
struct ir3_heap_chunk *chunk;
};
+typedef struct nir_variable nir_variable;
+
+struct ir3_array {
+ struct list_head node;
+ unsigned length;
+ unsigned id;
+
+ nir_variable *var;
+
+ /* We track the last write and last access (read or write) to
+ * setup dependencies on instructions that read or write the
+ * array. Reads can be re-ordered wrt. other reads, but should
+ * not be re-ordered wrt. to writes. Writes cannot be reordered
+ * wrt. any other access to the array.
+ *
+ * So array reads depend on last write, and array writes depend
+ * on the last access.
+ */
+ struct ir3_instruction *last_write, *last_access;
+
+ /* extra stuff used in RA pass: */
+ unsigned base;
+};
+
+struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id);
+
typedef struct nir_block nir_block;
struct ir3_block {
@@ -430,6 +451,8 @@ const char *ir3_instr_name(struct ir3_instruction *instr);
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
int num, int flags);
+struct ir3_register * ir3_reg_clone(struct ir3 *shader,
+ struct ir3_register *reg);
void ir3_instr_set_address(struct ir3_instruction *instr,
struct ir3_instruction *addr);
@@ -510,6 +533,9 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr)
if (dst->num == regid(REG_A0, 0))
return false;
+ if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
+ return false;
+
if ((instr->category == 1) &&
(instr->cat1.src_type == instr->cat1.dst_type))
return true;
@@ -623,8 +649,10 @@ static inline bool writes_pred(struct ir3_instruction *instr)
/* TODO better name */
static inline struct ir3_instruction *ssa(struct ir3_register *reg)
{
- if (reg->flags & IR3_REG_SSA)
+ if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) {
+ debug_assert(!(reg->instr && (reg->instr->flags & IR3_INSTR_UNUSED)));
return reg->instr;
+ }
return NULL;
}
@@ -813,8 +841,6 @@ static inline unsigned ir3_cat3_absneg(opc_t opc)
static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
{
- if (instr->fanin)
- return instr->regs_count + 2;
if (instr->address)
return instr->regs_count + 1;
return instr->regs_count;
@@ -822,8 +848,6 @@ static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
{
- if (n == (instr->regs_count + 1))
- return instr->fanin;
if (n == (instr->regs_count + 0))
return instr->address;
return ssa(instr->regs[n]);
@@ -834,8 +858,8 @@ static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr
/* iterator for an instruction's SSA sources (instr), also returns src #: */
#define foreach_ssa_src_n(__srcinst, __n, __instr) \
if ((__instr)->regs_count) \
- for (unsigned __cnt = __ssa_src_cnt(__instr) - 1, __n = 0; __n < __cnt; __n++) \
- if ((__srcinst = __ssa_src_n(__instr, __n + 1)))
+ for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \
+ if ((__srcinst = __ssa_src_n(__instr, __n)))
/* iterator for an instruction's SSA sources (instr): */
#define foreach_ssa_src(__srcinst, __instr) \
@@ -878,7 +902,15 @@ ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
struct ir3_instruction *instr =
ir3_instr_create(block, 1, 0);
ir3_reg_create(instr, 0, 0); /* dst */
- ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+ if (src->regs[0]->flags & IR3_REG_ARRAY) {
+ struct ir3_register *src_reg =
+ ir3_reg_create(instr, 0, IR3_REG_ARRAY);
+ src_reg->array = src->regs[0]->array;
+ src_reg->instr = src;
+ } else {
+ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+ }
+ debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV));
instr->cat1.src_type = type;
instr->cat1.dst_type = type;
return instr;
@@ -894,6 +926,7 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
instr->cat1.src_type = src_type;
instr->cat1.dst_type = dst_type;
+ debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY));
return instr;
}
@@ -1083,7 +1116,7 @@ typedef uint8_t regmask_t[2 * MAX_REG / 8];
static inline unsigned regmask_idx(struct ir3_register *reg)
{
- unsigned num = reg->num;
+ unsigned num = (reg->flags & IR3_REG_RELATIV) ? reg->array.offset : reg->num;
debug_assert(num < MAX_REG);
if (reg->flags & IR3_REG_HALF)
num += MAX_REG;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index e5d39097267..bd0ee89a1ec 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -74,8 +74,6 @@ struct ir3_compile {
/* mapping from nir_register to defining instruction: */
struct hash_table *def_ht;
- /* mapping from nir_variable to ir3_array: */
- struct hash_table *var_ht;
unsigned num_arrays;
/* a common pattern for indirect addressing is to request the
@@ -142,8 +140,6 @@ compile_init(struct ir3_compiler *compiler,
ctx->so = so;
ctx->def_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
- ctx->var_ht = _mesa_hash_table_create(ctx,
- _mesa_hash_pointer, _mesa_key_pointer_equal);
ctx->block_ht = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
@@ -220,206 +216,26 @@ compile_free(struct ir3_compile *ctx)
ralloc_free(ctx);
}
-/* global per-array information: */
-struct ir3_array {
- unsigned length, aid;
-};
-
-/* per-block array state: */
-struct ir3_array_value {
- /* TODO drop length/aid, and just have ptr back to ir3_array */
- unsigned length, aid;
- /* initial array element values are phi's, other than for the
- * entry block. The phi src's get added later in a resolve step
- * after we have visited all the blocks, to account for back
- * edges in the cfg.
- */
- struct ir3_instruction **phis;
- /* current array element values (as block is processed). When
- * the array phi's are resolved, it will contain the array state
- * at exit of block, so successor blocks can use it to add their
- * phi srcs.
- */
- struct ir3_instruction *arr[];
-};
-
-/* track array assignments per basic block. When an array is read
- * outside of the same basic block, we can use NIR's dominance-frontier
- * information to figure out where phi nodes are needed.
- */
-struct ir3_nir_block_data {
- unsigned foo;
- /* indexed by array-id (aid): */
- struct ir3_array_value *arrs[];
-};
-
-static struct ir3_nir_block_data *
-get_block_data(struct ir3_compile *ctx, struct ir3_block *block)
-{
- if (!block->data) {
- struct ir3_nir_block_data *bd = ralloc_size(ctx, sizeof(*bd) +
- ((ctx->num_arrays + 1) * sizeof(bd->arrs[0])));
- block->data = bd;
- }
- return block->data;
-}
-
static void
declare_var(struct ir3_compile *ctx, nir_variable *var)
{
unsigned length = glsl_get_length(var->type) * 4; /* always vec4, at least with ttn */
struct ir3_array *arr = ralloc(ctx, struct ir3_array);
+ arr->id = ++ctx->num_arrays;
arr->length = length;
- arr->aid = ++ctx->num_arrays;
- _mesa_hash_table_insert(ctx->var_ht, var, arr);
+ arr->var = var;
+ list_addtail(&arr->node, &ctx->ir->array_list);
}
-static nir_block *
-nir_block_pred(nir_block *block)
-{
- assert(block->predecessors->entries < 2);
- if (block->predecessors->entries == 0)
- return NULL;
- return (nir_block *)_mesa_set_next_entry(block->predecessors, NULL)->key;
-}
-
-static struct ir3_array_value *
+static struct ir3_array *
get_var(struct ir3_compile *ctx, nir_variable *var)
{
- struct hash_entry *entry = _mesa_hash_table_search(ctx->var_ht, var);
- struct ir3_block *block = ctx->block;
- struct ir3_nir_block_data *bd = get_block_data(ctx, block);
- struct ir3_array *arr = entry->data;
-
- if (!bd->arrs[arr->aid]) {
- struct ir3_array_value *av = ralloc_size(bd, sizeof(*av) +
- (arr->length * sizeof(av->arr[0])));
- struct ir3_array_value *defn = NULL;
- nir_block *pred_block;
-
- av->length = arr->length;
- av->aid = arr->aid;
-
- /* For loops, we have to consider that we have not visited some
- * of the blocks who should feed into the phi (ie. back-edges in
- * the cfg).. for example:
- *
- * loop {
- * block { load_var; ... }
- * if then block {} else block {}
- * block { store_var; ... }
- * if then block {} else block {}
- * block {...}
- * }
- *
- * We can skip the phi if we can chase the block predecessors
- * until finding the block previously defining the array without
- * crossing a block that has more than one predecessor.
- *
- * Otherwise create phi's and resolve them as a post-pass after
- * all the blocks have been visited (to handle back-edges).
- */
-
- for (pred_block = block->nblock;
- pred_block && (pred_block->predecessors->entries < 2) && !defn;
- pred_block = nir_block_pred(pred_block)) {
- struct ir3_block *pblock = get_block(ctx, pred_block);
- struct ir3_nir_block_data *pbd = pblock->data;
- if (!pbd)
- continue;
- defn = pbd->arrs[arr->aid];
- }
-
- if (defn) {
- /* only one possible definer: */
- for (unsigned i = 0; i < arr->length; i++)
- av->arr[i] = defn->arr[i];
- } else if (pred_block) {
- /* not the first block, and multiple potential definers: */
- av->phis = ralloc_size(av, arr->length * sizeof(av->phis[0]));
-
- for (unsigned i = 0; i < arr->length; i++) {
- struct ir3_instruction *phi;
-
- phi = ir3_instr_create2(block, -1, OPC_META_PHI,
- 1 + ctx->impl->num_blocks);
- ir3_reg_create(phi, 0, 0); /* dst */
-
- /* phi's should go at head of block: */
- list_delinit(&phi->node);
- list_add(&phi->node, &block->instr_list);
-
- av->phis[i] = av->arr[i] = phi;
- }
- } else {
- /* Some shaders end up reading array elements without
- * first writing.. so initialize things to prevent null
- * instr ptrs later:
- */
- for (unsigned i = 0; i < arr->length; i++)
- av->arr[i] = create_immed(block, 0);
- }
-
- bd->arrs[arr->aid] = av;
- }
-
- return bd->arrs[arr->aid];
-}
-
-static void
-add_array_phi_srcs(struct ir3_compile *ctx, nir_block *nblock,
- struct ir3_array_value *av, BITSET_WORD *visited)
-{
- struct ir3_block *block;
- struct ir3_nir_block_data *bd;
-
- if (BITSET_TEST(visited, nblock->index))
- return;
-
- BITSET_SET(visited, nblock->index);
-
- block = get_block(ctx, nblock);
- bd = block->data;
-
- if (bd && bd->arrs[av->aid]) {
- struct ir3_array_value *dav = bd->arrs[av->aid];
- for (unsigned i = 0; i < av->length; i++) {
- ir3_reg_create(av->phis[i], 0, IR3_REG_SSA)->instr =
- dav->arr[i];
- }
- } else {
- /* didn't find defn, recurse predecessors: */
- struct set_entry *entry;
- set_foreach(nblock->predecessors, entry) {
- add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
- }
- }
-}
-
-static void
-resolve_array_phis(struct ir3_compile *ctx, struct ir3_block *block)
-{
- struct ir3_nir_block_data *bd = block->data;
- unsigned bitset_words = BITSET_WORDS(ctx->impl->num_blocks);
-
- if (!bd)
- return;
-
- /* TODO use nir dom_frontier to help us with this? */
-
- for (unsigned i = 1; i <= ctx->num_arrays; i++) {
- struct ir3_array_value *av = bd->arrs[i];
- BITSET_WORD visited[bitset_words];
- struct set_entry *entry;
-
- if (!(av && av->phis))
- continue;
-
- memset(visited, 0, sizeof(visited));
- set_foreach(block->nblock->predecessors, entry) {
- add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
- }
+ list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+ if (arr->var == var)
+ return arr;
}
+ compile_error(ctx, "bogus var: %s\n", var->name);
+ return NULL;
}
/* allocate a n element value array (to be populated by caller) and
@@ -437,6 +253,7 @@ __get_dst(struct ir3_compile *ctx, void *key, unsigned n)
static struct ir3_instruction **
get_dst(struct ir3_compile *ctx, nir_dest *dst, unsigned n)
{
+ compile_assert(ctx, dst->is_ssa);
if (dst->is_ssa) {
return __get_dst(ctx, &dst->ssa, n);
} else {
@@ -454,6 +271,7 @@ static struct ir3_instruction **
get_src(struct ir3_compile *ctx, nir_src *src)
{
struct hash_entry *entry;
+ compile_assert(ctx, src->is_ssa);
if (src->is_ssa) {
entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
} else {
@@ -568,7 +386,7 @@ create_uniform_indirect(struct ir3_compile *ctx, unsigned n,
mov->cat1.src_type = TYPE_U32;
mov->cat1.dst_type = TYPE_U32;
ir3_reg_create(mov, 0, 0);
- ir3_reg_create(mov, n, IR3_REG_CONST | IR3_REG_RELATIV);
+ ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n;
ir3_instr_set_address(mov, address);
@@ -607,17 +425,45 @@ create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
src = ir3_reg_create(mov, 0, IR3_REG_SSA | IR3_REG_RELATIV);
src->instr = collect;
src->size = arrsz;
- src->offset = n;
+ src->array.offset = n;
ir3_instr_set_address(mov, address);
return mov;
}
+/* relative (indirect) if address!=NULL */
+static struct ir3_instruction *
+create_var_load(struct ir3_compile *ctx, struct ir3_array *arr, unsigned n,
+ struct ir3_instruction *address)
+{
+ struct ir3_block *block = ctx->block;
+ struct ir3_instruction *mov;
+ struct ir3_register *src;
+
+ mov = ir3_instr_create(block, 1, 0);
+ mov->cat1.src_type = TYPE_U32;
+ mov->cat1.dst_type = TYPE_U32;
+ ir3_reg_create(mov, 0, 0);
+ src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
+ COND(address, IR3_REG_RELATIV));
+ src->instr = arr->last_write;
+ src->size = arr->length;
+ src->array.id = arr->id;
+ src->array.offset = n;
+
+ if (address)
+ ir3_instr_set_address(mov, address);
+
+ arr->last_access = mov;
+
+ return mov;
+}
+
+/* relative (indirect) if address!=NULL */
static struct ir3_instruction *
-create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
- struct ir3_instruction *src, struct ir3_instruction *address,
- struct ir3_instruction *collect)
+create_var_store(struct ir3_compile *ctx, struct ir3_array *arr, unsigned n,
+ struct ir3_instruction *src, struct ir3_instruction *address)
{
struct ir3_block *block = ctx->block;
struct ir3_instruction *mov;
@@ -626,14 +472,18 @@ create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
mov = ir3_instr_create(block, 1, 0);
mov->cat1.src_type = TYPE_U32;
mov->cat1.dst_type = TYPE_U32;
- dst = ir3_reg_create(mov, 0, IR3_REG_RELATIV);
- dst->size = arrsz;
- dst->offset = n;
+ dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
+ COND(address, IR3_REG_RELATIV));
+ dst->instr = arr->last_access;
+ dst->size = arr->length;
+ dst->array.id = arr->id;
+ dst->array.offset = n;
ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
- mov->fanin = collect;
ir3_instr_set_address(mov, address);
+ arr->last_write = arr->last_access = mov;
+
return mov;
}
@@ -1198,7 +1048,7 @@ emit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
{
nir_deref_var *dvar = intr->variables[0];
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
- struct ir3_array_value *arr = get_var(ctx, dvar->var);
+ struct ir3_array *arr = get_var(ctx, dvar->var);
compile_assert(ctx, dvar->deref.child &&
(dvar->deref.child->deref_type == nir_deref_type_array));
@@ -1209,19 +1059,17 @@ emit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
for (int i = 0; i < intr->num_components; i++) {
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
- dst[i] = arr->arr[n];
+ dst[i] = create_var_load(ctx, arr, n, NULL);
}
break;
case nir_deref_array_type_indirect: {
/* for indirect, we need to collect all the array elements: */
- struct ir3_instruction *collect =
- create_collect(ctx->block, arr->arr, arr->length);
struct ir3_instruction *addr =
get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
for (int i = 0; i < intr->num_components; i++) {
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
- dst[i] = create_indirect_load(ctx, arr->length, n, addr, collect);
+ dst[i] = create_var_load(ctx, arr, n, addr);
}
break;
}
@@ -1238,8 +1086,9 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
{
nir_deref_var *dvar = intr->variables[0];
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
- struct ir3_array_value *arr = get_var(ctx, dvar->var);
- struct ir3_instruction **src;
+ struct ir3_array *arr = get_var(ctx, dvar->var);
+ struct ir3_instruction *addr, **src;
+ unsigned wrmask = intr->const_index[0];
compile_assert(ctx, dvar->deref.child &&
(dvar->deref.child->deref_type == nir_deref_type_array));
@@ -1248,66 +1097,24 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
switch (darr->deref_array_type) {
case nir_deref_array_type_direct:
- /* direct access does not require anything special: */
- for (int i = 0; i < intr->num_components; i++) {
- /* ttn doesn't generate partial writemasks */
- assert(intr->const_index[0] ==
- (1 << intr->num_components) - 1);
-
- unsigned n = darr->base_offset * 4 + i;
- compile_assert(ctx, n < arr->length);
- arr->arr[n] = src[i];
- }
+ addr = NULL;
break;
- case nir_deref_array_type_indirect: {
- /* for indirect, create indirect-store and fan that out: */
- struct ir3_instruction *collect =
- create_collect(ctx->block, arr->arr, arr->length);
- struct ir3_instruction *addr =
- get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
- for (int i = 0; i < intr->num_components; i++) {
- /* ttn doesn't generate partial writemasks */
- assert(intr->const_index[0] ==
- (1 << intr->num_components) - 1);
-
- struct ir3_instruction *store;
- unsigned n = darr->base_offset * 4 + i;
- compile_assert(ctx, n < arr->length);
-
- store = create_indirect_store(ctx, arr->length,
- n, src[i], addr, collect);
-
- store->fanin->fi.aid = arr->aid;
-
- /* TODO: probably split this out to be used for
- * store_output_indirect? or move this into
- * create_indirect_store()?
- */
- for (int j = i; j < arr->length; j += intr->num_components) {
- struct ir3_instruction *split;
-
- split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
- split->fo.off = j;
- ir3_reg_create(split, 0, 0);
- ir3_reg_create(split, 0, IR3_REG_SSA)->instr = store;
-
- arr->arr[j] = split;
- }
- }
- /* fixup fanout/split neighbors: */
- for (int i = 0; i < arr->length; i++) {
- arr->arr[i]->cp.right = (i < (arr->length - 1)) ?
- arr->arr[i+1] : NULL;
- arr->arr[i]->cp.left = (i > 0) ?
- arr->arr[i-1] : NULL;
- }
+ case nir_deref_array_type_indirect:
+ addr = get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
break;
- }
default:
compile_error(ctx, "Unhandled store deref type: %u\n",
darr->deref_array_type);
break;
}
+
+ for (int i = 0; i < intr->num_components; i++) {
+ if (!(wrmask & (1 << i)))
+ continue;
+ unsigned n = darr->base_offset * 4 + i;
+ compile_assert(ctx, n < arr->length);
+ create_var_store(ctx, arr, n, src[i], addr);
+ }
}
static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
@@ -1835,8 +1642,6 @@ resolve_phis(struct ir3_compile *ctx, struct ir3_block *block)
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
}
}
-
- resolve_array_phis(ctx, block);
}
static void
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
index a6e69d2416f..0d88e7bc3ab 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
@@ -202,6 +202,7 @@ static void combine_flags(unsigned *dstflags, unsigned srcflags)
*dstflags |= srcflags & IR3_REG_CONST;
*dstflags |= srcflags & IR3_REG_IMMED;
*dstflags |= srcflags & IR3_REG_RELATIV;
+ *dstflags |= srcflags & IR3_REG_ARRAY;
}
/* the "plain" MAD's (ie. the ones that don't shift first src prior to
@@ -233,6 +234,10 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
combine_flags(&new_flags, src_reg->flags);
if (valid_flags(instr, n, new_flags)) {
+ if (new_flags & IR3_REG_ARRAY) {
+ debug_assert(!(reg->flags & IR3_REG_ARRAY));
+ reg->array = src_reg->array;
+ }
reg->flags = new_flags;
reg->instr = ssa(src_reg);
}
@@ -283,6 +288,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
conflicts(instr->address, reg->instr->address))
return;
+ src_reg = ir3_reg_clone(instr->block->shader, src_reg);
src_reg->flags = new_flags;
instr->regs[n+1] = src_reg;
@@ -294,6 +300,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
if ((src_reg->flags & IR3_REG_RELATIV) &&
!conflicts(instr->address, reg->instr->address)) {
+ src_reg = ir3_reg_clone(instr->block->shader, src_reg);
src_reg->flags = new_flags;
instr->regs[n+1] = src_reg;
ir3_instr_set_address(instr, reg->instr->address);
@@ -329,6 +336,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
/* other than category 1 (mov) we can only encode up to 10 bits: */
if ((instr->category == 1) || !(iim_val & ~0x3ff)) {
new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
+ src_reg = ir3_reg_clone(instr->block->shader, src_reg);
src_reg->flags = new_flags;
src_reg->iim_val = iim_val;
instr->regs[n+1] = src_reg;
@@ -349,9 +357,11 @@ eliminate_output_mov(struct ir3_instruction *instr)
{
if (is_eligible_mov(instr, false)) {
struct ir3_register *reg = instr->regs[1];
- struct ir3_instruction *src_instr = ssa(reg);
- debug_assert(src_instr);
- return src_instr;
+ if (!(reg->flags & IR3_REG_ARRAY)) {
+ struct ir3_instruction *src_instr = ssa(reg);
+ debug_assert(src_instr);
+ return src_instr;
+ }
}
return instr;
}
@@ -379,9 +389,22 @@ instr_cp(struct ir3_instruction *instr)
continue;
instr_cp(src);
+
+ /* TODO non-indirect access we could figure out which register
+ * we actually want and allow cp..
+ */
+ if (reg->flags & IR3_REG_ARRAY)
+ continue;
+
reg_cp(instr, reg, n);
}
+ if (instr->regs[0]->flags & IR3_REG_ARRAY) {
+ struct ir3_instruction *src = ssa(instr->regs[0]);
+ if (src)
+ instr_cp(src);
+ }
+
if (instr->address) {
instr_cp(instr->address);
ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
index 4bbc0458790..3354cbd23fa 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
@@ -118,6 +118,10 @@ ir3_instr_depth(struct ir3_instruction *instr)
/* visit child to compute it's depth: */
ir3_instr_depth(src);
+ /* for array writes, no need to delay on previous write: */
+ if (i == 0)
+ continue;
+
sd = ir3_delayslots(src, instr, i) + src->depth;
instr->depth = MAX2(instr->depth, sd);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c
index a84e7989cf8..ec832f5d72a 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_print.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c
@@ -94,7 +94,7 @@ static void print_instr_name(struct ir3_instruction *instr)
}
}
-static void print_reg_name(struct ir3_register *reg, bool followssa)
+static void print_reg_name(struct ir3_register *reg)
{
if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
(reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
@@ -106,20 +106,29 @@ static void print_reg_name(struct ir3_register *reg, bool followssa)
if (reg->flags & IR3_REG_IMMED) {
printf("imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
- } else if (reg->flags & IR3_REG_SSA) {
- printf("_");
- if (followssa) {
- printf("[");
+ } else if (reg->flags & IR3_REG_ARRAY) {
+ printf("arr[id=%u, offset=%u, size=%u", reg->array.id,
+ reg->array.offset, reg->size);
+ /* for ARRAY we could have null src, for example first write
+ * instruction..
+ */
+ if (reg->instr) {
+ printf(", _[");
print_instr_name(reg->instr);
printf("]");
}
+ printf("]");
+ } else if (reg->flags & IR3_REG_SSA) {
+ printf("_[");
+ print_instr_name(reg->instr);
+ printf("]");
} else if (reg->flags & IR3_REG_RELATIV) {
if (reg->flags & IR3_REG_HALF)
printf("h");
if (reg->flags & IR3_REG_CONST)
- printf("c<a0.x + %u>", reg->num);
+ printf("c<a0.x + %u>", reg->array.offset);
else
- printf("\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->num, reg->size);
+ printf("\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->array.offset, reg->size);
} else {
if (reg->flags & IR3_REG_HALF)
printf("h");
@@ -158,7 +167,7 @@ print_instr(struct ir3_instruction *instr, int lvl)
for (i = 0; i < instr->regs_count; i++) {
struct ir3_register *reg = instr->regs[i];
printf(i ? ", " : " ");
- print_reg_name(reg, !!i);
+ print_reg_name(reg);
}
if (instr->address) {
@@ -168,13 +177,6 @@ print_instr(struct ir3_instruction *instr, int lvl)
printf("]");
}
- if (instr->fanin) {
- printf(", fanin=_");
- printf("[");
- print_instr_name(instr->fanin);
- printf("]");
- }
-
if (instr->cp.left) {
printf(", left=_");
printf("[");
@@ -192,8 +194,6 @@ print_instr(struct ir3_instruction *instr, int lvl)
if (is_meta(instr)) {
if (instr->opc == OPC_META_FO) {
printf(", off=%d", instr->fo.off);
- } else if ((instr->opc == OPC_META_FI) && instr->fi.aid) {
- printf(", aid=%d", instr->fi.aid);
}
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index 88ca95acbbf..3c42f8e1592 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -68,25 +68,24 @@
* LOAD_PAYLOAD instruction which gets turned into multiple MOV's after
* register assignment. But for us that is horrible from a scheduling
* standpoint. Instead what we do is use idea of 'definer' instruction.
- * Ie. the first instruction (lowest ip) to write to the array is the
+ * Ie. the first instruction (lowest ip) to write to the variable is the
* one we consider from use/def perspective when building interference
- * graph. (Other instructions which write other array elements just
- * define the variable some more.)
+ * graph. (Other instructions which write other variable components
+ * just define the variable some more.)
+ *
+ * Arrays of arbitrary size are handled via pre-coloring a consecutive
+ * sequence of registers. Additional scalar (single component) reg
+ * names are allocated starting at ctx->class_base[total_class_count]
+ * (see arr->base), which are pre-colored. In the use/def graph direct
+ * access is treated as a single element use/def, and indirect access
+ * is treated as use or def of all array elements. (Only the first
+ * def is tracked, in case of multiple indirect writes, etc.)
*/
static const unsigned class_sizes[] = {
1, 2, 3, 4,
4 + 4, /* txd + 1d/2d */
4 + 6, /* txd + 3d */
- /* temporary: until we can assign arrays, create classes so we
- * can round up array to fit. NOTE with tgsi arrays should
- * really all be multiples of four:
- */
- 4 * 4,
- 4 * 8,
- 4 * 16,
- 4 * 32,
-
};
#define class_count ARRAY_SIZE(class_sizes)
@@ -265,8 +264,9 @@ struct ir3_ra_ctx {
struct ir3_ra_reg_set *set;
struct ra_graph *g;
unsigned alloc_count;
- unsigned class_alloc_count[total_class_count];
- unsigned class_base[total_class_count];
+ /* one per class, plus one slot for arrays: */
+ unsigned class_alloc_count[total_class_count + 1];
+ unsigned class_base[total_class_count + 1];
unsigned instr_cnt;
unsigned *def, *use; /* def/use table */
struct ir3_ra_instr_data *instrd;
@@ -329,9 +329,6 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
struct ir3_instruction *d = NULL;
- if (instr->fanin)
- return get_definer(ctx, instr->fanin, sz, off);
-
if (id->defn) {
*sz = id->sz;
*off = id->off;
@@ -485,10 +482,13 @@ ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
/* couple special cases: */
if (writes_addr(instr) || writes_pred(instr)) {
id->cls = -1;
- continue;
+ } else if (instr->regs[0]->flags & IR3_REG_ARRAY) {
+ id->cls = total_class_count;
+ id->defn = instr;
+ } else {
+ id->defn = get_definer(ctx, instr, &id->sz, &id->off);
+ id->cls = size_to_class(id->sz, is_half(id->defn));
}
- id->defn = get_definer(ctx, instr, &id->sz, &id->off);
- id->cls = size_to_class(id->sz, is_half(id->defn));
}
}
@@ -518,8 +518,6 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
/* arrays which don't fit in one of the pre-defined class
* sizes are pre-colored:
- *
- * TODO but we still need to allocate names for them, don't we??
*/
if (id->cls >= 0) {
instr->name = ctx->class_alloc_count[id->cls]++;
@@ -531,7 +529,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
static void
ra_init(struct ir3_ra_ctx *ctx)
{
- unsigned n;
+ unsigned n, base;
ir3_clear_mark(ctx->ir);
n = ir3_count_instructions(ctx->ir);
@@ -550,11 +548,20 @@ ra_init(struct ir3_ra_ctx *ctx)
* actual ra name is class_base[cls] + instr->name;
*/
ctx->class_base[0] = 0;
- for (unsigned i = 1; i < total_class_count; i++) {
+ for (unsigned i = 1; i <= total_class_count; i++) {
ctx->class_base[i] = ctx->class_base[i-1] +
ctx->class_alloc_count[i-1];
}
+ /* and vreg names for array elements: */
+ base = ctx->class_base[total_class_count];
+ list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+ arr->base = base;
+ ctx->class_alloc_count[total_class_count] += arr->length;
+ base += arr->length;
+ }
+ ctx->alloc_count += ctx->class_alloc_count[total_class_count];
+
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
ralloc_steal(ctx->g, ctx->instrd);
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
@@ -566,6 +573,7 @@ __ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn)
{
unsigned name;
debug_assert(cls >= 0);
+ debug_assert(cls < total_class_count); /* we shouldn't get arrays here.. */
name = ctx->class_base[cls] + defn->name;
debug_assert(name < ctx->alloc_count);
return name;
@@ -590,6 +598,22 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
struct ir3_ra_block_data *bd;
unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
+ void def(unsigned name, struct ir3_instruction *instr)
+ {
+ /* defined on first write: */
+ if (!ctx->def[name])
+ ctx->def[name] = instr->ip;
+ ctx->use[name] = instr->ip;
+ BITSET_SET(bd->def, name);
+ }
+
+ void use(unsigned name, struct ir3_instruction *instr)
+ {
+ ctx->use[name] = MAX2(ctx->use[name], instr->ip);
+ if (!BITSET_TEST(bd->def, name))
+ BITSET_SET(bd->use, name);
+ }
+
bd = rzalloc(ctx->g, struct ir3_ra_block_data);
bd->def = rzalloc_array(bd, BITSET_WORD, bitset_words);
@@ -601,6 +625,7 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
struct ir3_instruction *src;
+ struct ir3_register *reg;
if (instr->regs_count == 0)
continue;
@@ -632,17 +657,45 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
if (writes_gpr(instr)) {
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+ struct ir3_register *dst = instr->regs[0];
- if (id->defn == instr) {
- unsigned name = ra_name(ctx, id);
+ if (dst->flags & IR3_REG_ARRAY) {
+ struct ir3_array *arr =
+ ir3_lookup_array(ctx->ir, dst->array.id);
+ unsigned i;
+
+ debug_assert(!(dst->flags & IR3_REG_PHI_SRC));
+
+ /* set the node class now.. in case we don't encounter
+ * this array dst again. From register_alloc algo's
+ * perspective, these are all single/scalar regs:
+ */
+ for (i = 0; i < arr->length; i++) {
+ unsigned name = arr->base + i;
+ ra_set_node_class(ctx->g, name, ctx->set->classes[0]);
+ }
+
+ /* indirect write is treated like a write to all array
+ * elements, since we don't know which one is actually
+ * written:
+ */
+ if (dst->flags & IR3_REG_RELATIV) {
+ for (i = 0; i < arr->length; i++) {
+ unsigned name = arr->base + i;
+ def(name, instr);
+ }
+ } else {
+ unsigned name = arr->base + dst->array.offset;
+ def(name, instr);
+ }
- ctx->def[name] = id->defn->ip;
- ctx->use[name] = id->defn->ip;
+ } else if (id->defn == instr) {
+ unsigned name = ra_name(ctx, id);
/* since we are in SSA at this point: */
debug_assert(!BITSET_TEST(bd->use, name));
- BITSET_SET(bd->def, name);
+ def(name, id->defn);
if (is_half(id->defn)) {
ra_set_node_class(ctx->g, name,
@@ -672,12 +725,28 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
}
}
- foreach_ssa_src(src, instr) {
- if (writes_gpr(src)) {
+ foreach_src(reg, instr) {
+ if (reg->flags & IR3_REG_ARRAY) {
+ struct ir3_array *arr =
+ ir3_lookup_array(ctx->ir, reg->array.id);
+ /* indirect read is treated like a read fromall array
+ * elements, since we don't know which one is actually
+ * read:
+ */
+ if (reg->flags & IR3_REG_RELATIV) {
+ unsigned i;
+ for (i = 0; i < arr->length; i++) {
+ unsigned name = arr->base + i;
+ use(name, instr);
+ }
+ } else {
+ unsigned name = arr->base + reg->array.offset;
+ use(name, instr);
+ debug_assert(reg->array.offset < arr->length);
+ }
+ } else if ((src = ssa(reg)) && writes_gpr(src)) {
unsigned name = ra_name(ctx, &ctx->instrd[src->ip]);
- ctx->use[name] = MAX2(ctx->use[name], instr->ip);
- if (!BITSET_TEST(bd->def, name))
- BITSET_SET(bd->use, name);
+ use(name, instr);
}
}
}
@@ -830,18 +899,36 @@ static void fixup_half_instr_src(struct ir3_instruction *instr)
}
}
+/* NOTE: instr could be NULL for IR3_REG_ARRAY case, for the first
+ * array access(es) which do not have any previous access to depend
+ * on from scheduling point of view
+ */
static void
reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
struct ir3_instruction *instr)
{
- struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- if (id->defn) {
+ struct ir3_ra_instr_data *id;
+
+ if (reg->flags & IR3_REG_ARRAY) {
+ struct ir3_array *arr =
+ ir3_lookup_array(ctx->ir, reg->array.id);
+ unsigned name = arr->base + reg->array.offset;
+ unsigned r = ra_get_node_reg(ctx->g, name);
+ unsigned num = ctx->set->ra_reg_to_gpr[r];
+
+ if (reg->flags & IR3_REG_RELATIV) {
+ reg->array.offset = num;
+ } else {
+ reg->num = num;
+ }
+
+ reg->flags &= ~IR3_REG_ARRAY;
+ } else if ((id = &ctx->instrd[instr->ip]) && id->defn) {
unsigned name = ra_name(ctx, id);
unsigned r = ra_get_node_reg(ctx->g, name);
unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
- if (reg->flags & IR3_REG_RELATIV)
- num += reg->offset;
+ debug_assert(!(reg->flags & IR3_REG_RELATIV));
reg->num = num;
reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
@@ -868,9 +955,9 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
foreach_src_n(reg, n, instr) {
struct ir3_instruction *src = reg->instr;
- if (!src)
+ /* Note: reg->instr could be null for IR3_REG_ARRAY */
+ if (!(src || (reg->flags & IR3_REG_ARRAY)))
continue;
-
reg_assign(ctx, instr->regs[n+1], src);
if (instr->regs[n+1]->flags & IR3_REG_HALF)
fixup_half_instr_src(instr);
@@ -881,6 +968,8 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
static int
ra_alloc(struct ir3_ra_ctx *ctx)
{
+ unsigned n = 0;
+
/* frag shader inputs get pre-assigned, since we have some
* constraints/unknowns about setup for some of these regs:
*/
@@ -898,7 +987,8 @@ ra_alloc(struct ir3_ra_ctx *ctx)
i += 4;
}
- for (j = 0; i < ir->ninputs; i++) {
+ j = 0;
+ for (; i < ir->ninputs; i++) {
struct ir3_instruction *instr = ir->inputs[i];
if (instr) {
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
@@ -914,6 +1004,24 @@ ra_alloc(struct ir3_ra_ctx *ctx)
}
}
}
+ n = j;
+ }
+
+ /* pre-assign array elements:
+ * TODO we could be a bit more clever if we knew which arrays didn't
+ * fully (partially?) conflict with each other..
+ */
+ list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+ unsigned i;
+ for (i = 0; i < arr->length; i++) {
+ unsigned name, reg;
+
+ name = arr->base + i;
+ reg = ctx->set->gpr_to_ra_reg[0][n++];
+
+ ra_set_node_reg(ctx->g, name, reg);
+
+ }
}
if (!ra_allocate(ctx->g))
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
index 6aaa16edbfe..8f640febc5d 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
@@ -187,6 +187,9 @@ delay_calc(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
foreach_ssa_src_n(src, i, instr) {
unsigned d;
+ /* for array writes, no need to delay on previous write: */
+ if (i == 0)
+ continue;
if (src->block != instr->block)
continue;
d = delay_calc_srcn(ctx, src, instr, i);