summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno/a3xx/ir3.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/freedreno/a3xx/ir3.h')
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir3.h110
1 files changed, 109 insertions, 1 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.h b/src/gallium/drivers/freedreno/a3xx/ir3.h
index 896bec114fa..ccd3b0b54b4 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir3.h
+++ b/src/gallium/drivers/freedreno/a3xx/ir3.h
@@ -65,6 +65,11 @@ struct ir3_register {
* that the shader needs no more input:
*/
IR3_REG_EI = 0x200,
+ /* meta-flags, for intermediate stages of IR, ie.
+ * before register assignment is done:
+ */
+ IR3_REG_SSA = 0x1000, /* 'instr' is ptr to assigning instr */
+ IR3_REG_IA = 0x2000, /* meta-input dst is "assigned" */
} flags;
union {
/* normal registers:
@@ -77,6 +82,10 @@ struct ir3_register {
float fim_val;
/* relative: */
int offset;
+ /* for IR3_REG_SSA, src registers contain ptr back to
+ * assigning instruction.
+ */
+ struct ir3_instruction *instr;
};
/* used for cat5 instructions, but also for internal/IR level
@@ -139,6 +148,10 @@ struct ir3_instruction {
IR3_INSTR_P = 0x080,
IR3_INSTR_S = 0x100,
IR3_INSTR_S2EN = 0x200,
+ /* meta-flags, for intermediate stages of IR, ie.
+ * before register assignment is done:
+ */
+ IR3_INSTR_MARK = 0x1000,
} flags;
int repeat;
unsigned regs_count;
@@ -171,7 +184,33 @@ struct ir3_instruction {
int offset;
int iim_val;
} cat6;
+ /* for meta-instructions, just used to hold extra data
+ * before instruction scheduling, etc
+ */
+ struct {
+ int off; /* component/offset */
+ } fo;
+ struct {
+ struct ir3_block *if_block, *else_block;
+ } flow;
+ struct {
+ struct ir3_block *block;
+ } inout;
};
+
+ /* transient values used during various algorithms: */
+ union {
+ /* The instruction depth is the max dependency distance to output.
+ *
+ * You can also think of it as the "cost", if we did any sort of
+ * optimization for register footprint. Ie. a value that is just
+ * result of moving a const to a reg would have a low cost, so to
+ * it could make sense to duplicate the instruction at various
+ * points where the result is needed to reduce register footprint.
+ */
+ unsigned depth;
+ };
+ struct ir3_instruction *next;
#ifdef DEBUG
uint32_t serialno;
#endif
@@ -201,6 +240,7 @@ struct ir3_shader * ir3_shader_create(void);
void ir3_shader_destroy(struct ir3_shader *shader);
void * ir3_shader_assemble(struct ir3_shader *shader,
struct ir3_shader_info *info);
+void * ir3_alloc(struct ir3_shader *shader, int sz);
struct ir3_block * ir3_block_create(struct ir3_shader *shader,
unsigned ntmp, unsigned nin, unsigned nout);
@@ -208,11 +248,44 @@ struct ir3_block * ir3_block_create(struct ir3_shader *shader,
struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
int category, opc_t opc);
struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
+const char *ir3_instr_name(struct ir3_instruction *instr);
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
int num, int flags);
+static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
+{
+ if (instr->flags & IR3_INSTR_MARK)
+ return true; /* already visited */
+ instr->flags ^= IR3_INSTR_MARK;
+ return false;
+}
+
+static inline void ir3_shader_clear_mark(struct ir3_shader *shader)
+{
+ /* TODO would be nice to drop the instruction array.. for
+ * new compiler, _clear_mark() is all we use it for, and
+ * we could probably manage a linked list instead..
+ */
+ unsigned i;
+ for (i = 0; i < shader->instrs_count; i++) {
+ struct ir3_instruction *instr = shader->instrs[i];
+ instr->flags &= ~IR3_INSTR_MARK;
+ }
+}
+
+static inline int ir3_instr_regno(struct ir3_instruction *instr,
+ struct ir3_register *reg)
+{
+ unsigned i;
+ for (i = 0; i < instr->regs_count; i++)
+ if (reg == instr->regs[i])
+ return i;
+ return -1;
+}
+
+
/* comp:
* 0 - x
* 1 - y
@@ -254,6 +327,15 @@ static inline bool is_input(struct ir3_instruction *instr)
return (instr->category == 2) && (instr->opc == OPC_BARY_F);
}
+static inline bool is_meta(struct ir3_instruction *instr)
+{
+ /* TODO how should we count PHI (and maybe fan-in/out) which
+ * might actually contribute some instructions to the final
+ * result?
+ */
+ return (instr->category == -1);
+}
+
static inline bool is_gpr(struct ir3_register *reg)
{
return !(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED));
@@ -262,13 +344,39 @@ static inline bool is_gpr(struct ir3_register *reg)
/* TODO combine is_gpr()/reg_gpr().. */
static inline bool reg_gpr(struct ir3_register *r)
{
- if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV))
+ if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_SSA))
return false;
if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
return false;
return true;
}
+/* dump: */
+#include <stdio.h>
+void ir3_shader_dump(struct ir3_shader *shader, const char *name,
+ struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3_shader? */,
+ FILE *f);
+void ir3_dump_instr_single(struct ir3_instruction *instr);
+void ir3_dump_instr_list(struct ir3_instruction *instr);
+
+/* flatten if/else: */
+int ir3_block_flatten(struct ir3_block *block);
+
+/* depth calculation: */
+int ir3_delayslots(struct ir3_instruction *assigner,
+ struct ir3_instruction *consumer, unsigned n);
+void ir3_block_depth(struct ir3_block *block);
+
+/* copy-propagate: */
+void ir3_block_cp(struct ir3_block *block);
+
+/* scheduling: */
+void ir3_block_sched(struct ir3_block *block);
+
+/* register assignment: */
+int ir3_block_ra(struct ir3_block *block, enum shader_t type);
+
+
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif