summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/vc4/vc4_register_allocate.c73
1 files changed, 59 insertions, 14 deletions
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c
index b62669feb30..3001900c074 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -139,6 +139,20 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
ra_set_finalize(vc4->regs, NULL);
}
+struct node_to_temp_map {
+ uint32_t temp;
+ uint32_t priority;
+};
+
+static int
+node_to_temp_priority(const void *in_a, const void *in_b)
+{
+ const struct node_to_temp_map *a = in_a;
+ const struct node_to_temp_map *b = in_b;
+
+ return a->priority - b->priority;
+}
+
/**
* Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
*
@@ -148,6 +162,8 @@ struct qpu_reg *
vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
{
struct simple_node *node;
+ struct node_to_temp_map map[c->num_temps];
+ uint32_t temp_to_node[c->num_temps];
uint32_t def[c->num_temps];
uint32_t use[c->num_temps];
struct qpu_reg *temp_registers = calloc(c->num_temps,
@@ -166,11 +182,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
struct ra_graph *g = ra_alloc_interference_graph(vc4->regs,
c->num_temps);
- for (uint32_t i = 0; i < c->num_temps; i++)
+ for (uint32_t i = 0; i < c->num_temps; i++) {
ra_set_node_class(g, i, vc4->reg_class_any);
+ }
- /* Compute the live ranges so we can figure out interference, and
- * figure out our register classes and preallocated registers.
+ /* Compute the live ranges so we can figure out interference.
*/
uint32_t ip = 0;
foreach(node, &c->instructions) {
@@ -188,27 +204,54 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
switch (inst->op) {
case QOP_FRAG_Z:
+ case QOP_FRAG_W:
+ /* The payload registers have values implicitly loaded
+ * at the start of the program.
+ */
def[inst->dst.index] = 0;
- ra_set_node_reg(g, inst->dst.index,
+ break;
+ default:
+ break;
+ }
+
+ ip++;
+ }
+
+ for (uint32_t i = 0; i < c->num_temps; i++) {
+ map[i].temp = i;
+ map[i].priority = use[i] - def[i];
+ }
+ qsort(map, c->num_temps, sizeof(map[0]), node_to_temp_priority);
+ for (uint32_t i = 0; i < c->num_temps; i++) {
+ temp_to_node[map[i].temp] = i;
+ }
+
+ /* Figure out our register classes and preallocated registers*/
+ foreach(node, &c->instructions) {
+ struct qinst *inst = (struct qinst *)node;
+
+ switch (inst->op) {
+ case QOP_FRAG_Z:
+ ra_set_node_reg(g, temp_to_node[inst->dst.index],
AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2 + 1);
break;
case QOP_FRAG_W:
- def[inst->dst.index] = 0;
- ra_set_node_reg(g, inst->dst.index,
+ ra_set_node_reg(g, temp_to_node[inst->dst.index],
AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2);
break;
case QOP_TEX_RESULT:
case QOP_TLB_COLOR_READ:
assert(vc4_regs[ACC_INDEX + 4].mux == QPU_MUX_R4);
- ra_set_node_reg(g, inst->dst.index,
+ ra_set_node_reg(g, temp_to_node[inst->dst.index],
ACC_INDEX + 4);
break;
case QOP_PACK_SCALED:
/* The pack flags require an A-file dst register. */
- ra_set_node_class(g, inst->dst.index, vc4->reg_class_a);
+ ra_set_node_class(g, temp_to_node[inst->dst.index],
+ vc4->reg_class_a);
break;
case QOP_UNPACK_8A:
@@ -216,20 +259,22 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_UNPACK_8C:
case QOP_UNPACK_8D:
/* The unpack flags require an A-file src register. */
- ra_set_node_class(g, inst->src[0].index, vc4->reg_class_a);
+ ra_set_node_class(g, temp_to_node[inst->src[0].index],
+ vc4->reg_class_a);
break;
default:
break;
}
-
- ip++;
}
for (uint32_t i = 0; i < c->num_temps; i++) {
for (uint32_t j = i + 1; j < c->num_temps; j++) {
- if (!(def[i] >= use[j] || def[j] >= use[i]))
- ra_add_node_interference(g, i, j);
+ if (!(def[i] >= use[j] || def[j] >= use[i])) {
+ ra_add_node_interference(g,
+ temp_to_node[i],
+ temp_to_node[j]);
+ }
}
}
@@ -237,7 +282,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
assert(ok);
for (uint32_t i = 0; i < c->num_temps; i++) {
- temp_registers[i] = vc4_regs[ra_get_node_reg(g, i)];
+ temp_registers[i] = vc4_regs[ra_get_node_reg(g, temp_to_node[i])];
/* If the value's never used, just write to the NOP register
* for clarity in debug output.