diff options
-rw-r--r-- | src/gallium/drivers/vc4/vc4_register_allocate.c | 73 |
1 files changed, 59 insertions, 14 deletions
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index b62669feb30..3001900c074 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -139,6 +139,20 @@ vc4_alloc_reg_set(struct vc4_context *vc4) ra_set_finalize(vc4->regs, NULL); } +struct node_to_temp_map { + uint32_t temp; + uint32_t priority; +}; + +static int +node_to_temp_priority(const void *in_a, const void *in_b) +{ + const struct node_to_temp_map *a = in_a; + const struct node_to_temp_map *b = in_b; + + return a->priority - b->priority; +} + /** * Returns a mapping from QFILE_TEMP indices to struct qpu_regs. * @@ -148,6 +162,8 @@ struct qpu_reg * vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) { struct simple_node *node; + struct node_to_temp_map map[c->num_temps]; + uint32_t temp_to_node[c->num_temps]; uint32_t def[c->num_temps]; uint32_t use[c->num_temps]; struct qpu_reg *temp_registers = calloc(c->num_temps, @@ -166,11 +182,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) struct ra_graph *g = ra_alloc_interference_graph(vc4->regs, c->num_temps); - for (uint32_t i = 0; i < c->num_temps; i++) + for (uint32_t i = 0; i < c->num_temps; i++) { ra_set_node_class(g, i, vc4->reg_class_any); + } - /* Compute the live ranges so we can figure out interference, and - * figure out our register classes and preallocated registers. + /* Compute the live ranges so we can figure out interference. */ uint32_t ip = 0; foreach(node, &c->instructions) { @@ -188,27 +204,54 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) switch (inst->op) { case QOP_FRAG_Z: + case QOP_FRAG_W: + /* The payload registers have values implicitly loaded + * at the start of the program. + */ def[inst->dst.index] = 0; - ra_set_node_reg(g, inst->dst.index, + break; + default: + break; + } + + ip++; + } + + for (uint32_t i = 0; i < c->num_temps; i++) { + map[i].temp = i; + map[i].priority = use[i] - def[i]; + } + qsort(map, c->num_temps, sizeof(map[0]), node_to_temp_priority); + for (uint32_t i = 0; i < c->num_temps; i++) { + temp_to_node[map[i].temp] = i; + } + + /* Figure out our register classes and preallocated registers*/ + foreach(node, &c->instructions) { + struct qinst *inst = (struct qinst *)node; + + switch (inst->op) { + case QOP_FRAG_Z: + ra_set_node_reg(g, temp_to_node[inst->dst.index], AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2 + 1); break; case QOP_FRAG_W: - def[inst->dst.index] = 0; - ra_set_node_reg(g, inst->dst.index, + ra_set_node_reg(g, temp_to_node[inst->dst.index], AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2); break; case QOP_TEX_RESULT: case QOP_TLB_COLOR_READ: assert(vc4_regs[ACC_INDEX + 4].mux == QPU_MUX_R4); - ra_set_node_reg(g, inst->dst.index, + ra_set_node_reg(g, temp_to_node[inst->dst.index], ACC_INDEX + 4); break; case QOP_PACK_SCALED: /* The pack flags require an A-file dst register. */ - ra_set_node_class(g, inst->dst.index, vc4->reg_class_a); + ra_set_node_class(g, temp_to_node[inst->dst.index], + vc4->reg_class_a); break; case QOP_UNPACK_8A: @@ -216,20 +259,22 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) case QOP_UNPACK_8C: case QOP_UNPACK_8D: /* The unpack flags require an A-file src register. */ - ra_set_node_class(g, inst->src[0].index, vc4->reg_class_a); + ra_set_node_class(g, temp_to_node[inst->src[0].index], + vc4->reg_class_a); break; default: break; } - - ip++; } for (uint32_t i = 0; i < c->num_temps; i++) { for (uint32_t j = i + 1; j < c->num_temps; j++) { - if (!(def[i] >= use[j] || def[j] >= use[i])) - ra_add_node_interference(g, i, j); + if (!(def[i] >= use[j] || def[j] >= use[i])) { + ra_add_node_interference(g, + temp_to_node[i], + temp_to_node[j]); + } } } @@ -237,7 +282,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) assert(ok); for (uint32_t i = 0; i < c->num_temps; i++) { - temp_registers[i] = vc4_regs[ra_get_node_reg(g, i)]; + temp_registers[i] = vc4_regs[ra_get_node_reg(g, temp_to_node[i])]; /* If the value's never used, just write to the NOP register * for clarity in debug output. |