diff options
author | Eric Anholt <[email protected]> | 2015-08-21 10:57:24 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2015-08-21 13:29:26 -0700 |
commit | eb2776504ae32feaf41a5bad9f09f154045e96a3 (patch) | |
tree | 42aaf9f1fbb58807ccba1913f42bff009129765f | |
parent | 89b1b33f44bc6ce71109ac8668529c30b6d6d910 (diff) |
vc4: Actually allow math results to allocate into r4.
I switched us to tracking whether the results *could* go to r4, but then
didn't make a separate register class for the class bits that included r4.
Switch the "any" class to actually be "any", and name the "any but r4"
class more appropriately.
total instructions in shared programs: 96798 -> 94680 (-2.19%)
instructions in affected programs: 62736 -> 60618 (-3.38%)
-rw-r--r-- | src/gallium/drivers/vc4/vc4_context.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_register_allocate.c | 7 |
2 files changed, 7 insertions, 1 deletions
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 654c46f3c0d..3a63af8f2b0 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -270,6 +270,7 @@ struct vc4_context { struct ra_regs *regs; unsigned int reg_class_any; + unsigned int reg_class_a_or_b_or_acc; unsigned int reg_class_r4_or_a; unsigned int reg_class_a; diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index 2ea88500227..3ced50f3a44 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -116,6 +116,7 @@ vc4_alloc_reg_set(struct vc4_context *vc4) vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs), true); vc4->reg_class_any = ra_alloc_reg_class(vc4->regs); + vc4->reg_class_a_or_b_or_acc = ra_alloc_reg_class(vc4->regs); vc4->reg_class_r4_or_a = ra_alloc_reg_class(vc4->regs); vc4->reg_class_a = ra_alloc_reg_class(vc4->regs); for (uint32_t i = 0; i < ARRAY_SIZE(vc4_regs); i++) { @@ -130,10 +131,12 @@ vc4_alloc_reg_set(struct vc4_context *vc4) */ if (vc4_regs[i].mux == QPU_MUX_R4) { ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i); + ra_class_add_reg(vc4->regs, vc4->reg_class_any, i); continue; } ra_class_add_reg(vc4->regs, vc4->reg_class_any, i); + ra_class_add_reg(vc4->regs, vc4->reg_class_a_or_b_or_acc, i); } for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2) { @@ -304,9 +307,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) switch (class_bits[i]) { case CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4: - case CLASS_BIT_A | CLASS_BIT_B_OR_ACC: ra_set_node_class(g, node, vc4->reg_class_any); break; + case CLASS_BIT_A | CLASS_BIT_B_OR_ACC: + ra_set_node_class(g, node, vc4->reg_class_a_or_b_or_acc); + break; case CLASS_BIT_A | CLASS_BIT_R4: ra_set_node_class(g, node, vc4->reg_class_r4_or_a); break; |