ra: Trade off some space to get time efficiency in ra_set_finalize().

Our use of the register allocator in i965 is somewhat unusual. Whereas most architectures would have a smaller set of registers with fewer register classes and reuse that across compilation, we have 1, 2, and 4-register classes (usually) and a variable number up to 128 registers per compile depending on how many setup parameters and push constants are present. As a result, when compiling large numbers of programs (as with glean texCombine going through ff_fragment_shader), we spent much of our CPU time in computing the q[] array. By keeping a separate list of what the conflicts are for a particular reg, we reduce glean texCombine time 17.0% +/- 2.3% (n=5). We don't expect this optimization to be useful for 915, which will have a constant register set, but it would be useful if we were switch to this register allocator for Mesa IR.
author: Eric Anholt <[email protected]> 2011-01-17 18:34:43 -0800
committer: Eric Anholt <[email protected]> 2011-01-18 10:17:34 -0800
commit: 754b9c5363aa7ae5f47c88c78790b3fe35c07403 (patch)
tree: f6a82ee02a2f2f7c9a130c394bd35b425ecd5e86
parent: 5b58b8c579f06edc2e5ead011e2544eadfbfb77a (diff)
1 files changed, 32 insertions, 6 deletions
diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c
index ada6e356419..3d8ccb39162 100644
--- a/src/mesa/program/register_allocate.c
+++ b/src/mesa/program/register_allocate.c
@@ -40,6 +40,9 @@
 struct ra_reg {
    char *name;
    GLboolean *conflicts;
+   unsigned int *conflict_list;
+   unsigned int conflict_list_size;
+   unsigned int num_conflicts;
 };
 
 struct ra_regs {
@@ -100,16 +103,39 @@ ra_alloc_reg_set(unsigned int count)
    for (i = 0; i < count; i++) {
       regs->regs[i].conflicts = talloc_zero_array(regs->regs, GLboolean, count);
       regs->regs[i].conflicts[i] = GL_TRUE;
+
+      regs->regs[i].conflict_list = talloc_array(regs->regs, unsigned int, 4);
+      regs->regs[i].conflict_list_size = 4;
+      regs->regs[i].conflict_list[0] = i;
+      regs->regs[i].num_conflicts = 1;
    }
 
    return regs;
 }
 
+static void
+ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2)
+{
+   struct ra_reg *reg1 = &regs->regs[r1];
+
+   if (reg1->conflict_list_size == reg1->num_conflicts) {
+      reg1->conflict_list_size *= 2;
+      reg1->conflict_list = talloc_realloc(regs,
+					   reg1->conflict_list,
+					   unsigned int,
+					   reg1->conflict_list_size);
+   }
+   reg1->conflict_list[reg1->num_conflicts++] = r2;
+   reg1->conflicts[r2] = GL_TRUE;
+}
+
 void
 ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
 {
-   regs->regs[r1].conflicts[r2] = GL_TRUE;
-   regs->regs[r2].conflicts[r1] = GL_TRUE;
+   if (!regs->regs[r1].conflicts[r2]) {
+      ra_add_conflict_list(regs, r1, r2);
+      ra_add_conflict_list(regs, r2, r1);
+   }
 }
 
 unsigned int
@@ -160,15 +186,15 @@ ra_set_finalize(struct ra_regs *regs)
 	 int max_conflicts = 0;
 
 	 for (rc = 0; rc < regs->count; rc++) {
-	    unsigned int rb;
 	    int conflicts = 0;
+	    int i;
 
 	    if (!regs->classes[c]->regs[rc])
 	       continue;
 
-	    for (rb = 0; rb < regs->count; rb++) {
-	       if (regs->classes[b]->regs[rb] &&
-		   regs->regs[rb].conflicts[rc])
+	    for (i = 0; i < regs->regs[rc].num_conflicts; i++) {
+	       unsigned int rb = regs->regs[rc].conflict_list[i];
+	       if (regs->classes[b]->regs[rb])
 		  conflicts++;
 	    }
 	    max_conflicts = MAX2(max_conflicts, conflicts);
author	Eric Anholt <[email protected]>	2011-01-17 18:34:43 -0800
committer	Eric Anholt <[email protected]>	2011-01-18 10:17:34 -0800
commit	754b9c5363aa7ae5f47c88c78790b3fe35c07403 (patch)
tree	f6a82ee02a2f2f7c9a130c394bd35b425ecd5e86
parent	5b58b8c579f06edc2e5ead011e2544eadfbfb77a (diff)