diff options
author | Eric Anholt <[email protected]> | 2011-01-17 18:34:43 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2011-01-18 10:17:34 -0800 |
commit | 754b9c5363aa7ae5f47c88c78790b3fe35c07403 (patch) | |
tree | f6a82ee02a2f2f7c9a130c394bd35b425ecd5e86 | |
parent | 5b58b8c579f06edc2e5ead011e2544eadfbfb77a (diff) |
ra: Trade off some space to get time efficiency in ra_set_finalize().
Our use of the register allocator in i965 is somewhat unusual.
Whereas most architectures would have a smaller set of registers with
fewer register classes and reuse that across compilation, we have 1,
2, and 4-register classes (usually) and a variable number up to 128
registers per compile depending on how many setup parameters and push
constants are present. As a result, when compiling large numbers of
programs (as with glean texCombine going through ff_fragment_shader),
we spent much of our CPU time in computing the q[] array. By keeping
a separate list of what the conflicts are for a particular reg, we
reduce glean texCombine time 17.0% +/- 2.3% (n=5).
We don't expect this optimization to be useful for 915, which will
have a constant register set, but it would be useful if we were switch
to this register allocator for Mesa IR.
-rw-r--r-- | src/mesa/program/register_allocate.c | 38 |
1 files changed, 32 insertions, 6 deletions
diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c index ada6e356419..3d8ccb39162 100644 --- a/src/mesa/program/register_allocate.c +++ b/src/mesa/program/register_allocate.c @@ -40,6 +40,9 @@ struct ra_reg { char *name; GLboolean *conflicts; + unsigned int *conflict_list; + unsigned int conflict_list_size; + unsigned int num_conflicts; }; struct ra_regs { @@ -100,16 +103,39 @@ ra_alloc_reg_set(unsigned int count) for (i = 0; i < count; i++) { regs->regs[i].conflicts = talloc_zero_array(regs->regs, GLboolean, count); regs->regs[i].conflicts[i] = GL_TRUE; + + regs->regs[i].conflict_list = talloc_array(regs->regs, unsigned int, 4); + regs->regs[i].conflict_list_size = 4; + regs->regs[i].conflict_list[0] = i; + regs->regs[i].num_conflicts = 1; } return regs; } +static void +ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2) +{ + struct ra_reg *reg1 = ®s->regs[r1]; + + if (reg1->conflict_list_size == reg1->num_conflicts) { + reg1->conflict_list_size *= 2; + reg1->conflict_list = talloc_realloc(regs, + reg1->conflict_list, + unsigned int, + reg1->conflict_list_size); + } + reg1->conflict_list[reg1->num_conflicts++] = r2; + reg1->conflicts[r2] = GL_TRUE; +} + void ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2) { - regs->regs[r1].conflicts[r2] = GL_TRUE; - regs->regs[r2].conflicts[r1] = GL_TRUE; + if (!regs->regs[r1].conflicts[r2]) { + ra_add_conflict_list(regs, r1, r2); + ra_add_conflict_list(regs, r2, r1); + } } unsigned int @@ -160,15 +186,15 @@ ra_set_finalize(struct ra_regs *regs) int max_conflicts = 0; for (rc = 0; rc < regs->count; rc++) { - unsigned int rb; int conflicts = 0; + int i; if (!regs->classes[c]->regs[rc]) continue; - for (rb = 0; rb < regs->count; rb++) { - if (regs->classes[b]->regs[rb] && - regs->regs[rb].conflicts[rc]) + for (i = 0; i < regs->regs[rc].num_conflicts; i++) { + unsigned int rb = regs->regs[rc].conflict_list[i]; + if (regs->classes[b]->regs[rb]) conflicts++; } max_conflicts = MAX2(max_conflicts, conflicts); |