diff options
author | Eric Anholt <[email protected]> | 2011-05-23 09:12:07 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2011-05-27 08:08:23 -0700 |
commit | 615117ce4efd041459f7d4b0c77aa8e248345e66 (patch) | |
tree | e71ffddde7290a934a0882a6e4a741789a0f234c /src/mesa | |
parent | 40540cc517480720e509a63f213ab33d66409bf8 (diff) |
i965/fs: Track fixed GRF regs separate from allocated GRF file in scheduling.
There's an assumption here that fixed GRFs will never intersect with
the allocated GRFs. That's true today, though it might change some
day if we decide to register-allocate the regs containing push
constants once they're dead.
This fixes a regression in 0f7325b89038937bd428f7c89ed9859189a0ab0b in
Lightsmark from the texture instructions now containing g0 references
instead of having that be implied. Performance is improved 15.2% +/-
3.6% (n=3).
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=34968
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp | 21 |
2 files changed, 22 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index c02237850e2..1cee37cfdb2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1570,7 +1570,7 @@ fs_visitor::visit(ir_texture *ir) * use the null register. Otherwise, we want an implied move from g0. */ if (ir->offset != NULL || !inst->header_present) - inst->src[0] = fs_reg(brw_null_reg()); + inst->src[0] = reg_undef; else inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index ed88aa689d0..d8218c26edb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -263,6 +263,12 @@ instruction_scheduler::calculate_deps() schedule_node *last_grf_write[virtual_grf_count]; schedule_node *last_mrf_write[BRW_MAX_MRF]; schedule_node *last_conditional_mod = NULL; + /* Fixed HW registers are assumed to be separate from the virtual + * GRFs, so they can be tracked separately. We don't really write + * to fixed GRFs much, so don't bother tracking them on a more + * granular level. + */ + schedule_node *last_fixed_grf_write = NULL; /* The last instruction always needs to still be the last * instruction. Either it's flow control (IF, ELSE, ENDIF, DO, @@ -285,6 +291,10 @@ instruction_scheduler::calculate_deps() for (int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { add_dep(last_grf_write[inst->src[i].reg], n); + } else if (inst->src[i].file == FIXED_HW_REG && + (inst->src[i].fixed_hw_reg.file == + BRW_GENERAL_REGISTER_FILE)) { + add_dep(last_fixed_grf_write, n); } else if (inst->src[i].file != BAD_FILE && inst->src[i].file != IMM && inst->src[i].file != UNIFORM) { @@ -323,6 +333,9 @@ instruction_scheduler::calculate_deps() add_dep(last_mrf_write[reg], n); last_mrf_write[reg] = n; } + } else if (inst->dst.file == FIXED_HW_REG && + inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { + last_fixed_grf_write = n; } else if (inst->dst.file != BAD_FILE) { add_barrier_deps(n); } @@ -344,6 +357,7 @@ instruction_scheduler::calculate_deps() memset(last_grf_write, 0, sizeof(last_grf_write)); memset(last_mrf_write, 0, sizeof(last_mrf_write)); last_conditional_mod = NULL; + last_fixed_grf_write = NULL; exec_node *node; exec_node *prev; @@ -357,6 +371,10 @@ instruction_scheduler::calculate_deps() for (int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { add_dep(n, last_grf_write[inst->src[i].reg]); + } else if (inst->src[i].file == FIXED_HW_REG && + (inst->src[i].fixed_hw_reg.file == + BRW_GENERAL_REGISTER_FILE)) { + add_dep(n, last_fixed_grf_write); } else if (inst->src[i].file != BAD_FILE && inst->src[i].file != IMM && inst->src[i].file != UNIFORM) { @@ -395,6 +413,9 @@ instruction_scheduler::calculate_deps() last_mrf_write[reg] = n; } + } else if (inst->dst.file == FIXED_HW_REG && + inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { + last_fixed_grf_write = n; } else if (inst->dst.file != BAD_FILE) { add_barrier_deps(n); } |