diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 64 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.h | 1 |
2 files changed, 45 insertions, 20 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 2756b71d9fc..91b72f7f20b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -218,6 +218,13 @@ vec4_instruction::is_math() opcode == SHADER_OPCODE_INT_REMAINDER || opcode == SHADER_OPCODE_POW); } + +bool +vec4_instruction::is_send_from_grf() +{ + return false; +} + /** * Returns how many MRFs an opcode will write over. * @@ -878,27 +885,46 @@ vec4_visitor::opt_register_coalesce() * * We initially create large virtual GRFs for temporary structures, arrays, * and matrices, so that the dereference visitor functions can add reg_offsets - * to work their way down to the actual member being accessed. + * to work their way down to the actual member being accessed. But when it + * comes to optimization, we'd like to treat each register as individual + * storage if possible. * - * Unlike in the FS visitor, though, we have no SEND messages that return more - * than 1 register. We also don't do any array access in register space, - * which would have required contiguous physical registers. Thus, all those - * large virtual GRFs can be split up into independent single-register virtual - * GRFs, making allocation and optimization easier. + * So far, the only thing that might prevent splitting is a send message from + * a GRF on IVB. */ void vec4_visitor::split_virtual_grfs() { int num_vars = this->virtual_grf_count; int new_virtual_grf[num_vars]; + bool split_grf[num_vars]; memset(new_virtual_grf, 0, sizeof(new_virtual_grf)); + /* Try to split anything > 0 sized. */ + for (int i = 0; i < num_vars; i++) { + split_grf[i] = this->virtual_grf_sizes[i] != 1; + } + + /* Check that the instructions are compatible with the registers we're trying + * to split. + */ + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + /* If there's a SEND message loading from a GRF on gen7+, it needs to be + * contiguous. Assume that the GRF for the SEND is always in src[0]. + */ + if (inst->is_send_from_grf()) { + split_grf[inst->src[0].reg] = false; + } + } + /* Allocate new space for split regs. Note that the virtual * numbers will be contiguous. */ for (int i = 0; i < num_vars; i++) { - if (this->virtual_grf_sizes[i] == 1) + if (!split_grf[i]) continue; new_virtual_grf[i] = virtual_grf_alloc(1); @@ -913,21 +939,19 @@ vec4_visitor::split_virtual_grfs() foreach_list(node, &this->instructions) { vec4_instruction *inst = (vec4_instruction *)node; - if (inst->dst.file == GRF && - new_virtual_grf[inst->dst.reg] && - inst->dst.reg_offset != 0) { - inst->dst.reg = (new_virtual_grf[inst->dst.reg] + - inst->dst.reg_offset - 1); - inst->dst.reg_offset = 0; + if (inst->dst.file == GRF && split_grf[inst->dst.reg] && + inst->dst.reg_offset != 0) { + inst->dst.reg = (new_virtual_grf[inst->dst.reg] + + inst->dst.reg_offset - 1); + inst->dst.reg_offset = 0; } for (int i = 0; i < 3; i++) { - if (inst->src[i].file == GRF && - new_virtual_grf[inst->src[i].reg] && - inst->src[i].reg_offset != 0) { - inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] + - inst->src[i].reg_offset - 1); - inst->src[i].reg_offset = 0; - } + if (inst->src[i].file == GRF && split_grf[inst->src[i].reg] && + inst->src[i].reg_offset != 0) { + inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] + + inst->src[i].reg_offset - 1); + inst->src[i].reg_offset = 0; + } } } this->live_intervals_valid = false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 61e18a66a40..38d06d0e535 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -194,6 +194,7 @@ public: bool is_tex(); bool is_math(); + bool is_send_from_grf(); bool can_reswizzle_dst(int dst_writemask, int swizzle, int swizzle_mask); void reswizzle_dst(int dst_writemask, int swizzle); }; |