summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2014-08-19 13:57:11 -0700
committerJason Ekstrand <[email protected]>2014-09-30 10:29:13 -0700
commit3dc3fccb7586e6198c50114d6245017fc9badde8 (patch)
tree552a17b7b741470bdb6629d40c0fa44960dad02c
parentf9da0740e22f27a6f8bd429f0bb768752b311398 (diff)
i965/fs: Rewrite fs_visitor::split_virtual_grfs
The original vgrf splitting code was written with the assumption that vgrfs came in two types: those that can be split into single registers and those that can't be split at all It was very conservative and bailed as soon as more than one element of a register was read or written. This won't work once we start allowing a regular MOV or ADD operation to operate on multiple registers. This rewrite allows for the case where a vgrf of size 5 may appropriately be split in to one register of size 1 and two registers of size 2. Signed-off-by: Jason Ekstrand <[email protected]> Acked-by: Matt Turner <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp133
1 files changed, 86 insertions, 47 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index f3c39e7cdb4..50de9f413b8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1645,15 +1645,39 @@ void
fs_visitor::split_virtual_grfs()
{
int num_vars = this->virtual_grf_count;
- bool split_grf[num_vars];
- int new_virtual_grf[num_vars];
- /* Try to split anything > 0 sized. */
+ /* Count the total number of registers */
+ int reg_count = 0;
+ int vgrf_to_reg[num_vars];
for (int i = 0; i < num_vars; i++) {
- if (this->virtual_grf_sizes[i] != 1)
- split_grf[i] = true;
- else
- split_grf[i] = false;
+ vgrf_to_reg[i] = reg_count;
+ reg_count += virtual_grf_sizes[i];
+ }
+
+ /* An array of "split points". For each register slot, this indicates
+ * if this slot can be separated from the previous slot. Every time an
+ * instruction uses multiple elements of a register (as a source or
+ * destination), we mark the used slots as inseparable. Then we go
+ * through and split the registers into the smallest pieces we can.
+ */
+ bool split_points[reg_count];
+ memset(split_points, 0, sizeof(split_points));
+
+ /* Mark all used registers as fully splittable */
+ foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ if (inst->dst.file == GRF) {
+ int reg = vgrf_to_reg[inst->dst.reg];
+ for (int j = 1; j < this->virtual_grf_sizes[inst->dst.reg]; j++)
+ split_points[reg + j] = true;
+ }
+
+ for (int i = 0; i < inst->sources; i++) {
+ if (inst->src[i].file == GRF) {
+ int reg = vgrf_to_reg[inst->src[i].reg];
+ for (int j = 1; j < this->virtual_grf_sizes[inst->src[i].reg]; j++)
+ split_points[reg + j] = true;
+ }
+ }
}
if (brw->has_pln &&
@@ -1663,61 +1687,75 @@ fs_visitor::split_virtual_grfs()
* Gen6, that was the only supported interpolation mode, and since Gen6,
* delta_x and delta_y are in fixed hardware registers.
*/
- split_grf[this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg] =
- false;
+ int vgrf = this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg;
+ split_points[vgrf_to_reg[vgrf] + 1] = false;
}
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- /* If there's a SEND message that requires contiguous destination
- * registers, no splitting is allowed.
- */
- if (inst->regs_written > 1) {
- split_grf[inst->dst.reg] = false;
+ if (inst->dst.file == GRF) {
+ int reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
+ for (int j = 1; j < inst->regs_written; j++)
+ split_points[reg + j] = false;
}
-
- /* If we're sending from a GRF, don't split it, on the assumption that
- * the send is reading the whole thing.
- */
- if (inst->is_send_from_grf()) {
- for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
- split_grf[inst->src[i].reg] = false;
- }
+ for (int i = 0; i < inst->sources; i++) {
+ if (inst->src[i].file == GRF) {
+ int reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset;
+ for (int j = 1; j < inst->regs_read(this, i); j++)
+ split_points[reg + j] = false;
}
}
}
- /* Allocate new space for split regs. Note that the virtual
- * numbers will be contiguous.
- */
+ int new_virtual_grf[reg_count];
+ int new_reg_offset[reg_count];
+
+ int reg = 0;
for (int i = 0; i < num_vars; i++) {
- if (split_grf[i]) {
- new_virtual_grf[i] = virtual_grf_alloc(1);
- for (int j = 2; j < this->virtual_grf_sizes[i]; j++) {
- int reg = virtual_grf_alloc(1);
- assert(reg == new_virtual_grf[i] + j - 1);
- (void) reg;
- }
- this->virtual_grf_sizes[i] = 1;
+ /* The first one should always be 0 as a quick sanity check. */
+ assert(split_points[reg] == false);
+
+ /* j = 0 case */
+ new_reg_offset[reg] = 0;
+ reg++;
+ int offset = 1;
+
+ /* j > 0 case */
+ for (int j = 1; j < virtual_grf_sizes[i]; j++) {
+ /* If this is a split point, reset the offset to 0 and allocate a
+ * new virtual GRF for the previous offset many registers
+ */
+ if (split_points[reg]) {
+ int grf = virtual_grf_alloc(offset);
+ for (int k = reg - offset; k < reg; k++)
+ new_virtual_grf[k] = grf;
+ offset = 0;
+ }
+ new_reg_offset[reg] = offset;
+ offset++;
+ reg++;
}
+
+ /* The last one gets the original register number */
+ virtual_grf_sizes[i] = offset;
+ for (int k = reg - offset; k < reg; k++)
+ new_virtual_grf[k] = i;
}
+ assert(reg == reg_count);
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- if (inst->dst.file == GRF &&
- split_grf[inst->dst.reg] &&
- inst->dst.reg_offset != 0) {
- inst->dst.reg = (new_virtual_grf[inst->dst.reg] +
- inst->dst.reg_offset - 1);
- inst->dst.reg_offset = 0;
+ if (inst->dst.file == GRF) {
+ reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
+ inst->dst.reg = new_virtual_grf[reg];
+ inst->dst.reg_offset = new_reg_offset[reg];
+ assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]);
}
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF &&
- split_grf[inst->src[i].reg] &&
- inst->src[i].reg_offset != 0) {
- inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] +
- inst->src[i].reg_offset - 1);
- inst->src[i].reg_offset = 0;
- }
+ if (inst->src[i].file == GRF) {
+ reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset;
+ inst->src[i].reg = new_virtual_grf[reg];
+ inst->src[i].reg_offset = new_reg_offset[reg];
+ assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]);
+ }
}
}
invalidate_live_intervals();
@@ -2331,6 +2369,7 @@ fs_visitor::compute_to_mrf()
void
fs_visitor::emit_repclear_shader()
{
+ brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
int base_mrf = 1;
int color_mrf = base_mrf + 2;