i965/fs: Use the GRF for FB writes on gen >= 7

On gen 7, the MRF was removed and we gained the ability to do send instructions directly from the GRF. This commit enables that functinoality for FB writes. v2: Make handling of components more sane. i965/fs: Force a high register for the final FB write v2: Renamed the array for the range mappings and added a comment Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Matt Turner <[email protected]>
author: Jason Ekstrand <[email protected]> 2014-09-12 16:17:37 -0700
committer: Jason Ekstrand <[email protected]> 2014-09-30 10:29:15 -0700
commit: 514fd1c55e617bb325979cbee4a89f0727c3b567 (patch)
tree: cbacd36b7ed0083887d31a5d8e51a24919f65552 /src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
parent: 1dd9b90ecd8e001b40febfb8908c0b9a0c08c7d5 (diff)
1 files changed, 33 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index ef5720c5368..63d9c05a575 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -113,6 +113,10 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width)
       class_sizes[class_count++] = 8;
    }
 
+   memset(screen->wm_reg_sets[index].class_to_ra_reg_range, 0,
+          sizeof(screen->wm_reg_sets[index].class_to_ra_reg_range));
+   int *class_to_ra_reg_range = screen->wm_reg_sets[index].class_to_ra_reg_range;
+
    /* Compute the total number of registers across all classes. */
    int ra_reg_count = 0;
    for (int i = 0; i < class_count; i++) {
@@ -131,6 +135,14 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width)
       } else {
          ra_reg_count += base_reg_count - (class_sizes[i] - 1);
       }
+      /* Mark the last register. We'll fill in the beginnings later. */
+      class_to_ra_reg_range[class_sizes[i]] = ra_reg_count;
+   }
+
+   /* Fill out the rest of the range markers */
+   for (int i = 1; i < 17; ++i) {
+      if (class_to_ra_reg_range[i] == 0)
+         class_to_ra_reg_range[i] = class_to_ra_reg_range[i-1];
    }
 
    uint8_t *ra_reg_to_grf = ralloc_array(screen, uint8_t, ra_reg_count);
@@ -505,9 +517,29 @@ fs_visitor::assign_regs(bool allow_spilling)
    }
 
    setup_payload_interference(g, payload_node_count, first_payload_node);
-   if (brw->gen >= 7)
+   if (brw->gen >= 7) {
       setup_mrf_hack_interference(g, first_mrf_hack_node);
 
+      foreach_block_and_inst(block, fs_inst, inst, cfg) {
+         /* When we do send-from-GRF for FB writes, we need to ensure that
+          * the last write instruction sends from a high register.  This is
+          * because the vertex fetcher wants to start filling the low
+          * payload registers while the pixel data port is still working on
+          * writing out the memory.  If we don't do this, we get rendering
+          * artifacts.
+          *
+          * We could just do "something high".  Instead, we just pick the
+          * highest register that works.
+          */
+         if (inst->opcode == FS_OPCODE_FB_WRITE && inst->eot) {
+            int size = virtual_grf_sizes[inst->src[0].reg];
+            int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
+            ra_set_node_reg(g, inst->src[0].reg, reg);
+            break;
+         }
+      }
+   }
+
    if (dispatch_width > 8) {
       /* In 16-wide dispatch we have an issue where a compressed
        * instruction is actually two instructions executed simultaneiously.
author	Jason Ekstrand <[email protected]>	2014-09-12 16:17:37 -0700
committer	Jason Ekstrand <[email protected]>	2014-09-30 10:29:15 -0700
commit	514fd1c55e617bb325979cbee4a89f0727c3b567 (patch)
tree	cbacd36b7ed0083887d31a5d8e51a24919f65552 /src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
parent	1dd9b90ecd8e001b40febfb8908c0b9a0c08c7d5 (diff)