i965/fs: Prefer things we know reduce reg pressure when pre-scheduling.

Previously, the best thing we had was to schedule the things unblocked by the last chosen instruction, on the hope that it would be consuming two values at the end of their live intervals while only producing one new value. But that's just a guess, and we can do counting of usage of registers to know when an instruction would (almost surely) reduce register pressure. The only failure mode I know of in this new dominant heuristic is that inside of a loop when scheduling the iterator (for example), choosing the last use of the iterator doesn't actually reduce the live interval of the iterator. But it doesn't seem to matter in shader-db: total instructions in shared programs: 1618700 -> 1618700 (0.00%) instructions in affected programs: 0 -> 0 GAINED: 13 LOST: 0 Note: The new functions are made virtual because I expect we'll soon lift the pre-regalloc scheduling heuristic over to the vec4 backend. Cc: "10.0" <[email protected]> Reviewed-by: Matt Turner <[email protected]>
author: Eric Anholt <[email protected]> 2013-10-14 11:38:09 -0700
committer: Eric Anholt <[email protected]> 2013-11-12 15:04:32 -0800
commit: bc0e3bb4d05fbb5e8e2af8dce8170cb78cfe0e70 (patch)
tree: cb0f38222e4eabc757c13601fa99d02a155b35e8
parent: 9b3e1592c26a183580342282e509d906d78bb6f6 (diff)
1 files changed, 144 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 5a425133a9d..53fffa72a85 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -417,6 +417,13 @@ public:
       this->instructions_to_schedule = 0;
       this->post_reg_alloc = post_reg_alloc;
       this->time = 0;
+      if (!post_reg_alloc) {
+         this->remaining_grf_uses = rzalloc_array(mem_ctx, int, grf_count);
+         this->grf_active = rzalloc_array(mem_ctx, bool, grf_count);
+      } else {
+         this->remaining_grf_uses = NULL;
+         this->grf_active = NULL;
+      }
    }
 
    ~instruction_scheduler()
@@ -442,6 +449,10 @@ public:
     */
    virtual int issue_time(backend_instruction *inst) = 0;
 
+   virtual void count_remaining_grf_uses(backend_instruction *inst) = 0;
+   virtual void update_register_pressure(backend_instruction *inst) = 0;
+   virtual int get_register_pressure_benefit(backend_instruction *inst) = 0;
+
    void schedule_instructions(backend_instruction *next_block_header);
 
    void *mem_ctx;
@@ -452,6 +463,22 @@ public:
    int time;
    exec_list instructions;
    backend_visitor *bv;
+
+   /**
+    * Number of instructions left to schedule that reference each vgrf.
+    *
+    * Used so that we can prefer scheduling instructions that will end the
+    * live intervals of multiple variables, to reduce register pressure.
+    */
+   int *remaining_grf_uses;
+
+   /**
+    * Tracks whether each VGRF has had an instruction scheduled that uses it.
+    *
+    * This is used to estimate whether scheduling a new instruction will
+    * increase register pressure.
+    */
+   bool *grf_active;
 };
 
 class fs_instruction_scheduler : public instruction_scheduler
@@ -463,6 +490,10 @@ public:
    schedule_node *choose_instruction_to_schedule();
    int issue_time(backend_instruction *inst);
    fs_visitor *v;
+
+   void count_remaining_grf_uses(backend_instruction *inst);
+   void update_register_pressure(backend_instruction *inst);
+   int get_register_pressure_benefit(backend_instruction *inst);
 };
 
 fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v,
@@ -473,6 +504,72 @@ fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v,
 {
 }
 
+void
+fs_instruction_scheduler::count_remaining_grf_uses(backend_instruction *be)
+{
+   fs_inst *inst = (fs_inst *)be;
+
+   if (!remaining_grf_uses)
+      return;
+
+   if (inst->dst.file == GRF)
+      remaining_grf_uses[inst->dst.reg]++;
+
+   for (int i = 0; i < 3; i++) {
+      if (inst->src[i].file != GRF)
+         continue;
+
+      remaining_grf_uses[inst->src[i].reg]++;
+   }
+}
+
+void
+fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
+{
+   fs_inst *inst = (fs_inst *)be;
+
+   if (!remaining_grf_uses)
+      return;
+
+   if (inst->dst.file == GRF) {
+      remaining_grf_uses[inst->dst.reg]--;
+      grf_active[inst->dst.reg] = true;
+   }
+
+   for (int i = 0; i < 3; i++) {
+      if (inst->src[i].file == GRF) {
+         remaining_grf_uses[inst->src[i].reg]--;
+         grf_active[inst->src[i].reg] = true;
+      }
+   }
+}
+
+int
+fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
+{
+   fs_inst *inst = (fs_inst *)be;
+   int benefit = 0;
+
+   if (inst->dst.file == GRF) {
+      if (remaining_grf_uses[inst->dst.reg] == 1)
+         benefit += v->virtual_grf_sizes[inst->dst.reg];
+      if (!grf_active[inst->dst.reg])
+         benefit -= v->virtual_grf_sizes[inst->dst.reg];
+   }
+
+   for (int i = 0; i < 3; i++) {
+      if (inst->src[i].file != GRF)
+         continue;
+
+      if (remaining_grf_uses[inst->src[i].reg] == 1)
+         benefit += v->virtual_grf_sizes[inst->src[i].reg];
+      if (!grf_active[inst->src[i].reg])
+         benefit -= v->virtual_grf_sizes[inst->src[i].reg];
+   }
+
+   return benefit;
+}
+
 class vec4_instruction_scheduler : public instruction_scheduler
 {
 public:
@@ -481,6 +578,10 @@ public:
    schedule_node *choose_instruction_to_schedule();
    int issue_time(backend_instruction *inst);
    vec4_visitor *v;
+
+   void count_remaining_grf_uses(backend_instruction *inst);
+   void update_register_pressure(backend_instruction *inst);
+   int get_register_pressure_benefit(backend_instruction *inst);
 };
 
 vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v,
@@ -491,6 +592,22 @@ vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v,
 }
 
 void
+vec4_instruction_scheduler::count_remaining_grf_uses(backend_instruction *be)
+{
+}
+
+void
+vec4_instruction_scheduler::update_register_pressure(backend_instruction *be)
+{
+}
+
+int
+vec4_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
+{
+   return 0;
+}
+
+void
 instruction_scheduler::add_inst(backend_instruction *inst)
 {
    schedule_node *n = new(mem_ctx) schedule_node(inst, bv->brw);
@@ -1037,6 +1154,23 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
             continue;
          }
 
+         /* Most important: If we can definitely reduce register pressure, do
+          * so immediately.
+          */
+         int register_pressure_benefit = get_register_pressure_benefit(n->inst);
+         int chosen_register_pressure_benefit =
+            get_register_pressure_benefit(chosen->inst);
+
+         if (register_pressure_benefit > 0 &&
+             register_pressure_benefit > chosen_register_pressure_benefit) {
+            chosen = n;
+            continue;
+         } else if (chosen_register_pressure_benefit > 0 &&
+                    (register_pressure_benefit <
+                     chosen_register_pressure_benefit)) {
+            continue;
+         }
+
          /* Prefer instructions that recently became available for scheduling.
           * These are the things that are most likely to (eventually) make a
           * variable dead and reduce register pressure.  Typical register
@@ -1153,6 +1287,7 @@ instruction_scheduler::schedule_instructions(backend_instruction *next_block_hea
       chosen->remove();
       next_block_header->insert_before(chosen->inst);
       instructions_to_schedule--;
+      update_register_pressure(chosen->inst);
 
       /* Update the clock for how soon an instruction could start after the
        * chosen one.
@@ -1228,6 +1363,15 @@ instruction_scheduler::run(exec_list *all_instructions)
       bv->dump_instructions();
    }
 
+   /* Populate the remaining GRF uses array to improve the pre-regalloc
+    * scheduling.
+    */
+   if (remaining_grf_uses) {
+      foreach_list(node, all_instructions) {
+         count_remaining_grf_uses((backend_instruction *)node);
+      }
+   }
+
    while (!next_block_header->is_tail_sentinel()) {
       /* Add things to be scheduled until we get to a new BB. */
       while (!next_block_header->is_tail_sentinel()) {
author	Eric Anholt <[email protected]>	2013-10-14 11:38:09 -0700
committer	Eric Anholt <[email protected]>	2013-11-12 15:04:32 -0800
commit	bc0e3bb4d05fbb5e8e2af8dce8170cb78cfe0e70 (patch)
tree	cb0f38222e4eabc757c13601fa99d02a155b35e8
parent	9b3e1592c26a183580342282e509d906d78bb6f6 (diff)