intel/fs: Teach instruction scheduler about GRF bank conflict cycles.

This should allow the post-RA scheduler to do a slightly better job at hiding latency in presence of instructions incurring bank conflicts. The main purpuse of this patch is not to improve performance though, but to get conflict cycles to show up in shader-db statistics in order to make sure that regressions in the bank conflict mitigation pass don't go unnoticed. Acked-by: Matt Turner <[email protected]>
author: Francisco Jerez <[email protected]> 2017-12-06 11:42:54 -0800
committer: Francisco Jerez <[email protected]> 2017-12-07 15:56:49 -0800
commit: acf98ff933d338c521d7c6a57c17a010149eb344 (patch)
tree: 07af8a02ab269b0679ee9f7320acc4cbe7ad7f81 /src
parent: af2c320190f3c73180f1610c8df955a7fa2a4d09 (diff)
3 files changed, 23 insertions, 2 deletions
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 0cec6fdcbad..9c160068a7e 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -146,6 +146,7 @@ public:
    bool opt_drop_redundant_mov_to_flags();
    bool opt_register_renaming();
    bool opt_bank_conflicts();
+   unsigned bank_conflict_cycles(const fs_inst *inst) const;
    bool register_coalesce();
    bool compute_to_mrf();
    bool eliminate_find_live_channel();
diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp b/src/intel/compiler/brw_fs_bank_conflicts.cpp
index b64a3d4a8a8..42cdc6ef7dc 100644
--- a/src/intel/compiler/brw_fs_bank_conflicts.cpp
+++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp
@@ -891,3 +891,22 @@ fs_visitor::opt_bank_conflicts()
    delete[] constrained;
    return true;
 }
+
+/**
+ * Estimate the number of GRF bank conflict cycles incurred by an instruction.
+ *
+ * Note that this neglects conflict cycles prior to register allocation
+ * because we don't know which bank each VGRF is going to end up aligned to.
+ */
+unsigned
+fs_visitor::bank_conflict_cycles(const fs_inst *inst) const
+{
+   if (grf_used && inst->is_3src(devinfo) &&
+       is_grf(inst->src[1]) && is_grf(inst->src[2]) &&
+       bank_of(reg_of(inst->src[1])) == bank_of(reg_of(inst->src[2])) &&
+       !is_conflict_optimized_out(devinfo, inst)) {
+      return DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE);
+   } else {
+      return 0;
+   }
+}
diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp
index a1e825c661c..692f7125323 100644
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@@ -1543,10 +1543,11 @@ vec4_instruction_scheduler::choose_instruction_to_schedule()
 int
 fs_instruction_scheduler::issue_time(backend_instruction *inst)
 {
+   const unsigned overhead = v->bank_conflict_cycles((fs_inst *)inst);
    if (is_compressed((fs_inst *)inst))
-      return 4;
+      return 4 + overhead;
    else
-      return 2;
+      return 2 + overhead;
 }
 
 int
author	Francisco Jerez <[email protected]>	2017-12-06 11:42:54 -0800
committer	Francisco Jerez <[email protected]>	2017-12-07 15:56:49 -0800
commit	acf98ff933d338c521d7c6a57c17a010149eb344 (patch)
tree	07af8a02ab269b0679ee9f7320acc4cbe7ad7f81 /src
parent	af2c320190f3c73180f1610c8df955a7fa2a4d09 (diff)