intel/fs: Make implied_mrf_writes() an fs_inst method.

This will be convenient in a later commit enabling SIMD32 fragment shaders, and happens to fix the calculation for MATH instructions which is currently inaccurate for SIMD-lowered instructions on Gen4-5 platforms (all of them on Gen4 in SIMD16 mode), since it was based on the shader's dispatch width rather than on the actual execution size of the instruction. This causes some shader-db noise on Gen4 due to the more compact register allocation interacting with the SEND dependency workarounds, but otherwise no major changes. Reviewed-by: Kenneth Graunke <[email protected]>
author: Francisco Jerez <[email protected]> 2019-12-27 16:38:26 -0800
committer: Francisco Jerez <[email protected]> 2020-01-10 11:02:30 -0800
commit: c20dc9b8363b5b497d4f6af5144590e8c2d940c0 (patch)
tree: fc1729fa9ef1dea7758efeaa7c04ce3a6be1b455 /src
parent: 591f146fd2c7b265cd9e759c242e6d6437ea6578 (diff)
7 files changed, 17 insertions, 18 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index f78c953f0f3..eaa57e4fe0a 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1164,16 +1164,16 @@ fs_inst::flags_written() const
  * Note that this is not the 0 or 1 implied writes in an actual gen
  * instruction -- the FS opcodes often generate MOVs in addition.
  */
-int
-fs_visitor::implied_mrf_writes(const fs_inst *inst) const
+unsigned
+fs_inst::implied_mrf_writes() const
 {
-   if (inst->mlen == 0)
+   if (mlen == 0)
       return 0;
 
-   if (inst->base_mrf == -1)
+   if (base_mrf == -1)
       return 0;
 
-   switch (inst->opcode) {
+   switch (opcode) {
    case SHADER_OPCODE_RCP:
    case SHADER_OPCODE_RSQ:
    case SHADER_OPCODE_SQRT:
@@ -1181,11 +1181,11 @@ fs_visitor::implied_mrf_writes(const fs_inst *inst) const
    case SHADER_OPCODE_LOG2:
    case SHADER_OPCODE_SIN:
    case SHADER_OPCODE_COS:
-      return 1 * dispatch_width / 8;
+      return 1 * exec_size / 8;
    case SHADER_OPCODE_POW:
    case SHADER_OPCODE_INT_QUOTIENT:
    case SHADER_OPCODE_INT_REMAINDER:
-      return 2 * dispatch_width / 8;
+      return 2 * exec_size / 8;
    case SHADER_OPCODE_TEX:
    case FS_OPCODE_TXB:
    case SHADER_OPCODE_TXD:
@@ -1201,14 +1201,14 @@ fs_visitor::implied_mrf_writes(const fs_inst *inst) const
       return 1;
    case FS_OPCODE_FB_WRITE:
    case FS_OPCODE_REP_FB_WRITE:
-      return inst->src[0].file == BAD_FILE ? 0 : 2;
+      return src[0].file == BAD_FILE ? 0 : 2;
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
    case SHADER_OPCODE_GEN4_SCRATCH_READ:
       return 1;
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
-      return inst->mlen;
+      return mlen;
    case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
-      return inst->mlen;
+      return mlen;
    default:
       unreachable("not reached");
    }
@@ -3494,7 +3494,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 	 /* Found a SEND instruction, which will include two or fewer
 	  * implied MRF writes.  We could do better here.
 	  */
-	 for (int i = 0; i < implied_mrf_writes(inst); i++) {
+	 for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
 	    last_mrf_move[inst->base_mrf + i] = NULL;
 	 }
       }
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 5236fff4b6e..a86db3ca075 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -302,8 +302,6 @@ public:
 
    fs_reg interp_reg(int location, int channel);
 
-   int implied_mrf_writes(const fs_inst *inst) const;
-
    virtual void dump_instructions();
    virtual void dump_instructions(const char *name);
    void dump_instruction(backend_instruction *inst);
diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp b/src/intel/compiler/brw_fs_bank_conflicts.cpp
index 938ebcceac4..e908d0e38fc 100644
--- a/src/intel/compiler/brw_fs_bank_conflicts.cpp
+++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp
@@ -573,7 +573,7 @@ namespace {
          if (v->devinfo->gen >= 7) {
             assert(inst->dst.file != MRF);
 
-            for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+            for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
                const unsigned reg = GEN7_MRF_HACK_START + inst->base_mrf + i;
                constrained[p.atom_of_reg(reg)] = true;
             }
diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp
index bf3f86b0d56..966080f6b6c 100644
--- a/src/intel/compiler/brw_fs_reg_allocate.cpp
+++ b/src/intel/compiler/brw_fs_reg_allocate.cpp
@@ -494,7 +494,7 @@ get_used_mrfs(fs_visitor *v, bool *mrf_used)
       }
 
       if (inst->mlen > 0) {
-	 for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+	 for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
             mrf_used[inst->base_mrf + i] = true;
          }
       }
diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp
index 67e07e7f26b..35586f2a107 100644
--- a/src/intel/compiler/brw_fs_scoreboard.cpp
+++ b/src/intel/compiler/brw_fs_scoreboard.cpp
@@ -896,7 +896,7 @@ namespace {
             }
 
             if (is_send(inst) && inst->base_mrf != -1) {
-               for (int j = 0; j < shader->implied_mrf_writes(inst); j++)
+               for (unsigned j = 0; j < inst->implied_mrf_writes(); j++)
                   add_dependency(ids, deps[ip], dependency_for_write(inst,
                      sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
             }
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index 099fb570d70..24e523c5270 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -357,6 +357,7 @@ public:
    bool can_do_cmod();
    bool can_change_types() const;
    bool has_source_and_destination_hazard() const;
+   unsigned implied_mrf_writes() const;
 
    /**
     * Return whether \p arg is a control source of a virtual instruction which
diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp
index 575693fc961..9c78944cf68 100644
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@@ -1190,7 +1190,7 @@ fs_instruction_scheduler::calculate_deps()
       }
 
       if (inst->mlen > 0 && inst->base_mrf != -1) {
-         for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+         for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
             add_dep(last_mrf_write[inst->base_mrf + i], n);
             last_mrf_write[inst->base_mrf + i] = n;
          }
@@ -1313,7 +1313,7 @@ fs_instruction_scheduler::calculate_deps()
       }
 
       if (inst->mlen > 0 && inst->base_mrf != -1) {
-         for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+         for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
             last_mrf_write[inst->base_mrf + i] = n;
          }
       }
author	Francisco Jerez <[email protected]>	2019-12-27 16:38:26 -0800
committer	Francisco Jerez <[email protected]>	2020-01-10 11:02:30 -0800
commit	c20dc9b8363b5b497d4f6af5144590e8c2d940c0 (patch)
tree	fc1729fa9ef1dea7758efeaa7c04ce3a6be1b455 /src
parent	591f146fd2c7b265cd9e759c242e6d6437ea6578 (diff)