intel/nir: Enable nir_opt_find_array_copies

We have to be a bit careful with this one because we want it to run in the optimization loop but only in the first brw_nir_optimize call. Later calls assume that we've lowered away copy_deref instructions and we don't want to introduce any more. Shader-db results on Kaby Lake: total instructions in shared programs: 15176942 -> 15176942 (0.00%) instructions in affected programs: 0 -> 0 helped: 0 HURT: 0 In spite of the lack of any shader-db improvement, this patch completely eliminates spilling in the Batman: Arkham City tessellation shaders. This is because we are now able to detect that the temporary array created by DXVK for storing TCS inputs is a copy of the input arrays and use indirect URB reads instead of making a copy of 4.5 KiB of input data and then indirecting on it with if-ladders. Reviewed-by: Caio Marcelo de Oliveira Filho <[email protected]>
author: Jason Ekstrand <[email protected]> 2018-07-23 22:20:41 -0700
committer: Jason Ekstrand <[email protected]> 2018-08-23 21:47:51 -0500
commit: 8d8222461f9d7f497d657c2c0eff70820986429b (patch)
tree: 431e652e3f96fc721045384c73354e4db94a20de /src
parent: 53072582dc9401c76bb8ef72d8f321f684fb0a15 (diff)
2 files changed, 28 insertions, 13 deletions
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index c9a92c8aeb6..ef5034d1e1e 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -533,7 +533,7 @@ brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
 
 nir_shader *
 brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
-                 bool is_scalar)
+                 bool is_scalar, bool allow_copies)
 {
    nir_variable_mode indirect_mask =
       brw_nir_no_indirect_mask(compiler, nir->info.stage);
@@ -544,6 +544,13 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
       OPT(nir_split_array_vars, nir_var_local);
       OPT(nir_shrink_vec_array_vars, nir_var_local);
       OPT(nir_lower_vars_to_ssa);
+      if (allow_copies) {
+         /* Only run this pass in the first call to brw_nir_optimize.  Later
+          * calls assume that we've lowered away any copy_deref instructions
+          * and we don't want to introduce any more.
+          */
+         OPT(nir_opt_find_array_copies);
+      }
       OPT(nir_opt_copy_prop_vars);
 
       if (is_scalar) {
@@ -664,7 +671,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
                         nir_lower_isign64 |
                         nir_lower_divmod64);
 
-   nir = brw_nir_optimize(nir, compiler, is_scalar);
+   nir = brw_nir_optimize(nir, compiler, is_scalar, true);
 
    /* This needs to be run after the first optimization pass but before we
     * lower indirect derefs away
@@ -701,7 +708,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
    nir_lower_indirect_derefs(nir, indirect_mask);
 
    /* Get rid of split copies */
-   nir = brw_nir_optimize(nir, compiler, is_scalar);
+   nir = brw_nir_optimize(nir, compiler, is_scalar, false);
 
    OPT(nir_remove_dead_variables, nir_var_local);
 
@@ -716,6 +723,18 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
    nir_validate_shader(*producer);
    nir_validate_shader(*consumer);
 
+   const bool p_is_scalar =
+      compiler->scalar_stage[(*producer)->info.stage];
+   const bool c_is_scalar =
+      compiler->scalar_stage[(*consumer)->info.stage];
+
+   if (p_is_scalar && c_is_scalar) {
+      NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
+      NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
+      *producer = brw_nir_optimize(*producer, compiler, p_is_scalar, false);
+      *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false);
+   }
+
    NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
    NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
 
@@ -732,13 +751,8 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
       NIR_PASS_V(*consumer, nir_lower_indirect_derefs,
                  brw_nir_no_indirect_mask(compiler, (*consumer)->info.stage));
 
-      const bool p_is_scalar =
-         compiler->scalar_stage[(*producer)->info.stage];
-      *producer = brw_nir_optimize(*producer, compiler, p_is_scalar);
-
-      const bool c_is_scalar =
-         compiler->scalar_stage[(*consumer)->info.stage];
-      *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar);
+      *producer = brw_nir_optimize(*producer, compiler, p_is_scalar, false);
+      *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false);
    }
 }
 
@@ -765,7 +779,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
       OPT(nir_opt_algebraic_before_ffma);
    } while (progress);
 
-   nir = brw_nir_optimize(nir, compiler, is_scalar);
+   nir = brw_nir_optimize(nir, compiler, is_scalar, false);
 
    if (devinfo->gen >= 6) {
       /* Try and fuse multiply-adds */
@@ -861,7 +875,7 @@ brw_nir_apply_sampler_key(nir_shader *nir,
 
    if (nir_lower_tex(nir, &tex_options)) {
       nir_validate_shader(nir);
-      nir = brw_nir_optimize(nir, compiler, is_scalar);
+      nir = brw_nir_optimize(nir, compiler, is_scalar, false);
    }
 
    return nir;
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 00b61731526..5c75ef2324a 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -153,7 +153,8 @@ bool brw_nir_opt_peephole_ffma(nir_shader *shader);
 
 nir_shader *brw_nir_optimize(nir_shader *nir,
                              const struct brw_compiler *compiler,
-                             bool is_scalar);
+                             bool is_scalar,
+                             bool allow_copies);
 
 #define BRW_NIR_FRAG_OUTPUT_INDEX_SHIFT 0
 #define BRW_NIR_FRAG_OUTPUT_INDEX_MASK INTEL_MASK(0, 0)
author	Jason Ekstrand <[email protected]>	2018-07-23 22:20:41 -0700
committer	Jason Ekstrand <[email protected]>	2018-08-23 21:47:51 -0500
commit	8d8222461f9d7f497d657c2c0eff70820986429b (patch)
tree	431e652e3f96fc721045384c73354e4db94a20de /src
parent	53072582dc9401c76bb8ef72d8f321f684fb0a15 (diff)