1 files changed, 19 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 547b60ddc0f..31bf25bb88a 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -718,6 +718,25 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
 
    OPT(nir_lower_clip_cull_distance_arrays);
 
+   if (devinfo->gen >= 7 && is_scalar) {
+      /* TODO: Yes, we could in theory do this on gen6 and earlier.  However,
+       * that would require plumbing through support for these indirect
+       * scratch read/write messages with message registers and that's just a
+       * pain.  Also, the primary benefit of this is for compute shaders which
+       * won't run on gen6 and earlier anyway.
+       *
+       * The threshold of 128B was chosen semi-arbitrarily.  The idea is that
+       * 128B per channel on a SIMD8 program is 32 registers or 25% of the
+       * register file.  Any array that large is likely to cause pressure
+       * issues.  Also, this value is sufficiently high that the benchmarks
+       * known to suffer from large temporary array issues are helped but
+       * nothing else in shader-db is hurt except for maybe that one kerbal
+       * space program shader.
+       */
+      OPT(nir_lower_vars_to_scratch, nir_var_function_temp, 128,
+          glsl_get_natural_size_align_bytes);
+   }
+
    nir_variable_mode indirect_mask =
       brw_nir_no_indirect_mask(compiler, nir->info.stage);
    OPT(nir_lower_indirect_derefs, indirect_mask);