aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/state_tracker
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2019-04-11 12:28:48 -0700
committerKenneth Graunke <[email protected]>2019-05-28 01:06:48 -0700
commitc31b4420e7812e144d9d7601c037e3225000b24c (patch)
treef3061efb70d3b220bf78700e19326603b39e5b75 /src/mesa/state_tracker
parent1d0a8cf40d87ee903d7738f05bf87fd4141ce40d (diff)
st/nir: Re-vectorize shader IO
We scalarize IO to enable further optimizations, such as propagating constant components across shaders, eliminating dead components, and so on. This patch attempts to re-vectorize those operations after the varying optimizations are done. Intel GPUs are a scalar architecture, but IO operations work on whole vec4's at a time, so we'd prefer to have a single IO load per vector rather than 4 scalar IO loads. This re-vectorization can help a lot. Broadcom GPUs, however, really do want scalar IO. radeonsi may want this, or may want to leave it to LLVM. So, we make a new flag in the NIR compiler options struct, and key it off of that, allowing drivers to pick. (It's a bit awkward because we have per-stage settings, but this is about IO between two stages...but I expect drivers to globally prefer one way or the other. We can adjust later if needed.) Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/mesa/state_tracker')
-rw-r--r--src/mesa/state_tracker/st_glsl_to_nir.cpp25
1 files changed, 25 insertions, 0 deletions
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index a87284ef2a7..11fc03baf86 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -655,6 +655,28 @@ st_nir_get_mesa_program(struct gl_context *ctx,
}
static void
+st_nir_vectorize_io(nir_shader *producer, nir_shader *consumer)
+{
+ NIR_PASS_V(producer, nir_lower_io_to_vector, nir_var_shader_out);
+ NIR_PASS_V(producer, nir_opt_combine_stores, nir_var_shader_out);
+ NIR_PASS_V(consumer, nir_lower_io_to_vector, nir_var_shader_in);
+
+ if ((producer)->info.stage != MESA_SHADER_TESS_CTRL) {
+ /* Calling lower_io_to_vector creates output variable writes with
+ * write-masks. We only support these for TCS outputs, so for other
+ * stages, we need to call nir_lower_io_to_temporaries to get rid of
+ * them. This, in turn, creates temporary variables and extra
+ * copy_deref intrinsics that we need to clean up.
+ */
+ NIR_PASS_V(producer, nir_lower_io_to_temporaries,
+ nir_shader_get_entrypoint(producer), true, false);
+ NIR_PASS_V(producer, nir_lower_global_vars_to_local);
+ NIR_PASS_V(producer, nir_split_var_copies);
+ NIR_PASS_V(producer, nir_lower_var_copies);
+ }
+}
+
+static void
st_nir_link_shaders(nir_shader **producer, nir_shader **consumer, bool scalar)
{
if (scalar) {
@@ -844,6 +866,9 @@ st_link_nir(struct gl_context *ctx,
prev_shader->sh.LinkedTransformFeedback->NumVarying > 0))
nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir,
nir, ctx->API != API_OPENGL_COMPAT);
+
+ if (ctx->Const.ShaderCompilerOptions[i].NirOptions->vectorize_io)
+ st_nir_vectorize_io(prev_shader->nir, nir);
}
prev = i;
}