diff options
author | Kenneth Graunke <[email protected]> | 2019-04-11 12:28:48 -0700 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2019-05-28 01:06:48 -0700 |
commit | c31b4420e7812e144d9d7601c037e3225000b24c (patch) | |
tree | f3061efb70d3b220bf78700e19326603b39e5b75 | |
parent | 1d0a8cf40d87ee903d7738f05bf87fd4141ce40d (diff) |
st/nir: Re-vectorize shader IO
We scalarize IO to enable further optimizations, such as propagating
constant components across shaders, eliminating dead components, and
so on. This patch attempts to re-vectorize those operations after
the varying optimizations are done.
Intel GPUs are a scalar architecture, but IO operations work on whole
vec4's at a time, so we'd prefer to have a single IO load per vector
rather than 4 scalar IO loads. This re-vectorization can help a lot.
Broadcom GPUs, however, really do want scalar IO. radeonsi may want
this, or may want to leave it to LLVM. So, we make a new flag in the
NIR compiler options struct, and key it off of that, allowing drivers
to pick. (It's a bit awkward because we have per-stage settings, but
this is about IO between two stages...but I expect drivers to globally
prefer one way or the other. We can adjust later if needed.)
Reviewed-by: Marek Olšák <[email protected]>
-rw-r--r-- | src/compiler/nir/nir.h | 6 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_glsl_to_nir.cpp | 25 |
2 files changed, 31 insertions, 0 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 15323f9a0c7..d55b3569d11 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2350,6 +2350,12 @@ typedef struct nir_shader_compiler_options { bool lower_add_sat; /** + * Should IO be re-vectorized? Some scalar ISAs still operate on vec4's + * for IO purposes and would prefer loads/stores be vectorized. + */ + bool vectorize_io; + + /** * Should nir_lower_io() create load_interpolated_input intrinsics? * * If not, it generates regular load_input intrinsics and interpolation diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index a87284ef2a7..11fc03baf86 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -655,6 +655,28 @@ st_nir_get_mesa_program(struct gl_context *ctx, } static void +st_nir_vectorize_io(nir_shader *producer, nir_shader *consumer) +{ + NIR_PASS_V(producer, nir_lower_io_to_vector, nir_var_shader_out); + NIR_PASS_V(producer, nir_opt_combine_stores, nir_var_shader_out); + NIR_PASS_V(consumer, nir_lower_io_to_vector, nir_var_shader_in); + + if ((producer)->info.stage != MESA_SHADER_TESS_CTRL) { + /* Calling lower_io_to_vector creates output variable writes with + * write-masks. We only support these for TCS outputs, so for other + * stages, we need to call nir_lower_io_to_temporaries to get rid of + * them. This, in turn, creates temporary variables and extra + * copy_deref intrinsics that we need to clean up. + */ + NIR_PASS_V(producer, nir_lower_io_to_temporaries, + nir_shader_get_entrypoint(producer), true, false); + NIR_PASS_V(producer, nir_lower_global_vars_to_local); + NIR_PASS_V(producer, nir_split_var_copies); + NIR_PASS_V(producer, nir_lower_var_copies); + } +} + +static void st_nir_link_shaders(nir_shader **producer, nir_shader **consumer, bool scalar) { if (scalar) { @@ -844,6 +866,9 @@ st_link_nir(struct gl_context *ctx, prev_shader->sh.LinkedTransformFeedback->NumVarying > 0)) nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir, nir, ctx->API != API_OPENGL_COMPAT); + + if (ctx->Const.ShaderCompilerOptions[i].NirOptions->vectorize_io) + st_nir_vectorize_io(prev_shader->nir, nir); } prev = i; } |