summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h5
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_nir.c37
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c6
5 files changed, 32 insertions, 19 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index ba8271d3fe3..19d4cca0dba 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -126,9 +126,7 @@ static void si_create_compute_state_async(void *job, int thread_index)
} else {
assert(program->ir_type == PIPE_SHADER_IR_NIR);
- si_nir_opts(sel->nir);
si_nir_scan_shader(sel->nir, &sel->info);
- si_lower_nir(sel->screen, sel->nir);
}
/* Store the declared LDS size into tgsi_shader_info for the shader
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index d9cd4ff92e9..1b7fdf23c60 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -977,6 +977,7 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
si_set_max_shader_compiler_threads;
sscreen->b.is_parallel_shader_compilation_finished =
si_is_parallel_shader_compilation_finished;
+ sscreen->b.finalize_nir = si_finalize_nir;
si_init_screen_get_functions(sscreen);
si_init_screen_buffer_functions(sscreen);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index cb8d6dbcced..1d41b7aa042 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -756,9 +756,8 @@ void si_nir_scan_shader(const struct nir_shader *nir,
struct tgsi_shader_info *info);
void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
struct tgsi_tessctrl_info *out);
-void si_nir_lower_ps_inputs(struct nir_shader *nir);
-void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir);
-void si_nir_opts(struct nir_shader *nir);
+void si_nir_adjust_driver_locations(struct nir_shader *nir);
+void si_finalize_nir(struct pipe_screen *screen, void *nirptr, bool optimize);
/* si_state_shaders.c */
void gfx9_get_gs_info(struct si_shader_selector *es,
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index aa82a7bd371..4df625ed274 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -801,7 +801,7 @@ void si_nir_scan_shader(const struct nir_shader *nir,
}
}
-void
+static void
si_nir_opts(struct nir_shader *nir)
{
bool progress;
@@ -913,7 +913,7 @@ si_nir_lower_color(nir_shader *nir)
}
}
-void si_nir_lower_ps_inputs(struct nir_shader *nir)
+static void si_nir_lower_ps_inputs(struct nir_shader *nir)
{
if (nir->info.stage != MESA_SHADER_FRAGMENT)
return;
@@ -938,11 +938,7 @@ void si_nir_lower_ps_inputs(struct nir_shader *nir)
nir_var_shader_in);
}
-/**
- * Perform "lowering" operations on the NIR that are run once when the shader
- * selector is created.
- */
-void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
+void si_nir_adjust_driver_locations(struct nir_shader *nir)
{
/* Adjust the driver location of inputs and outputs. The state tracker
* interprets them as slots, while the ac/nir backend interprets them
@@ -963,7 +959,14 @@ void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
variable->data.driver_location += 1;
}
}
+}
+/**
+ * Perform "lowering" operations on the NIR that are run once when the shader
+ * selector is created.
+ */
+static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
+{
/* Perform lowerings (and optimizations) of code.
*
* Performance considerations aside, we must:
@@ -990,14 +993,20 @@ void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
/* Lower load constants to scalar and then clean up the mess */
NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
NIR_PASS_V(nir, nir_lower_var_copies);
+ NIR_PASS_V(nir, nir_lower_pack);
+ NIR_PASS_V(nir, nir_opt_access);
si_nir_opts(nir);
/* Lower large variables that are always constant with load_constant
* intrinsics, which get turned into PC-relative loads from a data
* section next to the shader.
+ *
+ * st/mesa calls finalize_nir twice, but we can't call this pass twice.
*/
- NIR_PASS_V(nir, nir_opt_large_constants,
- glsl_get_natural_size_align_bytes, 16);
+ if (!nir->constant_data) {
+ NIR_PASS_V(nir, nir_opt_large_constants,
+ glsl_get_natural_size_align_bytes, 16);
+ }
ac_lower_indirect_derefs(nir, sscreen->info.chip_class);
@@ -1006,6 +1015,16 @@ void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
NIR_PASS_V(nir, nir_lower_bool_to_int32);
}
+void si_finalize_nir(struct pipe_screen *screen, void *nirptr, bool optimize)
+{
+ struct si_screen *sscreen = (struct si_screen *)screen;
+ struct nir_shader *nir = (struct nir_shader *)nirptr;
+
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+ si_nir_lower_ps_inputs(nir);
+ si_lower_nir(sscreen, nir);
+}
+
static void declare_nir_input_vs(struct si_shader_context *ctx,
struct nir_variable *variable,
unsigned input_index,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 04ff331444b..e5e7d523cd4 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2472,9 +2472,6 @@ static void si_init_shader_selector_async(void *job, int thread_index)
assert(thread_index < ARRAY_SIZE(sscreen->compiler));
compiler = &sscreen->compiler[thread_index];
- if (sel->nir)
- si_lower_nir(sel->screen, sel->nir);
-
/* Compile the main shader part for use with a prolog and/or epilog.
* If this fails, the driver will try to compile a monolithic shader
* on demand.
@@ -2715,10 +2712,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sel->nir = state->ir.nir;
}
- si_nir_lower_ps_inputs(sel->nir);
- si_nir_opts(sel->nir);
si_nir_scan_shader(sel->nir, &sel->info);
si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
+ si_nir_adjust_driver_locations(sel->nir);
}
sel->type = sel->info.processor;