summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorTimothy Arceri <tarceri@itsqueeze.com>2018-10-18 09:42:17 +1100
committerTimothy Arceri <tarceri@itsqueeze.com>2018-10-18 15:04:09 +1100
commit06675711e713e7ccd66aa076a6ba116286130474 (patch)
tree6fdb82d52cb0060e484d359244d4ec0ecd9a599e /src/amd
parent9d5b106b2efbd5a4bbbe54a5a00c8cebd642d960 (diff)
radv: use nir_opt_find_array_copies()
Totals from affected shaders: SGPRS: 1112 -> 1112 (0.00 %) VGPRS: 1492 -> 1196 (-19.84 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 112172 -> 101316 (-9.68 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 93 -> 98 (5.38 %) Wait states: 0 -> 0 (0.00 %) All affected shaders are from "Batman: Arkham City" over DXVK. The pass detects that the temporary array created by DXVK for storing TCS inputs is a copy of the input arrays and allows us to avoid copying all of the input data and then indirecting on it with if-ladders, instead we just do indirect indexing. Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/vulkan/radv_pipeline.c6
-rw-r--r--src/amd/vulkan/radv_shader.c22
-rw-r--r--src/amd/vulkan/radv_shader.h3
3 files changed, 23 insertions, 8 deletions
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index e1d665d0ac7..8d15a048bbf 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1808,13 +1808,13 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders)
ac_lower_indirect_derefs(ordered_shaders[i],
pipeline->device->physical_device->rad_info.chip_class);
}
- radv_optimize_nir(ordered_shaders[i], false);
+ radv_optimize_nir(ordered_shaders[i], false, false);
if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
ac_lower_indirect_derefs(ordered_shaders[i - 1],
pipeline->device->physical_device->rad_info.chip_class);
}
- radv_optimize_nir(ordered_shaders[i - 1], false);
+ radv_optimize_nir(ordered_shaders[i - 1], false, false);
}
}
}
@@ -2073,7 +2073,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) {
nir_lower_io_to_scalar_early(nir[i], mask);
- radv_optimize_nir(nir[i], false);
+ radv_optimize_nir(nir[i], false, false);
}
}
}
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 3b3422c8da6..52aa83d4a5a 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -118,7 +118,8 @@ void radv_DestroyShaderModule(
}
void
-radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
+radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
+ bool allow_copies)
{
bool progress;
@@ -128,6 +129,15 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
NIR_PASS_V(shader, nir_lower_vars_to_ssa);
NIR_PASS_V(shader, nir_lower_pack);
+ if (allow_copies) {
+ /* Only run this pass in the first call to
+ * radv_optimize_nir. Later calls assume that we've
+ * lowered away any copy_deref instructions and we
+ * don't want to introduce any more.
+ */
+ NIR_PASS(progress, shader, nir_opt_find_array_copies);
+ }
+
NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
NIR_PASS(progress, shader, nir_opt_dead_write_vars);
@@ -306,7 +316,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
}
nir_split_var_copies(nir);
- nir_lower_var_copies(nir);
nir_lower_global_vars_to_local(nir);
nir_remove_dead_variables(nir, nir_var_local);
@@ -323,7 +332,12 @@ radv_shader_compile_to_nir(struct radv_device *device,
nir_lower_load_const_to_scalar(nir);
if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
- radv_optimize_nir(nir, false);
+ radv_optimize_nir(nir, false, true);
+
+ /* We call nir_lower_var_copies() after the first radv_optimize_nir()
+ * to remove any copies introduced by nir_opt_find_array_copies().
+ */
+ nir_lower_var_copies(nir);
/* Indirect lowering must be called after the radv_optimize_nir() loop
* has been called at least once. Otherwise indirect lowering can
@@ -331,7 +345,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
* considered too large for unrolling.
*/
ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
- radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT);
+ radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false);
return nir;
}
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index c490b69f52b..22423e5f99a 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -298,7 +298,8 @@ struct radv_shader_slab {
};
void
-radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively);
+radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
+ bool allow_copies);
nir_shader *
radv_shader_compile_to_nir(struct radv_device *device,