summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorConnor Abbott <[email protected]>2019-08-30 16:08:47 +0200
committerConnor Abbott <[email protected]>2019-09-05 12:21:46 +0200
commit71a67942003a96d90289f7f53f546af821e64a51 (patch)
treeab4a9ead1cc7e56efc10c1ac6a2b8f6fa9567b23 /src
parent91626d0865851bc7e2e21296d1d8f34149216e40 (diff)
ac/nir: Enable nir_opt_large_constants
vkpipeline-db numbers: Totals: SGPRS: 1740306 -> 1741322 (0.06 %) VGPRS: 1331124 -> 1331712 (0.04 %) Spilled SGPRs: 21201 -> 21316 (0.54 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 256 -> 256 (0.00 %) dwords per thread Code Size: 79022628 -> 78694788 (-0.41 %) bytes LDS: 6500 -> 6500 (0.00 %) blocks Max Waves: 301413 -> 301302 (-0.04 %) Wait states: 0 -> 0 (0.00 %) Totals from affected shaders: SGPRS: 53633 -> 54649 (1.89 %) VGPRS: 53000 -> 53588 (1.11 %) Spilled SGPRs: 3454 -> 3569 (3.33 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 5284232 -> 4956392 (-6.20 %) bytes LDS: 2 -> 2 (0.00 %) blocks Max Waves: 4239 -> 4128 (-2.62 %) Wait states: 0 -> 0 (0.00 %) (The biggest VGPR and max wave regression is due to unrolling a loop, which made the scheduler more aggressive, but in this case it's able to effectively hide latency so it's actually probably a win.) shader-db numbers with radeonsi NIR: Totals: SGPRS: 3526496 -> 3526512 (0.00 %) VGPRS: 2198576 -> 2198576 (0.00 %) Spilled SGPRs: 10463 -> 10463 (0.00 %) Spilled VGPRs: 86 -> 86 (0.00 %) Private memory VGPRs: 3182 -> 2528 (-20.55 %) Scratch size: 3308 -> 2640 (-20.19 %) dwords per thread Code Size: 74117280 -> 74106140 (-0.02 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 775846 -> 775844 (-0.00 %) Wait states: 0 -> 0 (0.00 %) Totals from affected shaders: SGPRS: 856 -> 872 (1.87 %) VGPRS: 680 -> 680 (0.00 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 654 -> 0 (-100.00 %) Scratch size: 668 -> 0 (-100.00 %) dwords per thread Code Size: 49652 -> 38512 (-22.44 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 182 -> 180 (-1.10 %) Wait states: 0 -> 0 (0.00 %) Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/amd/vulkan/radv_shader.c7
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_nir.c7
2 files changed, 14 insertions, 0 deletions
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index b6de97deb24..729aabaf272 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -442,6 +442,13 @@ radv_shader_compile_to_nir(struct radv_device *device,
*/
nir_lower_var_copies(nir);
+ /* Lower large variables that are always constant with load_constant
+ * intrinsics, which get turned into PC-relative loads from a data
+ * section next to the shader.
+ */
+ NIR_PASS_V(nir, nir_opt_large_constants,
+ glsl_get_natural_size_align_bytes, 16);
+
/* Indirect lowering must be called after the radv_optimize_nir() loop
* has been called at least once. Otherwise indirect lowering can
* bloat the instruction count of the loop and cause it to be
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index a71b8fbbb9c..fdd139141e2 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -986,6 +986,13 @@ void si_lower_nir(struct si_shader_selector *sel)
};
NIR_PASS_V(sel->nir, nir_lower_subgroups, &subgroups_options);
+ /* Lower large variables that are always constant with load_constant
+ * intrinsics, which get turned into PC-relative loads from a data
+ * section next to the shader.
+ */
+ NIR_PASS_V(sel->nir, nir_opt_large_constants,
+ glsl_get_natural_size_align_bytes, 16);
+
ac_lower_indirect_derefs(sel->nir, sel->screen->info.chip_class);
si_nir_opts(sel->nir);