summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2016-11-22 04:17:49 +0000
committerDave Airlie <[email protected]>2016-11-22 07:20:17 +0000
commitf395e3445df98b0398865a1897bca003f9add6f4 (patch)
treedff13933d0e01baac2c6f408244b0b17a084f22f /src/amd
parent220912e21432dba996cb2694dea480c65cf6a7c6 (diff)
radv: add support for shader stats dump
I've started working on a shader-db alike for Vulkan, it's based on vktrace and it records pipelines, this adds support to dump the shader stats exactly like radeonsi does, so I can reuse the shader-db scripts it uses. Reviewed-by: Bas Nieuwenhuizen <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/vulkan/radv_device.c1
-rw-r--r--src/amd/vulkan/radv_pipeline.c84
-rw-r--r--src/amd/vulkan/radv_private.h2
3 files changed, 87 insertions, 0 deletions
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 85959735d29..00f3a07367e 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -666,6 +666,7 @@ VkResult radv_CreateDevice(
}
device->allow_fast_clears = env_var_as_boolean("RADV_FAST_CLEARS", false);
device->allow_dcc = !env_var_as_boolean("RADV_DCC_DISABLE", false);
+ device->shader_stats_dump = env_var_as_boolean("RADV_SHADER_STATS", false);
if (device->allow_fast_clears && device->allow_dcc)
radv_finishme("DCC fast clears have not been tested\n");
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 404e8409015..7d7d0c672f9 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -257,6 +257,81 @@ radv_shader_compile_to_nir(struct radv_device *device,
return nir;
}
+static const char *radv_get_shader_name(struct radv_shader_variant *var,
+ gl_shader_stage stage)
+{
+ switch (stage) {
+ case MESA_SHADER_VERTEX: return "Vertex Shader as VS";
+ case MESA_SHADER_FRAGMENT: return "Pixel Shader";
+ case MESA_SHADER_COMPUTE: return "Compute Shader";
+ default:
+ return "Unknown shader";
+ };
+
+}
+static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pipeline *pipeline)
+{
+ unsigned lds_increment = device->instance->physicalDevice.rad_info.chip_class >= CIK ? 512 : 256;
+ struct radv_shader_variant *var;
+ struct ac_shader_config *conf;
+ int i;
+ FILE *file = stderr;
+ unsigned max_simd_waves = 10;
+ unsigned lds_per_wave = 0;
+
+ for (i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (!pipeline->shaders[i])
+ continue;
+ var = pipeline->shaders[i];
+
+ conf = &var->config;
+
+ if (i == MESA_SHADER_FRAGMENT) {
+ lds_per_wave = conf->lds_size * lds_increment +
+ align(var->info.fs.num_interp * 48, lds_increment);
+ }
+
+ if (conf->num_sgprs) {
+ if (device->instance->physicalDevice.rad_info.chip_class >= VI)
+ max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
+ else
+ max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
+ }
+
+ if (conf->num_vgprs)
+ max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
+
+ /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
+ * that PS can use.
+ */
+ if (lds_per_wave)
+ max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
+
+ fprintf(file, "\n%s:\n",
+ radv_get_shader_name(var, i));
+ if (i == MESA_SHADER_FRAGMENT) {
+ fprintf(file, "*** SHADER CONFIG ***\n"
+ "SPI_PS_INPUT_ADDR = 0x%04x\n"
+ "SPI_PS_INPUT_ENA = 0x%04x\n",
+ conf->spi_ps_input_addr, conf->spi_ps_input_ena);
+ }
+ fprintf(file, "*** SHADER STATS ***\n"
+ "SGPRS: %d\n"
+ "VGPRS: %d\n"
+ "Spilled SGPRs: %d\n"
+ "Spilled VGPRs: %d\n"
+ "Code Size: %d bytes\n"
+ "LDS: %d blocks\n"
+ "Scratch: %d bytes per wave\n"
+ "Max Waves: %d\n"
+ "********************\n\n\n",
+ conf->num_sgprs, conf->num_vgprs,
+ conf->spilled_sgprs, conf->spilled_vgprs, var->code_size,
+ conf->lds_size, conf->scratch_bytes_per_wave,
+ max_simd_waves);
+ }
+}
+
void radv_shader_variant_destroy(struct radv_device *device,
struct radv_shader_variant *variant)
{
@@ -297,6 +372,7 @@ struct radv_shader_variant *radv_shader_variant_create(struct radv_device *devic
&variant->info, shader, &options, dump);
LLVMDisposeTargetMachine(tm);
+ variant->code_size = binary.code_size;
bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0;
unsigned vgpr_comp_cnt = 0;
@@ -1319,6 +1395,10 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
pipeline->binding_stride[desc->binding] = desc->stride;
}
+ if (device->shader_stats_dump) {
+ radv_dump_pipeline_stats(device, pipeline);
+ }
+
return VK_SUCCESS;
}
@@ -1412,6 +1492,10 @@ static VkResult radv_compute_pipeline_create(
pipeline->layout, NULL, dump);
*pPipeline = radv_pipeline_to_handle(pipeline);
+
+ if (device->shader_stats_dump) {
+ radv_dump_pipeline_stats(device, pipeline);
+ }
return VK_SUCCESS;
}
VkResult radv_CreateComputePipelines(
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 3d17895a118..a5d13a9239a 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -444,6 +444,7 @@ struct radv_device {
bool allow_fast_clears;
bool allow_dcc;
+ bool shader_stats_dump;
/* MSAA sample locations.
* The first index is the sample index.
@@ -794,6 +795,7 @@ struct radv_shader_variant {
struct ac_shader_variant_info info;
unsigned rsrc1;
unsigned rsrc2;
+ uint32_t code_size;
};
struct radv_depth_stencil_state {