diff options
author | Dave Airlie <[email protected]> | 2016-11-22 04:17:49 +0000 |
---|---|---|
committer | Dave Airlie <[email protected]> | 2016-11-22 07:20:17 +0000 |
commit | f395e3445df98b0398865a1897bca003f9add6f4 (patch) | |
tree | dff13933d0e01baac2c6f408244b0b17a084f22f /src | |
parent | 220912e21432dba996cb2694dea480c65cf6a7c6 (diff) |
radv: add support for shader stats dump
I've started working on a shader-db alike for Vulkan,
it's based on vktrace and it records pipelines, this
adds support to dump the shader stats exactly like
radeonsi does, so I can reuse the shader-db scripts it
uses.
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/amd/vulkan/radv_device.c | 1 | ||||
-rw-r--r-- | src/amd/vulkan/radv_pipeline.c | 84 | ||||
-rw-r--r-- | src/amd/vulkan/radv_private.h | 2 |
3 files changed, 87 insertions, 0 deletions
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 85959735d29..00f3a07367e 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -666,6 +666,7 @@ VkResult radv_CreateDevice( } device->allow_fast_clears = env_var_as_boolean("RADV_FAST_CLEARS", false); device->allow_dcc = !env_var_as_boolean("RADV_DCC_DISABLE", false); + device->shader_stats_dump = env_var_as_boolean("RADV_SHADER_STATS", false); if (device->allow_fast_clears && device->allow_dcc) radv_finishme("DCC fast clears have not been tested\n"); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 404e8409015..7d7d0c672f9 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -257,6 +257,81 @@ radv_shader_compile_to_nir(struct radv_device *device, return nir; } +static const char *radv_get_shader_name(struct radv_shader_variant *var, + gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: return "Vertex Shader as VS"; + case MESA_SHADER_FRAGMENT: return "Pixel Shader"; + case MESA_SHADER_COMPUTE: return "Compute Shader"; + default: + return "Unknown shader"; + }; + +} +static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pipeline *pipeline) +{ + unsigned lds_increment = device->instance->physicalDevice.rad_info.chip_class >= CIK ? 512 : 256; + struct radv_shader_variant *var; + struct ac_shader_config *conf; + int i; + FILE *file = stderr; + unsigned max_simd_waves = 10; + unsigned lds_per_wave = 0; + + for (i = 0; i < MESA_SHADER_STAGES; i++) { + if (!pipeline->shaders[i]) + continue; + var = pipeline->shaders[i]; + + conf = &var->config; + + if (i == MESA_SHADER_FRAGMENT) { + lds_per_wave = conf->lds_size * lds_increment + + align(var->info.fs.num_interp * 48, lds_increment); + } + + if (conf->num_sgprs) { + if (device->instance->physicalDevice.rad_info.chip_class >= VI) + max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs); + else + max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs); + } + + if (conf->num_vgprs) + max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs); + + /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD + * that PS can use. + */ + if (lds_per_wave) + max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave); + + fprintf(file, "\n%s:\n", + radv_get_shader_name(var, i)); + if (i == MESA_SHADER_FRAGMENT) { + fprintf(file, "*** SHADER CONFIG ***\n" + "SPI_PS_INPUT_ADDR = 0x%04x\n" + "SPI_PS_INPUT_ENA = 0x%04x\n", + conf->spi_ps_input_addr, conf->spi_ps_input_ena); + } + fprintf(file, "*** SHADER STATS ***\n" + "SGPRS: %d\n" + "VGPRS: %d\n" + "Spilled SGPRs: %d\n" + "Spilled VGPRs: %d\n" + "Code Size: %d bytes\n" + "LDS: %d blocks\n" + "Scratch: %d bytes per wave\n" + "Max Waves: %d\n" + "********************\n\n\n", + conf->num_sgprs, conf->num_vgprs, + conf->spilled_sgprs, conf->spilled_vgprs, var->code_size, + conf->lds_size, conf->scratch_bytes_per_wave, + max_simd_waves); + } +} + void radv_shader_variant_destroy(struct radv_device *device, struct radv_shader_variant *variant) { @@ -297,6 +372,7 @@ struct radv_shader_variant *radv_shader_variant_create(struct radv_device *devic &variant->info, shader, &options, dump); LLVMDisposeTargetMachine(tm); + variant->code_size = binary.code_size; bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0; unsigned vgpr_comp_cnt = 0; @@ -1319,6 +1395,10 @@ radv_pipeline_init(struct radv_pipeline *pipeline, pipeline->binding_stride[desc->binding] = desc->stride; } + if (device->shader_stats_dump) { + radv_dump_pipeline_stats(device, pipeline); + } + return VK_SUCCESS; } @@ -1412,6 +1492,10 @@ static VkResult radv_compute_pipeline_create( pipeline->layout, NULL, dump); *pPipeline = radv_pipeline_to_handle(pipeline); + + if (device->shader_stats_dump) { + radv_dump_pipeline_stats(device, pipeline); + } return VK_SUCCESS; } VkResult radv_CreateComputePipelines( diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 3d17895a118..a5d13a9239a 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -444,6 +444,7 @@ struct radv_device { bool allow_fast_clears; bool allow_dcc; + bool shader_stats_dump; /* MSAA sample locations. * The first index is the sample index. @@ -794,6 +795,7 @@ struct radv_shader_variant { struct ac_shader_variant_info info; unsigned rsrc1; unsigned rsrc2; + uint32_t code_size; }; struct radv_depth_stencil_state { |