From 76b12fa564a5f48e3f2264718dea0158d0c05b4c Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 14 Jan 2019 14:44:15 +0100 Subject: radv: Only use 32 KiB per threadgroup on Stoney. Causes hangs on some machines. What works for dEQP-VK.tessellation.shader_input_output.barrier: - running num_patches = 6 (which limits LDS to 32 KiB) - running num_patches = 8, and artificially cutting LDS size at 32 KiB. CC: Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_nir_to_llvm.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index cd58167b766..9d0aa411528 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -254,7 +254,16 @@ get_tcs_num_patches(struct radv_shader_context *ctx) /* Make sure that the data fits in LDS. This assumes the shaders only * use LDS for the inputs and outputs. */ - hardware_lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768; + hardware_lds_size = 32768; + + /* Looks like STONEY hangs if we use more than 32 KiB LDS in a single + * threadgroup, even though there is more than 32 KiB LDS. + * + * Test: dEQP-VK.tessellation.shader_input_output.barrier + */ + if (ctx->options->chip_class >= CIK && ctx->options->family != CHIP_STONEY) + hardware_lds_size = 65536; + num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size)); /* Make sure the output data fits in the offchip buffer */ num_patches = MIN2(num_patches, (ctx->options->tess_offchip_block_dw_size * 4) / output_patch_size); -- cgit v1.2.3