diff options
Diffstat (limited to 'src/amd/vulkan')
-rw-r--r-- | src/amd/vulkan/radv_pipeline.c | 114 |
1 files changed, 112 insertions, 2 deletions
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index a54d307b563..7446096b7ba 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2705,7 +2705,7 @@ struct radv_bin_size_entry { }; static VkExtent2D -radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) +radv_gfx9_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) { static const struct radv_bin_size_entry color_size_table[][3][9] = { { @@ -2975,6 +2975,110 @@ radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCr return extent; } +static VkExtent2D +radv_gfx10_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); + struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; + VkExtent2D extent = {512, 512}; + + unsigned sdp_interface_count; + + switch(pipeline->device->physical_device->rad_info.family) { + case CHIP_NAVI10: + case CHIP_NAVI12: + sdp_interface_count = 16; + break; + case CHIP_NAVI14: + sdp_interface_count = 8; + break; + default: + unreachable("Unhandled GFX10 chip"); + } + + const unsigned db_tag_size = 64; + const unsigned db_tag_count = 312; + const unsigned color_tag_size = 1024; + const unsigned color_tag_count = 31; + const unsigned fmask_tag_size = 256; + const unsigned fmask_tag_count = 44; + + const unsigned rb_count = pipeline->device->physical_device->rad_info.num_render_backends; + const unsigned pipe_count = MAX2(rb_count, sdp_interface_count); + + const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count; + const unsigned color_tag_part = (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count; + const unsigned fmask_tag_part = (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count; + + const unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config); + const unsigned samples_log = util_logbase2_ceil(total_samples); + + unsigned color_bytes_per_pixel = 0; + unsigned fmask_bytes_per_pixel = 0; + + const VkPipelineColorBlendStateCreateInfo *vkblend = pCreateInfo->pColorBlendState; + if (vkblend) { + for (unsigned i = 0; i < subpass->color_count; i++) { + if (!vkblend->pAttachments[i].colorWriteMask) + continue; + + if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) + continue; + + VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format; + color_bytes_per_pixel += vk_format_get_blocksize(format); + + if (total_samples > 1) { + const unsigned fmask_array[] = {0, 1, 1, 4}; + fmask_bytes_per_pixel += fmask_array[samples_log]; + } + } + + color_bytes_per_pixel *= total_samples; + } + color_bytes_per_pixel = MAX2(color_bytes_per_pixel, 1); + + const unsigned color_pixel_count_log = util_logbase2(color_tag_part / color_bytes_per_pixel); + extent.width = 1ull << ((color_pixel_count_log + 1) / 2); + extent.height = 1ull << (color_pixel_count_log / 2); + + if (fmask_bytes_per_pixel) { + const unsigned fmask_pixel_count_log = util_logbase2(fmask_tag_part / fmask_bytes_per_pixel); + + const VkExtent2D fmask_extent = (VkExtent2D){ + .width = 1ull << ((fmask_pixel_count_log + 1) / 2), + .height = 1ull << (color_pixel_count_log / 2) + }; + + if (fmask_extent.width * fmask_extent.height < extent.width * extent.height) + extent = fmask_extent; + } + + if (subpass->depth_stencil_attachment) { + struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment->attachment; + + /* Coefficients taken from AMDVLK */ + unsigned depth_coeff = vk_format_is_depth(attachment->format) ? 5 : 0; + unsigned stencil_coeff = vk_format_is_stencil(attachment->format) ? 1 : 0; + unsigned db_bytes_per_pixel = (depth_coeff + stencil_coeff) * total_samples; + + const unsigned db_pixel_count_log = util_logbase2(db_tag_part / db_bytes_per_pixel); + + const VkExtent2D db_extent = (VkExtent2D){ + .width = 1ull << ((db_pixel_count_log + 1) / 2), + .height = 1ull << (color_pixel_count_log / 2) + }; + + if (db_extent.width * db_extent.height < extent.width * extent.height) + extent = db_extent; + } + + extent.width = MAX2(extent.width, 128); + extent.height = MAX2(extent.width, 64); + + return extent; +} + static void radv_pipeline_generate_disabled_binning_state(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline, @@ -3027,7 +3131,13 @@ radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs, if (pipeline->device->physical_device->rad_info.chip_class < GFX9) return; - VkExtent2D bin_size = radv_compute_bin_size(pipeline, pCreateInfo); + VkExtent2D bin_size; + if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { + bin_size = radv_gfx10_compute_bin_size(pipeline, pCreateInfo); + } else if (pipeline->device->physical_device->rad_info.chip_class == GFX9) { + bin_size = radv_gfx9_compute_bin_size(pipeline, pCreateInfo); + } else + unreachable("Unhandled generation for binning bin size calculation"); if (pipeline->device->pbb_allowed && bin_size.width && bin_size.height) { unsigned context_states_per_bin; /* allowed range: [1, 6] */ |