summaryrefslogtreecommitdiffstats
path: root/src/amd/vulkan/radv_pipeline.c
diff options
context:
space:
mode:
authorBas Nieuwenhuizen <[email protected]>2017-12-30 17:31:44 +0100
committerBas Nieuwenhuizen <[email protected]>2017-12-31 15:07:07 +0100
commit6a36bfc64d2096aa338958c4605f5fc6372c07b8 (patch)
tree3a59c9fd9a30dcf09cb004511979d2010aa0829d /src/amd/vulkan/radv_pipeline.c
parentb0d17270ada1b7292f09b5d4ab2c77880ee64c35 (diff)
radv: Implement binning on GFX9.
Overall it does not really help or hurt. The deferred demo gets 1% improvement and some games a 3% decrease, so I don't think this should be enabled by default. But with the code upstream it is easier to experiment with it. v2: Remove initializing the registers from si_emit_config. Reviewed-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/amd/vulkan/radv_pipeline.c')
-rw-r--r--src/amd/vulkan/radv_pipeline.c325
1 files changed, 325 insertions, 0 deletions
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 14ada20d525..9b5728ee9e7 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2002,6 +2002,329 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline,
}
}
+struct radv_bin_size_entry {
+ unsigned bpp;
+ VkExtent2D extent;
+};
+
+static VkExtent2D
+radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
+{
+ static const struct radv_bin_size_entry color_size_table[][3][9] = {
+ {
+ /* One RB / SE */
+ {
+ /* One shader engine */
+ { 0, {128, 128}},
+ { 1, { 64, 128}},
+ { 2, { 32, 128}},
+ { 3, { 16, 128}},
+ { 17, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ /* Two shader engines */
+ { 0, {128, 128}},
+ { 2, { 64, 128}},
+ { 3, { 32, 128}},
+ { 5, { 16, 128}},
+ { 17, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ /* Four shader engines */
+ { 0, {128, 128}},
+ { 3, { 64, 128}},
+ { 5, { 16, 128}},
+ { 17, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ },
+ {
+ /* Two RB / SE */
+ {
+ /* One shader engine */
+ { 0, {128, 128}},
+ { 2, { 64, 128}},
+ { 3, { 32, 128}},
+ { 5, { 16, 128}},
+ { 33, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ /* Two shader engines */
+ { 0, {128, 128}},
+ { 3, { 64, 128}},
+ { 5, { 32, 128}},
+ { 9, { 16, 128}},
+ { 33, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ /* Four shader engines */
+ { 0, {256, 256}},
+ { 2, {128, 256}},
+ { 3, {128, 128}},
+ { 5, { 64, 128}},
+ { 9, { 16, 128}},
+ { 33, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ },
+ {
+ /* Four RB / SE */
+ {
+ /* One shader engine */
+ { 0, {128, 256}},
+ { 2, {128, 128}},
+ { 3, { 64, 128}},
+ { 5, { 32, 128}},
+ { 9, { 16, 128}},
+ { 33, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ /* Two shader engines */
+ { 0, {256, 256}},
+ { 2, {128, 256}},
+ { 3, {128, 128}},
+ { 5, { 64, 128}},
+ { 9, { 32, 128}},
+ { 17, { 16, 128}},
+ { 33, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ /* Four shader engines */
+ { 0, {256, 512}},
+ { 2, {256, 256}},
+ { 3, {128, 256}},
+ { 5, {128, 128}},
+ { 9, { 64, 128}},
+ { 17, { 16, 128}},
+ { 33, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ },
+ };
+ static const struct radv_bin_size_entry ds_size_table[][3][9] = {
+ {
+ // One RB / SE
+ {
+ // One shader engine
+ { 0, {128, 256}},
+ { 2, {128, 128}},
+ { 4, { 64, 128}},
+ { 7, { 32, 128}},
+ { 13, { 16, 128}},
+ { 49, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ // Two shader engines
+ { 0, {256, 256}},
+ { 2, {128, 256}},
+ { 4, {128, 128}},
+ { 7, { 64, 128}},
+ { 13, { 32, 128}},
+ { 25, { 16, 128}},
+ { 49, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ // Four shader engines
+ { 0, {256, 512}},
+ { 2, {256, 256}},
+ { 4, {128, 256}},
+ { 7, {128, 128}},
+ { 13, { 64, 128}},
+ { 25, { 16, 128}},
+ { 49, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ },
+ {
+ // Two RB / SE
+ {
+ // One shader engine
+ { 0, {256, 256}},
+ { 2, {128, 256}},
+ { 4, {128, 128}},
+ { 7, { 64, 128}},
+ { 13, { 32, 128}},
+ { 25, { 16, 128}},
+ { 97, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ // Two shader engines
+ { 0, {256, 512}},
+ { 2, {256, 256}},
+ { 4, {128, 256}},
+ { 7, {128, 128}},
+ { 13, { 64, 128}},
+ { 25, { 32, 128}},
+ { 49, { 16, 128}},
+ { 97, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ // Four shader engines
+ { 0, {512, 512}},
+ { 2, {256, 512}},
+ { 4, {256, 256}},
+ { 7, {128, 256}},
+ { 13, {128, 128}},
+ { 25, { 64, 128}},
+ { 49, { 16, 128}},
+ { 97, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ },
+ {
+ // Four RB / SE
+ {
+ // One shader engine
+ { 0, {256, 512}},
+ { 2, {256, 256}},
+ { 4, {128, 256}},
+ { 7, {128, 128}},
+ { 13, { 64, 128}},
+ { 25, { 32, 128}},
+ { 49, { 16, 128}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ // Two shader engines
+ { 0, {512, 512}},
+ { 2, {256, 512}},
+ { 4, {256, 256}},
+ { 7, {128, 256}},
+ { 13, {128, 128}},
+ { 25, { 64, 128}},
+ { 49, { 32, 128}},
+ { 97, { 16, 128}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ // Four shader engines
+ { 0, {512, 512}},
+ { 4, {256, 512}},
+ { 7, {256, 256}},
+ { 13, {128, 256}},
+ { 25, {128, 128}},
+ { 49, { 64, 128}},
+ { 97, { 16, 128}},
+ { UINT_MAX, { 0, 0}},
+ },
+ },
+ };
+
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ VkExtent2D extent = {512, 512};
+
+ unsigned log_num_rb_per_se =
+ util_logbase2_ceil(pipeline->device->physical_device->rad_info.num_render_backends /
+ pipeline->device->physical_device->rad_info.max_se);
+ unsigned log_num_se = util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_se);
+
+ unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_mode_cntl_1);
+ unsigned ps_iter_samples = 1u << G_028804_PS_ITER_SAMPLES(pipeline->graphics.ms.db_eqaa);
+ unsigned effective_samples = total_samples;
+ unsigned cb_target_mask = pipeline->graphics.blend.cb_target_mask;
+ unsigned color_bytes_per_pixel = 0;
+
+ for (unsigned i = 0; i < subpass->color_count; i++) {
+ if (!(cb_target_mask & (0xf << (i * 4))))
+ continue;
+
+ if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
+ color_bytes_per_pixel += vk_format_get_blocksize(format);
+ }
+
+ /* MSAA images typically don't use all samples all the time. */
+ if (effective_samples >= 2 && ps_iter_samples <= 1)
+ effective_samples = 2;
+ color_bytes_per_pixel *= effective_samples;
+
+ const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se];
+ while(color_entry->bpp <= color_bytes_per_pixel)
+ ++color_entry;
+
+ extent = color_entry->extent;
+
+ if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+ struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment.attachment;
+
+ /* Coefficients taken from AMDVLK */
+ unsigned depth_coeff = vk_format_is_depth(attachment->format) ? 5 : 0;
+ unsigned stencil_coeff = vk_format_is_stencil(attachment->format) ? 1 : 0;
+ unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples;
+
+ const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se];
+ while(ds_entry->bpp <= ds_bytes_per_pixel)
+ ++ds_entry;
+
+ extent.width = MIN2(extent.width, ds_entry->extent.width);
+ extent.height = MIN2(extent.height, ds_entry->extent.height);
+ }
+
+ return extent;
+}
+
+static void
+radv_compute_binning_state(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
+{
+ pipeline->graphics.bin.pa_sc_binner_cntl_0 =
+ S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
+ S_028C44_DISABLE_START_OF_PRIM(1);
+ pipeline->graphics.bin.db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
+
+ if (!pipeline->device->pbb_allowed)
+ return;
+
+ VkExtent2D bin_size = radv_compute_bin_size(pipeline, pCreateInfo);
+ if (!bin_size.width || !bin_size.height)
+ return;
+
+ unsigned context_states_per_bin; /* allowed range: [1, 6] */
+ unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
+ unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
+
+ switch (pipeline->device->physical_device->rad_info.family) {
+ case CHIP_VEGA10:
+ context_states_per_bin = 1;
+ persistent_states_per_bin = 1;
+ fpovs_per_batch = 63;
+ break;
+ case CHIP_RAVEN:
+ context_states_per_bin = 6;
+ persistent_states_per_bin = 32;
+ fpovs_per_batch = 63;
+ break;
+ default:
+ unreachable("unhandled family while determining binning state.");
+ }
+
+ pipeline->graphics.bin.pa_sc_binner_cntl_0 =
+ S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
+ S_028C44_BIN_SIZE_X(bin_size.width == 16) |
+ S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
+ S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) |
+ S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
+ S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin - 1) |
+ S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin - 1) |
+ S_028C44_DISABLE_START_OF_PRIM(1) |
+ S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) |
+ S_028C44_OPTIMAL_BIN_SELECTION(1);
+
+ /* DFSM is not implemented yet */
+ assert(!pipeline->device->dfsm_allowed);
+}
static VkResult
radv_pipeline_init(struct radv_pipeline *pipeline,
@@ -2290,6 +2613,8 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
radv_dump_pipeline_stats(device, pipeline);
}
+ radv_compute_binning_state(pipeline, pCreateInfo);
+
result = radv_pipeline_scratch_init(device, pipeline);
return result;
}