diff options
author | Dave Airlie <[email protected]> | 2018-01-11 08:02:52 +1000 |
---|---|---|
committer | Dave Airlie <[email protected]> | 2018-01-12 00:43:07 +0000 |
commit | ad11fc3571e025b22d9feed80a7f499665f7a255 (patch) | |
tree | d3f3d782fdb16ce151fb1ff4beaef7291b434e76 /src/amd/vulkan | |
parent | e37db93246d8251e2c0b366c08f23c9de756208e (diff) |
radv: don't emit unneeded vertex state.
If the number of instances hasn't changed and we've already
emitted it, don't emit it again.
If the vertex shader is the same and the first_instance, vertex_offset
haven't changed don't emit them again.
This increases the fps in GL_vs_VK -t 1 -m -api vk from around 40
to around 60 here, it may not impact anything else.
Dieter also reported smoketest going from 1060->1200 fps.
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Reviewed-by: Samuel Pitoiset <[email protected]>
Tested-by: Dieter Nützel <[email protected]>
Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/amd/vulkan')
-rw-r--r-- | src/amd/vulkan/radv_cmd_buffer.c | 53 | ||||
-rw-r--r-- | src/amd/vulkan/radv_private.h | 4 |
2 files changed, 49 insertions, 8 deletions
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index fb48691ca19..67799a13cc2 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2312,6 +2312,9 @@ VkResult radv_BeginCommandBuffer( memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state)); cmd_buffer->state.last_primitive_reset_en = -1; cmd_buffer->state.last_index_type = -1; + cmd_buffer->state.last_num_instances = -1; + cmd_buffer->state.last_vertex_offset = -1; + cmd_buffer->state.last_first_instance = -1; cmd_buffer->usage_flags = pBeginInfo->flags; /* setup initial configuration into command buffer */ @@ -2733,6 +2736,10 @@ void radv_CmdBindPipeline( cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE; cmd_buffer->push_constant_stages |= pipeline->active_stages; + /* the new vertex shader might not have the same user regs */ + cmd_buffer->state.last_first_instance = -1; + cmd_buffer->state.last_vertex_offset = -1; + radv_bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state); if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed) @@ -3003,6 +3010,21 @@ void radv_CmdExecuteCommands( secondary->state.last_ia_multi_vgt_param; } + if (secondary->state.last_first_instance != -1) { + primary->state.last_first_instance = + secondary->state.last_first_instance; + } + + if (secondary->state.last_num_instances != -1) { + primary->state.last_num_instances = + secondary->state.last_num_instances; + } + + if (secondary->state.last_vertex_offset != -1) { + primary->state.last_vertex_offset = + secondary->state.last_vertex_offset; + } + if (secondary->state.last_index_type != -1) { primary->state.last_index_type = secondary->state.last_index_type; @@ -3207,6 +3229,11 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t base_reg = cmd_buffer->state.pipeline->graphics.vtx_base_sgpr; assert(base_reg); + /* just reset draw state for vertex data */ + cmd_buffer->state.last_first_instance = -1; + cmd_buffer->state.last_num_instances = -1; + cmd_buffer->state.last_vertex_offset = -1; + if (draw_count == 1 && !count_va && !draw_id_enable) { radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, false)); @@ -3326,15 +3353,25 @@ radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer, } } else { assert(state->pipeline->graphics.vtx_base_sgpr); - radeon_set_sh_reg_seq(cs, state->pipeline->graphics.vtx_base_sgpr, - state->pipeline->graphics.vtx_emit_num); - radeon_emit(cs, info->vertex_offset); - radeon_emit(cs, info->first_instance); - if (state->pipeline->graphics.vtx_emit_num == 3) - radeon_emit(cs, 0); - radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, state->predicating)); - radeon_emit(cs, info->instance_count); + if (info->vertex_offset != state->last_vertex_offset || + info->first_instance != state->last_first_instance) { + radeon_set_sh_reg_seq(cs, state->pipeline->graphics.vtx_base_sgpr, + state->pipeline->graphics.vtx_emit_num); + + radeon_emit(cs, info->vertex_offset); + radeon_emit(cs, info->first_instance); + if (state->pipeline->graphics.vtx_emit_num == 3) + radeon_emit(cs, 0); + state->last_first_instance = info->first_instance; + state->last_vertex_offset = info->vertex_offset; + } + + if (state->last_num_instances != info->instance_count) { + radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, state->predicating)); + radeon_emit(cs, info->instance_count); + state->last_num_instances = info->instance_count; + } if (info->indexed) { int index_size = state->index_type ? 4 : 2; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 7330dc6369a..c39358951de 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -913,6 +913,10 @@ struct radv_cmd_state { uint32_t valid_descriptors; uint32_t trace_id; uint32_t last_ia_multi_vgt_param; + + uint32_t last_num_instances; + uint32_t last_first_instance; + uint32_t last_vertex_offset; }; struct radv_cmd_pool { |