summaryrefslogtreecommitdiffstats
path: root/src/vulkan
diff options
context:
space:
mode:
authorLionel Landwerlin <[email protected]>2019-03-02 17:15:41 +0000
committerLionel Landwerlin <[email protected]>2019-05-02 17:02:06 +0100
commit4438188f492e1f7c32c2728ef26a3772cd80081d (patch)
treeb4c4aed70e404fd5c40574051b35ef2b937ed67d /src/vulkan
parent9eddceef446ec90bcbd7a6239a54772b32d47f2e (diff)
vulkan/overlay: record stats in command buffers and accumulate on exec/submit
This significantly reworks how numbers displayed are computed. We accumulate operations written into command buffers and add those to the device when submitted to a queue. These collected values are then used to compute per frame overlay data. We also accumulate the data over the sampling fps period to produce numbers for that period of time. Signed-off-by: Lionel Landwerlin <[email protected]>
Diffstat (limited to 'src/vulkan')
-rw-r--r--src/vulkan/overlay-layer/overlay.cpp378
-rw-r--r--src/vulkan/overlay-layer/overlay_params.h1
2 files changed, 238 insertions, 141 deletions
diff --git a/src/vulkan/overlay-layer/overlay.cpp b/src/vulkan/overlay-layer/overlay.cpp
index 8de22307e14..979bb7bf913 100644
--- a/src/vulkan/overlay-layer/overlay.cpp
+++ b/src/vulkan/overlay-layer/overlay.cpp
@@ -50,10 +50,10 @@ struct instance_data {
};
struct frame_stat {
- uint32_t stats[OVERLAY_PARAM_ENABLED_MAX];
+ uint64_t stats[OVERLAY_PARAM_ENABLED_MAX];
};
-/* Mapped from VkDevice/VkCommandBuffer */
+/* Mapped from VkDevice */
struct queue_data;
struct device_data {
struct instance_data *instance;
@@ -71,6 +71,20 @@ struct device_data {
struct queue_data **queues;
uint32_t n_queues;
+ /* For a single frame */
+ struct frame_stat frame_stats;
+};
+
+/* Mapped from VkCommandBuffer */
+struct command_buffer_data {
+ struct device_data *device;
+
+ VkCommandBufferLevel level;
+
+ VkCommandBuffer cmd_buffer;
+ VkQueryPool pipeline_query_pool;
+ uint32_t query_index;
+
struct frame_stat stats;
};
@@ -143,14 +157,15 @@ struct swapchain_data {
uint64_t last_fps_update;
double fps;
- double frame_times[200];
-
- double acquire_times[200];
- uint64_t n_acquire;
-
enum overlay_param_enabled stat_selector;
struct frame_stat stats_min, stats_max;
- struct frame_stat stats[200];
+ struct frame_stat frames_stats[200];
+
+ /* Over a single frame */
+ struct frame_stat frame_stats;
+
+ /* Over fps_sampling_period */
+ struct frame_stat accumulated_stats;
};
static struct hash_table *vk_object_to_data = NULL;
@@ -168,6 +183,7 @@ static inline void ensure_vk_object_map(void)
}
#define FIND_SWAPCHAIN_DATA(obj) ((struct swapchain_data *)find_object_data((void *) obj))
+#define FIND_CMD_BUFFER_DATA(obj) ((struct command_buffer_data *)find_object_data((void *) obj))
#define FIND_DEVICE_DATA(obj) ((struct device_data *)find_object_data((void *) obj))
#define FIND_QUEUE_DATA(obj) ((struct queue_data *)find_object_data((void *) obj))
#define FIND_PHYSICAL_DEVICE_DATA(obj) ((struct instance_data *)find_object_data((void *) obj))
@@ -354,6 +370,26 @@ static void destroy_device_data(struct device_data *data)
}
/**/
+static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_buffer,
+ VkCommandBufferLevel level,
+ struct device_data *device_data)
+{
+ struct command_buffer_data *data = rzalloc(NULL, struct command_buffer_data);
+ data->device = device_data;
+ data->cmd_buffer = cmd_buffer;
+ data->level = level;
+ map_object((void *) data->cmd_buffer, data);
+ return data;
+}
+
+static void destroy_command_buffer_data(struct command_buffer_data *data)
+{
+ unmap_object((void *) data->cmd_buffer);
+ ralloc_free(data);
+}
+
+
+/**/
static struct swapchain_data *new_swapchain_data(VkSwapchainKHR swapchain,
struct device_data *device_data)
{
@@ -373,75 +409,72 @@ static void destroy_swapchain_data(struct swapchain_data *data)
static void snapshot_swapchain_frame(struct swapchain_data *data)
{
- struct instance_data *instance_data = data->device->instance;
+ struct device_data *device_data = data->device;
+ struct instance_data *instance_data = device_data->instance;
+ uint32_t f_idx = data->n_frames % ARRAY_SIZE(data->frames_stats);
uint64_t now = os_time_get(); /* us */
if (data->last_present_time) {
- data->frame_times[(data->n_frames - 1) % ARRAY_SIZE(data->frame_times)] =
- ((double)now - (double)data->last_present_time) / 1000.0;
+ data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame_timing] =
+ now - data->last_present_time;
+ }
+
+ memset(&data->frames_stats[f_idx], 0, sizeof(data->frames_stats[f_idx]));
+ for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ data->frames_stats[f_idx].stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s];
+ data->accumulated_stats.stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s];
}
if (data->last_fps_update) {
double elapsed = (double)(now - data->last_fps_update); /* us */
if (elapsed >= instance_data->params.fps_sampling_period) {
data->fps = 1000000.0f * data->n_frames_since_update / elapsed;
- data->n_frames_since_update = 0;
- data->last_fps_update = now;
if (instance_data->params.output_file) {
fprintf(instance_data->params.output_file, "%.2f\n", data->fps);
fflush(instance_data->params.output_file);
}
+
+ memset(&data->accumulated_stats, 0, sizeof(data->accumulated_stats));
+ data->n_frames_since_update = 0;
+ data->last_fps_update = now;
}
} else {
data->last_fps_update = now;
}
- struct device_data *device_data = data->device;
- data->stats[data->n_frames % ARRAY_SIZE(data->frame_times)] = device_data->stats;
- memset(&device_data->stats, 0, sizeof(device_data->stats));
+ memset(&device_data->frame_stats, 0, sizeof(device_data->frame_stats));
+ memset(&data->frame_stats, 0, sizeof(device_data->frame_stats));
data->last_present_time = now;
data->n_frames++;
data->n_frames_since_update++;
}
-static float get_frame_timing(void *_data, int _idx)
+static float get_time_stat(void *_data, int _idx)
{
struct swapchain_data *data = (struct swapchain_data *) _data;
- if ((ARRAY_SIZE(data->frame_times) - _idx) > (data->n_frames - 2))
+ if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames)
return 0.0f;
- int idx = ARRAY_SIZE(data->frame_times) +
- (data->n_frames - 2) < ARRAY_SIZE(data->frame_times) ?
- _idx - (data->n_frames - 2) :
- _idx + (data->n_frames - 2);
- idx %= ARRAY_SIZE(data->frame_times);
- return data->frame_times[idx];
-}
-
-static float get_acquire_timing(void *_data, int _idx)
-{
- struct swapchain_data *data = (struct swapchain_data *) _data;
- if ((ARRAY_SIZE(data->acquire_times) - _idx) > data->n_acquire)
- return 0.0f;
- int idx = ARRAY_SIZE(data->acquire_times) +
- data->n_acquire < ARRAY_SIZE(data->acquire_times) ?
- _idx - data->n_acquire :
- _idx + data->n_acquire;
- idx %= ARRAY_SIZE(data->acquire_times);
- return data->acquire_times[idx];
+ int idx = ARRAY_SIZE(data->frames_stats) +
+ data->n_frames < ARRAY_SIZE(data->frames_stats) ?
+ _idx - data->n_frames :
+ _idx + data->n_frames;
+ idx %= ARRAY_SIZE(data->frames_stats);
+ /* Time stats are in us. */
+ return data->frames_stats[idx].stats[data->stat_selector] / 1000.0f;
}
static float get_stat(void *_data, int _idx)
{
struct swapchain_data *data = (struct swapchain_data *) _data;
- if ((ARRAY_SIZE(data->stats) - _idx) > data->n_frames)
+ if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames)
return 0.0f;
- int idx = ARRAY_SIZE(data->stats) +
- data->n_frames < ARRAY_SIZE(data->stats) ?
+ int idx = ARRAY_SIZE(data->frames_stats) +
+ data->n_frames < ARRAY_SIZE(data->frames_stats) ?
_idx - data->n_frames :
_idx + data->n_frames;
- idx %= ARRAY_SIZE(data->stats);
- return data->stats[idx].stats[data->stat_selector];
+ idx %= ARRAY_SIZE(data->frames_stats);
+ return data->frames_stats[idx].stats[data->stat_selector];
}
static void position_layer(struct swapchain_data *data)
@@ -490,69 +523,54 @@ static void compute_swapchain_display(struct swapchain_data *data)
if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_fps])
ImGui::Text("FPS: %.2f" , data->fps);
- if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_frame_timing]){
- double min_time = FLT_MAX, max_time = 0.0f;
- for (uint32_t i = 0; i < MIN2(data->n_frames - 2, ARRAY_SIZE(data->frame_times)); i++) {
- min_time = MIN2(min_time, data->frame_times[i]);
- max_time = MAX2(max_time, data->frame_times[i]);
- }
- ImGui::PlotHistogram("##Frame timings", get_frame_timing, data,
- ARRAY_SIZE(data->frame_times), 0,
- NULL, min_time, max_time,
- ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
- ImGui::Text("Frame timing: %.3fms [%.3f, %.3f]",
- get_frame_timing(data, ARRAY_SIZE(data->frame_times) - 1),
- min_time, max_time);
+ /* Recompute min/max */
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ data->stats_min.stats[s] = UINT64_MAX;
+ data->stats_max.stats[s] = 0;
}
-
- if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_acquire_timing]) {
- double min_time = FLT_MAX, max_time = 0.0f;
- for (uint32_t i = 0; i < MIN2(data->n_acquire - 2, ARRAY_SIZE(data->acquire_times)); i++) {
- min_time = MIN2(min_time, data->acquire_times[i]);
- max_time = MAX2(max_time, data->acquire_times[i]);
+ for (uint32_t f = 0; f < MIN2(data->n_frames, ARRAY_SIZE(data->frames_stats)); f++) {
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ data->stats_min.stats[s] = MIN2(data->frames_stats[f].stats[s],
+ data->stats_min.stats[s]);
+ data->stats_max.stats[s] = MAX2(data->frames_stats[f].stats[s],
+ data->stats_max.stats[s]);
}
- ImGui::PlotHistogram("##Acquire timings", get_acquire_timing, data,
- ARRAY_SIZE(data->acquire_times), 0,
- NULL, min_time, max_time,
- ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
- ImGui::Text("Acquire timing: %.3fms [%.3f, %.3f]",
- get_acquire_timing(data, ARRAY_SIZE(data->acquire_times) - 1),
- min_time, max_time);
}
-
- for (uint32_t i = 0; i < ARRAY_SIZE(data->stats_min.stats); i++) {
- data->stats_min.stats[i] = UINT32_MAX;
- data->stats_max.stats[i] = 0;
- }
- for (uint32_t i = 0; i < MIN2(data->n_frames - 1, ARRAY_SIZE(data->stats)); i++) {
- for (uint32_t j = 0; j < ARRAY_SIZE(data->stats[0].stats); j++) {
- data->stats_min.stats[j] = MIN2(data->stats[i].stats[j],
- data->stats_min.stats[j]);
- data->stats_max.stats[j] = MAX2(data->stats[i].stats[j],
- data->stats_max.stats[j]);
- }
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ assert(data->stats_min.stats[s] != UINT64_MAX);
}
- for (uint32_t i = 0; i < ARRAY_SIZE(device_data->stats.stats); i++) {
- if (!instance_data->params.enabled[i] ||
- i == OVERLAY_PARAM_ENABLED_fps ||
- i == OVERLAY_PARAM_ENABLED_frame_timing ||
- i == OVERLAY_PARAM_ENABLED_acquire_timing)
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ if (!instance_data->params.enabled[s] ||
+ s == OVERLAY_PARAM_ENABLED_fps)
continue;
char hash[40];
- snprintf(hash, sizeof(hash), "##%s", overlay_param_names[i]);
- data->stat_selector = (enum overlay_param_enabled) i;
-
- ImGui::PlotHistogram(hash, get_stat, data,
- ARRAY_SIZE(data->stats), 0,
- NULL,
- data->stats_min.stats[i],
- data->stats_max.stats[i],
- ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
- ImGui::Text("%s: %.0f [%u, %u]", overlay_param_names[i],
- get_stat(data, ARRAY_SIZE(data->stats) - 1),
- data->stats_min.stats[i], data->stats_max.stats[i]);
+ snprintf(hash, sizeof(hash), "##%s", overlay_param_names[s]);
+ data->stat_selector = (enum overlay_param_enabled) s;
+
+ if (s == OVERLAY_PARAM_ENABLED_frame_timing ||
+ s == OVERLAY_PARAM_ENABLED_acquire_timing) {
+ double min_time = data->stats_min.stats[s] / 1000.0f;
+ double max_time = data->stats_max.stats[s] / 1000.0f;
+ ImGui::PlotHistogram(hash, get_time_stat, data,
+ ARRAY_SIZE(data->frames_stats), 0,
+ NULL, min_time, max_time,
+ ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
+ ImGui::Text("%s: %.3fms [%.3f, %.3f]", overlay_param_names[s],
+ get_time_stat(data, ARRAY_SIZE(data->frames_stats) - 1),
+ min_time, max_time);
+ } else {
+ ImGui::PlotHistogram(hash, get_stat, data,
+ ARRAY_SIZE(data->frames_stats), 0,
+ NULL,
+ data->stats_min.stats[s],
+ data->stats_max.stats[s],
+ ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
+ ImGui::Text("%s: %.0f [%" PRIu64 ", %" PRIu64 "]", overlay_param_names[s],
+ get_stat(data, ARRAY_SIZE(data->frames_stats) - 1),
+ data->stats_min.stats[s], data->stats_max.stats[s]);
+ }
}
data->window_size = ImVec2(data->window_size.x, ImGui::GetCursorPosY() + 10.0f);
ImGui::End();
@@ -1362,8 +1380,10 @@ static void before_present(struct swapchain_data *swapchain_data,
{
snapshot_swapchain_frame(swapchain_data);
- compute_swapchain_display(swapchain_data);
- render_swapchain_display(swapchain_data, imageIndex);
+ if (swapchain_data->n_frames > 0) {
+ compute_swapchain_display(swapchain_data);
+ render_swapchain_display(swapchain_data, imageIndex);
+ }
}
VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateSwapchainKHR(
@@ -1448,10 +1468,8 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImageKHR(
semaphore, fence, pImageIndex);
uint64_t ts1 = os_time_get();
- swapchain_data->acquire_times[swapchain_data->n_acquire %
- ARRAY_SIZE(swapchain_data->acquire_times)] =
- ((double)ts1 - (double)ts0) / 1000.0;
- swapchain_data->n_acquire++;
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0;
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++;
return result;
}
@@ -1468,10 +1486,8 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImage2KHR(
VkResult result = device_data->vtable.AcquireNextImage2KHR(device, pAcquireInfo, pImageIndex);
uint64_t ts1 = os_time_get();
- swapchain_data->acquire_times[swapchain_data->n_acquire %
- ARRAY_SIZE(swapchain_data->acquire_times)] =
- ((double)ts1 - (double)ts0) / 1000.0;
- swapchain_data->n_acquire++;
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0;
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++;
return result;
}
@@ -1483,10 +1499,11 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDraw(
uint32_t firstVertex,
uint32_t firstInstance)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDraw(commandBuffer, vertexCount, instanceCount,
firstVertex, firstInstance);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_draw]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexed(
@@ -1497,10 +1514,11 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexed(
int32_t vertexOffset,
uint32_t firstInstance)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndexed(commandBuffer, indexCount, instanceCount,
firstIndex, vertexOffset, firstInstance);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirect(
@@ -1510,9 +1528,10 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirect(
uint32_t drawCount,
uint32_t stride)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirect(
@@ -1522,9 +1541,10 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirect(
uint32_t drawCount,
uint32_t stride)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirectCountKHR(
@@ -1536,11 +1556,12 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirectCountKHR(
uint32_t maxDrawCount,
uint32_t stride)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect_count]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndirectCountKHR(commandBuffer, buffer, offset,
countBuffer, countBufferOffset,
maxDrawCount, stride);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect_count]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirectCountKHR(
@@ -1552,11 +1573,12 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirectCountKHR(
uint32_t maxDrawCount,
uint32_t stride)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect_count]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset,
countBuffer, countBufferOffset,
maxDrawCount, stride);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect_count]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatch(
@@ -1565,9 +1587,10 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatch(
uint32_t groupCountY,
uint32_t groupCountZ)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatchIndirect(
@@ -1575,9 +1598,10 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatchIndirect(
VkBuffer buffer,
VkDeviceSize offset)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch_indirect]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDispatchIndirect(commandBuffer, buffer, offset);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch_indirect]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdBindPipeline(
@@ -1585,29 +1609,85 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdBindPipeline(
VkPipelineBindPoint pipelineBindPoint,
VkPipeline pipeline)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
- device_data->vtable.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
switch (pipelineBindPoint) {
- case VK_PIPELINE_BIND_POINT_GRAPHICS: device_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_graphics]++; break;
- case VK_PIPELINE_BIND_POINT_COMPUTE: device_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_compute]++; break;
- case VK_PIPELINE_BIND_POINT_RAY_TRACING_NV: device_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_raytracing]++; break;
+ case VK_PIPELINE_BIND_POINT_GRAPHICS: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_graphics]++; break;
+ case VK_PIPELINE_BIND_POINT_COMPUTE: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_compute]++; break;
+ case VK_PIPELINE_BIND_POINT_RAY_TRACING_NV: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_raytracing]++; break;
default: break;
}
+ struct device_data *device_data = cmd_buffer_data->device;
+ device_data->vtable.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline);
}
-VKAPI_ATTR VkResult VKAPI_CALL overlay_AllocateCommandBuffers(VkDevice device,
- const VkCommandBufferAllocateInfo* pAllocateInfo,
- VkCommandBuffer* pCommandBuffers)
+VKAPI_ATTR VkResult VKAPI_CALL overlay_BeginCommandBuffer(
+ VkCommandBuffer commandBuffer,
+ const VkCommandBufferBeginInfo* pBeginInfo)
{
- struct device_data *device_data = FIND_DEVICE_DATA(device);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ struct device_data *device_data = cmd_buffer_data->device;
+
+ return device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL overlay_EndCommandBuffer(
+ VkCommandBuffer commandBuffer)
+{
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ struct device_data *device_data = cmd_buffer_data->device;
+
+ if (cmd_buffer_data->pipeline_query_pool) {
+ device_data->vtable.CmdEndQuery(commandBuffer,
+ cmd_buffer_data->pipeline_query_pool,
+ cmd_buffer_data->query_index);
+ }
+
+ return device_data->vtable.EndCommandBuffer(commandBuffer);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL overlay_ResetCommandBuffer(
+ VkCommandBuffer commandBuffer,
+ VkCommandBufferResetFlags flags)
+{
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ struct device_data *device_data = cmd_buffer_data->device;
+
+ memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats));
+
+ return device_data->vtable.ResetCommandBuffer(commandBuffer, flags);
+}
+VKAPI_ATTR void VKAPI_CALL overlay_CmdExecuteCommands(
+ VkCommandBuffer commandBuffer,
+ uint32_t commandBufferCount,
+ const VkCommandBuffer* pCommandBuffers)
+{
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ struct device_data *device_data = cmd_buffer_data->device;
+
+ /* Add the stats of the executed command buffers to the primary one. */
+ for (uint32_t c = 0; c < commandBufferCount; c++) {
+ struct command_buffer_data *sec_cmd_buffer_data = FIND_CMD_BUFFER_DATA(pCommandBuffers[c]);
+
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++)
+ cmd_buffer_data->stats.stats[s] += sec_cmd_buffer_data->stats.stats[s];
+ }
+
+ device_data->vtable.CmdExecuteCommands(commandBuffer, commandBufferCount, pCommandBuffers);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL overlay_AllocateCommandBuffers(
+ VkDevice device,
+ const VkCommandBufferAllocateInfo* pAllocateInfo,
+ VkCommandBuffer* pCommandBuffers)
+{
+ struct device_data *device_data = FIND_DEVICE_DATA(device);
VkResult result =
device_data->vtable.AllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers);
- if (result != VK_SUCCESS) return result;
-
+ if (result != VK_SUCCESS)
+ return result;
for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++)
- map_object(pCommandBuffers[i], device_data);
-
+ new_command_buffer_data(pCommandBuffers[i], pAllocateInfo->level, device_data);
return result;
}
@@ -1617,10 +1697,10 @@ VKAPI_ATTR void VKAPI_CALL overlay_FreeCommandBuffers(VkDevice device,
const VkCommandBuffer* pCommandBuffers)
{
struct device_data *device_data = FIND_DEVICE_DATA(device);
-
- for (uint32_t i = 0; i < commandBufferCount; i++)
- unmap_object(pCommandBuffers[i]);
-
+ for (uint32_t i = 0; i < commandBufferCount; i++) {
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(pCommandBuffers[i]);
+ destroy_command_buffer_data(cmd_buffer_data);
+ }
device_data->vtable.FreeCommandBuffers(device, commandPool,
commandBufferCount, pCommandBuffers);
}
@@ -1634,7 +1714,18 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_QueueSubmit(
struct queue_data *queue_data = FIND_QUEUE_DATA(queue);
struct device_data *device_data = queue_data->device;
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_submit]++;
+ device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_submit]++;
+
+ for (uint32_t s = 0; s < submitCount; s++) {
+ for (uint32_t c = 0; c < pSubmits[s].commandBufferCount; c++) {
+ struct command_buffer_data *cmd_buffer_data =
+ FIND_CMD_BUFFER_DATA(pSubmits[s].pCommandBuffers[c]);
+
+ /* Merge the submitted command buffer stats into the device. */
+ for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++)
+ device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st];
+ }
+ }
return device_data->vtable.QueueSubmit(queue, submitCount, pSubmits, fence);
}
@@ -1740,6 +1831,11 @@ static const struct {
{ "vkGetDeviceProcAddr", (void *) vkGetDeviceProcAddr },
#define ADD_HOOK(fn) { "vk" # fn, (void *) overlay_ ## fn }
ADD_HOOK(AllocateCommandBuffers),
+ ADD_HOOK(FreeCommandBuffers),
+ ADD_HOOK(ResetCommandBuffer),
+ ADD_HOOK(BeginCommandBuffer),
+ ADD_HOOK(EndCommandBuffer),
+ ADD_HOOK(CmdExecuteCommands),
ADD_HOOK(CmdDraw),
ADD_HOOK(CmdDrawIndexed),
diff --git a/src/vulkan/overlay-layer/overlay_params.h b/src/vulkan/overlay-layer/overlay_params.h
index 75bcf956e8e..aaa03eb716d 100644
--- a/src/vulkan/overlay-layer/overlay_params.h
+++ b/src/vulkan/overlay-layer/overlay_params.h
@@ -47,6 +47,7 @@ extern "C" {
OVERLAY_PARAM_BOOL(pipeline_graphics) \
OVERLAY_PARAM_BOOL(pipeline_compute) \
OVERLAY_PARAM_BOOL(pipeline_raytracing) \
+ OVERLAY_PARAM_BOOL(acquire) \
OVERLAY_PARAM_BOOL(acquire_timing) \
OVERLAY_PARAM_CUSTOM(fps_sampling_period) \
OVERLAY_PARAM_CUSTOM(output_file) \