summaryrefslogtreecommitdiffstats
path: root/src/vulkan
diff options
context:
space:
mode:
Diffstat (limited to 'src/vulkan')
-rw-r--r--src/vulkan/overlay-layer/overlay.cpp378
-rw-r--r--src/vulkan/overlay-layer/overlay_params.h1
2 files changed, 238 insertions, 141 deletions
diff --git a/src/vulkan/overlay-layer/overlay.cpp b/src/vulkan/overlay-layer/overlay.cpp
index 8de22307e14..979bb7bf913 100644
--- a/src/vulkan/overlay-layer/overlay.cpp
+++ b/src/vulkan/overlay-layer/overlay.cpp
@@ -50,10 +50,10 @@ struct instance_data {
};
struct frame_stat {
- uint32_t stats[OVERLAY_PARAM_ENABLED_MAX];
+ uint64_t stats[OVERLAY_PARAM_ENABLED_MAX];
};
-/* Mapped from VkDevice/VkCommandBuffer */
+/* Mapped from VkDevice */
struct queue_data;
struct device_data {
struct instance_data *instance;
@@ -71,6 +71,20 @@ struct device_data {
struct queue_data **queues;
uint32_t n_queues;
+ /* For a single frame */
+ struct frame_stat frame_stats;
+};
+
+/* Mapped from VkCommandBuffer */
+struct command_buffer_data {
+ struct device_data *device;
+
+ VkCommandBufferLevel level;
+
+ VkCommandBuffer cmd_buffer;
+ VkQueryPool pipeline_query_pool;
+ uint32_t query_index;
+
struct frame_stat stats;
};
@@ -143,14 +157,15 @@ struct swapchain_data {
uint64_t last_fps_update;
double fps;
- double frame_times[200];
-
- double acquire_times[200];
- uint64_t n_acquire;
-
enum overlay_param_enabled stat_selector;
struct frame_stat stats_min, stats_max;
- struct frame_stat stats[200];
+ struct frame_stat frames_stats[200];
+
+ /* Over a single frame */
+ struct frame_stat frame_stats;
+
+ /* Over fps_sampling_period */
+ struct frame_stat accumulated_stats;
};
static struct hash_table *vk_object_to_data = NULL;
@@ -168,6 +183,7 @@ static inline void ensure_vk_object_map(void)
}
#define FIND_SWAPCHAIN_DATA(obj) ((struct swapchain_data *)find_object_data((void *) obj))
+#define FIND_CMD_BUFFER_DATA(obj) ((struct command_buffer_data *)find_object_data((void *) obj))
#define FIND_DEVICE_DATA(obj) ((struct device_data *)find_object_data((void *) obj))
#define FIND_QUEUE_DATA(obj) ((struct queue_data *)find_object_data((void *) obj))
#define FIND_PHYSICAL_DEVICE_DATA(obj) ((struct instance_data *)find_object_data((void *) obj))
@@ -354,6 +370,26 @@ static void destroy_device_data(struct device_data *data)
}
/**/
+static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_buffer,
+ VkCommandBufferLevel level,
+ struct device_data *device_data)
+{
+ struct command_buffer_data *data = rzalloc(NULL, struct command_buffer_data);
+ data->device = device_data;
+ data->cmd_buffer = cmd_buffer;
+ data->level = level;
+ map_object((void *) data->cmd_buffer, data);
+ return data;
+}
+
+static void destroy_command_buffer_data(struct command_buffer_data *data)
+{
+ unmap_object((void *) data->cmd_buffer);
+ ralloc_free(data);
+}
+
+
+/**/
static struct swapchain_data *new_swapchain_data(VkSwapchainKHR swapchain,
struct device_data *device_data)
{
@@ -373,75 +409,72 @@ static void destroy_swapchain_data(struct swapchain_data *data)
static void snapshot_swapchain_frame(struct swapchain_data *data)
{
- struct instance_data *instance_data = data->device->instance;
+ struct device_data *device_data = data->device;
+ struct instance_data *instance_data = device_data->instance;
+ uint32_t f_idx = data->n_frames % ARRAY_SIZE(data->frames_stats);
uint64_t now = os_time_get(); /* us */
if (data->last_present_time) {
- data->frame_times[(data->n_frames - 1) % ARRAY_SIZE(data->frame_times)] =
- ((double)now - (double)data->last_present_time) / 1000.0;
+ data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame_timing] =
+ now - data->last_present_time;
+ }
+
+ memset(&data->frames_stats[f_idx], 0, sizeof(data->frames_stats[f_idx]));
+ for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ data->frames_stats[f_idx].stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s];
+ data->accumulated_stats.stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s];
}
if (data->last_fps_update) {
double elapsed = (double)(now - data->last_fps_update); /* us */
if (elapsed >= instance_data->params.fps_sampling_period) {
data->fps = 1000000.0f * data->n_frames_since_update / elapsed;
- data->n_frames_since_update = 0;
- data->last_fps_update = now;
if (instance_data->params.output_file) {
fprintf(instance_data->params.output_file, "%.2f\n", data->fps);
fflush(instance_data->params.output_file);
}
+
+ memset(&data->accumulated_stats, 0, sizeof(data->accumulated_stats));
+ data->n_frames_since_update = 0;
+ data->last_fps_update = now;
}
} else {
data->last_fps_update = now;
}
- struct device_data *device_data = data->device;
- data->stats[data->n_frames % ARRAY_SIZE(data->frame_times)] = device_data->stats;
- memset(&device_data->stats, 0, sizeof(device_data->stats));
+ memset(&device_data->frame_stats, 0, sizeof(device_data->frame_stats));
+ memset(&data->frame_stats, 0, sizeof(device_data->frame_stats));
data->last_present_time = now;
data->n_frames++;
data->n_frames_since_update++;
}
-static float get_frame_timing(void *_data, int _idx)
+static float get_time_stat(void *_data, int _idx)
{
struct swapchain_data *data = (struct swapchain_data *) _data;
- if ((ARRAY_SIZE(data->frame_times) - _idx) > (data->n_frames - 2))
+ if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames)
return 0.0f;
- int idx = ARRAY_SIZE(data->frame_times) +
- (data->n_frames - 2) < ARRAY_SIZE(data->frame_times) ?
- _idx - (data->n_frames - 2) :
- _idx + (data->n_frames - 2);
- idx %= ARRAY_SIZE(data->frame_times);
- return data->frame_times[idx];
-}
-
-static float get_acquire_timing(void *_data, int _idx)
-{
- struct swapchain_data *data = (struct swapchain_data *) _data;
- if ((ARRAY_SIZE(data->acquire_times) - _idx) > data->n_acquire)
- return 0.0f;
- int idx = ARRAY_SIZE(data->acquire_times) +
- data->n_acquire < ARRAY_SIZE(data->acquire_times) ?
- _idx - data->n_acquire :
- _idx + data->n_acquire;
- idx %= ARRAY_SIZE(data->acquire_times);
- return data->acquire_times[idx];
+ int idx = ARRAY_SIZE(data->frames_stats) +
+ data->n_frames < ARRAY_SIZE(data->frames_stats) ?
+ _idx - data->n_frames :
+ _idx + data->n_frames;
+ idx %= ARRAY_SIZE(data->frames_stats);
+ /* Time stats are in us. */
+ return data->frames_stats[idx].stats[data->stat_selector] / 1000.0f;
}
static float get_stat(void *_data, int _idx)
{
struct swapchain_data *data = (struct swapchain_data *) _data;
- if ((ARRAY_SIZE(data->stats) - _idx) > data->n_frames)
+ if ((ARRAY_SIZE(data->frames_stats) - _idx) > data->n_frames)
return 0.0f;
- int idx = ARRAY_SIZE(data->stats) +
- data->n_frames < ARRAY_SIZE(data->stats) ?
+ int idx = ARRAY_SIZE(data->frames_stats) +
+ data->n_frames < ARRAY_SIZE(data->frames_stats) ?
_idx - data->n_frames :
_idx + data->n_frames;
- idx %= ARRAY_SIZE(data->stats);
- return data->stats[idx].stats[data->stat_selector];
+ idx %= ARRAY_SIZE(data->frames_stats);
+ return data->frames_stats[idx].stats[data->stat_selector];
}
static void position_layer(struct swapchain_data *data)
@@ -490,69 +523,54 @@ static void compute_swapchain_display(struct swapchain_data *data)
if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_fps])
ImGui::Text("FPS: %.2f" , data->fps);
- if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_frame_timing]){
- double min_time = FLT_MAX, max_time = 0.0f;
- for (uint32_t i = 0; i < MIN2(data->n_frames - 2, ARRAY_SIZE(data->frame_times)); i++) {
- min_time = MIN2(min_time, data->frame_times[i]);
- max_time = MAX2(max_time, data->frame_times[i]);
- }
- ImGui::PlotHistogram("##Frame timings", get_frame_timing, data,
- ARRAY_SIZE(data->frame_times), 0,
- NULL, min_time, max_time,
- ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
- ImGui::Text("Frame timing: %.3fms [%.3f, %.3f]",
- get_frame_timing(data, ARRAY_SIZE(data->frame_times) - 1),
- min_time, max_time);
+ /* Recompute min/max */
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ data->stats_min.stats[s] = UINT64_MAX;
+ data->stats_max.stats[s] = 0;
}
-
- if (instance_data->params.enabled[OVERLAY_PARAM_ENABLED_acquire_timing]) {
- double min_time = FLT_MAX, max_time = 0.0f;
- for (uint32_t i = 0; i < MIN2(data->n_acquire - 2, ARRAY_SIZE(data->acquire_times)); i++) {
- min_time = MIN2(min_time, data->acquire_times[i]);
- max_time = MAX2(max_time, data->acquire_times[i]);
+ for (uint32_t f = 0; f < MIN2(data->n_frames, ARRAY_SIZE(data->frames_stats)); f++) {
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ data->stats_min.stats[s] = MIN2(data->frames_stats[f].stats[s],
+ data->stats_min.stats[s]);
+ data->stats_max.stats[s] = MAX2(data->frames_stats[f].stats[s],
+ data->stats_max.stats[s]);
}
- ImGui::PlotHistogram("##Acquire timings", get_acquire_timing, data,
- ARRAY_SIZE(data->acquire_times), 0,
- NULL, min_time, max_time,
- ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
- ImGui::Text("Acquire timing: %.3fms [%.3f, %.3f]",
- get_acquire_timing(data, ARRAY_SIZE(data->acquire_times) - 1),
- min_time, max_time);
}
-
- for (uint32_t i = 0; i < ARRAY_SIZE(data->stats_min.stats); i++) {
- data->stats_min.stats[i] = UINT32_MAX;
- data->stats_max.stats[i] = 0;
- }
- for (uint32_t i = 0; i < MIN2(data->n_frames - 1, ARRAY_SIZE(data->stats)); i++) {
- for (uint32_t j = 0; j < ARRAY_SIZE(data->stats[0].stats); j++) {
- data->stats_min.stats[j] = MIN2(data->stats[i].stats[j],
- data->stats_min.stats[j]);
- data->stats_max.stats[j] = MAX2(data->stats[i].stats[j],
- data->stats_max.stats[j]);
- }
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ assert(data->stats_min.stats[s] != UINT64_MAX);
}
- for (uint32_t i = 0; i < ARRAY_SIZE(device_data->stats.stats); i++) {
- if (!instance_data->params.enabled[i] ||
- i == OVERLAY_PARAM_ENABLED_fps ||
- i == OVERLAY_PARAM_ENABLED_frame_timing ||
- i == OVERLAY_PARAM_ENABLED_acquire_timing)
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) {
+ if (!instance_data->params.enabled[s] ||
+ s == OVERLAY_PARAM_ENABLED_fps)
continue;
char hash[40];
- snprintf(hash, sizeof(hash), "##%s", overlay_param_names[i]);
- data->stat_selector = (enum overlay_param_enabled) i;
-
- ImGui::PlotHistogram(hash, get_stat, data,
- ARRAY_SIZE(data->stats), 0,
- NULL,
- data->stats_min.stats[i],
- data->stats_max.stats[i],
- ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
- ImGui::Text("%s: %.0f [%u, %u]", overlay_param_names[i],
- get_stat(data, ARRAY_SIZE(data->stats) - 1),
- data->stats_min.stats[i], data->stats_max.stats[i]);
+ snprintf(hash, sizeof(hash), "##%s", overlay_param_names[s]);
+ data->stat_selector = (enum overlay_param_enabled) s;
+
+ if (s == OVERLAY_PARAM_ENABLED_frame_timing ||
+ s == OVERLAY_PARAM_ENABLED_acquire_timing) {
+ double min_time = data->stats_min.stats[s] / 1000.0f;
+ double max_time = data->stats_max.stats[s] / 1000.0f;
+ ImGui::PlotHistogram(hash, get_time_stat, data,
+ ARRAY_SIZE(data->frames_stats), 0,
+ NULL, min_time, max_time,
+ ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
+ ImGui::Text("%s: %.3fms [%.3f, %.3f]", overlay_param_names[s],
+ get_time_stat(data, ARRAY_SIZE(data->frames_stats) - 1),
+ min_time, max_time);
+ } else {
+ ImGui::PlotHistogram(hash, get_stat, data,
+ ARRAY_SIZE(data->frames_stats), 0,
+ NULL,
+ data->stats_min.stats[s],
+ data->stats_max.stats[s],
+ ImVec2(ImGui::GetContentRegionAvailWidth(), 30));
+ ImGui::Text("%s: %.0f [%" PRIu64 ", %" PRIu64 "]", overlay_param_names[s],
+ get_stat(data, ARRAY_SIZE(data->frames_stats) - 1),
+ data->stats_min.stats[s], data->stats_max.stats[s]);
+ }
}
data->window_size = ImVec2(data->window_size.x, ImGui::GetCursorPosY() + 10.0f);
ImGui::End();
@@ -1362,8 +1380,10 @@ static void before_present(struct swapchain_data *swapchain_data,
{
snapshot_swapchain_frame(swapchain_data);
- compute_swapchain_display(swapchain_data);
- render_swapchain_display(swapchain_data, imageIndex);
+ if (swapchain_data->n_frames > 0) {
+ compute_swapchain_display(swapchain_data);
+ render_swapchain_display(swapchain_data, imageIndex);
+ }
}
VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateSwapchainKHR(
@@ -1448,10 +1468,8 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImageKHR(
semaphore, fence, pImageIndex);
uint64_t ts1 = os_time_get();
- swapchain_data->acquire_times[swapchain_data->n_acquire %
- ARRAY_SIZE(swapchain_data->acquire_times)] =
- ((double)ts1 - (double)ts0) / 1000.0;
- swapchain_data->n_acquire++;
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0;
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++;
return result;
}
@@ -1468,10 +1486,8 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_AcquireNextImage2KHR(
VkResult result = device_data->vtable.AcquireNextImage2KHR(device, pAcquireInfo, pImageIndex);
uint64_t ts1 = os_time_get();
- swapchain_data->acquire_times[swapchain_data->n_acquire %
- ARRAY_SIZE(swapchain_data->acquire_times)] =
- ((double)ts1 - (double)ts0) / 1000.0;
- swapchain_data->n_acquire++;
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire_timing] += ts1 - ts0;
+ swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_acquire]++;
return result;
}
@@ -1483,10 +1499,11 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDraw(
uint32_t firstVertex,
uint32_t firstInstance)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDraw(commandBuffer, vertexCount, instanceCount,
firstVertex, firstInstance);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_draw]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexed(
@@ -1497,10 +1514,11 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexed(
int32_t vertexOffset,
uint32_t firstInstance)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndexed(commandBuffer, indexCount, instanceCount,
firstIndex, vertexOffset, firstInstance);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirect(
@@ -1510,9 +1528,10 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirect(
uint32_t drawCount,
uint32_t stride)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndirect(commandBuffer, buffer, offset, drawCount, stride);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirect(
@@ -1522,9 +1541,10 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirect(
uint32_t drawCount,
uint32_t stride)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndexedIndirect(commandBuffer, buffer, offset, drawCount, stride);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirectCountKHR(
@@ -1536,11 +1556,12 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndirectCountKHR(
uint32_t maxDrawCount,
uint32_t stride)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect_count]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndirectCountKHR(commandBuffer, buffer, offset,
countBuffer, countBufferOffset,
maxDrawCount, stride);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indirect_count]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirectCountKHR(
@@ -1552,11 +1573,12 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDrawIndexedIndirectCountKHR(
uint32_t maxDrawCount,
uint32_t stride)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect_count]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDrawIndexedIndirectCountKHR(commandBuffer, buffer, offset,
countBuffer, countBufferOffset,
maxDrawCount, stride);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_draw_indexed_indirect_count]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatch(
@@ -1565,9 +1587,10 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatch(
uint32_t groupCountY,
uint32_t groupCountZ)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatchIndirect(
@@ -1575,9 +1598,10 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdDispatchIndirect(
VkBuffer buffer,
VkDeviceSize offset)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch_indirect]++;
+ struct device_data *device_data = cmd_buffer_data->device;
device_data->vtable.CmdDispatchIndirect(commandBuffer, buffer, offset);
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_dispatch_indirect]++;
}
VKAPI_ATTR void VKAPI_CALL overlay_CmdBindPipeline(
@@ -1585,29 +1609,85 @@ VKAPI_ATTR void VKAPI_CALL overlay_CmdBindPipeline(
VkPipelineBindPoint pipelineBindPoint,
VkPipeline pipeline)
{
- struct device_data *device_data = FIND_DEVICE_DATA(commandBuffer);
- device_data->vtable.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
switch (pipelineBindPoint) {
- case VK_PIPELINE_BIND_POINT_GRAPHICS: device_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_graphics]++; break;
- case VK_PIPELINE_BIND_POINT_COMPUTE: device_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_compute]++; break;
- case VK_PIPELINE_BIND_POINT_RAY_TRACING_NV: device_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_raytracing]++; break;
+ case VK_PIPELINE_BIND_POINT_GRAPHICS: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_graphics]++; break;
+ case VK_PIPELINE_BIND_POINT_COMPUTE: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_compute]++; break;
+ case VK_PIPELINE_BIND_POINT_RAY_TRACING_NV: cmd_buffer_data->stats.stats[OVERLAY_PARAM_ENABLED_pipeline_raytracing]++; break;
default: break;
}
+ struct device_data *device_data = cmd_buffer_data->device;
+ device_data->vtable.CmdBindPipeline(commandBuffer, pipelineBindPoint, pipeline);
}
-VKAPI_ATTR VkResult VKAPI_CALL overlay_AllocateCommandBuffers(VkDevice device,
- const VkCommandBufferAllocateInfo* pAllocateInfo,
- VkCommandBuffer* pCommandBuffers)
+VKAPI_ATTR VkResult VKAPI_CALL overlay_BeginCommandBuffer(
+ VkCommandBuffer commandBuffer,
+ const VkCommandBufferBeginInfo* pBeginInfo)
{
- struct device_data *device_data = FIND_DEVICE_DATA(device);
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ struct device_data *device_data = cmd_buffer_data->device;
+
+ return device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL overlay_EndCommandBuffer(
+ VkCommandBuffer commandBuffer)
+{
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ struct device_data *device_data = cmd_buffer_data->device;
+
+ if (cmd_buffer_data->pipeline_query_pool) {
+ device_data->vtable.CmdEndQuery(commandBuffer,
+ cmd_buffer_data->pipeline_query_pool,
+ cmd_buffer_data->query_index);
+ }
+
+ return device_data->vtable.EndCommandBuffer(commandBuffer);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL overlay_ResetCommandBuffer(
+ VkCommandBuffer commandBuffer,
+ VkCommandBufferResetFlags flags)
+{
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ struct device_data *device_data = cmd_buffer_data->device;
+
+ memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats));
+
+ return device_data->vtable.ResetCommandBuffer(commandBuffer, flags);
+}
+VKAPI_ATTR void VKAPI_CALL overlay_CmdExecuteCommands(
+ VkCommandBuffer commandBuffer,
+ uint32_t commandBufferCount,
+ const VkCommandBuffer* pCommandBuffers)
+{
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
+ struct device_data *device_data = cmd_buffer_data->device;
+
+ /* Add the stats of the executed command buffers to the primary one. */
+ for (uint32_t c = 0; c < commandBufferCount; c++) {
+ struct command_buffer_data *sec_cmd_buffer_data = FIND_CMD_BUFFER_DATA(pCommandBuffers[c]);
+
+ for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++)
+ cmd_buffer_data->stats.stats[s] += sec_cmd_buffer_data->stats.stats[s];
+ }
+
+ device_data->vtable.CmdExecuteCommands(commandBuffer, commandBufferCount, pCommandBuffers);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL overlay_AllocateCommandBuffers(
+ VkDevice device,
+ const VkCommandBufferAllocateInfo* pAllocateInfo,
+ VkCommandBuffer* pCommandBuffers)
+{
+ struct device_data *device_data = FIND_DEVICE_DATA(device);
VkResult result =
device_data->vtable.AllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers);
- if (result != VK_SUCCESS) return result;
-
+ if (result != VK_SUCCESS)
+ return result;
for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++)
- map_object(pCommandBuffers[i], device_data);
-
+ new_command_buffer_data(pCommandBuffers[i], pAllocateInfo->level, device_data);
return result;
}
@@ -1617,10 +1697,10 @@ VKAPI_ATTR void VKAPI_CALL overlay_FreeCommandBuffers(VkDevice device,
const VkCommandBuffer* pCommandBuffers)
{
struct device_data *device_data = FIND_DEVICE_DATA(device);
-
- for (uint32_t i = 0; i < commandBufferCount; i++)
- unmap_object(pCommandBuffers[i]);
-
+ for (uint32_t i = 0; i < commandBufferCount; i++) {
+ struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(pCommandBuffers[i]);
+ destroy_command_buffer_data(cmd_buffer_data);
+ }
device_data->vtable.FreeCommandBuffers(device, commandPool,
commandBufferCount, pCommandBuffers);
}
@@ -1634,7 +1714,18 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_QueueSubmit(
struct queue_data *queue_data = FIND_QUEUE_DATA(queue);
struct device_data *device_data = queue_data->device;
- device_data->stats.stats[OVERLAY_PARAM_ENABLED_submit]++;
+ device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_submit]++;
+
+ for (uint32_t s = 0; s < submitCount; s++) {
+ for (uint32_t c = 0; c < pSubmits[s].commandBufferCount; c++) {
+ struct command_buffer_data *cmd_buffer_data =
+ FIND_CMD_BUFFER_DATA(pSubmits[s].pCommandBuffers[c]);
+
+ /* Merge the submitted command buffer stats into the device. */
+ for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++)
+ device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st];
+ }
+ }
return device_data->vtable.QueueSubmit(queue, submitCount, pSubmits, fence);
}
@@ -1740,6 +1831,11 @@ static const struct {
{ "vkGetDeviceProcAddr", (void *) vkGetDeviceProcAddr },
#define ADD_HOOK(fn) { "vk" # fn, (void *) overlay_ ## fn }
ADD_HOOK(AllocateCommandBuffers),
+ ADD_HOOK(FreeCommandBuffers),
+ ADD_HOOK(ResetCommandBuffer),
+ ADD_HOOK(BeginCommandBuffer),
+ ADD_HOOK(EndCommandBuffer),
+ ADD_HOOK(CmdExecuteCommands),
ADD_HOOK(CmdDraw),
ADD_HOOK(CmdDrawIndexed),
diff --git a/src/vulkan/overlay-layer/overlay_params.h b/src/vulkan/overlay-layer/overlay_params.h
index 75bcf956e8e..aaa03eb716d 100644
--- a/src/vulkan/overlay-layer/overlay_params.h
+++ b/src/vulkan/overlay-layer/overlay_params.h
@@ -47,6 +47,7 @@ extern "C" {
OVERLAY_PARAM_BOOL(pipeline_graphics) \
OVERLAY_PARAM_BOOL(pipeline_compute) \
OVERLAY_PARAM_BOOL(pipeline_raytracing) \
+ OVERLAY_PARAM_BOOL(acquire) \
OVERLAY_PARAM_BOOL(acquire_timing) \
OVERLAY_PARAM_CUSTOM(fps_sampling_period) \
OVERLAY_PARAM_CUSTOM(output_file) \