summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKeith Packard <[email protected]>2018-10-11 16:05:18 -0700
committerKeith Packard <[email protected]>2018-10-17 20:10:15 -0700
commit67a2c1493c068281936fecba9fa6784becf08f8e (patch)
treec9586efcc42aa08e1387519569b7554eaa021ffb /src
parenta11cafbd7af1980a277ffbca00acb0b1f7f25309 (diff)
vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v5]
Offers three clocks, device, clock monotonic and clock monotonic raw. Could use some kernel support to reduce the deviation between clock values. v2: Ensure deviation is at least as big as the GPU time interval. v3: Set device->lost when returning DEVICE_LOST. Use MAX2 and DIV_ROUND_UP instead of open coding these. Delete spurious TIMESTAMP in radv version. Suggested-by: Jason Ekstrand <[email protected]> Suggested-by: Lionel Landwerlin <[email protected]> v4: Add anv_gem_reg_read to anv_gem_stubs.c Suggested-by: Jason Ekstrand <[email protected]> v5: Adjust maxDeviation computation to max(sampled_clock_period) + sample_interval. Suggested-by: Bas Nieuwenhuizen <[email protected]> Suggested-by: Jason Ekstrand <[email protected]> Signed-off-by: Keith Packard <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]> Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/amd/vulkan/radv_device.c119
-rw-r--r--src/amd/vulkan/radv_extensions.py1
-rw-r--r--src/intel/vulkan/anv_device.c127
-rw-r--r--src/intel/vulkan/anv_extensions.py1
-rw-r--r--src/intel/vulkan/anv_gem.c13
-rw-r--r--src/intel/vulkan/anv_gem_stubs.c7
-rw-r--r--src/intel/vulkan/anv_private.h2
7 files changed, 270 insertions, 0 deletions
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 85e5d9014d8..6eb37472992 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -4957,3 +4957,122 @@ radv_GetDeviceGroupPeerMemoryFeatures(
VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
}
+
+static const VkTimeDomainEXT radv_time_domains[] = {
+ VK_TIME_DOMAIN_DEVICE_EXT,
+ VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+ VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
+};
+
+VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
+ VkPhysicalDevice physicalDevice,
+ uint32_t *pTimeDomainCount,
+ VkTimeDomainEXT *pTimeDomains)
+{
+ int d;
+ VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
+
+ for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
+ vk_outarray_append(&out, i) {
+ *i = radv_time_domains[d];
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
+
+static uint64_t
+radv_clock_gettime(clockid_t clock_id)
+{
+ struct timespec current;
+ int ret;
+
+ ret = clock_gettime(clock_id, &current);
+ if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
+ ret = clock_gettime(CLOCK_MONOTONIC, &current);
+ if (ret < 0)
+ return 0;
+
+ return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
+}
+
+VkResult radv_GetCalibratedTimestampsEXT(
+ VkDevice _device,
+ uint32_t timestampCount,
+ const VkCalibratedTimestampInfoEXT *pTimestampInfos,
+ uint64_t *pTimestamps,
+ uint64_t *pMaxDeviation)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
+ int d;
+ uint64_t begin, end;
+ uint64_t max_clock_period = 0;
+
+ begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+ for (d = 0; d < timestampCount; d++) {
+ switch (pTimestampInfos[d].timeDomain) {
+ case VK_TIME_DOMAIN_DEVICE_EXT:
+ pTimestamps[d] = device->ws->query_value(device->ws,
+ RADEON_TIMESTAMP);
+ uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
+ max_clock_period = MAX2(max_clock_period, device_period);
+ break;
+ case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+ pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
+ max_clock_period = MAX2(max_clock_period, 1);
+ break;
+
+ case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+ pTimestamps[d] = begin;
+ break;
+ default:
+ pTimestamps[d] = 0;
+ break;
+ }
+ }
+
+ end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+ /*
+ * The maximum deviation is the sum of the interval over which we
+ * perform the sampling and the maximum period of any sampled
+ * clock. That's because the maximum skew between any two sampled
+ * clock edges is when the sampled clock with the largest period is
+ * sampled at the end of that period but right at the beginning of the
+ * sampling interval and some other clock is sampled right at the
+ * begining of its sampling period and right at the end of the
+ * sampling interval. Let's assume the GPU has the longest clock
+ * period and that the application is sampling GPU and monotonic:
+ *
+ * s e
+ * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
+ * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+ *
+ * g
+ * 0 1 2 3
+ * GPU -----_____-----_____-----_____-----_____
+ *
+ * m
+ * x y z 0 1 2 3 4 5 6 7 8 9 a b c
+ * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+ *
+ * Interval <----------------->
+ * Deviation <-------------------------->
+ *
+ * s = read(raw) 2
+ * g = read(GPU) 1
+ * m = read(monotonic) 2
+ * e = read(raw) b
+ *
+ * We round the sample interval up by one tick to cover sampling error
+ * in the interval clock
+ */
+
+ uint64_t sample_interval = end - begin + 1;
+
+ *pMaxDeviation = sample_interval + max_clock_period;
+
+ return VK_SUCCESS;
+}
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index 5dcedae1c63..4c81d3f0068 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -92,6 +92,7 @@ EXTENSIONS = [
Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
+ Extension('VK_EXT_calibrated_timestamps', 1, True),
Extension('VK_EXT_conditional_rendering', 1, True),
Extension('VK_EXT_conservative_rasterization', 1, 'device->rad_info.chip_class >= GFX9'),
Extension('VK_EXT_display_surface_counter', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 54d353e400f..cfcf3cb31c7 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -3035,6 +3035,133 @@ void anv_DestroyFramebuffer(
vk_free2(&device->alloc, pAllocator, fb);
}
+static const VkTimeDomainEXT anv_time_domains[] = {
+ VK_TIME_DOMAIN_DEVICE_EXT,
+ VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+ VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
+};
+
+VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
+ VkPhysicalDevice physicalDevice,
+ uint32_t *pTimeDomainCount,
+ VkTimeDomainEXT *pTimeDomains)
+{
+ int d;
+ VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
+
+ for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
+ vk_outarray_append(&out, i) {
+ *i = anv_time_domains[d];
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
+
+static uint64_t
+anv_clock_gettime(clockid_t clock_id)
+{
+ struct timespec current;
+ int ret;
+
+ ret = clock_gettime(clock_id, &current);
+ if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
+ ret = clock_gettime(CLOCK_MONOTONIC, &current);
+ if (ret < 0)
+ return 0;
+
+ return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
+}
+
+#define TIMESTAMP 0x2358
+
+VkResult anv_GetCalibratedTimestampsEXT(
+ VkDevice _device,
+ uint32_t timestampCount,
+ const VkCalibratedTimestampInfoEXT *pTimestampInfos,
+ uint64_t *pTimestamps,
+ uint64_t *pMaxDeviation)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ uint64_t timestamp_frequency = device->info.timestamp_frequency;
+ int ret;
+ int d;
+ uint64_t begin, end;
+ uint64_t max_clock_period = 0;
+
+ begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+ for (d = 0; d < timestampCount; d++) {
+ switch (pTimestampInfos[d].timeDomain) {
+ case VK_TIME_DOMAIN_DEVICE_EXT:
+ ret = anv_gem_reg_read(device, TIMESTAMP | 1,
+ &pTimestamps[d]);
+
+ if (ret != 0) {
+ device->lost = TRUE;
+ return VK_ERROR_DEVICE_LOST;
+ }
+ uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
+ max_clock_period = MAX2(max_clock_period, device_period);
+ break;
+ case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+ pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
+ max_clock_period = MAX2(max_clock_period, 1);
+ break;
+
+ case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+ pTimestamps[d] = begin;
+ break;
+ default:
+ pTimestamps[d] = 0;
+ break;
+ }
+ }
+
+ end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+ /*
+ * The maximum deviation is the sum of the interval over which we
+ * perform the sampling and the maximum period of any sampled
+ * clock. That's because the maximum skew between any two sampled
+ * clock edges is when the sampled clock with the largest period is
+ * sampled at the end of that period but right at the beginning of the
+ * sampling interval and some other clock is sampled right at the
+ * begining of its sampling period and right at the end of the
+ * sampling interval. Let's assume the GPU has the longest clock
+ * period and that the application is sampling GPU and monotonic:
+ *
+ * s e
+ * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
+ * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+ *
+ * g
+ * 0 1 2 3
+ * GPU -----_____-----_____-----_____-----_____
+ *
+ * m
+ * x y z 0 1 2 3 4 5 6 7 8 9 a b c
+ * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+ *
+ * Interval <----------------->
+ * Deviation <-------------------------->
+ *
+ * s = read(raw) 2
+ * g = read(GPU) 1
+ * m = read(monotonic) 2
+ * e = read(raw) b
+ *
+ * We round the sample interval up by one tick to cover sampling error
+ * in the interval clock
+ */
+
+ uint64_t sample_interval = end - begin + 1;
+
+ *pMaxDeviation = sample_interval + max_clock_period;
+
+ return VK_SUCCESS;
+}
+
/* vk_icd.h does not declare this function, so we declare it here to
* suppress Wmissing-prototypes.
*/
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py
index 1329ef74026..00a4e5e8015 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -127,6 +127,7 @@ EXTENSIONS = [
Extension('VK_EXT_vertex_attribute_divisor', 3, True),
Extension('VK_EXT_post_depth_coverage', 1, 'device->info.gen >= 9'),
Extension('VK_EXT_sampler_filter_minmax', 1, 'device->info.gen >= 9'),
+ Extension('VK_EXT_calibrated_timestamps', 1, True),
]
class VkVersion:
diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c
index c43b5ef9e06..1bdf040c1a3 100644
--- a/src/intel/vulkan/anv_gem.c
+++ b/src/intel/vulkan/anv_gem.c
@@ -423,6 +423,19 @@ anv_gem_fd_to_handle(struct anv_device *device, int fd)
return args.handle;
}
+int
+anv_gem_reg_read(struct anv_device *device, uint32_t offset, uint64_t *result)
+{
+ struct drm_i915_reg_read args = {
+ .offset = offset
+ };
+
+ int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_REG_READ, &args);
+
+ *result = args.val;
+ return ret;
+}
+
#ifndef SYNC_IOC_MAGIC
/* duplicated from linux/sync_file.h to avoid build-time dependency
* on new (v4.7) kernel headers. Once distro's are mostly using
diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c
index 5093bd5db1a..8cc3ad1f22e 100644
--- a/src/intel/vulkan/anv_gem_stubs.c
+++ b/src/intel/vulkan/anv_gem_stubs.c
@@ -251,3 +251,10 @@ anv_gem_syncobj_wait(struct anv_device *device,
{
unreachable("Unused");
}
+
+int
+anv_gem_reg_read(struct anv_device *device,
+ uint32_t offset, uint64_t *result)
+{
+ unreachable("Unused");
+}
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 09a0b0b88ba..a3a041ab322 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1109,6 +1109,8 @@ int anv_gem_get_aperture(int fd, uint64_t *size);
int anv_gem_gpu_get_reset_stats(struct anv_device *device,
uint32_t *active, uint32_t *pending);
int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
+int anv_gem_reg_read(struct anv_device *device,
+ uint32_t offset, uint64_t *result);
uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,