summaryrefslogtreecommitdiffstats
path: root/src/intel/vulkan
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2019-04-24 03:02:35 -0500
committerJason Ekstrand <[email protected]>2019-08-12 22:56:07 +0000
commitd787a2d05e20642a7ec52ce80a830c7795a6bdc0 (patch)
tree538a081981c3d4da3a63e6363799300b683f66c7 /src/intel/vulkan
parent67cb55ad118fec14651cba9f27bbbe6f76d7603f (diff)
anv: Implement VK_KHR_pipeline_executable_properties
Reviewed-by: Lionel Landwerlin <[email protected]>
Diffstat (limited to 'src/intel/vulkan')
-rw-r--r--src/intel/vulkan/anv_device.c7
-rw-r--r--src/intel/vulkan/anv_extensions.py1
-rw-r--r--src/intel/vulkan/anv_pipeline.c276
-rw-r--r--src/intel/vulkan/anv_private.h14
-rw-r--r--src/intel/vulkan/genX_pipeline.c1
5 files changed, 295 insertions, 4 deletions
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index c4e27433a49..9154c1ab5a7 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1126,6 +1126,13 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
+ VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
+ (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
+ features->pipelineExecutableInfo = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
VkPhysicalDeviceProtectedMemoryFeatures *features = (void *)ext;
features->protectedMemory = false;
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py
index 2c2accbe566..7e7ce428987 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -101,6 +101,7 @@ EXTENSIONS = [
Extension('VK_KHR_maintenance2', 1, True),
Extension('VK_KHR_maintenance3', 1, True),
Extension('VK_KHR_multiview', 1, True),
+ Extension('VK_KHR_pipeline_executable_properties', 1, True),
Extension('VK_KHR_push_descriptor', 1, True),
Extension('VK_KHR_relaxed_block_layout', 1, True),
Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True),
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index fc492844cce..6d705b88e08 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -30,6 +30,7 @@
#include "util/mesa-sha1.h"
#include "util/os_time.h"
#include "common/gen_l3_config.h"
+#include "common/gen_disasm.h"
#include "anv_private.h"
#include "compiler/brw_nir.h"
#include "anv_nir.h"
@@ -529,6 +530,7 @@ struct anv_pipeline_stage {
uint32_t num_stats;
struct brw_compile_stats stats[3];
+ char *disasm[3];
VkPipelineCreationFeedbackEXT feedback;
@@ -1063,6 +1065,77 @@ anv_pipeline_compile_fs(const struct brw_compiler *compiler,
}
}
+static void
+anv_pipeline_add_executable(struct anv_pipeline *pipeline,
+ struct anv_pipeline_stage *stage,
+ struct brw_compile_stats *stats,
+ uint32_t code_offset)
+{
+ char *disasm = NULL;
+ if (stage->code &&
+ (pipeline->flags &
+ VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
+ char *stream_data = NULL;
+ size_t stream_size = 0;
+ FILE *stream = open_memstream(&stream_data, &stream_size);
+
+ /* Creating this is far cheaper than it looks. It's perfectly fine to
+ * do it for every binary.
+ */
+ struct gen_disasm *d = gen_disasm_create(&pipeline->device->info);
+ gen_disasm_disassemble(d, stage->code, code_offset, stream);
+ gen_disasm_destroy(d);
+
+ fclose(stream);
+
+ /* Copy it to a ralloc'd thing */
+ disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
+ memcpy(disasm, stream_data, stream_size);
+ disasm[stream_size] = 0;
+
+ free(stream_data);
+ }
+
+ pipeline->executables[pipeline->num_executables++] =
+ (struct anv_pipeline_executable) {
+ .stage = stage->stage,
+ .stats = *stats,
+ .disasm = disasm,
+ };
+}
+
+static void
+anv_pipeline_add_executables(struct anv_pipeline *pipeline,
+ struct anv_pipeline_stage *stage,
+ struct anv_shader_bin *bin)
+{
+ if (stage->stage == MESA_SHADER_FRAGMENT) {
+ /* We pull the prog data and stats out of the anv_shader_bin because
+ * the anv_pipeline_stage may not be fully populated if we successfully
+ * looked up the shader in a cache.
+ */
+ const struct brw_wm_prog_data *wm_prog_data =
+ (const struct brw_wm_prog_data *)bin->prog_data;
+ struct brw_compile_stats *stats = bin->stats;
+
+ if (wm_prog_data->dispatch_8) {
+ anv_pipeline_add_executable(pipeline, stage, stats++, 0);
+ }
+
+ if (wm_prog_data->dispatch_16) {
+ anv_pipeline_add_executable(pipeline, stage, stats++,
+ wm_prog_data->prog_offset_16);
+ }
+
+ if (wm_prog_data->dispatch_32) {
+ anv_pipeline_add_executable(pipeline, stage, stats++,
+ wm_prog_data->prog_offset_32);
+ }
+ } else {
+ anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
+ }
+}
+
static VkResult
anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
struct anv_pipeline_cache *cache,
@@ -1182,6 +1255,13 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
}
/* We found all our shaders in the cache. We're done. */
+ for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+ if (!stages[s].entrypoint)
+ continue;
+
+ anv_pipeline_add_executables(pipeline, &stages[s],
+ pipeline->shaders[s]);
+ }
goto done;
} else if (found > 0) {
/* We found some but not all of our shaders. This shouldn't happen
@@ -1335,6 +1415,8 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
goto fail;
}
+ anv_pipeline_add_executables(pipeline, &stages[s], bin);
+
pipeline->shaders[s] = bin;
ralloc_free(stage_ctx);
@@ -1455,6 +1537,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
&cache_hit);
}
+ void *mem_ctx = ralloc_context(NULL);
if (bin == NULL) {
int64_t stage_start = os_time_get_nano();
@@ -1469,8 +1552,6 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
.set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
};
- void *mem_ctx = ralloc_context(NULL);
-
stage.nir = anv_pipeline_stage_get_nir(pipeline, cache, mem_ctx, &stage);
if (stage.nir == NULL) {
ralloc_free(mem_ctx);
@@ -1511,11 +1592,13 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
- ralloc_free(mem_ctx);
-
stage.feedback.duration = os_time_get_nano() - stage_start;
}
+ anv_pipeline_add_executables(pipeline, &stage, bin);
+
+ ralloc_free(mem_ctx);
+
if (cache_hit) {
stage.feedback.flags |=
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
@@ -1823,6 +1906,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
* of various prog_data pointers. Make them NULL by default.
*/
memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
+ pipeline->num_executables = 0;
result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
if (result != VK_SUCCESS) {
@@ -1909,3 +1993,187 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
return VK_SUCCESS;
}
+
+#define WRITE_STR(field, ...) ({ \
+ memset(field, 0, sizeof(field)); \
+ UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
+ assert(i > 0 && i < sizeof(field)); \
+})
+
+VkResult anv_GetPipelineExecutablePropertiesKHR(
+ VkDevice device,
+ const VkPipelineInfoKHR* pPipelineInfo,
+ uint32_t* pExecutableCount,
+ VkPipelineExecutablePropertiesKHR* pProperties)
+{
+ ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
+ VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
+
+ for (uint32_t i = 0; i < pipeline->num_executables; i++) {
+ vk_outarray_append(&out, props) {
+ gl_shader_stage stage = pipeline->executables[i].stage;
+ props->stages = mesa_to_vk_shader_stage(stage);
+
+ unsigned simd_width = pipeline->executables[i].stats.dispatch_width;
+ if (stage == MESA_SHADER_FRAGMENT) {
+ WRITE_STR(props->name, "%s%d %s",
+ simd_width ? "SIMD" : "vec",
+ simd_width ? simd_width : 4,
+ _mesa_shader_stage_to_string(stage));
+ } else {
+ WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
+ }
+ WRITE_STR(props->description, "%s%d %s shader",
+ simd_width ? "SIMD" : "vec",
+ simd_width ? simd_width : 4,
+ _mesa_shader_stage_to_string(stage));
+
+ /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
+ * wants a subgroup size of 1.
+ */
+ props->subgroupSize = MAX2(simd_width, 1);
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
+
+VkResult anv_GetPipelineExecutableStatisticsKHR(
+ VkDevice device,
+ const VkPipelineExecutableInfoKHR* pExecutableInfo,
+ uint32_t* pStatisticCount,
+ VkPipelineExecutableStatisticKHR* pStatistics)
+{
+ ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
+ VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
+
+ assert(pExecutableInfo->executableIndex < pipeline->num_executables);
+ const struct anv_pipeline_executable *exe =
+ &pipeline->executables[pExecutableInfo->executableIndex];
+ const struct brw_stage_prog_data *prog_data =
+ pipeline->shaders[exe->stage]->prog_data;
+
+ vk_outarray_append(&out, stat) {
+ WRITE_STR(stat->name, "Instruction Count");
+ WRITE_STR(stat->description,
+ "Number of GEN instructions in the final generated "
+ "shader executable.");
+ stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ stat->value.u64 = exe->stats.instructions;
+ }
+
+ vk_outarray_append(&out, stat) {
+ WRITE_STR(stat->name, "Loop Count");
+ WRITE_STR(stat->description,
+ "Number of loops (not unrolled) in the final generated "
+ "shader executable.");
+ stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ stat->value.u64 = exe->stats.loops;
+ }
+
+ vk_outarray_append(&out, stat) {
+ WRITE_STR(stat->name, "Cycle Count");
+ WRITE_STR(stat->description,
+ "Estimate of the number of EU cycles required to execute "
+ "the final generated executable. This is an estimate only "
+ "and may vary greatly from actual run-time performance.");
+ stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ stat->value.u64 = exe->stats.cycles;
+ }
+
+ vk_outarray_append(&out, stat) {
+ WRITE_STR(stat->name, "Spill Count");
+ WRITE_STR(stat->description,
+ "Number of scratch spill operations. This gives a rough "
+ "estimate of the cost incurred due to spilling temporary "
+ "values to memory. If this is non-zero, you may want to "
+ "adjust your shader to reduce register pressure.");
+ stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ stat->value.u64 = exe->stats.spills;
+ }
+
+ vk_outarray_append(&out, stat) {
+ WRITE_STR(stat->name, "Fill Count");
+ WRITE_STR(stat->description,
+ "Number of scratch fill operations. This gives a rough "
+ "estimate of the cost incurred due to spilling temporary "
+ "values to memory. If this is non-zero, you may want to "
+ "adjust your shader to reduce register pressure.");
+ stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ stat->value.u64 = exe->stats.fills;
+ }
+
+ vk_outarray_append(&out, stat) {
+ WRITE_STR(stat->name, "Scratch Memory Size");
+ WRITE_STR(stat->description,
+ "Number of bytes of scratch memory required by the "
+ "generated shader executable. If this is non-zero, you "
+ "may want to adjust your shader to reduce register "
+ "pressure.");
+ stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ stat->value.u64 = prog_data->total_scratch;
+ }
+
+ if (exe->stage == MESA_SHADER_COMPUTE) {
+ vk_outarray_append(&out, stat) {
+ WRITE_STR(stat->name, "Workgroup Memory Size");
+ WRITE_STR(stat->description,
+ "Number of bytes of workgroup shared memory used by this "
+ "compute shader including any padding.");
+ stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ stat->value.u64 = prog_data->total_scratch;
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
+
+static bool
+write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
+ const char *data)
+{
+ ir->isText = VK_TRUE;
+
+ size_t data_len = strlen(data) + 1;
+
+ if (ir->pData == NULL) {
+ ir->dataSize = data_len;
+ return true;
+ }
+
+ strncpy(ir->pData, data, ir->dataSize);
+ if (ir->dataSize < data_len)
+ return false;
+
+ ir->dataSize = data_len;
+ return true;
+}
+
+VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
+ VkDevice device,
+ const VkPipelineExecutableInfoKHR* pExecutableInfo,
+ uint32_t* pInternalRepresentationCount,
+ VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
+{
+ ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
+ VK_OUTARRAY_MAKE(out, pInternalRepresentations,
+ pInternalRepresentationCount);
+ bool incomplete_text = false;
+
+ assert(pExecutableInfo->executableIndex < pipeline->num_executables);
+ const struct anv_pipeline_executable *exe =
+ &pipeline->executables[pExecutableInfo->executableIndex];
+
+ if (exe->disasm) {
+ vk_outarray_append(&out, ir) {
+ WRITE_STR(ir->name, "GEN Assembly");
+ WRITE_STR(ir->description,
+ "Final GEN assembly for the generated shader binary");
+
+ if (!write_ir_text(ir, exe->disasm))
+ incomplete_text = true;
+ }
+ }
+
+ return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
+}
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 5682e838abf..85cf7ea9b6d 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -2794,6 +2794,17 @@ anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
anv_shader_bin_destroy(device, shader);
}
+/* 5 possible simultaneous shader stages and FS may have up to 3 binaries */
+#define MAX_PIPELINE_EXECUTABLES 7
+
+struct anv_pipeline_executable {
+ gl_shader_stage stage;
+
+ struct brw_compile_stats stats;
+
+ char *disasm;
+};
+
struct anv_pipeline {
struct anv_device * device;
struct anv_batch batch;
@@ -2811,6 +2822,9 @@ struct anv_pipeline {
struct anv_shader_bin * shaders[MESA_SHADER_STAGES];
+ uint32_t num_executables;
+ struct anv_pipeline_executable executables[MAX_PIPELINE_EXECUTABLES];
+
struct {
const struct gen_l3_config * l3_config;
uint32_t total_size;
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index e0e723c13a4..88f5b358faf 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -2233,6 +2233,7 @@ compute_pipeline_create(
* of various prog_data pointers. Make them NULL by default.
*/
memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
+ pipeline->num_executables = 0;
pipeline->needs_data_cache = false;