anv/pipeline: Hash the entire pipeline in one go

Instead of hashing each stage separately (and TES and TCS together), we hash the entire pipeline. This means we'll get fewer cache hits if they, for instance, re-use the same VS over and over again but it also means we can now safely do cross-stage optimizations. Reviewed-by: Timothy Arceri <[email protected]>
author: Jason Ekstrand <[email protected]> 2017-10-26 19:24:28 -0700
committer: Jason Ekstrand <[email protected]> 2018-08-02 10:29:20 -0700
commit: bdc3565c8c513dfcfef185da5561dc53dc053787 (patch)
tree: 042a37fa6463f2d603ef013cd6eca32ebe7f5baf /src/intel
parent: 4a8236ae17ff58de238678b5bb78833ed35cba37 (diff)
1 files changed, 94 insertions, 53 deletions
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index f1a43a3bfc9..3033acbe958 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -400,34 +400,67 @@ struct anv_pipeline_stage {
    const VkSpecializationInfo *spec_info;
 
    union brw_any_prog_key key;
+
+   struct {
+      gl_shader_stage stage;
+      unsigned char sha1[20];
+   } cache_key;
 };
 
 static void
-anv_pipeline_hash_shader(struct anv_pipeline *pipeline,
-                         struct anv_pipeline_layout *layout,
-                         struct anv_pipeline_stage *stage,
-                         unsigned char *sha1_out)
+anv_pipeline_hash_shader(struct mesa_sha1 *ctx,
+                         struct anv_pipeline_stage *stage)
 {
-   struct mesa_sha1 ctx;
-
-   _mesa_sha1_init(&ctx);
-   if (stage->stage != MESA_SHADER_COMPUTE) {
-      _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
-                        sizeof(pipeline->subpass->view_mask));
-   }
-   if (layout)
-      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
-   _mesa_sha1_update(&ctx, stage->module->sha1, sizeof(stage->module->sha1));
-   _mesa_sha1_update(&ctx, stage->entrypoint, strlen(stage->entrypoint));
-   _mesa_sha1_update(&ctx, &stage->stage, sizeof(stage->stage));
+   _mesa_sha1_update(ctx, stage->module->sha1, sizeof(stage->module->sha1));
+   _mesa_sha1_update(ctx, stage->entrypoint, strlen(stage->entrypoint));
+   _mesa_sha1_update(ctx, &stage->stage, sizeof(stage->stage));
    if (stage->spec_info) {
-      _mesa_sha1_update(&ctx, stage->spec_info->pMapEntries,
+      _mesa_sha1_update(ctx, stage->spec_info->pMapEntries,
                         stage->spec_info->mapEntryCount *
                         sizeof(*stage->spec_info->pMapEntries));
-      _mesa_sha1_update(&ctx, stage->spec_info->pData,
+      _mesa_sha1_update(ctx, stage->spec_info->pData,
                         stage->spec_info->dataSize);
    }
-   _mesa_sha1_update(&ctx, &stage->key, brw_prog_key_size(stage->stage));
+   _mesa_sha1_update(ctx, &stage->key, brw_prog_key_size(stage->stage));
+}
+
+static void
+anv_pipeline_hash_graphics(struct anv_pipeline *pipeline,
+                           struct anv_pipeline_layout *layout,
+                           struct anv_pipeline_stage *stages,
+                           unsigned char *sha1_out)
+{
+   struct mesa_sha1 ctx;
+   _mesa_sha1_init(&ctx);
+
+   _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
+                     sizeof(pipeline->subpass->view_mask));
+
+   if (layout)
+      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
+
+   for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+      if (stages[s].entrypoint)
+         anv_pipeline_hash_shader(&ctx, &stages[s]);
+   }
+
+   _mesa_sha1_final(&ctx, sha1_out);
+}
+
+static void
+anv_pipeline_hash_compute(struct anv_pipeline *pipeline,
+                          struct anv_pipeline_layout *layout,
+                          struct anv_pipeline_stage *stage,
+                          unsigned char *sha1_out)
+{
+   struct mesa_sha1 ctx;
+   _mesa_sha1_init(&ctx);
+
+   if (layout)
+      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
+
+   anv_pipeline_hash_shader(&ctx, stage);
+
    _mesa_sha1_final(&ctx, sha1_out);
 }
 
@@ -532,10 +565,6 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
 
    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
 
-   unsigned char sha1[20];
-   anv_pipeline_hash_shader(pipeline, layout, stage, sha1);
-   bin = anv_device_search_for_kernel(pipeline->device, cache, sha1, 20);
-
    if (bin == NULL) {
       struct brw_vs_prog_data prog_data = {};
       struct anv_pipeline_binding surface_to_descriptor[256];
@@ -571,7 +600,9 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
       }
 
       unsigned code_size = prog_data.base.base.program_size;
-      bin = anv_device_upload_kernel(pipeline->device, cache, sha1, 20,
+      bin = anv_device_upload_kernel(pipeline->device, cache,
+                                     &stage->cache_key,
+                                     sizeof(stage->cache_key),
                                      shader_code, code_size,
                                      nir->constant_data,
                                      nir->constant_data_size,
@@ -644,18 +675,6 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline,
 
    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
 
-   unsigned char tcs_sha1[40];
-   unsigned char tes_sha1[40];
-   anv_pipeline_hash_shader(pipeline, layout, tcs_stage, tcs_sha1);
-   anv_pipeline_hash_shader(pipeline, layout, tes_stage, tes_sha1);
-   memcpy(&tcs_sha1[20], tes_sha1, 20);
-   memcpy(&tes_sha1[20], tcs_sha1, 20);
-
-   tcs_bin = anv_device_search_for_kernel(pipeline->device, cache,
-                                          tcs_sha1, sizeof(tcs_sha1));
-   tes_bin = anv_device_search_for_kernel(pipeline->device, cache,
-                                          tes_sha1, sizeof(tes_sha1));
-
    if (tcs_bin == NULL || tes_bin == NULL) {
       struct brw_tcs_prog_data tcs_prog_data = {};
       struct brw_tes_prog_data tes_prog_data = {};
@@ -725,7 +744,8 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline,
 
       unsigned code_size = tcs_prog_data.base.base.program_size;
       tcs_bin = anv_device_upload_kernel(pipeline->device, cache,
-                                         tcs_sha1, sizeof(tcs_sha1),
+                                         &tcs_stage->cache_key,
+                                         sizeof(tcs_stage->cache_key),
                                          shader_code, code_size,
                                          tcs_nir->constant_data,
                                          tcs_nir->constant_data_size,
@@ -748,7 +768,8 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline,
 
       code_size = tes_prog_data.base.base.program_size;
       tes_bin = anv_device_upload_kernel(pipeline->device, cache,
-                                         tes_sha1, sizeof(tes_sha1),
+                                         &tes_stage->cache_key,
+                                         sizeof(tes_stage->cache_key),
                                          shader_code, code_size,
                                          tes_nir->constant_data,
                                          tes_nir->constant_data_size,
@@ -781,10 +802,6 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
 
    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
 
-   unsigned char sha1[20];
-   anv_pipeline_hash_shader(pipeline, layout, stage, sha1);
-   bin = anv_device_search_for_kernel(pipeline->device, cache, sha1, 20);
-
    if (bin == NULL) {
       struct brw_gs_prog_data prog_data = {};
       struct anv_pipeline_binding surface_to_descriptor[256];
@@ -821,7 +838,9 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
 
       /* TODO: SIMD8 GS */
       const unsigned code_size = prog_data.base.base.program_size;
-      bin = anv_device_upload_kernel(pipeline->device, cache, sha1, 20,
+      bin = anv_device_upload_kernel(pipeline->device, cache,
+                                     &stage->cache_key,
+                                     sizeof(stage->cache_key),
                                      shader_code, code_size,
                                      nir->constant_data,
                                      nir->constant_data_size,
@@ -859,10 +878,6 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
 
    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
 
-   unsigned char sha1[20];
-   anv_pipeline_hash_shader(pipeline, layout, stage, sha1);
-   bin = anv_device_search_for_kernel(pipeline->device, cache, sha1, 20);
-
    if (bin == NULL) {
       struct brw_wm_prog_data prog_data = {};
       struct anv_pipeline_binding surface_to_descriptor[256];
@@ -981,7 +996,9 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
       }
 
       unsigned code_size = prog_data.base.program_size;
-      bin = anv_device_upload_kernel(pipeline->device, cache, sha1, 20,
+      bin = anv_device_upload_kernel(pipeline->device, cache,
+                                     &stage->cache_key,
+                                     sizeof(stage->cache_key),
                                      shader_code, code_size,
                                      nir->constant_data,
                                      nir->constant_data_size,
@@ -1025,7 +1042,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
 
    unsigned char sha1[20];
-   anv_pipeline_hash_shader(pipeline, layout, &stage, sha1);
+   anv_pipeline_hash_compute(pipeline, layout, &stage, sha1);
    bin = anv_device_search_for_kernel(pipeline->device, cache, sha1, 20);
 
    if (bin == NULL) {
@@ -1384,14 +1401,36 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
 
    assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
 
-   if (stages[MESA_SHADER_VERTEX].entrypoint) {
+   ANV_FROM_HANDLE(anv_pipeline_layout, layout, pCreateInfo->layout);
+
+   unsigned char sha1[20];
+   anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
+
+   for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+      if (!stages[s].entrypoint)
+         continue;
+
+      stages[s].cache_key.stage = s;
+      memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
+
+      struct anv_shader_bin *bin =
+         anv_device_search_for_kernel(pipeline->device, cache,
+                                      &stages[s].cache_key,
+                                      sizeof(stages[s].cache_key));
+      if (bin)
+         anv_pipeline_add_compiled_stage(pipeline, s, bin);
+   }
+
+   if (stages[MESA_SHADER_VERTEX].entrypoint &&
+       !pipeline->shaders[MESA_SHADER_VERTEX]) {
       result = anv_pipeline_compile_vs(pipeline, cache, pCreateInfo,
                                        &stages[MESA_SHADER_VERTEX]);
       if (result != VK_SUCCESS)
          goto compile_fail;
    }
 
-   if (stages[MESA_SHADER_TESS_EVAL].entrypoint) {
+   if (stages[MESA_SHADER_TESS_EVAL].entrypoint &&
+       !pipeline->shaders[MESA_SHADER_TESS_EVAL]) {
       result = anv_pipeline_compile_tcs_tes(pipeline, cache, pCreateInfo,
                                             &stages[MESA_SHADER_TESS_CTRL],
                                             &stages[MESA_SHADER_TESS_EVAL]);
@@ -1399,14 +1438,16 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
          goto compile_fail;
    }
 
-   if (stages[MESA_SHADER_GEOMETRY].entrypoint) {
+   if (stages[MESA_SHADER_GEOMETRY].entrypoint &&
+       !pipeline->shaders[MESA_SHADER_GEOMETRY]) {
       result = anv_pipeline_compile_gs(pipeline, cache, pCreateInfo,
                                        &stages[MESA_SHADER_GEOMETRY]);
       if (result != VK_SUCCESS)
          goto compile_fail;
    }
 
-   if (stages[MESA_SHADER_FRAGMENT].entrypoint) {
+   if (stages[MESA_SHADER_FRAGMENT].entrypoint &&
+       !pipeline->shaders[MESA_SHADER_FRAGMENT]) {
       result = anv_pipeline_compile_fs(pipeline, cache, pCreateInfo,
                                        &stages[MESA_SHADER_FRAGMENT]);
       if (result != VK_SUCCESS)
author	Jason Ekstrand <[email protected]>	2017-10-26 19:24:28 -0700
committer	Jason Ekstrand <[email protected]>	2018-08-02 10:29:20 -0700
commit	bdc3565c8c513dfcfef185da5561dc53dc053787 (patch)
tree	042a37fa6463f2d603ef013cd6eca32ebe7f5baf /src/intel
parent	4a8236ae17ff58de238678b5bb78833ed35cba37 (diff)