anv: Completely rework shader compilation

Now that we have a decent interface in upstream mesa, we can get rid of all our hacks. As of this commit, we no longer use any fake GL state objects and all of shader compilation is moved into anv_pipeline.c. This should make way for actually implementing a shader cache one of these days. As a nice side-benifit, this commit also gains us an extra 300 passing CTS tests because we're actually filling out the texture swizzle information for vertex shaders.
author: Jason Ekstrand <[email protected]> 2015-10-19 22:06:59 -0700
committer: Jason Ekstrand <[email protected]> 2015-10-20 13:02:03 -0700
commit: a71e614d33e8d869bbaced8948349a7180783ab7 (patch)
tree: 0194c7cf955a3c4f15c67cab53b388389a828574 /src/vulkan/anv_pipeline.c
parent: 2d9e899e3576120f1a671c6cc38835b41269e607 (diff)
1 files changed, 619 insertions, 27 deletions
diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c
index fda382eee19..a923017310a 100644
--- a/src/vulkan/anv_pipeline.c
+++ b/src/vulkan/anv_pipeline.c
@@ -28,6 +28,12 @@
 #include <fcntl.h>
 
 #include "anv_private.h"
+#include "brw_nir.h"
+#include "anv_nir.h"
+#include "glsl/nir/nir_spirv.h"
+
+/* Needed for SWIZZLE macros */
+#include "program/prog_instruction.h"
 
 // Shader functions
 
@@ -81,16 +87,12 @@ VkResult anv_CreateShader(
    const char *name = pCreateInfo->pName ? pCreateInfo->pName : "main";
    size_t name_len = strlen(name);
 
-   if (strcmp(name, "main") != 0) {
-      anv_finishme("Multiple shaders per module not really supported");
-   }
-
    shader = anv_device_alloc(device, sizeof(*shader) + name_len + 1, 8,
                              VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
    if (shader == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   shader->module = module;
+   shader->module = module,
    memcpy(shader->entrypoint, name, name_len + 1);
 
    *pShader = anv_shader_to_handle(shader);
@@ -108,6 +110,86 @@ void anv_DestroyShader(
    anv_device_free(device, shader);
 }
 
+#define SPIR_V_MAGIC_NUMBER 0x07230203
+
+static const gl_shader_stage vk_shader_stage_to_mesa_stage[] = {
+   [VK_SHADER_STAGE_VERTEX] = MESA_SHADER_VERTEX,
+   [VK_SHADER_STAGE_TESS_CONTROL] = -1,
+   [VK_SHADER_STAGE_TESS_EVALUATION] = -1,
+   [VK_SHADER_STAGE_GEOMETRY] = MESA_SHADER_GEOMETRY,
+   [VK_SHADER_STAGE_FRAGMENT] = MESA_SHADER_FRAGMENT,
+   [VK_SHADER_STAGE_COMPUTE] = MESA_SHADER_COMPUTE,
+};
+
+static bool
+is_scalar_shader_stage(const struct brw_compiler *compiler, VkShaderStage stage)
+{
+   switch (stage) {
+   case VK_SHADER_STAGE_VERTEX:
+      return compiler->scalar_vs;
+   case VK_SHADER_STAGE_GEOMETRY:
+      return false;
+   case VK_SHADER_STAGE_FRAGMENT:
+   case VK_SHADER_STAGE_COMPUTE:
+      return true;
+   default:
+      unreachable("Unsupported shader stage");
+   }
+}
+
+/* Eventually, this will become part of anv_CreateShader.  Unfortunately,
+ * we can't do that yet because we don't have the ability to copy nir.
+ */
+static nir_shader *
+anv_shader_compile_to_nir(struct anv_device *device,
+                          struct anv_shader *shader, VkShaderStage vk_stage)
+{
+   if (strcmp(shader->entrypoint, "main") != 0) {
+      anv_finishme("Multiple shaders per module not really supported");
+   }
+
+   gl_shader_stage stage = vk_shader_stage_to_mesa_stage[vk_stage];
+   const struct brw_compiler *compiler =
+      device->instance->physicalDevice.compiler;
+   const nir_shader_compiler_options *nir_options =
+      compiler->glsl_compiler_options[stage].NirOptions;
+
+   nir_shader *nir;
+   if (shader->module->nir) {
+      /* Some things such as our meta clear/blit code will give us a NIR
+       * shader directly.  In that case, we just ignore the SPIR-V entirely
+       * and just use the NIR shader */
+      nir = shader->module->nir;
+      nir->options = nir_options;
+   } else {
+      uint32_t *spirv = (uint32_t *) shader->module->data;
+      assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
+      assert(shader->module->size % 4 == 0);
+
+      nir = spirv_to_nir(spirv, shader->module->size / 4, stage, nir_options);
+   }
+   nir_validate_shader(nir);
+
+   /* Make sure the provided shader has exactly one entrypoint and that the
+    * name matches the name that came in from the VkShader.
+    */
+   nir_function_impl *entrypoint = NULL;
+   nir_foreach_overload(nir, overload) {
+      if (strcmp(shader->entrypoint, overload->function->name) == 0 &&
+          overload->impl) {
+         assert(entrypoint == NULL);
+         entrypoint = overload->impl;
+      }
+   }
+   assert(entrypoint != NULL);
+
+   brw_preprocess_nir(nir, &device->info,
+                      is_scalar_shader_stage(compiler, vk_stage));
+
+   nir_shader_gather_info(nir, entrypoint);
+
+   return nir;
+}
 
 VkResult anv_CreatePipelineCache(
     VkDevice                                    device,
@@ -156,7 +238,6 @@ void anv_DestroyPipeline(
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
 
-   anv_compiler_free(pipeline);
    anv_reloc_list_finish(&pipeline->batch_relocs, pipeline->device);
    anv_state_stream_finish(&pipeline->program_stream);
    anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
@@ -178,6 +259,506 @@ static const uint32_t vk_to_gen_primitive_type[] = {
 };
 
 static void
+populate_sampler_prog_key(const struct brw_device_info *devinfo,
+                          struct brw_sampler_prog_key_data *key)
+{
+   /* XXX: Handle texture swizzle on HSW- */
+   for (int i = 0; i < MAX_SAMPLERS; i++) {
+      /* Assume color sampler, no swizzling. (Works for BDW+) */
+      key->swizzles[i] = SWIZZLE_XYZW;
+   }
+}
+
+static void
+populate_vs_prog_key(const struct brw_device_info *devinfo,
+                     struct brw_vs_prog_key *key)
+{
+   memset(key, 0, sizeof(*key));
+
+   populate_sampler_prog_key(devinfo, &key->tex);
+
+   /* XXX: Handle vertex input work-arounds */
+
+   /* XXX: Handle sampler_prog_key */
+}
+
+static void
+populate_wm_prog_key(const struct brw_device_info *devinfo,
+                     const VkGraphicsPipelineCreateInfo *info,
+                     struct brw_wm_prog_key *key)
+{
+   ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass);
+
+   memset(key, 0, sizeof(*key));
+
+   populate_sampler_prog_key(devinfo, &key->tex);
+
+   /* Vulkan doesn't specify a default */
+   key->high_quality_derivatives = false;
+
+   /* XXX Vulkan doesn't appear to specify */
+   key->clamp_fragment_color = false;
+
+   /* XXX: These are needed for flipping the coordinates.  Do we need to do
+    * this in Vulkan?
+    */
+   key->drawable_height = 0;
+   key->render_to_fbo = true; /* XXX really? */
+
+   key->nr_color_regions = render_pass->subpasses[info->subpass].color_count;
+
+   key->replicate_alpha = key->nr_color_regions > 1 &&
+                          info->pColorBlendState->alphaToCoverageEnable;
+
+   if (info->pMultisampleState && info->pMultisampleState->rasterSamples > 1) {
+      /* We should probably pull this out of the shader, but it's fairly
+       * harmless to compute it and then let dead-code take care of it.
+       */
+      key->compute_sample_id = true;
+      key->persample_shading = info->pMultisampleState->sampleShadingEnable;
+      if (key->persample_shading)
+         key->persample_2x = info->pMultisampleState->rasterSamples == 2;
+
+      key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable;
+      key->compute_sample_id = info->pMultisampleState->sampleShadingEnable;
+   }
+}
+
+static void
+populate_cs_prog_key(const struct brw_device_info *devinfo,
+                     struct brw_cs_prog_key *key)
+{
+   memset(key, 0, sizeof(*key));
+
+   populate_sampler_prog_key(devinfo, &key->tex);
+}
+
+static nir_shader *
+anv_pipeline_compile(struct anv_pipeline *pipeline,
+                     struct anv_shader *shader,
+                     VkShaderStage stage,
+                     struct brw_stage_prog_data *prog_data)
+{
+   const struct brw_compiler *compiler =
+      pipeline->device->instance->physicalDevice.compiler;
+
+   nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, shader, stage);
+   if (nir == NULL)
+      return NULL;
+
+   bool have_push_constants = false;
+   nir_foreach_variable(var, &nir->uniforms) {
+      if (!glsl_type_is_sampler(var->type)) {
+         have_push_constants = true;
+         break;
+      }
+   }
+
+   /* Figure out the number of parameters */
+   prog_data->nr_params = 0;
+
+   if (have_push_constants) {
+      /* If the shader uses any push constants at all, we'll just give
+       * them the maximum possible number
+       */
+      prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
+   }
+
+   if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets)
+      prog_data->nr_params += MAX_DYNAMIC_BUFFERS;
+
+   if (prog_data->nr_params > 0) {
+      prog_data->param = (const gl_constant_value **)
+         anv_device_alloc(pipeline->device,
+                          prog_data->nr_params * sizeof(gl_constant_value *),
+                          8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER);
+
+      /* We now set the param values to be offsets into a
+       * anv_push_constant_data structure.  Since the compiler doesn't
+       * actually dereference any of the gl_constant_value pointers in the
+       * params array, it doesn't really matter what we put here.
+       */
+      struct anv_push_constants *null_data = NULL;
+      if (have_push_constants) {
+         /* Fill out the push constants section of the param array */
+         for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++)
+            prog_data->param[i] = (const gl_constant_value *)
+               &null_data->client_data[i * sizeof(float)];
+      }
+   }
+
+   /* Set up dynamic offsets */
+   anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data);
+
+   /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
+   anv_nir_apply_pipeline_layout(nir, pipeline->layout);
+
+   /* All binding table offsets provided by apply_pipeline_layout() are
+    * relative to the start of the bindint table (plus MAX_RTS for VS).
+    */
+   unsigned bias = stage == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0;
+   prog_data->binding_table.size_bytes = 0;
+   prog_data->binding_table.texture_start = bias;
+   prog_data->binding_table.ubo_start = bias;
+   prog_data->binding_table.image_start = bias;
+
+   /* Finish the optimization and compilation process */
+   brw_postprocess_nir(nir, &pipeline->device->info,
+                       is_scalar_shader_stage(compiler, stage));
+
+   /* nir_lower_io will only handle the push constants; we need to set this
+    * to the full number of possible uniforms.
+    */
+   nir->num_uniforms = prog_data->nr_params;
+
+   return nir;
+}
+
+static uint32_t
+anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
+                           const void *data, size_t size)
+{
+   struct anv_state state =
+      anv_state_stream_alloc(&pipeline->program_stream, size, 64);
+
+   assert(size < pipeline->program_stream.block_pool->block_size);
+
+   memcpy(state.map, data, size);
+
+   return state.offset;
+}
+static void
+anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
+                                VkShaderStage stage,
+                                struct brw_stage_prog_data *prog_data)
+{
+   struct brw_device_info *devinfo = &pipeline->device->info;
+   uint32_t max_threads[] = {
+      [VK_SHADER_STAGE_VERTEX]                  = devinfo->max_vs_threads,
+      [VK_SHADER_STAGE_TESS_CONTROL]            = 0,
+      [VK_SHADER_STAGE_TESS_EVALUATION]         = 0,
+      [VK_SHADER_STAGE_GEOMETRY]                = devinfo->max_gs_threads,
+      [VK_SHADER_STAGE_FRAGMENT]                = devinfo->max_wm_threads,
+      [VK_SHADER_STAGE_COMPUTE]                 = devinfo->max_cs_threads,
+   };
+
+   pipeline->prog_data[stage] = prog_data;
+   pipeline->active_stages |= 1 << stage;
+   pipeline->scratch_start[stage] = pipeline->total_scratch;
+   pipeline->total_scratch =
+      align_u32(pipeline->total_scratch, 1024) +
+      prog_data->total_scratch * max_threads[stage];
+}
+
+static VkResult
+anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
+                        const VkGraphicsPipelineCreateInfo *info,
+                        struct anv_shader *shader)
+{
+   const struct brw_compiler *compiler =
+      pipeline->device->instance->physicalDevice.compiler;
+   struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
+   struct brw_vs_prog_key key;
+
+   populate_vs_prog_key(&pipeline->device->info, &key);
+
+   /* TODO: Look up shader in cache */
+
+   memset(prog_data, 0, sizeof(*prog_data));
+
+   nir_shader *nir = anv_pipeline_compile(pipeline, shader,
+                                          VK_SHADER_STAGE_VERTEX,
+                                          &prog_data->base.base);
+   if (nir == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   void *mem_ctx = ralloc_context(NULL);
+
+   if (shader->module->nir == NULL)
+      ralloc_steal(mem_ctx, nir);
+
+   prog_data->inputs_read = nir->info.inputs_read;
+   pipeline->writes_point_size = nir->info.outputs_written & VARYING_SLOT_PSIZ;
+
+   brw_compute_vue_map(&pipeline->device->info,
+                       &prog_data->base.vue_map,
+                       nir->info.outputs_written,
+                       false /* XXX: Do SSO? */);
+
+   unsigned code_size;
+   const unsigned *shader_code =
+      brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+                     NULL, false, -1, &code_size, NULL);
+   if (shader_code == NULL) {
+      ralloc_free(mem_ctx);
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   const uint32_t offset =
+      anv_pipeline_upload_kernel(pipeline, shader_code, code_size);
+   if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
+      pipeline->vs_simd8 = offset;
+      pipeline->vs_vec4 = NO_KERNEL;
+   } else {
+      pipeline->vs_simd8 = NO_KERNEL;
+      pipeline->vs_vec4 = offset;
+   }
+
+   ralloc_free(mem_ctx);
+
+   anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX,
+                                   &prog_data->base.base);
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
+                        const VkGraphicsPipelineCreateInfo *info,
+                        struct anv_shader *shader)
+{
+   const struct brw_compiler *compiler =
+      pipeline->device->instance->physicalDevice.compiler;
+   struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data;
+   struct brw_wm_prog_key key;
+
+   populate_wm_prog_key(&pipeline->device->info, info, &key);
+
+   if (pipeline->use_repclear)
+      key.nr_color_regions = 1;
+
+   /* TODO: Look up shader in cache */
+
+   memset(prog_data, 0, sizeof(*prog_data));
+
+   prog_data->binding_table.render_target_start = 0;
+
+   nir_shader *nir = anv_pipeline_compile(pipeline, shader,
+                                          VK_SHADER_STAGE_FRAGMENT,
+                                          &prog_data->base);
+   if (nir == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   void *mem_ctx = ralloc_context(NULL);
+
+   if (shader->module->nir == NULL)
+      ralloc_steal(mem_ctx, nir);
+
+   unsigned code_size;
+   const unsigned *shader_code =
+      brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+                     NULL, -1, -1, pipeline->use_repclear, &code_size, NULL);
+   if (shader_code == NULL) {
+      ralloc_free(mem_ctx);
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   uint32_t offset = anv_pipeline_upload_kernel(pipeline,
+                                                shader_code, code_size);
+   if (prog_data->no_8)
+      pipeline->ps_simd8 = NO_KERNEL;
+   else
+      pipeline->ps_simd8 = offset;
+
+   if (prog_data->no_8 || prog_data->prog_offset_16) {
+      pipeline->ps_simd16 = offset + prog_data->prog_offset_16;
+   } else {
+      pipeline->ps_simd16 = NO_KERNEL;
+   }
+
+   pipeline->ps_ksp2 = 0;
+   pipeline->ps_grf_start2 = 0;
+   if (pipeline->ps_simd8 != NO_KERNEL) {
+      pipeline->ps_ksp0 = pipeline->ps_simd8;
+      pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg;
+      if (pipeline->ps_simd16 != NO_KERNEL) {
+         pipeline->ps_ksp2 = pipeline->ps_simd16;
+         pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16;
+      }
+   } else if (pipeline->ps_simd16 != NO_KERNEL) {
+      pipeline->ps_ksp0 = pipeline->ps_simd16;
+      pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16;
+   }
+
+   ralloc_free(mem_ctx);
+
+   anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT,
+                                   &prog_data->base);
+
+   return VK_SUCCESS;
+}
+
+VkResult
+anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
+                        const VkComputePipelineCreateInfo *info,
+                        struct anv_shader *shader)
+{
+   const struct brw_compiler *compiler =
+      pipeline->device->instance->physicalDevice.compiler;
+   struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+   struct brw_cs_prog_key key;
+
+   populate_cs_prog_key(&pipeline->device->info, &key);
+
+   /* TODO: Look up shader in cache */
+
+   memset(prog_data, 0, sizeof(*prog_data));
+
+   nir_shader *nir = anv_pipeline_compile(pipeline, shader,
+                                          VK_SHADER_STAGE_COMPUTE,
+                                          &prog_data->base);
+   if (nir == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   void *mem_ctx = ralloc_context(NULL);
+
+   if (shader->module->nir == NULL)
+      ralloc_steal(mem_ctx, nir);
+
+   unsigned code_size;
+   const unsigned *shader_code =
+      brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir,
+                     -1, &code_size, NULL);
+   if (shader_code == NULL) {
+      ralloc_free(mem_ctx);
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   pipeline->cs_simd = anv_pipeline_upload_kernel(pipeline,
+                                                  shader_code, code_size);
+   ralloc_free(mem_ctx);
+
+   anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE,
+                                   &prog_data->base);
+
+   return VK_SUCCESS;
+}
+
+static const int gen8_push_size = 32 * 1024;
+
+static void
+gen7_compute_urb_partition(struct anv_pipeline *pipeline)
+{
+   const struct brw_device_info *devinfo = &pipeline->device->info;
+   bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT;
+   unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1;
+   unsigned vs_entry_size_bytes = vs_size * 64;
+   bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT;
+   unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1;
+   unsigned gs_entry_size_bytes = gs_size * 64;
+
+   /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
+    *
+    *     VS Number of URB Entries must be divisible by 8 if the VS URB Entry
+    *     Allocation Size is less than 9 512-bit URB entries.
+    *
+    * Similar text exists for GS.
+    */
+   unsigned vs_granularity = (vs_size < 9) ? 8 : 1;
+   unsigned gs_granularity = (gs_size < 9) ? 8 : 1;
+
+   /* URB allocations must be done in 8k chunks. */
+   unsigned chunk_size_bytes = 8192;
+
+   /* Determine the size of the URB in chunks. */
+   unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes;
+
+   /* Reserve space for push constants */
+   unsigned push_constant_bytes = gen8_push_size;
+   unsigned push_constant_chunks =
+      push_constant_bytes / chunk_size_bytes;
+
+   /* Initially, assign each stage the minimum amount of URB space it needs,
+    * and make a note of how much additional space it "wants" (the amount of
+    * additional space it could actually make use of).
+    */
+
+   /* VS has a lower limit on the number of URB entries */
+   unsigned vs_chunks =
+      ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes,
+            chunk_size_bytes) / chunk_size_bytes;
+   unsigned vs_wants =
+      ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes,
+            chunk_size_bytes) / chunk_size_bytes - vs_chunks;
+
+   unsigned gs_chunks = 0;
+   unsigned gs_wants = 0;
+   if (gs_present) {
+      /* There are two constraints on the minimum amount of URB space we can
+       * allocate:
+       *
+       * (1) We need room for at least 2 URB entries, since we always operate
+       * the GS in DUAL_OBJECT mode.
+       *
+       * (2) We can't allocate less than nr_gs_entries_granularity.
+       */
+      gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
+                        chunk_size_bytes) / chunk_size_bytes;
+      gs_wants =
+         ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes,
+               chunk_size_bytes) / chunk_size_bytes - gs_chunks;
+   }
+
+   /* There should always be enough URB space to satisfy the minimum
+    * requirements of each stage.
+    */
+   unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks;
+   assert(total_needs <= urb_chunks);
+
+   /* Mete out remaining space (if any) in proportion to "wants". */
+   unsigned total_wants = vs_wants + gs_wants;
+   unsigned remaining_space = urb_chunks - total_needs;
+   if (remaining_space > total_wants)
+      remaining_space = total_wants;
+   if (remaining_space > 0) {
+      unsigned vs_additional = (unsigned)
+         round(vs_wants * (((double) remaining_space) / total_wants));
+      vs_chunks += vs_additional;
+      remaining_space -= vs_additional;
+      gs_chunks += remaining_space;
+   }
+
+   /* Sanity check that we haven't over-allocated. */
+   assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks);
+
+   /* Finally, compute the number of entries that can fit in the space
+    * allocated to each stage.
+    */
+   unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes;
+   unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes;
+
+   /* Since we rounded up when computing *_wants, this may be slightly more
+    * than the maximum allowed amount, so correct for that.
+    */
+   nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries);
+   nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries);
+
+   /* Ensure that we program a multiple of the granularity. */
+   nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);
+   nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity);
+
+   /* Finally, sanity check to make sure we have at least the minimum number
+    * of entries needed for each stage.
+    */
+   assert(nr_vs_entries >= devinfo->urb.min_vs_entries);
+   if (gs_present)
+      assert(nr_gs_entries >= 2);
+
+   /* Lay out the URB in the following order:
+    * - push constants
+    * - VS
+    * - GS
+    */
+   pipeline->urb.vs_start = push_constant_chunks;
+   pipeline->urb.vs_size = vs_size;
+   pipeline->urb.nr_vs_entries = nr_vs_entries;
+
+   pipeline->urb.gs_start = push_constant_chunks + vs_chunks;
+   pipeline->urb.gs_size = gs_size;
+   pipeline->urb.nr_gs_entries = nr_gs_entries;
+}
+
+static void
 anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline,
                                 const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
@@ -335,7 +916,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device,
 
    pipeline->device = device;
    pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);
-   memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
 
    result = anv_reloc_list_init(&pipeline->batch_relocs, device);
    if (result != VK_SUCCESS) {
@@ -349,11 +929,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device,
    anv_state_stream_init(&pipeline->program_stream,
                          &device->instruction_block_pool);
 
-   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
-      pipeline->shaders[pCreateInfo->pStages[i].stage] =
-         anv_shader_from_handle(pCreateInfo->pStages[i].shader);
-   }
-
    anv_pipeline_init_dynamic_state(pipeline, pCreateInfo);
 
    if (pCreateInfo->pTessellationState)
@@ -363,27 +938,44 @@ anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device,
       anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO");
 
    pipeline->use_repclear = extra && extra->use_repclear;
+   pipeline->writes_point_size = false;
 
-   anv_compiler_run(device->compiler, pipeline);
+   /* When we free the pipeline, we detect stages based on the NULL status
+    * of various prog_data pointers.  Make them NULL by default.
+    */
+   memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
+   memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
 
-   const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data;
+   pipeline->vs_simd8 = NO_KERNEL;
+   pipeline->vs_vec4 = NO_KERNEL;
+   pipeline->gs_vec4 = NO_KERNEL;
 
-   pipeline->ps_ksp2 = 0;
-   pipeline->ps_grf_start2 = 0;
-   if (pipeline->ps_simd8 != NO_KERNEL) {
-      pipeline->ps_ksp0 = pipeline->ps_simd8;
-      pipeline->ps_grf_start0 = wm_prog_data->base.dispatch_grf_start_reg;
-      if (pipeline->ps_simd16 != NO_KERNEL) {
-         pipeline->ps_ksp2 = pipeline->ps_simd16;
-         pipeline->ps_grf_start2 = wm_prog_data->dispatch_grf_start_reg_16;
+   pipeline->active_stages = 0;
+   pipeline->total_scratch = 0;
+
+   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
+      ANV_FROM_HANDLE(anv_shader, shader, pCreateInfo->pStages[i].shader);
+
+      switch (pCreateInfo->pStages[i].stage) {
+      case VK_SHADER_STAGE_VERTEX:
+         anv_pipeline_compile_vs(pipeline, pCreateInfo, shader);
+         break;
+      case VK_SHADER_STAGE_FRAGMENT:
+         anv_pipeline_compile_fs(pipeline, pCreateInfo, shader);
+         break;
+      default:
+         anv_finishme("Unsupported shader stage");
       }
-   } else if (pipeline->ps_simd16 != NO_KERNEL) {
-      pipeline->ps_ksp0 = pipeline->ps_simd16;
-      pipeline->ps_grf_start0 = wm_prog_data->dispatch_grf_start_reg_16;
-   } else {
-      unreachable("no ps shader");
    }
 
+   if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
+      /* Vertex is only optional if disable_vs is set */
+      assert(extra->disable_vs);
+      memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
+   }
+
+   gen7_compute_urb_partition(pipeline);
+
    const VkPipelineVertexInputStateCreateInfo *vi_info =
       pCreateInfo->pVertexInputState;
    pipeline->vb_used = 0;
author	Jason Ekstrand <[email protected]>	2015-10-19 22:06:59 -0700
committer	Jason Ekstrand <[email protected]>	2015-10-20 13:02:03 -0700
commit	a71e614d33e8d869bbaced8948349a7180783ab7 (patch)
tree	0194c7cf955a3c4f15c67cab53b388389a828574 /src/vulkan/anv_pipeline.c
parent	2d9e899e3576120f1a671c6cc38835b41269e607 (diff)