nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes

So far, input_reads was a bitmap tracking which vertex input locations were being used. In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4) consumes just one location, any other small attribute. So we mark the proper bit in inputs_read, and also the same bit in double_inputs_read if the attribute is a dvec3/dvec4. But in Vulkan, this is slightly different: a dvec3/dvec4 attribute consumes two locations, not just one. And hence two bits would be marked in inputs_read for the same vertex input attribute. To avoid handling two different situations in NIR, we just choose the latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4 vertex input attribute is marked with two bits in the inputs_read bitmap (and also in the double_inputs_read), and following attributes are adjusted accordingly. As example, if in our GLSL/IR shader we have three attributes: layout(location = 0) vec3 attr0; layout(location = 1) dvec4 attr1; layout(location = 2) dvec3 attr2; then in our NIR shader we put attr0 in location 0, attr1 in locations 1 and 2, and attr2 in location 3 and 4. Checking carefully, basically we are using slots rather than locations in NIR. When emitting the vertices, we do a inverse map to know the corresponding location for each slot. v2 (Jason): - use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR. v3 (Jason): - Fix commit log error. - Use ladder ifs and fix braces. - elements_double is divisible by 2, don't need DIV_ROUND_UP(). - Use if ladder instead of a switch. - Add comment about hardware restriction in 64bit vertex attributes. Reviewed-by: Jason Ekstrand <[email protected]>
author: Juan A. Suarez Romero <[email protected]> 2016-12-16 10:24:43 +0100
committer: Juan A. Suarez Romero <[email protected]> 2017-01-09 10:42:22 +0100
commit: c2acf97fcc9b32eaa9778771282758e5652a8ad4 (patch)
tree: a7c8890b9a6a1532b55170d531310382742cb9de /src/intel/vulkan/genX_pipeline.c
parent: 3551a2d3ad2661477ba3b0a36a13eeb68e28fe85 (diff)
1 files changed, 40 insertions, 6 deletions
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 9ff84cd2921..c3feb115bb2 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -42,9 +42,35 @@ vertex_element_comp_control(enum isl_format format, unsigned comp)
    default: unreachable("Invalid component");
    }
 
+   /*
+    * Take in account hardware restrictions when dealing with 64-bit floats.
+    *
+    * From Broadwell spec, command reference structures, page 586:
+    *  "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
+    *   64-bit components are stored * in the URB without any conversion. In
+    *   this case, vertex elements must be written as 128 or 256 bits, with
+    *   VFCOMP_STORE_0 being used to pad the output as required. E.g., if
+    *   R64_PASSTHRU is used to copy a 64-bit Red component into the URB,
+    *   Component 1 must be specified as VFCOMP_STORE_0 (with Components 2,3
+    *   set to VFCOMP_NOSTORE) in order to output a 128-bit vertex element, or
+    *   Components 1-3 must be specified as VFCOMP_STORE_0 in order to output
+    *   a 256-bit vertex element. Likewise, use of R64G64B64_PASSTHRU requires
+    *   Component 3 to be specified as VFCOMP_STORE_0 in order to output a
+    *   256-bit vertex element."
+    */
    if (bits) {
       return VFCOMP_STORE_SRC;
-   } else if (comp < 3) {
+   } else if (comp >= 2 &&
+              !isl_format_layouts[format].channels.b.bits &&
+              isl_format_layouts[format].channels.r.type == ISL_RAW) {
+      /* When emitting 64-bit attributes, we need to write either 128 or 256
+       * bit chunks, using VFCOMP_NOSTORE when not writing the chunk, and
+       * VFCOMP_STORE_0 to pad the written chunk */
+      return VFCOMP_NOSTORE;
+   } else if (comp < 3 ||
+              isl_format_layouts[format].channels.r.type == ISL_RAW) {
+      /* Note we need to pad with value 0, not 1, due hardware restrictions
+       * (see comment above) */
       return VFCOMP_STORE_0;
    } else if (isl_format_layouts[format].channels.r.type == ISL_UINT ||
             isl_format_layouts[format].channels.r.type == ISL_SINT) {
@@ -64,8 +90,10 @@ emit_vertex_input(struct anv_pipeline *pipeline,
 
    /* Pull inputs_read out of the VS prog data */
    const uint64_t inputs_read = vs_prog_data->inputs_read;
+   const uint64_t double_inputs_read = vs_prog_data->double_inputs_read;
    assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
    const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
+   const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0;
 
 #if GEN_GEN >= 8
    /* On BDW+, we only need to allocate space for base ids.  Setting up
@@ -83,13 +111,16 @@ emit_vertex_input(struct anv_pipeline *pipeline,
                                 vs_prog_data->uses_baseinstance;
 #endif
 
-   uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem;
-   if (elem_count == 0)
+   uint32_t elem_count = __builtin_popcount(elements) -
+      __builtin_popcount(elements_double) / 2;
+
+   uint32_t total_elems = elem_count + needs_svgs_elem;
+   if (total_elems == 0)
       return;
 
    uint32_t *p;
 
-   const uint32_t num_dwords = 1 + elem_count * 2;
+   const uint32_t num_dwords = 1 + total_elems * 2;
    p = anv_batch_emitn(&pipeline->batch, num_dwords,
                        GENX(3DSTATE_VERTEX_ELEMENTS));
    memset(p + 1, 0, (num_dwords - 1) * 4);
@@ -107,7 +138,10 @@ emit_vertex_input(struct anv_pipeline *pipeline,
       if ((elements & (1 << desc->location)) == 0)
          continue; /* Binding unused */
 
-      uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1));
+      uint32_t slot =
+         __builtin_popcount(elements & ((1 << desc->location) - 1)) -
+         DIV_ROUND_UP(__builtin_popcount(elements_double &
+                                        ((1 << desc->location) -1)), 2);
 
       struct GENX(VERTEX_ELEMENT_STATE) element = {
          .VertexBufferIndex = desc->binding,
@@ -137,7 +171,7 @@ emit_vertex_input(struct anv_pipeline *pipeline,
 #endif
    }
 
-   const uint32_t id_slot = __builtin_popcount(elements);
+   const uint32_t id_slot = elem_count;
    if (needs_svgs_elem) {
       /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
        *    "Within a VERTEX_ELEMENT_STATE structure, if a Component
author	Juan A. Suarez Romero <[email protected]>	2016-12-16 10:24:43 +0100
committer	Juan A. Suarez Romero <[email protected]>	2017-01-09 10:42:22 +0100
commit	c2acf97fcc9b32eaa9778771282758e5652a8ad4 (patch)
tree	a7c8890b9a6a1532b55170d531310382742cb9de /src/intel/vulkan/genX_pipeline.c
parent	3551a2d3ad2661477ba3b0a36a13eeb68e28fe85 (diff)