nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes

So far, input_reads was a bitmap tracking which vertex input locations were being used. In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4) consumes just one location, any other small attribute. So we mark the proper bit in inputs_read, and also the same bit in double_inputs_read if the attribute is a dvec3/dvec4. But in Vulkan, this is slightly different: a dvec3/dvec4 attribute consumes two locations, not just one. And hence two bits would be marked in inputs_read for the same vertex input attribute. To avoid handling two different situations in NIR, we just choose the latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4 vertex input attribute is marked with two bits in the inputs_read bitmap (and also in the double_inputs_read), and following attributes are adjusted accordingly. As example, if in our GLSL/IR shader we have three attributes: layout(location = 0) vec3 attr0; layout(location = 1) dvec4 attr1; layout(location = 2) dvec3 attr2; then in our NIR shader we put attr0 in location 0, attr1 in locations 1 and 2, and attr2 in location 3 and 4. Checking carefully, basically we are using slots rather than locations in NIR. When emitting the vertices, we do a inverse map to know the corresponding location for each slot. v2 (Jason): - use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR. v3 (Jason): - Fix commit log error. - Use ladder ifs and fix braces. - elements_double is divisible by 2, don't need DIV_ROUND_UP(). - Use if ladder instead of a switch. - Add comment about hardware restriction in 64bit vertex attributes. Reviewed-by: Jason Ekstrand <[email protected]>
author: Juan A. Suarez Romero <[email protected]> 2016-12-16 10:24:43 +0100
committer: Juan A. Suarez Romero <[email protected]> 2017-01-09 10:42:22 +0100
commit: c2acf97fcc9b32eaa9778771282758e5652a8ad4 (patch)
tree: a7c8890b9a6a1532b55170d531310382742cb9de /src/compiler
parent: 3551a2d3ad2661477ba3b0a36a13eeb68e28fe85 (diff)
2 files changed, 51 insertions, 25 deletions
diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp
index 69d4c2b20c6..33f71bf416a 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -129,6 +129,19 @@ private:
 
 } /* end of anonymous namespace */
 
+static void
+nir_remap_attributes(nir_shader *shader)
+{
+   nir_foreach_variable(var, &shader->inputs) {
+      var->data.location += _mesa_bitcount_64(shader->info->double_inputs_read &
+                                              BITFIELD64_MASK(var->data.location));
+   }
+
+   /* Once the remap is done, reset double_inputs_read, so later it will have
+    * which location/slots are doubles */
+   shader->info->double_inputs_read = 0;
+}
+
 nir_shader *
 glsl_to_nir(const struct gl_shader_program *shader_prog,
             gl_shader_stage stage,
@@ -146,6 +159,13 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
 
    nir_lower_constant_initializers(shader, (nir_variable_mode)~0);
 
+   /* Remap the locations to slots so those requiring two slots will occupy
+    * two locations. For instance, if we have in the IR code a dvec3 attr0 in
+    * location 0 and vec4 attr1 in location 1, in NIR attr0 will use
+    * locations/slots 0 and 1, and attr1 will use location/slot 2 */
+   if (shader->stage == MESA_SHADER_VERTEX)
+      nir_remap_attributes(shader);
+
    shader->info->name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
    if (shader_prog->Label)
       shader->info->label = ralloc_strdup(shader, shader_prog->Label);
@@ -322,6 +342,14 @@ nir_visitor::visit(ir_variable *ir)
             var->data.compact = ir->type->without_array()->is_scalar();
          }
       }
+
+      /* Mark all the locations that require two slots */
+      if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
+         for (uint i = 0; i < glsl_count_attribute_slots(var->type, true); i++) {
+            uint64_t bitfield = BITFIELD64_BIT(var->data.location + i);
+            shader->info->double_inputs_read |= bitfield;
+         }
+      }
       break;
 
    case ir_var_shader_out:
diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c
index 07c99497146..35a1ce4dec6 100644
--- a/src/compiler/nir/nir_gather_info.c
+++ b/src/compiler/nir/nir_gather_info.c
@@ -53,11 +53,6 @@ set_io_mask(nir_shader *shader, nir_variable *var, int offset, int len)
          else
             shader->info->inputs_read |= bitfield;
 
-         /* double inputs read is only for vertex inputs */
-         if (shader->stage == MESA_SHADER_VERTEX &&
-             glsl_type_is_dual_slot(glsl_without_array(var->type)))
-            shader->info->double_inputs_read |= bitfield;
-
          if (shader->stage == MESA_SHADER_FRAGMENT) {
             shader->info->fs.uses_sample_qualifier |= var->data.sample;
          }
@@ -83,26 +78,21 @@ static void
 mark_whole_variable(nir_shader *shader, nir_variable *var)
 {
    const struct glsl_type *type = var->type;
-   bool is_vertex_input = false;
 
    if (nir_is_per_vertex_io(var, shader->stage)) {
       assert(glsl_type_is_array(type));
       type = glsl_get_array_element(type);
    }
 
-   if (shader->stage == MESA_SHADER_VERTEX &&
-       var->data.mode == nir_var_shader_in)
-      is_vertex_input = true;
-
    const unsigned slots =
       var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
-                        : glsl_count_attribute_slots(type, is_vertex_input);
+                        : glsl_count_attribute_slots(type, false);
 
    set_io_mask(shader, var, 0, slots);
 }
 
 static unsigned
-get_io_offset(nir_deref_var *deref, bool is_vertex_input)
+get_io_offset(nir_deref_var *deref)
 {
    unsigned offset = 0;
 
@@ -117,7 +107,7 @@ get_io_offset(nir_deref_var *deref, bool is_vertex_input)
             return -1;
          }
 
-         offset += glsl_count_attribute_slots(tail->type, is_vertex_input) *
+         offset += glsl_count_attribute_slots(tail->type, false) *
             deref_array->base_offset;
       }
       /* TODO: we can get the offset for structs here see nir_lower_io() */
@@ -163,12 +153,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var *deref)
       return false;
    }
 
-   bool is_vertex_input = false;
-   if (shader->stage == MESA_SHADER_VERTEX &&
-       var->data.mode == nir_var_shader_in)
-      is_vertex_input = true;
-
-   unsigned offset = get_io_offset(deref, is_vertex_input);
+   unsigned offset = get_io_offset(deref);
    if (offset == -1)
       return false;
 
@@ -184,8 +169,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var *deref)
    }
 
    /* double element width for double types that takes two slots */
-   if (!is_vertex_input &&
-       glsl_type_is_dual_slot(glsl_without_array(type))) {
+   if (glsl_type_is_dual_slot(glsl_without_array(type))) {
       elem_width *= 2;
    }
 
@@ -220,13 +204,27 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader)
    case nir_intrinsic_interp_var_at_sample:
    case nir_intrinsic_interp_var_at_offset:
    case nir_intrinsic_load_var:
-   case nir_intrinsic_store_var:
-      if (instr->variables[0]->var->data.mode == nir_var_shader_in ||
-          instr->variables[0]->var->data.mode == nir_var_shader_out) {
+   case nir_intrinsic_store_var: {
+      nir_variable *var = instr->variables[0]->var;
+
+      if (var->data.mode == nir_var_shader_in ||
+          var->data.mode == nir_var_shader_out) {
          if (!try_mask_partial_io(shader, instr->variables[0]))
-            mark_whole_variable(shader, instr->variables[0]->var);
+            mark_whole_variable(shader, var);
+
+         /* We need to track which input_reads bits correspond to a
+          * dvec3/dvec4 input attribute */
+         if (shader->stage == MESA_SHADER_VERTEX &&
+             var->data.mode == nir_var_shader_in &&
+             glsl_type_is_dual_slot(glsl_without_array(var->type))) {
+            for (uint i = 0; i < glsl_count_attribute_slots(var->type, false); i++) {
+               int idx = var->data.location + i;
+               shader->info->double_inputs_read |= BITFIELD64_BIT(idx);
+            }
+         }
       }
       break;
+   }
 
    case nir_intrinsic_load_draw_id:
    case nir_intrinsic_load_front_face:
author	Juan A. Suarez Romero <[email protected]>	2016-12-16 10:24:43 +0100
committer	Juan A. Suarez Romero <[email protected]>	2017-01-09 10:42:22 +0100
commit	c2acf97fcc9b32eaa9778771282758e5652a8ad4 (patch)
tree	a7c8890b9a6a1532b55170d531310382742cb9de /src/compiler
parent	3551a2d3ad2661477ba3b0a36a13eeb68e28fe85 (diff)