nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes

So far, input_reads was a bitmap tracking which vertex input locations were being used. In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4) consumes just one location, any other small attribute. So we mark the proper bit in inputs_read, and also the same bit in double_inputs_read if the attribute is a dvec3/dvec4. But in Vulkan, this is slightly different: a dvec3/dvec4 attribute consumes two locations, not just one. And hence two bits would be marked in inputs_read for the same vertex input attribute. To avoid handling two different situations in NIR, we just choose the latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4 vertex input attribute is marked with two bits in the inputs_read bitmap (and also in the double_inputs_read), and following attributes are adjusted accordingly. As example, if in our GLSL/IR shader we have three attributes: layout(location = 0) vec3 attr0; layout(location = 1) dvec4 attr1; layout(location = 2) dvec3 attr2; then in our NIR shader we put attr0 in location 0, attr1 in locations 1 and 2, and attr2 in location 3 and 4. Checking carefully, basically we are using slots rather than locations in NIR. When emitting the vertices, we do a inverse map to know the corresponding location for each slot. v2 (Jason): - use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR. v3 (Jason): - Fix commit log error. - Use ladder ifs and fix braces. - elements_double is divisible by 2, don't need DIV_ROUND_UP(). - Use if ladder instead of a switch. - Add comment about hardware restriction in 64bit vertex attributes. Reviewed-by: Jason Ekstrand <[email protected]>
author: Juan A. Suarez Romero <[email protected]> 2016-12-16 10:24:43 +0100
committer: Juan A. Suarez Romero <[email protected]> 2017-01-09 10:42:22 +0100
commit: c2acf97fcc9b32eaa9778771282758e5652a8ad4 (patch)
tree: a7c8890b9a6a1532b55170d531310382742cb9de /src/mesa/drivers
parent: 3551a2d3ad2661477ba3b0a36a13eeb68e28fe85 (diff)
6 files changed, 16 insertions, 29 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 57815645924..b7527f2cd9b 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -481,11 +481,16 @@ brw_prepare_vertices(struct brw_context *brw)
    /* Accumulate the list of enabled arrays. */
    brw->vb.nr_enabled = 0;
    while (vs_inputs) {
-      GLuint index = ffsll(vs_inputs) - 1;
+      GLuint first = ffsll(vs_inputs) - 1;
+      GLuint index =
+         first - DIV_ROUND_UP(_mesa_bitcount_64(vs_prog_data->double_inputs_read &
+                                                BITFIELD64_MASK(first)), 2);
       struct brw_vertex_element *input = &brw->vb.inputs[index];
       input->is_dual_slot = brw->gen >= 8 &&
-         (vs_prog_data->double_inputs_read & BITFIELD64_BIT(index)) != 0;
-      vs_inputs &= ~BITFIELD64_BIT(index);
+         (vs_prog_data->double_inputs_read & BITFIELD64_BIT(first)) != 0;
+      vs_inputs &= ~BITFIELD64_BIT(first);
+      if (input->is_dual_slot)
+         vs_inputs &= ~BITFIELD64_BIT(first + 1);
       brw->vb.enabled[brw->vb.nr_enabled++] = input;
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c8a069386dd..03f9c24d151 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -492,19 +492,6 @@ type_size_scalar(const struct glsl_type *type)
    return 0;
 }
 
-/* Attribute arrays are loaded as one vec4 per element (or matrix column),
- * except for double-precision types, which are loaded as one dvec4.
- */
-extern "C" int
-type_size_vs_input(const struct glsl_type *type)
-{
-   if (type->is_double()) {
-      return type_size_dvec4(type);
-   } else {
-      return type_size_vec4(type);
-   }
-}
-
 /**
  * Create a MOV to read the timestamp register.
  *
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 3775e6c4a09..cea38d86237 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -36,8 +36,7 @@ fs_reg *
 fs_visitor::emit_vs_system_value(int location)
 {
    fs_reg *reg = new(this->mem_ctx)
-      fs_reg(ATTR, 4 * (_mesa_bitcount_64(nir->info->inputs_read) +
-                        _mesa_bitcount_64(nir->info->double_inputs_read)),
+      fs_reg(ATTR, 4 * _mesa_bitcount_64(nir->info->inputs_read),
              BRW_REGISTER_TYPE_D);
    struct brw_vs_prog_data *vs_prog_data = brw_vs_prog_data(prog_data);
 
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 3645f48777a..2d2fce28eef 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -113,9 +113,7 @@ remap_vs_attrs(nir_block *block, shader_info *nir_info)
          int attr = intrin->const_index[0];
          int slot = _mesa_bitcount_64(nir_info->inputs_read &
                                       BITFIELD64_MASK(attr));
-         int dslot = _mesa_bitcount_64(nir_info->double_inputs_read &
-                                       BITFIELD64_MASK(attr));
-         intrin->const_index[0] = 4 * (slot + dslot);
+         intrin->const_index[0] = 4 * slot;
       }
    }
    return true;
@@ -268,7 +266,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
     * loaded as one vec4 or dvec4 per element (or matrix column), depending on
     * whether it is a double-precision type or not.
     */
-   nir_lower_io(nir, nir_var_shader_in, type_size_vs_input, 0);
+   nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
 
    /* This pass needs actual constants */
    nir_opt_constant_folding(nir);
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index f713d47b40e..ecb41189806 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -34,7 +34,6 @@ extern "C" {
 int type_size_scalar(const struct glsl_type *type);
 int type_size_vec4(const struct glsl_type *type);
 int type_size_dvec4(const struct glsl_type *type);
-int type_size_vs_input(const struct glsl_type *type);
 
 static inline int
 type_size_scalar_bytes(const struct glsl_type *type)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index b5e846d7cf5..5ddbe580d5a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -2737,7 +2737,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
       ((1 << shader->info->cull_distance_array_size) - 1) <<
       shader->info->clip_distance_array_size;
 
-   unsigned nr_attributes = _mesa_bitcount_64(prog_data->inputs_read);
+   unsigned nr_attribute_slots = _mesa_bitcount_64(prog_data->inputs_read);
 
    /* gl_VertexID and gl_InstanceID are system values, but arrive via an
     * incoming vertex attribute.  So, add an extra slot.
@@ -2747,18 +2747,17 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
         BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
         BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
         BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
-      nr_attributes++;
+      nr_attribute_slots++;
    }
 
    /* gl_DrawID has its very own vec4 */
    if (shader->info->system_values_read &
        BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
-      nr_attributes++;
+      nr_attribute_slots++;
    }
 
-   unsigned nr_attribute_slots =
-      nr_attributes +
-      _mesa_bitcount_64(shader->info->double_inputs_read);
+   unsigned nr_attributes = nr_attribute_slots -
+      DIV_ROUND_UP(_mesa_bitcount_64(shader->info->double_inputs_read), 2);
 
    /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry
     * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode.  Empirically, in
author	Juan A. Suarez Romero <[email protected]>	2016-12-16 10:24:43 +0100
committer	Juan A. Suarez Romero <[email protected]>	2017-01-09 10:42:22 +0100
commit	c2acf97fcc9b32eaa9778771282758e5652a8ad4 (patch)
tree	a7c8890b9a6a1532b55170d531310382742cb9de /src/mesa/drivers
parent	3551a2d3ad2661477ba3b0a36a13eeb68e28fe85 (diff)