i965: perform 2 uploads with dual slot *64*PASSTHRU formats on gen<8

The emission of vertex attributes corresponding to dvec3 and dvec4 vertex shader input variables was not correct when the <size> passed to the VertexAttribL* commands was <= 2. In 61a8a55f557 ("i965/gen8: Fix vertex attrib upload for dvec3/4 shader inputs"), for gen8+ we needed to determine if the attrib was dual slot to emit 128 or 256-bit, independently of the VAO size. Similarly, for gen < 8 we also need to determine whether the attrib is dual slot to force the emission of 256-bits through 2 uploads. Additionally, we make use of the ISL_FORMAT_R32_FLOAT format in this second upload to fill these unspecified components with zeros, as we also do for gen8+. Fixes the following test on Haswell: KHR-GL46.vertex_attrib_binding.basic-inputL-case1 v2: Added more inline comments to explain why we are using ISL_FORMAT_R32_FLOAT and its consequences, as requested by Alejandro and Antía. Fixes: 75968a668e4 ("i965/gen7: expose OpenGL 4.2 on Haswell when supported") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103006 Cc: Alejandro Piñeiro <[email protected]> Cc: Juan A. Suarez Romero <[email protected]> Cc: Antia Puentes <[email protected]> Cc: Rafael Antognolli <[email protected]> Cc: Kenneth Graunke <[email protected]> Signed-off-by: Andres Gomez <[email protected]> Reviewed-by: Alejandro Piñeiro <[email protected]> Reviewed-by: Antia Puentes <[email protected]> Reviewed-by: Rafael Antognolli <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]>
author: Andres Gomez <[email protected]> 2018-01-29 18:25:30 +0200
committer: Andres Gomez <[email protected]> 2018-01-31 22:50:06 +0200
commit: 5a7aba2e0a7fb3414a94d04d5970a2ed10c1f63e (patch)
tree: 9760eb0b18b93283267037b8de2d9a8059fd92ae /src/mesa
parent: ab1f2e6bc401636202f93efad3677f26d028e412 (diff)
1 files changed, 27 insertions, 5 deletions
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index aa4d64d08e2..a39a254dacd 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -364,11 +364,15 @@ is_passthru_format(uint32_t format)
 }
 
 UNUSED static int
-uploads_needed(uint32_t format)
+uploads_needed(uint32_t format,
+	       bool is_dual_slot)
 {
    if (!is_passthru_format(format))
       return 1;
 
+   if (is_dual_slot)
+      return 2;
+
    switch (format) {
    case ISL_FORMAT_R64_PASSTHRU:
    case ISL_FORMAT_R64G64_PASSTHRU:
@@ -397,11 +401,19 @@ downsize_format_if_needed(uint32_t format,
    if (!is_passthru_format(format))
       return format;
 
+   /* ISL_FORMAT_R64_PASSTHRU and ISL_FORMAT_R64G64_PASSTHRU with an upload ==
+    * 1 means that we have been forced to do 2 uploads for a size <= 2. This
+    * happens with gen < 8 and dvec3 or dvec4 vertex shader input
+    * variables. In those cases, we return ISL_FORMAT_R32_FLOAT as a way of
+    * flagging that we want to fill with zeroes this second forced upload.
+    */
    switch (format) {
    case ISL_FORMAT_R64_PASSTHRU:
-      return ISL_FORMAT_R32G32_FLOAT;
+      return !upload ? ISL_FORMAT_R32G32_FLOAT
+                     : ISL_FORMAT_R32_FLOAT;
    case ISL_FORMAT_R64G64_PASSTHRU:
-      return ISL_FORMAT_R32G32B32A32_FLOAT;
+      return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT
+                     : ISL_FORMAT_R32_FLOAT;
    case ISL_FORMAT_R64G64B64_PASSTHRU:
       return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT
                      : ISL_FORMAT_R32G32_FLOAT;
@@ -420,6 +432,15 @@ static int
 upload_format_size(uint32_t upload_format)
 {
    switch (upload_format) {
+   case ISL_FORMAT_R32_FLOAT:
+
+      /* downsized_format has returned this one in order to flag that we are
+       * performing a second upload which we want to have filled with
+       * zeroes. This happens with gen < 8, a size <= 2, and dvec3 or dvec4
+       * vertex shader input variables.
+       */
+
+      return 0;
    case ISL_FORMAT_R32G32_FLOAT:
       return 2;
    case ISL_FORMAT_R32G32B32A32_FLOAT:
@@ -517,7 +538,7 @@ genX(emit_vertices)(struct brw_context *brw)
       struct brw_vertex_element *input = brw->vb.enabled[i];
       uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
 
-      if (uploads_needed(format) > 1)
+      if (uploads_needed(format, input->is_dual_slot) > 1)
          nr_elements++;
    }
 #endif
@@ -613,7 +634,8 @@ genX(emit_vertices)(struct brw_context *brw)
       uint32_t comp1 = VFCOMP_STORE_SRC;
       uint32_t comp2 = VFCOMP_STORE_SRC;
       uint32_t comp3 = VFCOMP_STORE_SRC;
-      const unsigned num_uploads = GEN_GEN < 8 ? uploads_needed(format) : 1;
+      const unsigned num_uploads = GEN_GEN < 8 ?
+	 uploads_needed(format, input->is_dual_slot) : 1;
 
 #if GEN_GEN >= 8
       /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
author	Andres Gomez <[email protected]>	2018-01-29 18:25:30 +0200
committer	Andres Gomez <[email protected]>	2018-01-31 22:50:06 +0200
commit	5a7aba2e0a7fb3414a94d04d5970a2ed10c1f63e (patch)
tree	9760eb0b18b93283267037b8de2d9a8059fd92ae /src/mesa
parent	ab1f2e6bc401636202f93efad3677f26d028e412 (diff)