3 files changed, 38 insertions, 14 deletions
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 6753a8daf08..038f3f95512 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -1222,6 +1222,35 @@ brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo,
    }
 }
 
+/**
+ * Computes the first varying slot in the URB produced by the previous stage
+ * that is used in the next stage. We do this by testing the varying slots in
+ * the previous stage's vue map against the inputs read in the next stage.
+ *
+ * Note that:
+ *
+ * - Each URB offset contains two varying slots and we can only skip a
+ *   full offset if both slots are unused, so the value we return here is always
+ *   rounded down to the closest multiple of two.
+ *
+ * - gl_Layer and gl_ViewportIndex don't have their own varying slots, they are
+ *   part of the vue header, so if these are read we can't skip anything.
+ */
+static inline int
+brw_compute_first_urb_slot_required(uint64_t inputs_read,
+                                    const struct brw_vue_map *prev_stage_vue_map)
+{
+   if ((inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT)) == 0) {
+      for (int i = 0; i < prev_stage_vue_map->num_slots; i++) {
+         int varying = prev_stage_vue_map->slot_to_varying[i];
+         if (varying > 0 && (inputs_read & BITFIELD64_BIT(varying)) != 0)
+            return ROUND_DOWN_TO(i, 2);
+      }
+   }
+
+   return 0;
+}
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index e33cb0e1186..a40b910c1a0 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1481,9 +1481,6 @@ fs_visitor::calculate_urb_setup()
             }
          }
       } else {
-         bool include_vue_header =
-            nir->info.inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
-
          /* We have enough input varyings that the SF/SBE pipeline stage can't
           * arbitrarily rearrange them to suit our whim; we have to put them
           * in an order that matches the output of the previous pipeline stage
@@ -1493,8 +1490,10 @@ fs_visitor::calculate_urb_setup()
          brw_compute_vue_map(devinfo, &prev_stage_vue_map,
                              key->input_slots_valid,
                              nir->info.separate_shader);
+
          int first_slot =
-            include_vue_header ? 0 : 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
+            brw_compute_first_urb_slot_required(nir->info.inputs_read,
+                                                &prev_stage_vue_map);
 
          assert(prev_stage_vue_map.num_slots <= first_slot + 32);
          for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 2a99376e3c2..ecf5a9ae68d 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1029,17 +1029,13 @@ genX(calculate_attr_overrides)(const struct brw_context *brw,
 
    *point_sprite_enables = 0;
 
-   /* If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
-    * the full vertex header.  Otherwise, we can program the SF to start
-    * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
-    * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
-    * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
-    */
-
-   bool fs_needs_vue_header = fp->info.inputs_read &
-      (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+   int first_slot =
+      brw_compute_first_urb_slot_required(fp->info.inputs_read,
+                                          &brw->vue_map_geom_out);
 
-   *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
+   /* Each URB offset packs two varying slots */
+   assert(first_slot % 2 == 0);
+   *urb_entry_read_offset = first_slot / 2;
 
    /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
     * description of dw10 Point Sprite Texture Coordinate Enable: