summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/intel/compiler/brw_compiler.h29
-rw-r--r--src/intel/compiler/brw_fs.cpp7
-rw-r--r--src/mesa/drivers/dri/i965/genX_state_upload.c16
3 files changed, 38 insertions, 14 deletions
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 6753a8daf08..038f3f95512 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -1222,6 +1222,35 @@ brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo,
}
}
+/**
+ * Computes the first varying slot in the URB produced by the previous stage
+ * that is used in the next stage. We do this by testing the varying slots in
+ * the previous stage's vue map against the inputs read in the next stage.
+ *
+ * Note that:
+ *
+ * - Each URB offset contains two varying slots and we can only skip a
+ * full offset if both slots are unused, so the value we return here is always
+ * rounded down to the closest multiple of two.
+ *
+ * - gl_Layer and gl_ViewportIndex don't have their own varying slots, they are
+ * part of the vue header, so if these are read we can't skip anything.
+ */
+static inline int
+brw_compute_first_urb_slot_required(uint64_t inputs_read,
+ const struct brw_vue_map *prev_stage_vue_map)
+{
+ if ((inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT)) == 0) {
+ for (int i = 0; i < prev_stage_vue_map->num_slots; i++) {
+ int varying = prev_stage_vue_map->slot_to_varying[i];
+ if (varying > 0 && (inputs_read & BITFIELD64_BIT(varying)) != 0)
+ return ROUND_DOWN_TO(i, 2);
+ }
+ }
+
+ return 0;
+}
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index e33cb0e1186..a40b910c1a0 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1481,9 +1481,6 @@ fs_visitor::calculate_urb_setup()
}
}
} else {
- bool include_vue_header =
- nir->info.inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
-
/* We have enough input varyings that the SF/SBE pipeline stage can't
* arbitrarily rearrange them to suit our whim; we have to put them
* in an order that matches the output of the previous pipeline stage
@@ -1493,8 +1490,10 @@ fs_visitor::calculate_urb_setup()
brw_compute_vue_map(devinfo, &prev_stage_vue_map,
key->input_slots_valid,
nir->info.separate_shader);
+
int first_slot =
- include_vue_header ? 0 : 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
+ brw_compute_first_urb_slot_required(nir->info.inputs_read,
+ &prev_stage_vue_map);
assert(prev_stage_vue_map.num_slots <= first_slot + 32);
for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 2a99376e3c2..ecf5a9ae68d 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1029,17 +1029,13 @@ genX(calculate_attr_overrides)(const struct brw_context *brw,
*point_sprite_enables = 0;
- /* If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
- * the full vertex header. Otherwise, we can program the SF to start
- * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
- * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
- * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
- */
-
- bool fs_needs_vue_header = fp->info.inputs_read &
- (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+ int first_slot =
+ brw_compute_first_urb_slot_required(fp->info.inputs_read,
+ &brw->vue_map_geom_out);
- *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
+ /* Each URB offset packs two varying slots */
+ assert(first_slot % 2 == 0);
+ *urb_entry_read_offset = first_slot / 2;
/* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
* description of dw10 Point Sprite Texture Coordinate Enable: