diff options
-rw-r--r-- | src/intel/compiler/brw_compiler.h | 29 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_state_upload.c | 16 |
3 files changed, 38 insertions, 14 deletions
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 6753a8daf08..038f3f95512 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -1222,6 +1222,35 @@ brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo, } } +/** + * Computes the first varying slot in the URB produced by the previous stage + * that is used in the next stage. We do this by testing the varying slots in + * the previous stage's vue map against the inputs read in the next stage. + * + * Note that: + * + * - Each URB offset contains two varying slots and we can only skip a + * full offset if both slots are unused, so the value we return here is always + * rounded down to the closest multiple of two. + * + * - gl_Layer and gl_ViewportIndex don't have their own varying slots, they are + * part of the vue header, so if these are read we can't skip anything. + */ +static inline int +brw_compute_first_urb_slot_required(uint64_t inputs_read, + const struct brw_vue_map *prev_stage_vue_map) +{ + if ((inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT)) == 0) { + for (int i = 0; i < prev_stage_vue_map->num_slots; i++) { + int varying = prev_stage_vue_map->slot_to_varying[i]; + if (varying > 0 && (inputs_read & BITFIELD64_BIT(varying)) != 0) + return ROUND_DOWN_TO(i, 2); + } + } + + return 0; +} + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index e33cb0e1186..a40b910c1a0 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1481,9 +1481,6 @@ fs_visitor::calculate_urb_setup() } } } else { - bool include_vue_header = - nir->info.inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); - /* We have enough input varyings that the SF/SBE pipeline stage can't * arbitrarily rearrange them to suit our whim; we have to put them * in an order that matches the output of the previous pipeline stage @@ -1493,8 +1490,10 @@ fs_visitor::calculate_urb_setup() brw_compute_vue_map(devinfo, &prev_stage_vue_map, key->input_slots_valid, nir->info.separate_shader); + int first_slot = - include_vue_header ? 0 : 2 * BRW_SF_URB_ENTRY_READ_OFFSET; + brw_compute_first_urb_slot_required(nir->info.inputs_read, + &prev_stage_vue_map); assert(prev_stage_vue_map.num_slots <= first_slot + 32); for (int slot = first_slot; slot < prev_stage_vue_map.num_slots; diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 2a99376e3c2..ecf5a9ae68d 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -1029,17 +1029,13 @@ genX(calculate_attr_overrides)(const struct brw_context *brw, *point_sprite_enables = 0; - /* If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in - * the full vertex header. Otherwise, we can program the SF to start - * reading at an offset of 1 (2 varying slots) to skip unnecessary data: - * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5 - * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+ - */ - - bool fs_needs_vue_header = fp->info.inputs_read & - (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); + int first_slot = + brw_compute_first_urb_slot_required(fp->info.inputs_read, + &brw->vue_map_geom_out); - *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1; + /* Each URB offset packs two varying slots */ + assert(first_slot % 2 == 0); + *urb_entry_read_offset = first_slot / 2; /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE, * description of dw10 Point Sprite Texture Coordinate Enable: |