summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorChris Forbes <[email protected]>2013-06-08 06:11:44 +1200
committerChris Forbes <[email protected]>2013-06-16 01:05:41 +1200
commitdef84d8014e334e00b0a76f7bb635ccc62e3b67e (patch)
tree7bcf496e68de7d611788ca9da7ffd6eaa1b8f6b1 /src/mesa
parent1b77d2133c41c4eb475b81967a3e4d39196c7fe1 (diff)
i965: Shrink Gen5 VUE map layout to be the same as Gen4.
The PRM suggests a larger layout, mostly to support having gl_ClipDistance[] somewhere predictable for the fixed-function clipper -- but it didn't actually arrive in Gen5. Just use the same layout for both Gen4 and Gen5. No Piglit regressions. Improves performance in CS:S Video Stress Test by ~3%. V2: - Remove now-useless function for determining the SF URB read offset - Remove now-unused BRW_VARYING_SLOT_POS_DUPLICATE Signed-off-by: Chris Forbes <[email protected]> Reviewed-by: Paul Berry <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c17
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c23
6 files changed, 7 insertions, 40 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index ae6f81ae6c4..0db1a1802b8 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -333,7 +333,6 @@ struct brw_wm_prog_data {
typedef enum
{
BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
- BRW_VARYING_SLOT_POS_DUPLICATE,
BRW_VARYING_SLOT_PAD,
/**
* Technically this is not a varying but just a placeholder that
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index 5870b60d38e..ba8782b6d0c 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -76,7 +76,7 @@ static void compile_sf_prog( struct brw_context *brw,
c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
}
- c.urb_entry_read_offset = brw_sf_compute_urb_entry_read_offset(intel);
+ c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
c.nr_setup_regs = c.nr_attr_regs;
diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h
index f908fc0667b..caeb0d06b1c 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.h
+++ b/src/mesa/drivers/dri/i965/brw_sf.h
@@ -103,6 +103,7 @@ void brw_emit_line_setup( struct brw_sf_compile *c, bool allocate );
void brw_emit_point_setup( struct brw_sf_compile *c, bool allocate );
void brw_emit_point_sprite_setup( struct brw_sf_compile *c, bool allocate );
void brw_emit_anyprim_setup( struct brw_sf_compile *c );
-int brw_sf_compute_urb_entry_read_offset(struct intel_context *intel);
+
+#define BRW_SF_URB_ENTRY_READ_OFFSET 1
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index 7c29ba27d1a..4b5e7cc93f1 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -124,19 +124,6 @@ const struct brw_tracked_state brw_sf_vp = {
.emit = upload_sf_vp
};
-/**
- * Compute the offset within the URB (expressed in 256-bit register
- * increments) that should be used to read the VUE in th efragment shader.
- */
-int
-brw_sf_compute_urb_entry_read_offset(struct intel_context *intel)
-{
- if (intel->gen == 5)
- return 3;
- else
- return 1;
-}
-
static void upload_sf_unit( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
@@ -163,9 +150,7 @@ static void upload_sf_unit( struct brw_context *brw )
sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
sf->thread3.dispatch_grf_start_reg = 3;
-
- sf->thread3.urb_entry_read_offset =
- brw_sf_compute_urb_entry_read_offset(intel);
+ sf->thread3.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
/* CACHE_NEW_SF_PROG */
sf->thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 02ba603d018..162fd55f429 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2746,7 +2746,6 @@ vec4_visitor::emit_urb_slot(int mrf, int varying)
current_annotation = "NDC";
emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
break;
- case BRW_VARYING_SLOT_POS_DUPLICATE:
case VARYING_SLOT_POS:
current_annotation = "gl_Position";
emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 720325dec5c..d173d2e31b8 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -85,34 +85,17 @@ brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map,
*/
switch (intel->gen) {
case 4:
+ case 5:
/* There are 8 dwords in VUE header pre-Ironlake:
* dword 0-3 is indices, point width, clip flags.
* dword 4-7 is ndc position
* dword 8-11 is the first vertex data.
- */
- assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
- assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
- assign_vue_slot(vue_map, VARYING_SLOT_POS);
- break;
- case 5:
- /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
- * dword 0-3 of the header is indices, point width, clip flags.
- * dword 4-7 is the ndc position
- * dword 8-11 of the vertex header is the 4D space position
- * dword 12-19 of the vertex header is the user clip distance.
- * dword 20-23 is a pad so that the vertex element data is aligned
- * dword 24-27 is the first vertex data we fill.
*
- * Note: future pipeline stages expect 4D space position to be
- * contiguous with the other varyings, so we make dword 24-27 a
- * duplicate copy of the 4D space position.
+ * On Ironlake the VUE header is nominally 20 dwords, but the hardware
+ * will accept the same header layout as Gen4 [and should be a bit faster]
*/
assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
- assign_vue_slot(vue_map, BRW_VARYING_SLOT_POS_DUPLICATE);
- assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
- assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
- assign_vue_slot(vue_map, BRW_VARYING_SLOT_PAD);
assign_vue_slot(vue_map, VARYING_SLOT_POS);
break;
case 6: