summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2011-11-01 10:54:08 -0700
committerKenneth Graunke <[email protected]>2011-11-10 22:51:19 -0800
commit6ba9090ea05e817bd38c1fcc63c53168b16593c7 (patch)
treefefb9ecff7cd5ef9227228fae00bb664fc86acdc /src
parent0983c6869bead0c31c62e5b1dda7f70898d43971 (diff)
i965: Use 0 for the number of binding table entries in 3DSTATE_(VS|WM).
These fields control how many entries the hardware prefetches into the state cache, so they only impact performance, not correctness. However, it's not clear how to use this in a way that's beneficial. According to the documentation, kernels "using a large number" of entries may wish to program this to zero to avoid thrashing the cache; it's unclear how many is too many. Also, Ironlake's WM was missing this feature entirely---the count had to be zero. The dirty bit tracking to handle this complicates the surface state and binding table setup; removing it should simplify things and make future refactoring easier. So just set 0 for the number of entries rather than trying to compute and track it. Appears to have no impact on Nexuiz and OpenArena on Sandybridge. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Eric Anholt <[email protected]> Reviewed-by: Paul Berry <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c5
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c5
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c4
10 files changed, 6 insertions, 50 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 3e05e36d34a..f8e6f81d7cf 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -169,8 +169,6 @@ enum brw_state_id {
*/
#define BRW_NEW_BATCH (1 << BRW_STATE_BATCH)
/** \see brw.state.depth_region */
-#define BRW_NEW_NR_WM_SURFACES (1 << BRW_STATE_NR_WM_SURFACES)
-#define BRW_NEW_NR_VS_SURFACES (1 << BRW_STATE_NR_VS_SURFACES)
#define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER)
#define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF)
#define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF)
@@ -739,7 +737,6 @@ struct brw_context
/** Binding table of pointers to surf_bo entries */
uint32_t bind_bo_offset;
uint32_t surf_offset[BRW_VS_MAX_SURF];
- GLuint nr_surfaces;
uint32_t push_const_offset; /* Offset in the batchbuffer */
int push_const_size; /* in 256-bit register increments */
@@ -810,7 +807,6 @@ struct brw_context
uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
GLuint render_surf;
- GLuint nr_surfaces;
drm_intel_bo *scratch_bo;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 2f16891395b..862c5a658fa 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -357,8 +357,6 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
DEFINE_BIT(BRW_NEW_VERTICES),
DEFINE_BIT(BRW_NEW_BATCH),
- DEFINE_BIT(BRW_NEW_NR_WM_SURFACES),
- DEFINE_BIT(BRW_NEW_NR_VS_SURFACES),
DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
DEFINE_BIT(BRW_NEW_WM_CONSTBUF),
DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index 5a9032caf4e..631e1828eb1 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -71,11 +71,7 @@ brw_upload_vs_unit(struct brw_context *brw)
*/
vs->thread1.single_program_flow = (intel->gen == 5);
- /* BRW_NEW_NR_VS_SURFACES */
- if (intel->gen == 5)
- vs->thread1.binding_table_entry_count = 0; /* hardware requirement */
- else
- vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces;
+ vs->thread1.binding_table_entry_count = 0;
if (brw->vs.prog_data->total_scratch != 0) {
vs->thread2.scratch_space_base_pointer =
@@ -176,7 +172,6 @@ const struct brw_tracked_state brw_vs_unit = {
.brw = (BRW_NEW_BATCH |
BRW_NEW_PROGRAM_CACHE |
BRW_NEW_CURBE_OFFSETS |
- BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE |
BRW_NEW_VERTEX_PROGRAM),
.cache = CACHE_NEW_VS_PROG
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index 84d3101f1fe..4c99185010f 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -136,9 +136,6 @@ brw_update_vs_constant_surface( struct gl_context *ctx,
/**
* Vertex shader surfaces (constant buffer).
- *
- * This consumes the state updates for the constant buffer needing
- * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit.
*/
static void
brw_upload_vs_surfaces(struct brw_context *brw)
@@ -170,11 +167,6 @@ brw_upload_vs_surfaces(struct brw_context *brw)
brw->vs.bind_bo_offset = 0;
}
}
-
- if (brw->vs.nr_surfaces != nr_surfaces) {
- brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
- brw->vs.nr_surfaces = nr_surfaces;
- }
}
const struct brw_tracked_state brw_vs_surfaces = {
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 51ef745b241..69d7a769e54 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -113,12 +113,7 @@ brw_upload_wm_unit(struct brw_context *brw)
wm->thread1.depth_coef_urb_read_offset = 1;
wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- if (intel->gen == 5)
- wm->thread1.binding_table_entry_count = 0; /* hardware requirement */
- else {
- /* BRW_NEW_NR_SURFACES */
- wm->thread1.binding_table_entry_count = brw->wm.nr_surfaces;
- }
+ wm->thread1.binding_table_entry_count = 0;
if (brw->wm.prog_data->total_scratch != 0) {
wm->thread2.scratch_space_base_pointer =
@@ -263,8 +258,7 @@ const struct brw_tracked_state brw_wm_unit = {
.brw = (BRW_NEW_BATCH |
BRW_NEW_PROGRAM_CACHE |
BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_CURBE_OFFSETS |
- BRW_NEW_NR_WM_SURFACES),
+ BRW_NEW_CURBE_OFFSETS),
.cache = (CACHE_NEW_WM_PROG |
CACHE_NEW_SAMPLER)
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index d1203134b81..df7a0ca5c67 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -624,11 +624,6 @@ brw_upload_wm_surfaces(struct brw_context *brw)
}
}
- if (brw->wm.nr_surfaces != nr_surfaces) {
- brw->wm.nr_surfaces = nr_surfaces;
- brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
- }
-
brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
}
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index e22fd393ef5..7ea7e21cb63 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -167,9 +167,7 @@ upload_vs_state(struct brw_context *brw)
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
OUT_BATCH(brw->vs.prog_offset);
- OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
- floating_point_mode |
- (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ OUT_BATCH(floating_point_mode | (0 << GEN6_VS_SAMPLER_COUNT_SHIFT));
if (brw->vs.prog_data->total_scratch) {
OUT_RELOC(brw->vs.scratch_bo,
@@ -220,8 +218,7 @@ upload_vs_state(struct brw_context *brw)
const struct brw_tracked_state gen6_vs_state = {
.dirty = {
.mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
- .brw = (BRW_NEW_NR_VS_SURFACES |
- BRW_NEW_URB_FENCE |
+ .brw = (BRW_NEW_URB_FENCE |
BRW_NEW_CONTEXT |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_BATCH),
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 714d59492ca..b98516e848f 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -140,9 +140,6 @@ upload_wm_state(struct brw_context *brw)
if (ctx->Shader.CurrentFragmentProgram == NULL)
dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
- /* BRW_NEW_NR_WM_SURFACES */
- dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT;
-
/* CACHE_NEW_SAMPLER */
dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
dw4 |= (brw->wm.prog_data->first_curbe_grf <<
@@ -217,7 +214,6 @@ const struct brw_tracked_state gen6_wm_state = {
_NEW_PROGRAM_CONSTANTS |
_NEW_POLYGON),
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_NR_WM_SURFACES |
BRW_NEW_URB_FENCE |
BRW_NEW_BATCH),
.cache = (CACHE_NEW_SAMPLER |
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index dbf93465e49..462db5bf194 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -75,9 +75,7 @@ upload_vs_state(struct brw_context *brw)
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
OUT_BATCH(brw->vs.prog_offset);
- OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
- floating_point_mode |
- (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | floating_point_mode);
if (brw->vs.prog_data->total_scratch) {
OUT_RELOC(brw->vs.scratch_bo,
@@ -101,7 +99,6 @@ const struct brw_tracked_state gen7_vs_state = {
.dirty = {
.mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_CURBE_OFFSETS |
- BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE |
BRW_NEW_CONTEXT |
BRW_NEW_VERTEX_PROGRAM |
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 2dce359ea94..8b79663da3f 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -146,9 +146,6 @@ upload_ps_state(struct brw_context *brw)
dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
- /* BRW_NEW_NR_WM_SURFACES */
- dw2 |= brw->wm.nr_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT;
-
/* Use ALT floating point mode for ARB fragment programs, because they
* require 0^0 == 1.
*/
@@ -198,7 +195,6 @@ const struct brw_tracked_state gen7_ps_state = {
.mesa = _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_NR_WM_SURFACES |
BRW_NEW_PS_BINDING_TABLE |
BRW_NEW_URB_FENCE |
BRW_NEW_BATCH),