aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_sf_state.c
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2011-03-09 13:18:30 -0800
committerEric Anholt <[email protected]>2011-04-11 11:55:25 -0700
commit88022278f71ed3ea9613a7fa72a03367f75443d3 (patch)
treed6d254d90d59e52527e7c70edce785d7d4c8d592 /src/mesa/drivers/dri/i965/brw_sf_state.c
parentb1be5bd205d3efcaf4012d2c9a12831da57fc7fb (diff)
i965: Move the SF VP from state caching to state streaming.
This is a 49.6% +/- 2.0% (n=9, IPS outlier removed) performance improvement for the hacked-up-for-cache-misses scissor-many, and no statistically significant performance difference for the hacked-up-for-cache-hits version (n=9, IPS outlier removed). No statistically significant performance difference from ETQW (n=5) from these last two commits.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_sf_state.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c20
1 files changed, 14 insertions, 6 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index f8b5275561d..66d91a0bde7 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -38,13 +38,15 @@
static void upload_sf_vp(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
- struct brw_sf_viewport sfv_stack, *sfv = &sfv_stack;
+ struct brw_sf_viewport *sfv;
GLfloat y_scale, y_bias;
const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
const GLfloat *v = ctx->Viewport._WindowMap.m;
+ sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset);
memset(sfv, 0, sizeof(*sfv));
if (render_to_fbo) {
@@ -104,8 +106,12 @@ static void upload_sf_vp(struct brw_context *brw)
sfv->scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
}
+ /* Keep a pointer to it for brw_state_dump.c */
drm_intel_bo_unreference(brw->sf.vp_bo);
- brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, sfv, sizeof(*sfv));
+ drm_intel_bo_reference(intel->batch.bo);
+ brw->sf.vp_bo = intel->batch.bo;
+
+ brw->state.dirty.cache |= CACHE_NEW_SF_VP;
}
const struct brw_tracked_state brw_sf_vp = {
@@ -113,7 +119,7 @@ const struct brw_tracked_state brw_sf_vp = {
.mesa = (_NEW_VIEWPORT |
_NEW_SCISSOR |
_NEW_BUFFERS),
- .brw = 0,
+ .brw = BRW_NEW_BATCH,
.cache = 0
},
.prepare = upload_sf_vp
@@ -171,7 +177,8 @@ static void upload_sf_unit( struct brw_context *brw )
sf->thread4.stats_enable = 1;
/* CACHE_NEW_SF_VP */
- sf->sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */
+ sf->sf5.sf_viewport_state_offset = (brw->sf.vp_bo->offset +
+ brw->sf.vp_offset) >> 5; /* reloc */
sf->sf5.viewport_transform = 1;
@@ -289,8 +296,9 @@ static void upload_sf_unit( struct brw_context *brw )
/* Emit SF viewport relocation */
drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset +
offsetof(struct brw_sf_unit_state, sf5)),
- brw->sf.vp_bo, (sf->sf5.front_winding |
- (sf->sf5.viewport_transform << 1)),
+ intel->batch.bo, (brw->sf.vp_offset |
+ sf->sf5.front_winding |
+ (sf->sf5.viewport_transform << 1)),
I915_GEM_DOMAIN_INSTRUCTION, 0);
brw->state.dirty.cache |= CACHE_NEW_SF_UNIT;