summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2011-04-21 19:03:18 -0700
committerEric Anholt <[email protected]>2011-04-29 15:25:30 -0700
commit90c70123b830bead0ac622df94f2809ac056af95 (patch)
treeb81f57e262039ea8df633c437fbea02b695716a2 /src
parent530de3a2f5f6c1f1a6ec7a5f781d90f7229cab2a (diff)
i965/gen6: Stream the WM push constants.
Improves 3DMMES taiji demo performance by 5.1% +/- 1.9% (n=15), by reducing CPU time spent thrashing around those tiny little constant BOs. Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c1
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c24
4 files changed, 19 insertions, 22 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 6bf8a1c83c7..6a244984e91 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -698,11 +698,11 @@ struct brw_context
drm_intel_bo *state_bo;
drm_intel_bo *const_bo; /* pull constant buffer. */
/**
- * This is the push constant BO on gen6.
+ * This is offset in the batch to the push constants on gen6.
*
* Pre-gen6, push constants live in the CURBE.
*/
- drm_intel_bo *push_const_bo;
+ uint32_t push_const_offset;
} wm;
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index a22e63c9caa..321dbc19ede 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -364,15 +364,17 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
for (i = 0; i < nr_prims; i++) {
uint32_t hw_prim;
+ int estimated_max_prim_size;
+
+ estimated_max_prim_size = 512; /* batchbuffer commands */
+ estimated_max_prim_size += 1024; /* gen6 WM push constants */
+ estimated_max_prim_size += 512; /* misc. pad */
/* Flush the batch if it's approaching full, so that we don't wrap while
* we've got validated state that needs to be in the same batch as the
- * primitives. This fraction is just a guess (minimal full state plus
- * a primitive is around 512 bytes), and would be better if we had
- * an upper bound of how much we might emit in a single
- * brw_try_draw_prims().
+ * primitives.
*/
- intel_batchbuffer_require_space(intel, 1024, false);
+ intel_batchbuffer_require_space(intel, estimated_max_prim_size, false);
hw_prim = brw_set_prim(brw, &prim[i]);
if (brw->state.dirty.brw) {
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index ce8712a260f..4961b0449df 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -93,7 +93,6 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->wm.prog_bo);
dri_bo_release(&brw->wm.state_bo);
dri_bo_release(&brw->wm.const_bo);
- dri_bo_release(&brw->wm.push_const_bo);
dri_bo_release(&brw->cc.prog_bo);
dri_bo_release(&brw->cc.vp_bo);
dri_bo_release(&brw->cc.blend_state_bo);
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index c4b57fe1f95..fe5c7a1120b 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -42,9 +42,6 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
const struct brw_fragment_program *fp =
brw_fragment_program_const(brw->fragment_program);
- drm_intel_bo_unreference(brw->wm.push_const_bo);
- brw->wm.push_const_bo = NULL;
-
/* Updates the ParamaterValues[i] pointers for all parameters of the
* basic type of PROGRAM_STATE_VAR.
*/
@@ -55,13 +52,11 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
float *constants;
unsigned int i;
- brw->wm.push_const_bo = drm_intel_bo_alloc(intel->bufmgr,
- "WM constant_bo",
- brw->wm.prog_data->nr_params *
- sizeof(float),
- 4096);
- drm_intel_gem_bo_map_gtt(brw->wm.push_const_bo);
- constants = brw->wm.push_const_bo->virtual;
+ constants = brw_state_batch(brw,
+ brw->wm.prog_data->nr_params *
+ sizeof(float),
+ 32, &brw->wm.push_const_offset);
+
for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
*brw->wm.prog_data->param[i]);
@@ -80,15 +75,14 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
printf("\n");
printf("\n");
}
-
- drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo);
}
}
const struct brw_tracked_state gen6_wm_constants = {
.dirty = {
.mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_FRAGMENT_PROGRAM,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_FRAGMENT_PROGRAM),
.cache = 0,
},
.prepare = gen6_prepare_wm_push_constants,
@@ -118,8 +112,10 @@ upload_wm_state(struct brw_context *brw)
OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
- OUT_RELOC(brw->wm.push_const_bo,
+ /* This is also the set of state flags from gen6_prepare_wm_constants */
+ OUT_RELOC(intel->batch.bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
+ brw->wm.push_const_offset +
ALIGN(brw->wm.prog_data->nr_params,
brw->wm.prog_data->dispatch_width) / 8 - 1);
OUT_BATCH(0);