diff options
author | Eric Anholt <[email protected]> | 2011-04-27 13:33:10 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2011-06-18 16:00:45 -0700 |
commit | c173541d9769d41a85cc899bc49699a3587df4bf (patch) | |
tree | 0b445fd0db1f9eb806b7fe48fa8ac4fced4baa8a /src/mesa/drivers/dri/i965/brw_gs_state.c | |
parent | 962dab948609c97c1c01fde6a27e19307948d302 (diff) |
i965: Use state streaming on programs, and state base address on gen5+.
There will be a little bit of thrashing of the program cache BO as the
cache warms up, but once the application is in steady state, this
reduces relocations on gen5 and later.
On my T420 laptop, cairogl firefox-talos-gfx performance improves 2.6%
+/- 1.3% (n=6). No statistically significant performance difference
on nexuiz (n=5).
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_gs_state.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_gs_state.c | 19 |
1 files changed, 9 insertions, 10 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 542874b7706..bbfefcd816a 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -45,12 +45,17 @@ brw_prepare_gs_unit(struct brw_context *brw) memset(gs, 0, sizeof(*gs)); - /* CACHE_NEW_GS_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_GS_PROG */ if (brw->gs.prog_active) { gs->thread0.grf_reg_count = (ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1); - /* reloc */ - gs->thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; + + gs->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->gs.state_offset + + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_offset + + (gs->thread0.grf_reg_count << 1)) >> 6; gs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; gs->thread1.single_program_flow = 1; @@ -69,13 +74,6 @@ brw_prepare_gs_unit(struct brw_context *brw) gs->thread4.max_threads = 1; else gs->thread4.max_threads = 0; - - /* Emit GS program relocation */ - drm_intel_bo_emit_reloc(intel->batch.bo, - (brw->gs.state_offset + - offsetof(struct brw_gs_unit_state, thread0)), - brw->gs.prog_bo, gs->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); } if (intel->gen == 5) @@ -91,6 +89,7 @@ const struct brw_tracked_state brw_gs_unit = { .dirty = { .mesa = 0, .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_GS_PROG |