From 588cebce2d5b6afd24b72603d744d390481310dd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 24 Apr 2011 19:47:37 -0700 Subject: i965/gen4: Move VS state to state streaming. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 2 +- src/mesa/drivers/dri/i965/brw_misc_state.c | 4 +- src/mesa/drivers/dri/i965/brw_state_dump.c | 3 +- src/mesa/drivers/dri/i965/brw_vs_state.c | 130 +++++++++++------------------ src/mesa/drivers/dri/i965/brw_vtbl.c | 2 - 5 files changed, 53 insertions(+), 88 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 0876c3e94e6..a81d6157390 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -631,8 +631,8 @@ struct brw_context int8_t *constant_map; /* variable array following prog_data */ drm_intel_bo *prog_bo; - drm_intel_bo *state_bo; drm_intel_bo *const_bo; + uint32_t state_offset; /** Binding table of pointers to surf_bo entries */ uint32_t bind_bo_offset; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 7325a2b6c8d..b61a7ad099e 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -143,7 +143,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2)); - OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + brw->vs.state_offset); if (brw->gs.prog_active) OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); else @@ -163,7 +164,6 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) static void prepare_psp_urb_cbs(struct brw_context *brw) { - brw_add_validated_bo(brw, brw->vs.state_bo); brw_add_validated_bo(brw, brw->gs.state_bo); brw_add_validated_bo(brw, brw->clip.state_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 930eb5155db..4313b2aaa98 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -388,7 +388,8 @@ void brw_debug_batch(struct intel_context *intel) dump_wm_sampler_state(brw); if (intel->gen < 6) - state_struct_out("VS", brw->vs.state_bo, 0, sizeof(struct brw_vs_unit_state)); + state_struct_out("VS", intel->batch.bo, brw->vs.state_offset, + sizeof(struct brw_vs_unit_state)); brw_debug_prog("VS prog", brw->vs.prog_bo); if (intel->gen < 6) diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index c3a7cc247c5..1eee5b7e5de 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -49,48 +49,19 @@ struct brw_vs_unit_key { }; static void -vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) -{ - struct gl_context *ctx = &brw->intel.ctx; - - memset(key, 0, sizeof(*key)); - - /* CACHE_NEW_VS_PROG */ - key->total_grf = brw->vs.prog_data->total_grf; - key->urb_entry_read_length = brw->vs.prog_data->urb_read_length; - key->curb_entry_read_length = brw->vs.prog_data->curb_read_length; - - /* BRW_NEW_URB_FENCE */ - key->nr_urb_entries = brw->urb.nr_vs_entries; - key->urb_size = brw->urb.vsize; - - /* BRW_NEW_NR_VS_SURFACES */ - key->nr_surfaces = brw->vs.nr_surfaces; - - /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ - if (ctx->Transform.ClipPlanesEnabled) { - /* Note that we read in the userclip planes as well, hence - * clip_start: - */ - key->curbe_offset = brw->curbe.clip_start; - } - else { - key->curbe_offset = brw->curbe.vs_start; - } -} - -static drm_intel_bo * -vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) +brw_prepare_vs_unit(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - struct brw_vs_unit_state vs; - drm_intel_bo *bo; + struct gl_context *ctx = &intel->ctx; + struct brw_vs_unit_state *vs; - memset(&vs, 0, sizeof(vs)); + vs = brw_state_batch(brw, sizeof(*vs), 32, &brw->vs.state_offset); + memset(vs, 0, sizeof(*vs)); - vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ - vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; - vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + /* CACHE_NEW_VS_PROG */ + vs->thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ + vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; + vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; /* Choosing multiple program flow means that we may get 2-vertex threads, * which will have the channel mask for dwords 4-7 enabled in the thread, * and those dwords will be written to the second URB handle when we @@ -103,21 +74,34 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) * The most notable and reliably failing application is the Humus * demo "CelShading" */ - vs.thread1.single_program_flow = (intel->gen == 5); + vs->thread1.single_program_flow = (intel->gen == 5); + /* BRW_NEW_NR_VS_SURFACES */ if (intel->gen == 5) - vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ + vs->thread1.binding_table_entry_count = 0; /* hardware requirement */ else - vs.thread1.binding_table_entry_count = key->nr_surfaces; + vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces; + + vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; + vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; + vs->thread3.dispatch_grf_start_reg = 1; + vs->thread3.urb_entry_read_offset = 0; - vs.thread3.urb_entry_read_length = key->urb_entry_read_length; - vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; - vs.thread3.dispatch_grf_start_reg = 1; - vs.thread3.urb_entry_read_offset = 0; - vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ + if (ctx->Transform.ClipPlanesEnabled) { + /* Note that we read in the userclip planes as well, hence + * clip_start: + */ + vs->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + } + else { + vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; + } + + /* BRW_NEW_URB_FENCE */ if (intel->gen == 5) { - switch (key->nr_urb_entries) { + switch (brw->urb.nr_vs_entries) { case 8: case 12: case 16: @@ -129,13 +113,13 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) case 192: case 224: case 256: - vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; + vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2; break; default: assert(0); } } else { - switch (key->nr_urb_entries) { + switch (brw->urb.nr_vs_entries) { case 8: case 12: case 16: @@ -147,63 +131,45 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) default: assert(0); } - vs.thread4.nr_urb_entries = key->nr_urb_entries; + vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries; } - vs.thread4.urb_entry_allocation_size = key->urb_size - 1; + vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2, - 1, brw->vs_max_threads) - 1; + vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2, + 1, brw->vs_max_threads) - 1; /* No samplers for ARB_vp programs: */ /* It has to be set to 0 for Ironlake */ - vs.vs5.sampler_count = 0; + vs->vs5.sampler_count = 0; if (unlikely(INTEL_DEBUG & DEBUG_STATS)) - vs.thread4.stats_enable = 1; + vs->thread4.stats_enable = 1; /* Vertex program always enabled: */ - vs.vs6.vs_enable = 1; - - bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT, - key, sizeof(*key), - &brw->vs.prog_bo, 1, - &vs, sizeof(vs)); + vs->vs6.vs_enable = 1; /* Emit VS program relocation */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_vs_unit_state, thread0), - brw->vs.prog_bo, vs.thread0.grf_reg_count << 1, + drm_intel_bo_emit_reloc(intel->batch.bo, (brw->vs.state_offset + + offsetof(struct brw_vs_unit_state, + thread0)), + brw->vs.prog_bo, vs->thread0.grf_reg_count << 1, I915_GEM_DOMAIN_INSTRUCTION, 0); - return bo; -} - -static void prepare_vs_unit(struct brw_context *brw) -{ - struct brw_vs_unit_key key; - - vs_unit_populate_key(brw, &key); - - drm_intel_bo_unreference(brw->vs.state_bo); - brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT, - &key, sizeof(key), - &brw->vs.prog_bo, 1, - NULL); - if (brw->vs.state_bo == NULL) { - brw->vs.state_bo = vs_unit_create_from_key(brw, &key); - } + brw->state.dirty.cache |= CACHE_NEW_VS_UNIT; } const struct brw_tracked_state brw_vs_unit = { .dirty = { .mesa = _NEW_TRANSFORM, - .brw = (BRW_NEW_CURBE_OFFSETS | + .brw = (BRW_NEW_BATCH | + BRW_NEW_CURBE_OFFSETS | BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_VS_PROG }, - .prepare = prepare_vs_unit, + .prepare = brw_prepare_vs_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index b1b2e42d3de..8d1497c8215 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -60,7 +60,6 @@ dri_bo_release(drm_intel_bo **bo) static void brw_destroy_context( struct intel_context *intel ) { struct brw_context *brw = brw_context(&intel->ctx); - int i; brw_destroy_state(brw); brw_draw_destroy( brw ); @@ -77,7 +76,6 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->curbe.curbe_bo); dri_bo_release(&brw->vs.prog_bo); - dri_bo_release(&brw->vs.state_bo); dri_bo_release(&brw->vs.const_bo); dri_bo_release(&brw->gs.prog_bo); dri_bo_release(&brw->gs.state_bo); -- cgit v1.2.3