diff options
author | Eric Anholt <[email protected]> | 2010-06-07 09:25:10 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2010-06-11 00:15:56 -0700 |
commit | 73de09f265cb1c66d70fd9eb92021882bfbbbef6 (patch) | |
tree | 999c40cc2b004ec8cc4670f21fbe72c485884821 /src/mesa/drivers/dri/i965/brw_vs_surface_state.c | |
parent | 118a47623a11a374df371d52ed0294224e6a62dc (diff) |
i965: Convert the binding table to streamed indirect state.
This slightly reduces reduces cairo-gl firefox-talos-gfx runtime on my
Ironlake:
before:
[ # ] backend test min(s) median(s) stddev. count
[ 0] gl firefox-talos-gfx 38.236 38.383 0.43% 5/6
after:
[ 0] gl firefox-talos-gfx 37.799 38.203 0.39% 6/6
It turns out the cost of caching these objects and looking them up in
the cache again is greater than the cost of just computing the object
again, particularly when the overhead of having a separate BO to pin
is removed.
(Those that are paying close attention will note that this is a
reversal of the path I was moving the driver in a couple of years ago.
The major thing that has changed is that back then all state was
recomputed when we wrapped the streaming state buffer, including
recompiling our precious programs. Now, we're uncaching just the
objects that are cheap to compute, and retaining caching of expensive
objects)
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vs_surface_state.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 112 |
1 files changed, 50 insertions, 62 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 26164e907f4..d946756af70 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -151,49 +151,29 @@ brw_update_vs_constant_surface( GLcontext *ctx, } -/** - * Constructs the binding table for the VS surface state. - */ -static drm_intel_bo * -brw_vs_get_binding_table(struct brw_context *brw) +static void +prepare_vs_surfaces(struct brw_context *brw) { - drm_intel_bo *bind_bo; - - bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, BRW_VS_MAX_SURF, - NULL); - - if (bind_bo == NULL) { - GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); - uint32_t data[BRW_VS_MAX_SURF]; - int i; - - for (i = 0; i < BRW_VS_MAX_SURF; i++) - if (brw->vs.surf_bo[i]) - data[i] = brw->vs.surf_bo[i]->offset; - else - data[i] = 0; - - bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, BRW_VS_MAX_SURF, - data, data_size); - - /* Emit binding table relocations to surface state */ - for (i = 0; i < BRW_VS_MAX_SURF; i++) { - if (brw->vs.surf_bo[i] != NULL) { - /* The presumed offsets were set in the data values for - * brw_upload_cache. - */ - drm_intel_bo_emit_reloc(bind_bo, i * 4, - brw->vs.surf_bo[i], 0, - I915_GEM_DOMAIN_INSTRUCTION, 0); - } + GLcontext *ctx = &brw->intel.ctx; + int i; + int nr_surfaces = 0; + + brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); + + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + nr_surfaces = i + 1; } } - return bind_bo; + if (brw->vs.nr_surfaces != nr_surfaces) { + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; + brw->vs.nr_surfaces = nr_surfaces; + } + + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + brw_add_validated_bo(brw, brw->vs.surf_bo[i]); + } } /** @@ -203,43 +183,51 @@ brw_vs_get_binding_table(struct brw_context *brw) * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and * CACHE_NEW_SURF_BIND for the binding table upload. */ -static void prepare_vs_surfaces(struct brw_context *brw ) +static void upload_vs_surfaces(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; + uint32_t *bind; int i; - int nr_surfaces = 0; - - brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); - for (i = 0; i < BRW_VS_MAX_SURF; i++) { - if (brw->vs.surf_bo[i] != NULL) { - nr_surfaces = i + 1; + /* BRW_NEW_NR_VS_SURFACES */ + if (brw->vs.nr_surfaces == 0) { + if (brw->vs.bind_bo) { + drm_intel_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = NULL; + brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE; } + return; } - if (brw->vs.nr_surfaces != nr_surfaces) { - brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; - brw->vs.nr_surfaces = nr_surfaces; - } - - /* Note that we don't end up updating the bind_bo if we don't have a - * surface to be pointing at. This should be relatively harmless, as it - * just slightly increases our working set size. + /* Might want to calculate nr_surfaces first, to avoid taking up so much + * space for the binding table. (once we have vs samplers) */ - if (brw->vs.nr_surfaces != 0) { - drm_intel_bo_unreference(brw->vs.bind_bo); - brw->vs.bind_bo = brw_vs_get_binding_table(brw); + bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_VS_MAX_SURF, + 32, &brw->vs.bind_bo, &brw->vs.bind_bo_offset); + + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + /* BRW_NEW_VS_CONSTBUF */ + if (brw->vs.surf_bo[i]) { + drm_intel_bo_emit_reloc(brw->vs.bind_bo, + brw->vs.bind_bo_offset + i * sizeof(uint32_t), + brw->vs.surf_bo[i], 0, + I915_GEM_DOMAIN_INSTRUCTION, 0); + bind[i] = brw->vs.surf_bo[i]->offset; + } else { + bind[i] = 0; + } } + + brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE; } const struct brw_tracked_state brw_vs_surfaces = { .dirty = { .mesa = 0, - .brw = (BRW_NEW_VS_CONSTBUF), + .brw = (BRW_NEW_VS_CONSTBUF | + BRW_NEW_NR_VS_SURFACES | + BRW_NEW_BATCH), .cache = 0 }, .prepare = prepare_vs_surfaces, + .emit = upload_vs_surfaces, }; - - - |